import pandas as pd
import numpy as np
%pylab inline
Populating the interactive namespace from numpy and matplotlib
import urllib2
import json
url = 'http://www.ratemyprofessors.com/paginate/professors/ratings?tid=1765657&page=0'
urldat = urllib2.urlopen(url)
ratdat = json.load(urldat)
ratdata
{u'ratings': [{u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 0, u'id': 24820908, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"This class was tough, but I enjoyed it despite my grade. You do need to start projects early (easier said than done) and study hard for the exams. Bad grades on projects will hurt your final grade, but good grades won't save you if you perform poorly on the exams. I found the material to be very useful for my future career.", u'rDate': u'05/13/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Inspirational', u'Expect homework', u'Tests are tough'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24789629, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"I dropped 280 once after taking it with somebody else, retook it with Deorio and got an A. Deorio made the concepts more understandable. When you take the class a second time, it's a breeze. Tip: If you have to retake the class, ask for permission to reuse projects you did well on VIA EMAIL so you have it in writing.", u'rDate': u'05/10/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'A', u'teacherRatingTags': [u'Amazing lectures', u'Expect homework', u'Tests are tough'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24789222, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'280 is a really hard class, but for those who have to take it, Deorio is the guy to take it with. He is really clear in his lecture and does want students to succeed. You will learn a lot from him. However, 280 has to be a weeder class. People do well in 101 and say they wanna be a CS major, many of them give up after 280. Start projects early.', u'rDate': u'05/10/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Hilarious', u'Expect homework', u'Tests are tough'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24746624, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'I took this class because it was required for my major (EE), left very interested in programming. Take this class with Deorio and he will make you very interested in the subject. After taking 280, you have enough knowledge to do a lot of things with programming, and it prepares you quite well for job interviews. Hard work, but it pays off.', u'rDate': u'05/05/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u"It's my life", u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Respected by students', u'Expect homework', u'Tests are tough'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24717554, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"Deorio is probably the best person you could possibly take EECS 280 with. Even if you don't particularly enjoy programming, you will still have a good experience with the class. It is extremely hard to get an A range grade, but a B range grade is very doable. You gotta start projects early as once the autograder closes, you get a 0 for the project.", u'rDate': u'05/01/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'B', u'teacherRatingTags': [u'Tests are tough', u'Inspirational', u'Expect homework'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'average', u'helpCount': 2, u'id': 24716268, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'Unlike Berkeley where all the past exams (even from 1993) of a programming course along with solutions are posted online and available to everybody, here you only got 2 practice exams without solutions. That makes getting AA+ pretty hard. GSIs can make crazy exam questions (e.g. static member function) which is impossible if you just go to lecture', u'rDate': u'05/01/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 3, u'rInterest': u"It's my life", u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 1, u'id': 24714435, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"DeOrio is definitely one of the best profs in the EECS department, if not the university. He has an excellent knack of making complicated computer science concepts clear in his lectures, while adding some humor to it. 280 as a whole is great except you don't get answers to practice exams, which is personally a huge bummer coming from 183101.", u'rDate': u'05/01/2015', u'rEasy': 3, u'rErrorMsg': None, u'rHelpful': 4, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Amazing lectures', u'Respected by students'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'person'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 0, u'id': 24699406, u'notHelpCount': 1, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"I took this class because it was required for my major and I'm not a huge fan of programming, but Deorio made my experience a good one. I found the class to be extremely difficult, but I did learn a lot. Lectures can feel slow and you might have a hard time paying attention, but you can watch the recordings at 1.5 speed and it's really helpful.", u'rDate': u'04/29/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'B-', u'teacherRatingTags': [u'Lectures are long', u'Respected by students', u'Tests are tough'], u'unUsefulGrouping': u'person', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 0, u'id': 24689190, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"The reputation of EECS 280 being a tough class is true, especially if you are not in the habit of starting homework early. Deorio is great, but you won't magically understand the material just because you go to or watch his lectures. Exams are hard and acing every project doesn't guarantee that you will do well. No solutions to practice exams sucks", u'rDate': u'04/28/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 4, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'B', u'teacherRatingTags': [u'Expect homework', u'Respected by students', u'Tests are tough'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24683472, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'Projects are fun, lectures are amazing, and I had an awesome time overall in 280. That said, the exams seemed to be the main "grade maker" this semester. Everyone did well on (most of) the projects, so the only thing separating everyone\'s grades seemed to be the midterm and the final. I though Professor DeOrio did a great job preparing us though.', u'rDate': u'04/27/2015', u'rEasy': 3, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u"It's my life", u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Would take again', u'Respected by students', u'Inspirational'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'average', u'easyColor': u'average', u'helpColor': u'average', u'helpCount': 0, u'id': 24679871, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'average', u'rClarity': 3, u'rClass': u'EECS280', u'rComments': u"He's a good lecturer. I just want to warn everyone, if you are not already an expert in programming, take those exams seriously! Even if you get full scores on projects, you still have to study for a decent amount of time. Practice writing code on the paper! Class policy is ridiculous. No solutions published. And they won't reveal the curve.", u'rDate': u'04/27/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 3, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Tests are tough'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 1, u'id': 24671045, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"This was quite a tough class for me. However, I couldn't think of anybody better than Deorio to teach it. Don't take this class if you are not willing to put in the work for it. Projects won't really save your grade if you do them, but can hurt your grade if you don't do them. Exams are also extremely hard and those ultimately determine your grade.", u'rDate': u'04/26/2015', u'rEasy': 2, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Amazing lectures', u'Tests are tough', u'Would take again'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'person'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24668940, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'1 short project + 4 partner projects + 2 exams. Projects are easy to ace if you spend enough time, exams are tougher. Deorio is a great teacher, and keeps the class engaged.', u'rDate': u'04/25/2015', u'rEasy': 3, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'good', u'helpColor': u'good', u'helpCount': 0, u'id': 24666382, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'Deorio is a spectacular professor. Best lectures that I have gone to at Umich', u'rDate': u'04/24/2015', u'rEasy': 4, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u"It's my life", u'rStatus': 1, u'rTextBookUse': u'You need it sometimes', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Inspirational', u'Would take again', u'Amazing lectures'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 1, u'id': 24660124, u'notHelpCount': 1, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"DeOrio knows his stuff, and he's definitely the prof to take 280 with. Lectures are incredibly clear and helpful, along with the in class examples. That said, he does seem a bit full of himself.", u'rDate': u'04/23/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Participation matters'], u'unUsefulGrouping': u'person', u'usefulGrouping': u'person'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 2, u'id': 24651417, u'notHelpCount': 1, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"Deorio is awesome and you will learn a lot with him. Don't expect the class to be easy though, and don't listen to anybody that tells you it's a joke. Expect to work very hard and devote a lot of time if you are not great at programming. Also, don't expect to do well on the exams by only doing the projects. You need to practice using practice exams", u'rDate': u'04/22/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Really into it', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Amazing lectures', u'Expect homework', u'Tests are tough'], u'unUsefulGrouping': u'person', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 0, u'id': 24558298, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u"This is a very difficult class. Deorio is probably the best person you could possibly take this class with though. The autograder doesn't tell you how you actually did on projects until after the due date and the unpublished test cases are very thorough so start projects early. Don't overlook the importance of studying for exams. They are tough.", u'rDate': u'04/07/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 4, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'C', u'teacherRatingTags': [u'Expect homework', u'Tests are tough', u'Tough Grader'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'average', u'helpColor': u'good', u'helpCount': 0, u'id': 24527704, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS280', u'rComments': u"Greatest professor I've ever had. He explains everything very clearly, and really knows his stuff. Also, his chicken stories are the best.", u'rDate': u'03/31/2015', u'rEasy': 3, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u"It's my life", u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'A-', u'teacherRatingTags': [], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'N/A', u'clarityColor': u'good', u'easyColor': u'poor', u'helpColor': u'good', u'helpCount': 0, u'id': 24513476, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 5, u'rClass': u'EECS 280', u'rComments': u'This is a very difficult class, especially for non-CS majors. It is a very heavy workload and very difficult to get a good grade in this class. However, I must say that I enjoyed having Deorio as a professor. Attendance at lectures is not mandatory, but I must say that he really kept me interested in going. No need to get the book for this class.', u'rDate': u'03/28/2015', u'rEasy': 1, u'rErrorMsg': None, u'rHelpful': 5, u'rInterest': u'Sorta interested', u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'N/A', u'teacherRatingTags': [u'Tests are tough', u'Expect homework', u'Amazing lectures'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}, {u'attendance': u'Not Mandatory', u'clarityColor': u'good', u'easyColor': u'good', u'helpColor': u'good', u'helpCount': 0, u'id': 24509955, u'notHelpCount': 0, u'onlineClass': u'', u'quality': u'good', u'rClarity': 4, u'rClass': u'EECS 280', u'rComments': u'A fun and charismatic lecturer. This class was easy enough to get an A with only watching the recordings. You still learn a lot though. Would entirely recommend Deorio for EECS 280.', u'rDate': u'03/27/2015', u'rEasy': 4, u'rErrorMsg': None, u'rHelpful': 4, u'rInterest': u"It's my life", u'rStatus': 1, u'rTextBookUse': u'What textbook?', u'sId': 1258, u'takenForCredit': u'Yes', u'teacher': None, u'teacherGrade': u'A+', u'teacherRatingTags': [u'Gives good feedback', u'Clear grading criteria', u'Hilarious'], u'unUsefulGrouping': u'people', u'usefulGrouping': u'people'}], u'remaining': 81}
allratings = []
for pagenum in range(6):
url = 'http://www.ratemyprofessors.com/paginate/professors/ratings?tid=1765657&page=' + str(pagenum)
urldat = urllib2.urlopen(url)
ratdat = json.load(urldat)
allratings = allratings + ratdat['ratings']
len(allratings)
101
ratings = pd.DataFrame(allratings)
ratings.rHelpful
0 5 1 5 2 5 3 5 4 5 5 3 6 4 7 5 8 4 9 5 10 3 11 5 12 5 13 5 14 5 15 5 16 4 17 5 18 5 19 4 20 5 21 5 22 5 23 5 24 5 25 3 26 4 27 5 28 4 29 5 .. 71 5 72 5 73 5 74 5 75 5 76 3 77 5 78 5 79 5 80 5 81 5 82 5 83 5 84 3 85 4 86 5 87 5 88 5 89 5 90 5 91 5 92 5 93 5 94 5 95 5 96 5 97 5 98 5 99 5 100 5 Name: rHelpful, dtype: int64
ratings.rHelpful.value_counts()
5 71 4 23 3 7 dtype: int64
ratings.rEasy.value_counts()
2 32 3 28 1 25 4 14 5 2 dtype: int64
by_easy = ratings.groupby('rEasy').mean()
plt.plot(by_easy.index, by_easy.rHelpful)
[<matplotlib.lines.Line2D at 0x7f1018478450>]
len(ratdata['ratings'])
20
ratdata['ratings'][0].keys()
[u'rErrorMsg', u'rClass', u'rTextBookUse', u'easyColor', u'teacherGrade', u'rEasy', u'rClarity', u'usefulGrouping', u'rInterest', u'sId', u'quality', u'id', u'rStatus', u'attendance', u'teacherRatingTags', u'rComments', u'rDate', u'takenForCredit', u'onlineClass', u'notHelpCount', u'rHelpful', u'teacher', u'helpColor', u'unUsefulGrouping', u'helpCount', u'clarityColor']
ratings = pd.DataFrame(ratdata['ratings'])
ratings
attendance | clarityColor | easyColor | helpColor | helpCount | id | notHelpCount | onlineClass | quality | rClarity | ... | rInterest | rStatus | rTextBookUse | sId | takenForCredit | teacher | teacherGrade | teacherRatingTags | unUsefulGrouping | usefulGrouping | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | N/A | good | poor | good | 0 | 24820908 | 0 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | N/A | [Inspirational, Expect homework, Tests are tough] | people | people | |
1 | Not Mandatory | good | average | good | 0 | 24789629 | 0 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | A | [Amazing lectures, Expect homework, Tests are ... | people | people | |
2 | N/A | good | average | good | 0 | 24789222 | 0 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | N/A | [Hilarious, Expect homework, Tests are tough] | people | people | |
3 | N/A | good | average | good | 0 | 24746624 | 0 | good | 5 | ... | It's my life | 1 | What textbook? | 1258 | Yes | None | N/A | [Respected by students, Expect homework, Tests... | people | people | |
4 | Not Mandatory | good | average | good | 0 | 24717554 | 0 | good | 5 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | B | [Tests are tough, Inspirational, Expect homework] | people | people | |
5 | Not Mandatory | good | average | average | 2 | 24716268 | 0 | good | 5 | ... | It's my life | 1 | What textbook? | 1258 | Yes | None | N/A | [] | people | people | |
6 | N/A | good | average | good | 1 | 24714435 | 0 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | N/A | [Amazing lectures, Respected by students] | people | person | |
7 | Not Mandatory | good | poor | good | 0 | 24699406 | 1 | good | 5 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | B- | [Lectures are long, Respected by students, Tes... | person | people | |
8 | Not Mandatory | good | poor | good | 0 | 24689190 | 0 | good | 5 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | B | [Expect homework, Respected by students, Tests... | people | people | |
9 | N/A | good | average | good | 0 | 24683472 | 0 | good | 5 | ... | It's my life | 1 | What textbook? | 1258 | Yes | None | N/A | [Would take again, Respected by students, Insp... | people | people | |
10 | Not Mandatory | average | average | average | 0 | 24679871 | 0 | average | 3 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | N/A | [Tests are tough] | people | people | |
11 | N/A | good | average | good | 1 | 24671045 | 0 | good | 5 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | N/A | [Amazing lectures, Tests are tough, Would take... | people | person | |
12 | N/A | good | average | good | 0 | 24668940 | 0 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | N/A | [] | people | people | |
13 | N/A | good | good | good | 0 | 24666382 | 0 | good | 5 | ... | It's my life | 1 | You need it sometimes | 1258 | Yes | None | N/A | [Inspirational, Would take again, Amazing lect... | people | people | |
14 | Not Mandatory | good | poor | good | 1 | 24660124 | 1 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | N/A | [Participation matters] | person | person | |
15 | N/A | good | poor | good | 2 | 24651417 | 1 | good | 5 | ... | Really into it | 1 | What textbook? | 1258 | Yes | None | N/A | [Amazing lectures, Expect homework, Tests are ... | person | people | |
16 | Not Mandatory | good | poor | good | 0 | 24558298 | 0 | good | 5 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | C | [Expect homework, Tests are tough, Tough Grader] | people | people | |
17 | Not Mandatory | good | average | good | 0 | 24527704 | 0 | good | 5 | ... | It's my life | 1 | What textbook? | 1258 | Yes | None | A- | [] | people | people | |
18 | N/A | good | poor | good | 0 | 24513476 | 0 | good | 5 | ... | Sorta interested | 1 | What textbook? | 1258 | Yes | None | N/A | [Tests are tough, Expect homework, Amazing lec... | people | people | |
19 | Not Mandatory | good | good | good | 0 | 24509955 | 0 | good | 4 | ... | It's my life | 1 | What textbook? | 1258 | Yes | None | A+ | [Gives good feedback, Clear grading criteria, ... | people | people |
20 rows × 26 columns
ratings.rComments
0 This class was tough, but I enjoyed it despite... 1 I dropped 280 once after taking it with somebo... 2 280 is a really hard class, but for those who ... 3 I took this class because it was required for ... 4 Deorio is probably the best person you could p... 5 Unlike Berkeley where all the past exams (even... 6 DeOrio is definitely one of the best profs in ... 7 I took this class because it was required for ... 8 The reputation of EECS 280 being a tough class... 9 Projects are fun, lectures are amazing, and I ... 10 He's a good lecturer. I just want to warn ever... 11 This was quite a tough class for me. However, ... 12 1 short project + 4 partner projects + 2 exams... 13 Deorio is a spectacular professor. Best lectur... 14 DeOrio knows his stuff, and he's definitely th... 15 Deorio is awesome and you will learn a lot wit... 16 This is a very difficult class. Deorio is prob... 17 Greatest professor I've ever had. He explains ... 18 This is a very difficult class, especially for... 19 A fun and charismatic lecturer. This class was... Name: rComments, dtype: object
ls
Jake Practice 2.ipynb Untitled18.ipynb
JakePractice.ipynb Untitled19.ipynb
JakePrepLec3.ipynb Untitled2.ipynb
Kevin.ipynb Untitled20.ipynb
PlayingAround.ipynb Untitled3.ipynb
README Untitled4.ipynb
RateMyProfessors data.ipynb Untitled5.ipynb
SANITIZED_final_grade_data.csv Untitled6.ipynb
SANITIZED_final_gradebook.csv Untitled7.ipynb
The Real Deal.ipynb Untitled8.ipynb
The Real Real Deal.ipynb Untitled9.ipynb
Untitled.ipynb holland_seawater.dat
Untitled1.ipynb holland_temperature.dat
Untitled10.ipynb py_exploratory_comp_1_sol.ipynb
Untitled11.ipynb ratings_subset_bigdata.csv
Untitled12.ipynb rmp_ratings_subset.zip
Untitled13.ipynb temp.py
Untitled14.ipynb untitled.txt
Untitled15.ipynb untitled1.txt
Untitled16.ipynb untitled2.txt
Untitled17.ipynb xypoints.dat
ratings = pd.read_csv('ratings_subset_bigdata.csv')
ratings.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 93796 entries, 0 to 93795 Data columns (total 31 columns): Unnamed: 0 93796 non-null int64 index 93796 non-null int64 attendance 7322 non-null object clarityColor 93796 non-null object easyColor 93796 non-null object helpColor 93796 non-null object helpCount 93796 non-null int64 id 93796 non-null int64 notHelpCount 93796 non-null int64 onlineClass 168 non-null object quality 93796 non-null object rClarity 93796 non-null int64 rClass 59188 non-null object rComments 93661 non-null object rDate 93796 non-null object rEasy 93796 non-null int64 rErrorMsg 0 non-null float64 rHelpful 93796 non-null int64 rInterest 89532 non-null object rStatus 93796 non-null int64 rTextBookUse 73433 non-null object sId 93796 non-null int64 takenForCredit 5542 non-null object teacher 0 non-null float64 teacherGrade 4838 non-null object teacherRatingTags 93796 non-null object unUsefulGrouping 93796 non-null object usefulGrouping 93796 non-null object tid 93796 non-null int64 bingender 93796 non-null int64 ishot 93796 non-null int64 dtypes: float64(2), int64(13), object(16) memory usage: 22.9+ MB
heatmap, xedges, yedges = np.histogram2d(ratings.rEasy, ratings.rHelpful,bins=[5,5])
plt.pcolor(heatmap,cmap=plt.cm.Reds)
<matplotlib.collections.PolyCollection at 0x7f1018e7c650>
by_easy = ratings.groupby('rEasy').mean()
plt.plot(by_easy.rClarity)
[<matplotlib.lines.Line2D at 0x7f1018da35d0>]
plt.plot(by_easy.rHelpful)
[<matplotlib.lines.Line2D at 0x7f1018d4ddd0>]
np.corrcoef(ratings.rEasy,ratings.rClarity)[0,1]
0.38490525108326107
np.corrcoef(ratings.rHelpful,ratings.rClarity)[0,1]
0.80210093883336542
ratings.head()
Unnamed: 0 | index | attendance | clarityColor | easyColor | helpColor | helpCount | id | notHelpCount | onlineClass | ... | sId | takenForCredit | teacher | teacherGrade | teacherRatingTags | unUsefulGrouping | usefulGrouping | tid | bingender | ishot | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | NaN | good | average | good | 0 | 24228248 | 10 | NaN | ... | 135 | Yes | NaN | NaN | ["Would take again", "Hilarious", "Tests are t... | people | people | 916674 | 0 | 0 |
1 | 1 | 1 | Mandatory | good | average | good | 0 | 24218909 | 1 | NaN | ... | 135 | Yes | NaN | A | ["Skip class? You won't pass.", "Tests are tou... | person | people | 916674 | 0 | 0 |
2 | 2 | 2 | NaN | good | average | good | 1 | 24215795 | 2 | NaN | ... | 135 | Yes | NaN | NaN | ["Hilarious", "Would take again", "Skip class?... | people | person | 916674 | 0 | 0 |
3 | 3 | 3 | Not Mandatory | poor | average | average | 18 | 24204179 | 6 | NaN | ... | 135 | Yes | NaN | NaN | ["Tests are tough", "Get ready to read"] | people | people | 916674 | 0 | 0 |
4 | 4 | 4 | Not Mandatory | good | good | good | 1 | 24198463 | 0 | NaN | ... | 135 | Yes | NaN | A | ["Inspirational", "Hilarious", "Skip class? Yo... | people | person | 916674 | 0 | 0 |
5 rows × 31 columns
ratings.rComments.head()
0 Great Professor My wife took this class twice ... 1 Great Professor Study the notes from class and... 2 Brother Brau is a great guy He gives great spi... 3 People rave about Brau but I personally dont g... 4 This class doesnt have much homework which was... Name: rComments, dtype: object
import sklearn
from sklearn import linear_model
cl = linear_model.RidgeClassifier()
from sklearn.feature_extraction.text import CountVectorizer, ENGLISH_STOP_WORDS
len(ENGLISH_STOP_WORDS)
318
ENGLISH_STOP_WORDS
frozenset({'a', 'about', 'above', 'across', 'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any', 'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around', 'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both', 'bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant', 'co', 'con', 'could', 'couldnt', 'cry', 'de', 'describe', 'detail', 'do', 'done', 'down', 'due', 'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else', 'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every', 'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen', 'fify', 'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly', 'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get', 'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'i', 'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it', 'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least', 'less', 'ltd', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill', 'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must', 'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once', 'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 'part', 'per', 'perhaps', 'please', 'put', 'rather', 're', 'same', 'see', 'seem', 'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should', 'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such', 'system', 'take', 'ten', 'than', 'that', 'the', 'their', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these', 'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too', 'top', 'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under', 'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well', 'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without', 'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves'})
count_vect = CountVectorizer(min_df=120,stop_words=ENGLISH_STOP_WORDS,ngram_range=(1,2))
numtrain = int(3.0*len(ratings.index)/4)
numtrain
ratings = ratings[ratings.rComments.notnull()]
traininds = np.random.choice(ratings.index,size = numtrain,replace=False)
testinds = ratings.index.difference(traininds)
Xtrain = count_vect.fit_transform(ratings.ix[traininds,'rComments'])
Xtest = count_vect.transform(ratings.ix[testinds,'rComments'])
Ytrain = ratings.ix[traininds,'ishot']
cl.fit(Xtrain,np.array(Ytrain))
RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True, max_iter=None, normalize=False, solver='auto', tol=0.001)
Ytest = ratings.ix[testinds,'ishot']
Ypreds = cl.predict(Xtest)
Ypreds
array([0, 0, 0, ..., 0, 1, 0])
sum(Ypreds)
836
df = pd.DataFrame(data={'words':count_vect.get_feature_names(), 'coef':cl.coef_.flatten(), 'abscoef':np.abs(cl.coef_.flatten())})
df.sort('abscoef',ascending=False,inplace=True)
df
abscoef | coef | words | |
---|---|---|---|
842 | 0.713850 | 0.713850 | hot |
1874 | 0.620711 | 0.620711 | young |
1151 | 0.565271 | 0.565271 | nelson |
571 | 0.440758 | 0.440758 | eyes |
374 | 0.422829 | 0.422829 | dance |
371 | 0.388632 | 0.388632 | cute |
1085 | 0.388631 | -0.388631 | marsh |
1153 | 0.386088 | -0.386088 | new testament |
828 | 0.376755 | -0.376755 | holyoak |
199 | 0.345287 | 0.345287 | chill |
147 | 0.334509 | -0.334509 | brau |
1520 | 0.320579 | 0.320579 | speaking |
922 | 0.319725 | -0.319725 | kearl |
348 | 0.283771 | 0.283771 | coolest |
1214 | 0.279600 | -0.279600 | parker |
664 | 0.263533 | 0.263533 | german |
1240 | 0.258119 | 0.258119 | phenomenal |
1330 | 0.254259 | 0.254259 | public |
847 | 0.246795 | 0.246795 | human |
822 | 0.244276 | 0.244276 | highly recommended |
1159 | 0.233863 | -0.233863 | nice man |
137 | 0.231801 | 0.231801 | bomb |
475 | 0.231608 | 0.231608 | easy going |
479 | 0.227222 | 0.227222 | easy learn |
379 | 0.223648 | 0.223648 | dc |
265 | 0.222726 | -0.222726 | class love |
1362 | 0.221242 | 0.221242 | reading quizzes |
1045 | 0.221000 | 0.221000 | loved going |
1129 | 0.220336 | -0.220336 | monotone |
1106 | 0.216171 | 0.216171 | mention |
... | ... | ... | ... |
1701 | 0.001462 | -0.001462 | textbook |
528 | 0.001455 | 0.001455 | example |
252 | 0.001419 | 0.001419 | class isnt |
575 | 0.001377 | -0.001377 | facts |
539 | 0.001356 | -0.001356 | excellent teacher |
1029 | 0.001343 | -0.001343 | lot |
427 | 0.001341 | 0.001341 | does good |
708 | 0.001213 | 0.001213 | grades |
1192 | 0.001029 | 0.001029 | opportunity |
205 | 0.000952 | -0.000952 | class |
450 | 0.000785 | 0.000785 | dont miss |
860 | 0.000783 | 0.000783 | important |
574 | 0.000778 | 0.000778 | fact |
481 | 0.000771 | 0.000771 | easy study |
29 | 0.000719 | -0.000719 | actually |
1609 | 0.000650 | -0.000650 | taken |
412 | 0.000611 | -0.000611 | difficult class |
39 | 0.000538 | 0.000538 | allows |
915 | 0.000432 | -0.000432 | just dont |
1492 | 0.000423 | -0.000423 | shouldnt |
1800 | 0.000376 | 0.000376 | wants succeed |
559 | 0.000369 | -0.000369 | explains things |
836 | 0.000302 | -0.000302 | honest |
1336 | 0.000277 | 0.000277 | questions class |
1461 | 0.000245 | -0.000245 | schedule |
1187 | 0.000171 | 0.000171 | open |
1475 | 0.000160 | -0.000160 | semester |
1695 | 0.000093 | -0.000093 | tests tough |
456 | 0.000061 | 0.000061 | dont want |
1469 | 0.000019 | 0.000019 | scriptures |
1878 rows × 3 columns
df.sort('coef',ascending=False,inplace=True)
df
abscoef | coef | words | |
---|---|---|---|
842 | 0.713850 | 0.713850 | hot |
1874 | 0.620711 | 0.620711 | young |
1151 | 0.565271 | 0.565271 | nelson |
571 | 0.440758 | 0.440758 | eyes |
374 | 0.422829 | 0.422829 | dance |
371 | 0.388632 | 0.388632 | cute |
199 | 0.345287 | 0.345287 | chill |
1520 | 0.320579 | 0.320579 | speaking |
348 | 0.283771 | 0.283771 | coolest |
664 | 0.263533 | 0.263533 | german |
1240 | 0.258119 | 0.258119 | phenomenal |
1330 | 0.254259 | 0.254259 | public |
847 | 0.246795 | 0.246795 | human |
822 | 0.244276 | 0.244276 | highly recommended |
137 | 0.231801 | 0.231801 | bomb |
475 | 0.231608 | 0.231608 | easy going |
479 | 0.227222 | 0.227222 | easy learn |
379 | 0.223648 | 0.223648 | dc |
1362 | 0.221242 | 0.221242 | reading quizzes |
1045 | 0.221000 | 0.221000 | loved going |
1106 | 0.216171 | 0.216171 | mention |
1025 | 0.215086 | 0.215086 | looked forward |
322 | 0.211760 | 0.211760 | comfortable |
788 | 0.207359 | 0.207359 | help learn |
636 | 0.206991 | 0.206991 | french |
531 | 0.206084 | 0.206084 | exams arent |
158 | 0.205526 | 0.205526 | brown |
23 | 0.203678 | 0.203678 | academic |
1673 | 0.203621 | 0.203621 | testament |
1524 | 0.202615 | 0.202615 | spend time |
... | ... | ... | ... |
415 | 0.150019 | -0.150019 | difficult understand |
122 | 0.150697 | -0.150697 | best math |
1178 | 0.151531 | -0.151531 | office |
991 | 0.151532 | -0.151532 | lecturing |
445 | 0.161251 | -0.161251 | dont buy |
1715 | 0.165290 | -0.165290 | think hes |
1854 | 0.165374 | -0.165374 | wouldnt recommend |
1644 | 0.168190 | -0.168190 | teacher loved |
12 | 0.168641 | -0.168641 | 30 |
434 | 0.170138 | -0.170138 | doesnt care |
3 | 0.176178 | -0.176178 | 105 |
1418 | 0.177775 | -0.177775 | relatively easy |
1607 | 0.187225 | -0.187225 | ta lab |
1125 | 0.188734 | -0.188734 | mission |
619 | 0.190885 | -0.190885 | finance |
751 | 0.192054 | -0.192054 | hansen |
926 | 0.193562 | -0.193562 | kelly |
699 | 0.195853 | -0.195853 | gotten |
1271 | 0.200579 | -0.200579 | power points |
1530 | 0.202177 | -0.202177 | spiritual thoughts |
904 | 0.204705 | -0.204705 | ive met |
1129 | 0.220336 | -0.220336 | monotone |
265 | 0.222726 | -0.222726 | class love |
1159 | 0.233863 | -0.233863 | nice man |
1214 | 0.279600 | -0.279600 | parker |
922 | 0.319725 | -0.319725 | kearl |
147 | 0.334509 | -0.334509 | brau |
828 | 0.376755 | -0.376755 | holyoak |
1153 | 0.386088 | -0.386088 | new testament |
1085 | 0.388631 | -0.388631 | marsh |
1878 rows × 3 columns