In [123]:
import pandas as pd
import numpy as np
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [124]:
import urllib2
import json
In [125]:
url = 'http://www.ratemyprofessors.com/paginate/professors/ratings?tid=1765657&page=0' 
urldat = urllib2.urlopen(url)
ratdat = json.load(urldat)
In [126]:
ratdata
Out[126]:
{u'ratings': [{u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24820908,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"This class was tough, but I enjoyed it despite my grade. You do need to start projects early (easier said than done) and study hard for the exams. Bad grades on projects will hurt your final grade, but good grades won't save you if you perform poorly on the exams. I found the material to be very useful for my future career.",
   u'rDate': u'05/13/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Inspirational',
    u'Expect homework',
    u'Tests are tough'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24789629,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"I dropped 280 once after taking it with somebody else, retook it with Deorio and got an A. Deorio made the concepts more understandable. When you take the class a second time, it's a breeze. Tip: If you have to retake the class, ask for permission to reuse projects you did well on VIA EMAIL so you have it in writing.",
   u'rDate': u'05/10/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'A',
   u'teacherRatingTags': [u'Amazing lectures',
    u'Expect homework',
    u'Tests are tough'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24789222,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'280 is a really hard class, but for those who have to take it, Deorio is the guy to take it with. He is really clear in his lecture and does want students to succeed. You will learn a lot from him. However, 280 has to be a weeder class. People do well in 101 and say they wanna be a CS major, many of them give up after 280. Start projects early.',
   u'rDate': u'05/10/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Hilarious',
    u'Expect homework',
    u'Tests are tough'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24746624,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'I took this class because it was required for my major (EE), left very interested in programming. Take this class with Deorio and he will make you very interested in the subject. After taking 280, you have enough knowledge to do a lot of things with programming, and it prepares you quite well for job interviews. Hard work, but it pays off.',
   u'rDate': u'05/05/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u"It's my life",
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Respected by students',
    u'Expect homework',
    u'Tests are tough'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24717554,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"Deorio is probably the best person you could possibly take EECS 280 with. Even if you don't particularly enjoy programming, you will still have a good experience with the class. It is extremely hard to get an A range grade, but a B range grade is very doable. You gotta start projects early as once the autograder closes, you get a 0 for the project.",
   u'rDate': u'05/01/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'B',
   u'teacherRatingTags': [u'Tests are tough',
    u'Inspirational',
    u'Expect homework'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'average',
   u'helpCount': 2,
   u'id': 24716268,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'Unlike Berkeley where all the past exams (even from 1993) of a programming course along with solutions are posted online and available to everybody, here you only got 2 practice exams without solutions. That makes getting AA+ pretty hard. GSIs can make crazy exam questions (e.g. static member function) which is impossible if you just go to lecture',
   u'rDate': u'05/01/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 3,
   u'rInterest': u"It's my life",
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 1,
   u'id': 24714435,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"DeOrio is definitely one of the best profs in the EECS department, if not the university. He has an excellent knack of making complicated computer science concepts clear in his lectures, while adding some humor to it. 280 as a whole is great except you don't get answers to practice exams, which is personally a huge bummer coming from 183101.",
   u'rDate': u'05/01/2015',
   u'rEasy': 3,
   u'rErrorMsg': None,
   u'rHelpful': 4,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Amazing lectures', u'Respected by students'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'person'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24699406,
   u'notHelpCount': 1,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"I took this class because it was required for my major and I'm not a huge fan of programming, but Deorio made my experience a good one. I found the class to be extremely difficult, but I did learn a lot. Lectures can feel slow and you might have a hard time paying attention, but you can watch the recordings at 1.5 speed and it's really helpful.",
   u'rDate': u'04/29/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'B-',
   u'teacherRatingTags': [u'Lectures are long',
    u'Respected by students',
    u'Tests are tough'],
   u'unUsefulGrouping': u'person',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24689190,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"The reputation of EECS 280 being a tough class is true, especially if you are not in the habit of starting homework early. Deorio is great, but you won't magically understand the material just because you go to or watch his lectures. Exams are hard and acing every project doesn't guarantee that you will do well. No solutions to practice exams sucks",
   u'rDate': u'04/28/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 4,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'B',
   u'teacherRatingTags': [u'Expect homework',
    u'Respected by students',
    u'Tests are tough'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24683472,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'Projects are fun, lectures are amazing, and I had an awesome time overall in 280. That said, the exams seemed to be the main "grade maker" this semester. Everyone did well on (most of) the projects, so the only thing separating everyone\'s grades seemed to be the midterm and the final. I though Professor DeOrio did a great job preparing us though.',
   u'rDate': u'04/27/2015',
   u'rEasy': 3,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u"It's my life",
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Would take again',
    u'Respected by students',
    u'Inspirational'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'average',
   u'easyColor': u'average',
   u'helpColor': u'average',
   u'helpCount': 0,
   u'id': 24679871,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'average',
   u'rClarity': 3,
   u'rClass': u'EECS280',
   u'rComments': u"He's a good lecturer. I just want to warn everyone, if you are not already an expert in programming, take those exams seriously! Even if you get full scores on projects, you still have to study for a decent amount of time. Practice writing code on the paper! Class policy is ridiculous. No solutions published. And they won't reveal the curve.",
   u'rDate': u'04/27/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 3,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Tests are tough'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 1,
   u'id': 24671045,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"This was quite a tough class for me. However, I couldn't think of anybody better than Deorio to teach it. Don't take this class if you are not willing to put in the work for it. Projects won't really save your grade if you do them, but can hurt your grade if you don't do them. Exams are also extremely hard and those ultimately determine your grade.",
   u'rDate': u'04/26/2015',
   u'rEasy': 2,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Amazing lectures',
    u'Tests are tough',
    u'Would take again'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'person'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24668940,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'1 short project + 4 partner projects + 2 exams. Projects are easy to ace if you spend enough time, exams are tougher. Deorio is a great teacher, and keeps the class engaged.',
   u'rDate': u'04/25/2015',
   u'rEasy': 3,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'good',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24666382,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'Deorio is a spectacular professor. Best lectures that I have gone to at Umich',
   u'rDate': u'04/24/2015',
   u'rEasy': 4,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u"It's my life",
   u'rStatus': 1,
   u'rTextBookUse': u'You need it sometimes',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Inspirational',
    u'Would take again',
    u'Amazing lectures'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 1,
   u'id': 24660124,
   u'notHelpCount': 1,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"DeOrio knows his stuff, and he's definitely the prof to take 280 with. Lectures are incredibly clear and helpful, along with the in class examples. That said, he does seem a bit full of himself.",
   u'rDate': u'04/23/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Participation matters'],
   u'unUsefulGrouping': u'person',
   u'usefulGrouping': u'person'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 2,
   u'id': 24651417,
   u'notHelpCount': 1,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"Deorio is awesome and you will learn a lot with him. Don't expect the class to be easy though, and don't listen to anybody that tells you it's a joke. Expect to work very hard and devote a lot of time if you are not great at programming. Also, don't expect to do well on the exams by only doing the projects. You need to practice using practice exams",
   u'rDate': u'04/22/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Really into it',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Amazing lectures',
    u'Expect homework',
    u'Tests are tough'],
   u'unUsefulGrouping': u'person',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24558298,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u"This is a very difficult class. Deorio is probably the best person you could possibly take this class with though. The autograder doesn't tell you how you actually did on projects until after the due date and the unpublished test cases are very thorough so start projects early. Don't overlook the importance of studying for exams. They are tough.",
   u'rDate': u'04/07/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 4,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'C',
   u'teacherRatingTags': [u'Expect homework',
    u'Tests are tough',
    u'Tough Grader'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'average',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24527704,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS280',
   u'rComments': u"Greatest professor I've ever had. He explains everything very clearly, and really knows his stuff. Also, his chicken stories are the best.",
   u'rDate': u'03/31/2015',
   u'rEasy': 3,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u"It's my life",
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'A-',
   u'teacherRatingTags': [],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'N/A',
   u'clarityColor': u'good',
   u'easyColor': u'poor',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24513476,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 5,
   u'rClass': u'EECS 280',
   u'rComments': u'This is a very difficult class, especially for non-CS majors. It is a very heavy workload and very difficult to get a good grade in this class. However, I must say that I enjoyed having Deorio as a professor. Attendance at lectures is not mandatory, but I must say that he really kept me interested in going. No need to get the book for this class.',
   u'rDate': u'03/28/2015',
   u'rEasy': 1,
   u'rErrorMsg': None,
   u'rHelpful': 5,
   u'rInterest': u'Sorta interested',
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'N/A',
   u'teacherRatingTags': [u'Tests are tough',
    u'Expect homework',
    u'Amazing lectures'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'},
  {u'attendance': u'Not Mandatory',
   u'clarityColor': u'good',
   u'easyColor': u'good',
   u'helpColor': u'good',
   u'helpCount': 0,
   u'id': 24509955,
   u'notHelpCount': 0,
   u'onlineClass': u'',
   u'quality': u'good',
   u'rClarity': 4,
   u'rClass': u'EECS 280',
   u'rComments': u'A fun and charismatic lecturer. This class was easy enough to get an A with only watching the recordings. You still learn a lot though. Would entirely recommend Deorio for EECS 280.',
   u'rDate': u'03/27/2015',
   u'rEasy': 4,
   u'rErrorMsg': None,
   u'rHelpful': 4,
   u'rInterest': u"It's my life",
   u'rStatus': 1,
   u'rTextBookUse': u'What textbook?',
   u'sId': 1258,
   u'takenForCredit': u'Yes',
   u'teacher': None,
   u'teacherGrade': u'A+',
   u'teacherRatingTags': [u'Gives good feedback',
    u'Clear grading criteria',
    u'Hilarious'],
   u'unUsefulGrouping': u'people',
   u'usefulGrouping': u'people'}],
 u'remaining': 81}
In [127]:
allratings = []
for pagenum in range(6):
    url = 'http://www.ratemyprofessors.com/paginate/professors/ratings?tid=1765657&page=' + str(pagenum)
    urldat = urllib2.urlopen(url)
    ratdat = json.load(urldat)
    allratings = allratings + ratdat['ratings']
In [128]:
len(allratings)
Out[128]:
101
In [129]:
ratings = pd.DataFrame(allratings)
In [130]:
ratings.rHelpful
Out[130]:
0      5
1      5
2      5
3      5
4      5
5      3
6      4
7      5
8      4
9      5
10     3
11     5
12     5
13     5
14     5
15     5
16     4
17     5
18     5
19     4
20     5
21     5
22     5
23     5
24     5
25     3
26     4
27     5
28     4
29     5
      ..
71     5
72     5
73     5
74     5
75     5
76     3
77     5
78     5
79     5
80     5
81     5
82     5
83     5
84     3
85     4
86     5
87     5
88     5
89     5
90     5
91     5
92     5
93     5
94     5
95     5
96     5
97     5
98     5
99     5
100    5
Name: rHelpful, dtype: int64
In [131]:
ratings.rHelpful.value_counts()
Out[131]:
5    71
4    23
3     7
dtype: int64
In [132]:
ratings.rEasy.value_counts()
Out[132]:
2    32
3    28
1    25
4    14
5     2
dtype: int64
In [133]:
by_easy = ratings.groupby('rEasy').mean()
In [134]:
plt.plot(by_easy.index, by_easy.rHelpful)
Out[134]:
[<matplotlib.lines.Line2D at 0x7f1018478450>]
In [135]:
len(ratdata['ratings'])
Out[135]:
20
In [136]:
ratdata['ratings'][0].keys()
Out[136]:
[u'rErrorMsg',
 u'rClass',
 u'rTextBookUse',
 u'easyColor',
 u'teacherGrade',
 u'rEasy',
 u'rClarity',
 u'usefulGrouping',
 u'rInterest',
 u'sId',
 u'quality',
 u'id',
 u'rStatus',
 u'attendance',
 u'teacherRatingTags',
 u'rComments',
 u'rDate',
 u'takenForCredit',
 u'onlineClass',
 u'notHelpCount',
 u'rHelpful',
 u'teacher',
 u'helpColor',
 u'unUsefulGrouping',
 u'helpCount',
 u'clarityColor']
In [137]:
ratings = pd.DataFrame(ratdata['ratings'])
In [138]:
ratings
Out[138]:
attendance clarityColor easyColor helpColor helpCount id notHelpCount onlineClass quality rClarity ... rInterest rStatus rTextBookUse sId takenForCredit teacher teacherGrade teacherRatingTags unUsefulGrouping usefulGrouping
0 N/A good poor good 0 24820908 0 good 5 ... Really into it 1 What textbook? 1258 Yes None N/A [Inspirational, Expect homework, Tests are tough] people people
1 Not Mandatory good average good 0 24789629 0 good 5 ... Really into it 1 What textbook? 1258 Yes None A [Amazing lectures, Expect homework, Tests are ... people people
2 N/A good average good 0 24789222 0 good 5 ... Really into it 1 What textbook? 1258 Yes None N/A [Hilarious, Expect homework, Tests are tough] people people
3 N/A good average good 0 24746624 0 good 5 ... It's my life 1 What textbook? 1258 Yes None N/A [Respected by students, Expect homework, Tests... people people
4 Not Mandatory good average good 0 24717554 0 good 5 ... Sorta interested 1 What textbook? 1258 Yes None B [Tests are tough, Inspirational, Expect homework] people people
5 Not Mandatory good average average 2 24716268 0 good 5 ... It's my life 1 What textbook? 1258 Yes None N/A [] people people
6 N/A good average good 1 24714435 0 good 5 ... Really into it 1 What textbook? 1258 Yes None N/A [Amazing lectures, Respected by students] people person
7 Not Mandatory good poor good 0 24699406 1 good 5 ... Sorta interested 1 What textbook? 1258 Yes None B- [Lectures are long, Respected by students, Tes... person people
8 Not Mandatory good poor good 0 24689190 0 good 5 ... Sorta interested 1 What textbook? 1258 Yes None B [Expect homework, Respected by students, Tests... people people
9 N/A good average good 0 24683472 0 good 5 ... It's my life 1 What textbook? 1258 Yes None N/A [Would take again, Respected by students, Insp... people people
10 Not Mandatory average average average 0 24679871 0 average 3 ... Sorta interested 1 What textbook? 1258 Yes None N/A [Tests are tough] people people
11 N/A good average good 1 24671045 0 good 5 ... Sorta interested 1 What textbook? 1258 Yes None N/A [Amazing lectures, Tests are tough, Would take... people person
12 N/A good average good 0 24668940 0 good 5 ... Really into it 1 What textbook? 1258 Yes None N/A [] people people
13 N/A good good good 0 24666382 0 good 5 ... It's my life 1 You need it sometimes 1258 Yes None N/A [Inspirational, Would take again, Amazing lect... people people
14 Not Mandatory good poor good 1 24660124 1 good 5 ... Really into it 1 What textbook? 1258 Yes None N/A [Participation matters] person person
15 N/A good poor good 2 24651417 1 good 5 ... Really into it 1 What textbook? 1258 Yes None N/A [Amazing lectures, Expect homework, Tests are ... person people
16 Not Mandatory good poor good 0 24558298 0 good 5 ... Sorta interested 1 What textbook? 1258 Yes None C [Expect homework, Tests are tough, Tough Grader] people people
17 Not Mandatory good average good 0 24527704 0 good 5 ... It's my life 1 What textbook? 1258 Yes None A- [] people people
18 N/A good poor good 0 24513476 0 good 5 ... Sorta interested 1 What textbook? 1258 Yes None N/A [Tests are tough, Expect homework, Amazing lec... people people
19 Not Mandatory good good good 0 24509955 0 good 4 ... It's my life 1 What textbook? 1258 Yes None A+ [Gives good feedback, Clear grading criteria, ... people people

20 rows × 26 columns

In [139]:
ratings.rComments
Out[139]:
0     This class was tough, but I enjoyed it despite...
1     I dropped 280 once after taking it with somebo...
2     280 is a really hard class, but for those who ...
3     I took this class because it was required for ...
4     Deorio is probably the best person you could p...
5     Unlike Berkeley where all the past exams (even...
6     DeOrio is definitely one of the best profs in ...
7     I took this class because it was required for ...
8     The reputation of EECS 280 being a tough class...
9     Projects are fun, lectures are amazing, and I ...
10    He's a good lecturer. I just want to warn ever...
11    This was quite a tough class for me. However, ...
12    1 short project + 4 partner projects + 2 exams...
13    Deorio is a spectacular professor. Best lectur...
14    DeOrio knows his stuff, and he's definitely th...
15    Deorio is awesome and you will learn a lot wit...
16    This is a very difficult class. Deorio is prob...
17    Greatest professor I've ever had. He explains ...
18    This is a very difficult class, especially for...
19    A fun and charismatic lecturer. This class was...
Name: rComments, dtype: object
In [140]:
ls
Jake Practice 2.ipynb           Untitled18.ipynb
JakePractice.ipynb              Untitled19.ipynb
JakePrepLec3.ipynb              Untitled2.ipynb
Kevin.ipynb                     Untitled20.ipynb
PlayingAround.ipynb             Untitled3.ipynb
README                          Untitled4.ipynb
RateMyProfessors data.ipynb     Untitled5.ipynb
SANITIZED_final_grade_data.csv  Untitled6.ipynb
SANITIZED_final_gradebook.csv   Untitled7.ipynb
The Real Deal.ipynb             Untitled8.ipynb
The Real Real Deal.ipynb        Untitled9.ipynb
Untitled.ipynb                  holland_seawater.dat
Untitled1.ipynb                 holland_temperature.dat
Untitled10.ipynb                py_exploratory_comp_1_sol.ipynb
Untitled11.ipynb                ratings_subset_bigdata.csv
Untitled12.ipynb                rmp_ratings_subset.zip
Untitled13.ipynb                temp.py
Untitled14.ipynb                untitled.txt
Untitled15.ipynb                untitled1.txt
Untitled16.ipynb                untitled2.txt
Untitled17.ipynb                xypoints.dat
In [141]:
ratings = pd.read_csv('ratings_subset_bigdata.csv')
In [142]:
ratings.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 93796 entries, 0 to 93795
Data columns (total 31 columns):
Unnamed: 0           93796 non-null int64
index                93796 non-null int64
attendance           7322 non-null object
clarityColor         93796 non-null object
easyColor            93796 non-null object
helpColor            93796 non-null object
helpCount            93796 non-null int64
id                   93796 non-null int64
notHelpCount         93796 non-null int64
onlineClass          168 non-null object
quality              93796 non-null object
rClarity             93796 non-null int64
rClass               59188 non-null object
rComments            93661 non-null object
rDate                93796 non-null object
rEasy                93796 non-null int64
rErrorMsg            0 non-null float64
rHelpful             93796 non-null int64
rInterest            89532 non-null object
rStatus              93796 non-null int64
rTextBookUse         73433 non-null object
sId                  93796 non-null int64
takenForCredit       5542 non-null object
teacher              0 non-null float64
teacherGrade         4838 non-null object
teacherRatingTags    93796 non-null object
unUsefulGrouping     93796 non-null object
usefulGrouping       93796 non-null object
tid                  93796 non-null int64
bingender            93796 non-null int64
ishot                93796 non-null int64
dtypes: float64(2), int64(13), object(16)
memory usage: 22.9+ MB
In [143]:
heatmap, xedges, yedges = np.histogram2d(ratings.rEasy, ratings.rHelpful,bins=[5,5])
In [144]:
plt.pcolor(heatmap,cmap=plt.cm.Reds)
Out[144]:
<matplotlib.collections.PolyCollection at 0x7f1018e7c650>
In [145]:
by_easy = ratings.groupby('rEasy').mean()
In [146]:
plt.plot(by_easy.rClarity)
Out[146]:
[<matplotlib.lines.Line2D at 0x7f1018da35d0>]
In [147]:
plt.plot(by_easy.rHelpful)
Out[147]:
[<matplotlib.lines.Line2D at 0x7f1018d4ddd0>]
In [148]:
np.corrcoef(ratings.rEasy,ratings.rClarity)[0,1]
Out[148]:
0.38490525108326107
In [149]:
np.corrcoef(ratings.rHelpful,ratings.rClarity)[0,1]
Out[149]:
0.80210093883336542
In [150]:
ratings.head()
Out[150]:
Unnamed: 0 index attendance clarityColor easyColor helpColor helpCount id notHelpCount onlineClass ... sId takenForCredit teacher teacherGrade teacherRatingTags unUsefulGrouping usefulGrouping tid bingender ishot
0 0 0 NaN good average good 0 24228248 10 NaN ... 135 Yes NaN NaN ["Would take again", "Hilarious", "Tests are t... people people 916674 0 0
1 1 1 Mandatory good average good 0 24218909 1 NaN ... 135 Yes NaN A ["Skip class? You won't pass.", "Tests are tou... person people 916674 0 0
2 2 2 NaN good average good 1 24215795 2 NaN ... 135 Yes NaN NaN ["Hilarious", "Would take again", "Skip class?... people person 916674 0 0
3 3 3 Not Mandatory poor average average 18 24204179 6 NaN ... 135 Yes NaN NaN ["Tests are tough", "Get ready to read"] people people 916674 0 0
4 4 4 Not Mandatory good good good 1 24198463 0 NaN ... 135 Yes NaN A ["Inspirational", "Hilarious", "Skip class? Yo... people person 916674 0 0

5 rows × 31 columns

In [151]:
ratings.rComments.head()
Out[151]:
0    Great Professor My wife took this class twice ...
1    Great Professor Study the notes from class and...
2    Brother Brau is a great guy He gives great spi...
3    People rave about Brau but I personally dont g...
4    This class doesnt have much homework which was...
Name: rComments, dtype: object
In [152]:
import sklearn
In [153]:
from sklearn import linear_model
In [154]:
cl = linear_model.RidgeClassifier()
In [155]:
from sklearn.feature_extraction.text import CountVectorizer, ENGLISH_STOP_WORDS
In [156]:
len(ENGLISH_STOP_WORDS)
Out[156]:
318
In [157]:
ENGLISH_STOP_WORDS
Out[157]:
frozenset({'a',
           'about',
           'above',
           'across',
           'after',
           'afterwards',
           'again',
           'against',
           'all',
           'almost',
           'alone',
           'along',
           'already',
           'also',
           'although',
           'always',
           'am',
           'among',
           'amongst',
           'amoungst',
           'amount',
           'an',
           'and',
           'another',
           'any',
           'anyhow',
           'anyone',
           'anything',
           'anyway',
           'anywhere',
           'are',
           'around',
           'as',
           'at',
           'back',
           'be',
           'became',
           'because',
           'become',
           'becomes',
           'becoming',
           'been',
           'before',
           'beforehand',
           'behind',
           'being',
           'below',
           'beside',
           'besides',
           'between',
           'beyond',
           'bill',
           'both',
           'bottom',
           'but',
           'by',
           'call',
           'can',
           'cannot',
           'cant',
           'co',
           'con',
           'could',
           'couldnt',
           'cry',
           'de',
           'describe',
           'detail',
           'do',
           'done',
           'down',
           'due',
           'during',
           'each',
           'eg',
           'eight',
           'either',
           'eleven',
           'else',
           'elsewhere',
           'empty',
           'enough',
           'etc',
           'even',
           'ever',
           'every',
           'everyone',
           'everything',
           'everywhere',
           'except',
           'few',
           'fifteen',
           'fify',
           'fill',
           'find',
           'fire',
           'first',
           'five',
           'for',
           'former',
           'formerly',
           'forty',
           'found',
           'four',
           'from',
           'front',
           'full',
           'further',
           'get',
           'give',
           'go',
           'had',
           'has',
           'hasnt',
           'have',
           'he',
           'hence',
           'her',
           'here',
           'hereafter',
           'hereby',
           'herein',
           'hereupon',
           'hers',
           'herself',
           'him',
           'himself',
           'his',
           'how',
           'however',
           'hundred',
           'i',
           'ie',
           'if',
           'in',
           'inc',
           'indeed',
           'interest',
           'into',
           'is',
           'it',
           'its',
           'itself',
           'keep',
           'last',
           'latter',
           'latterly',
           'least',
           'less',
           'ltd',
           'made',
           'many',
           'may',
           'me',
           'meanwhile',
           'might',
           'mill',
           'mine',
           'more',
           'moreover',
           'most',
           'mostly',
           'move',
           'much',
           'must',
           'my',
           'myself',
           'name',
           'namely',
           'neither',
           'never',
           'nevertheless',
           'next',
           'nine',
           'no',
           'nobody',
           'none',
           'noone',
           'nor',
           'not',
           'nothing',
           'now',
           'nowhere',
           'of',
           'off',
           'often',
           'on',
           'once',
           'one',
           'only',
           'onto',
           'or',
           'other',
           'others',
           'otherwise',
           'our',
           'ours',
           'ourselves',
           'out',
           'over',
           'own',
           'part',
           'per',
           'perhaps',
           'please',
           'put',
           'rather',
           're',
           'same',
           'see',
           'seem',
           'seemed',
           'seeming',
           'seems',
           'serious',
           'several',
           'she',
           'should',
           'show',
           'side',
           'since',
           'sincere',
           'six',
           'sixty',
           'so',
           'some',
           'somehow',
           'someone',
           'something',
           'sometime',
           'sometimes',
           'somewhere',
           'still',
           'such',
           'system',
           'take',
           'ten',
           'than',
           'that',
           'the',
           'their',
           'them',
           'themselves',
           'then',
           'thence',
           'there',
           'thereafter',
           'thereby',
           'therefore',
           'therein',
           'thereupon',
           'these',
           'they',
           'thick',
           'thin',
           'third',
           'this',
           'those',
           'though',
           'three',
           'through',
           'throughout',
           'thru',
           'thus',
           'to',
           'together',
           'too',
           'top',
           'toward',
           'towards',
           'twelve',
           'twenty',
           'two',
           'un',
           'under',
           'until',
           'up',
           'upon',
           'us',
           'very',
           'via',
           'was',
           'we',
           'well',
           'were',
           'what',
           'whatever',
           'when',
           'whence',
           'whenever',
           'where',
           'whereafter',
           'whereas',
           'whereby',
           'wherein',
           'whereupon',
           'wherever',
           'whether',
           'which',
           'while',
           'whither',
           'who',
           'whoever',
           'whole',
           'whom',
           'whose',
           'why',
           'will',
           'with',
           'within',
           'without',
           'would',
           'yet',
           'you',
           'your',
           'yours',
           'yourself',
           'yourselves'})
In [158]:
count_vect = CountVectorizer(min_df=120,stop_words=ENGLISH_STOP_WORDS,ngram_range=(1,2))
In [159]:
numtrain = int(3.0*len(ratings.index)/4)
In [160]:
numtrain
ratings = ratings[ratings.rComments.notnull()]
In [161]:
traininds = np.random.choice(ratings.index,size = numtrain,replace=False)
In [162]:
testinds = ratings.index.difference(traininds)
In [163]:
Xtrain = count_vect.fit_transform(ratings.ix[traininds,'rComments'])
In [164]:
Xtest = count_vect.transform(ratings.ix[testinds,'rComments'])
In [165]:
Ytrain = ratings.ix[traininds,'ishot']
In [166]:
cl.fit(Xtrain,np.array(Ytrain))
Out[166]:
RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
        max_iter=None, normalize=False, solver='auto', tol=0.001)
In [167]:
Ytest = ratings.ix[testinds,'ishot']
In [168]:
Ypreds = cl.predict(Xtest)
In [169]:
Ypreds
Out[169]:
array([0, 0, 0, ..., 0, 1, 0])
In [170]:
sum(Ypreds)
Out[170]:
836
In [171]:
df = pd.DataFrame(data={'words':count_vect.get_feature_names(), 'coef':cl.coef_.flatten(), 'abscoef':np.abs(cl.coef_.flatten())})
In [172]:
df.sort('abscoef',ascending=False,inplace=True)
In [173]:
df
Out[173]:
abscoef coef words
842 0.713850 0.713850 hot
1874 0.620711 0.620711 young
1151 0.565271 0.565271 nelson
571 0.440758 0.440758 eyes
374 0.422829 0.422829 dance
371 0.388632 0.388632 cute
1085 0.388631 -0.388631 marsh
1153 0.386088 -0.386088 new testament
828 0.376755 -0.376755 holyoak
199 0.345287 0.345287 chill
147 0.334509 -0.334509 brau
1520 0.320579 0.320579 speaking
922 0.319725 -0.319725 kearl
348 0.283771 0.283771 coolest
1214 0.279600 -0.279600 parker
664 0.263533 0.263533 german
1240 0.258119 0.258119 phenomenal
1330 0.254259 0.254259 public
847 0.246795 0.246795 human
822 0.244276 0.244276 highly recommended
1159 0.233863 -0.233863 nice man
137 0.231801 0.231801 bomb
475 0.231608 0.231608 easy going
479 0.227222 0.227222 easy learn
379 0.223648 0.223648 dc
265 0.222726 -0.222726 class love
1362 0.221242 0.221242 reading quizzes
1045 0.221000 0.221000 loved going
1129 0.220336 -0.220336 monotone
1106 0.216171 0.216171 mention
... ... ... ...
1701 0.001462 -0.001462 textbook
528 0.001455 0.001455 example
252 0.001419 0.001419 class isnt
575 0.001377 -0.001377 facts
539 0.001356 -0.001356 excellent teacher
1029 0.001343 -0.001343 lot
427 0.001341 0.001341 does good
708 0.001213 0.001213 grades
1192 0.001029 0.001029 opportunity
205 0.000952 -0.000952 class
450 0.000785 0.000785 dont miss
860 0.000783 0.000783 important
574 0.000778 0.000778 fact
481 0.000771 0.000771 easy study
29 0.000719 -0.000719 actually
1609 0.000650 -0.000650 taken
412 0.000611 -0.000611 difficult class
39 0.000538 0.000538 allows
915 0.000432 -0.000432 just dont
1492 0.000423 -0.000423 shouldnt
1800 0.000376 0.000376 wants succeed
559 0.000369 -0.000369 explains things
836 0.000302 -0.000302 honest
1336 0.000277 0.000277 questions class
1461 0.000245 -0.000245 schedule
1187 0.000171 0.000171 open
1475 0.000160 -0.000160 semester
1695 0.000093 -0.000093 tests tough
456 0.000061 0.000061 dont want
1469 0.000019 0.000019 scriptures

1878 rows × 3 columns

In [174]:
df.sort('coef',ascending=False,inplace=True)
In [175]:
df
Out[175]:
abscoef coef words
842 0.713850 0.713850 hot
1874 0.620711 0.620711 young
1151 0.565271 0.565271 nelson
571 0.440758 0.440758 eyes
374 0.422829 0.422829 dance
371 0.388632 0.388632 cute
199 0.345287 0.345287 chill
1520 0.320579 0.320579 speaking
348 0.283771 0.283771 coolest
664 0.263533 0.263533 german
1240 0.258119 0.258119 phenomenal
1330 0.254259 0.254259 public
847 0.246795 0.246795 human
822 0.244276 0.244276 highly recommended
137 0.231801 0.231801 bomb
475 0.231608 0.231608 easy going
479 0.227222 0.227222 easy learn
379 0.223648 0.223648 dc
1362 0.221242 0.221242 reading quizzes
1045 0.221000 0.221000 loved going
1106 0.216171 0.216171 mention
1025 0.215086 0.215086 looked forward
322 0.211760 0.211760 comfortable
788 0.207359 0.207359 help learn
636 0.206991 0.206991 french
531 0.206084 0.206084 exams arent
158 0.205526 0.205526 brown
23 0.203678 0.203678 academic
1673 0.203621 0.203621 testament
1524 0.202615 0.202615 spend time
... ... ... ...
415 0.150019 -0.150019 difficult understand
122 0.150697 -0.150697 best math
1178 0.151531 -0.151531 office
991 0.151532 -0.151532 lecturing
445 0.161251 -0.161251 dont buy
1715 0.165290 -0.165290 think hes
1854 0.165374 -0.165374 wouldnt recommend
1644 0.168190 -0.168190 teacher loved
12 0.168641 -0.168641 30
434 0.170138 -0.170138 doesnt care
3 0.176178 -0.176178 105
1418 0.177775 -0.177775 relatively easy
1607 0.187225 -0.187225 ta lab
1125 0.188734 -0.188734 mission
619 0.190885 -0.190885 finance
751 0.192054 -0.192054 hansen
926 0.193562 -0.193562 kelly
699 0.195853 -0.195853 gotten
1271 0.200579 -0.200579 power points
1530 0.202177 -0.202177 spiritual thoughts
904 0.204705 -0.204705 ive met
1129 0.220336 -0.220336 monotone
265 0.222726 -0.222726 class love
1159 0.233863 -0.233863 nice man
1214 0.279600 -0.279600 parker
922 0.319725 -0.319725 kearl
147 0.334509 -0.334509 brau
828 0.376755 -0.376755 holyoak
1153 0.386088 -0.386088 new testament
1085 0.388631 -0.388631 marsh

1878 rows × 3 columns

In [ ]: