diff --git a/Student Detail Schedule.html b/schedule.html similarity index 100% rename from Student Detail Schedule.html rename to schedule.html diff --git a/scraper.py b/scraper.py index f5f8c47..cd31101 100644 --- a/scraper.py +++ b/scraper.py @@ -19,22 +19,21 @@ def datetime2date_time(dtime,mode): def seconds_from_midnight(t): return t.hour*60**2+ t.minute*60+t.second class Class: - def __init__(self,title,abrv,session,days,location,time_range,date_range,lab): + def __init__(self,title,abrv,session,days,location,time_range,date_range,lab=None): self.title = title self.abrv = abrv self.session = session self.days = days self.location = location self.time_range = time_range - self.lab = None + self.lab = lab self.date_range = date_range # data is a list of two html tables def scrape(self,data): info,times = data # info title,abrv,sesession = info.find('caption').text.split(' - ') - self.lab = None - self.session = int(self.session) + session = int(self.session) rows = info.find_all('tr') for row in rows: name = row.find('th').text.rstrip(':') @@ -64,11 +63,13 @@ class Class: headers = (header.text.lower() for header in headers.find_all('th')) if len(data) > 1: data,lab = map(lambda row: parse_horz_row(headers,row),data[:2]) + lab = Class(title + " - Lab",abrv,session,**lab) - else + else: lab = None data = data[0] + return Class(title,abrv,session,lab=lab,**data) def parse_horz_row(headers,row): data = (col.text for col in row.find_all('td')) @@ -109,5 +110,5 @@ def get_classes(page): if __name__ == "__main__": with open('schedule.html') as file: - page = BS(file.read(),'lxml') + page = BS(file.read()) class1,*classes = get_classes(page)