diff --git a/scraper.py b/scraper.py index ca7555e..f5f8c47 100644 --- a/scraper.py +++ b/scraper.py @@ -19,8 +19,9 @@ def datetime2date_time(dtime,mode): def seconds_from_midnight(t): return t.hour*60**2+ t.minute*60+t.second class Class: - def __init__(self,title,session,days,location,time_range,date_range): + def __init__(self,title,abrv,session,days,location,time_range,date_range,lab): self.title = title + self.abrv = abrv self.session = session self.days = days self.location = location @@ -31,7 +32,7 @@ class Class: def scrape(self,data): info,times = data # info - self.title,self.abrv,self.session = info.find('caption').text.split(' - ') + title,abrv,sesession = info.find('caption').text.split(' - ') self.lab = None self.session = int(self.session) rows = info.find_all('tr') @@ -60,15 +61,17 @@ class Class: # time headers,*data = times.find_all('tr') + headers = (header.text.lower() for header in headers.find_all('th')) if len(data) > 1: - data,lab = data[:2] + data,lab = map(lambda row: parse_horz_row(headers,row),data[:2]) + else lab = None data = data[0] - data = (col.text for col in data.find_all('td')) - headers = (header.text.lower() for header in headers.find_all('th')) + def parse_horz_row(headers,row): + data = (col.text for col in row.find_all('td')) ret = {} time_data = dict(zip(headers,data)) if time_data['time'] == 'TBA':