From c6f27b48fdb140b071d10edd9b98af71a9003d7a Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Wed, 9 Jan 2019 13:26:56 -0600 Subject: [PATCH] Fixed bug where all classes end up on monday Removed liter from git merge process --- scraper.py | 42 ++++++++++-------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/scraper.py b/scraper.py index 4aaf80a..1e8f447 100644 --- a/scraper.py +++ b/scraper.py @@ -40,12 +40,17 @@ def parse_horz_row(headers,row): datetime2date_time(e,'time'), ) s,e = map(dateparse,time_data['date range'].split(' - ')) + time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) + ret['days'] = sorted((days.index(time_data['days'][i]) for i in range(len(time_data['days'])))) + if len(ret['days']) > 0: + class_start = (s.weekday()+1)%7 + start = ret['days'][0] + s += datetime.timedelta(days=(start - class_start)) + ret['date_range'] = ( datetime2date_time(s,'date'), datetime2date_time(e,'date'), ) - time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) - ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) ret['location'] = time_data['where'] return ret @@ -121,38 +126,9 @@ class Class: lab.update(params) lab = Class(title + " - Lab",abrv,session,**lab) -<<<<<<< HEAD else: lab = None data = parse_horz_row(headers,data[0]) -======= - else: - lab = None - data = data[0] - - - def parse_horz_row(headers,row): - data = (col.text for col in row.find_all('td')) - ret = {} - time_data = dict(zip(headers,data)) - if time_data['time'] == 'TBA': - ret['time_range'] = None - else: - s,e = map(dateparse,time_data['time'].split(' - ')) - ret['time_range'] = ( - datetime2date_time(s,'time'), - datetime2date_time(e,'time'), - ) - s,e = map(dateparse,time_data['date range'].split(' - ')) - ret['date_range'] = ( - datetime2date_time(s,'date'), - datetime2date_time(e,'date'), - ) - time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) - ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) - ret['location'] = time_data['where'] - return ret ->>>>>>> local params.update(data) return Class(title,abrv,session,lab=lab,**params) @@ -176,4 +152,6 @@ def get_classes(page): if __name__ == "__main__": with open('schedule.html') as file: page = BS(file.read(),'lxml') - class1,*classes = get_classes(page) + classes = list(get_classes(page)) + for _class in classes: + print(repr(_class),_class.date_range) \ No newline at end of file