diff --git a/body_create.py b/body_create.py index c7abd02..971d858 100644 --- a/body_create.py +++ b/body_create.py @@ -68,4 +68,18 @@ def create_body(class_obj,is_lab = False): def json_dump(obj): with open('classes.json','w') as file: - json.dump(obj,file) \ No newline at end of file + json.dump(obj,file) + + +def create_event(class_list): + for class_obj in class_list: + body,lab_body = create_body(class_obj) + yield body + if lab_body: + yield lab_body +if __name__ == "__main__": + from scraper import get_classes + with open('schedule.html') as file: + classes = get_classes(file.read()) + l = list(create_event(classes)) + json_dump(l) diff --git a/scraper.py b/scraper.py index efe6b8b..4aaf80a 100644 --- a/scraper.py +++ b/scraper.py @@ -121,9 +121,38 @@ class Class: lab.update(params) lab = Class(title + " - Lab",abrv,session,**lab) +<<<<<<< HEAD else: lab = None data = parse_horz_row(headers,data[0]) +======= + else: + lab = None + data = data[0] + + + def parse_horz_row(headers,row): + data = (col.text for col in row.find_all('td')) + ret = {} + time_data = dict(zip(headers,data)) + if time_data['time'] == 'TBA': + ret['time_range'] = None + else: + s,e = map(dateparse,time_data['time'].split(' - ')) + ret['time_range'] = ( + datetime2date_time(s,'time'), + datetime2date_time(e,'time'), + ) + s,e = map(dateparse,time_data['date range'].split(' - ')) + ret['date_range'] = ( + datetime2date_time(s,'date'), + datetime2date_time(e,'date'), + ) + time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) + ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) + ret['location'] = time_data['where'] + return ret +>>>>>>> local params.update(data) return Class(title,abrv,session,lab=lab,**params) @@ -142,7 +171,7 @@ def get_classes(page): page = BS(page,'lxml') tables = page.find_all('table',attrs= {'class':'datadisplaytable'}) groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2)) - return list(map(Class.scrape,groups)) + return map(Class.scrape,groups) if __name__ == "__main__": with open('schedule.html') as file: