Browse Source

finished body_create

master
Raphael Roberts 7 years ago
parent
commit
fcb4a704bf
  1. 16
      body_create.py
  2. 31
      scraper.py

16
body_create.py

@ -68,4 +68,18 @@ def create_body(class_obj,is_lab = False):
def json_dump(obj):
with open('classes.json','w') as file:
json.dump(obj,file)
json.dump(obj,file)
def create_event(class_list):
for class_obj in class_list:
body,lab_body = create_body(class_obj)
yield body
if lab_body:
yield lab_body
if __name__ == "__main__":
from scraper import get_classes
with open('schedule.html') as file:
classes = get_classes(file.read())
l = list(create_event(classes))
json_dump(l)

31
scraper.py

@ -121,9 +121,38 @@ class Class:
lab.update(params)
lab = Class(title + " - Lab",abrv,session,**lab)
<<<<<<< HEAD
else:
lab = None
data = parse_horz_row(headers,data[0])
=======
else:
lab = None
data = data[0]
def parse_horz_row(headers,row):
data = (col.text for col in row.find_all('td'))
ret = {}
time_data = dict(zip(headers,data))
if time_data['time'] == 'TBA':
ret['time_range'] = None
else:
s,e = map(dateparse,time_data['time'].split(' - '))
ret['time_range'] = (
datetime2date_time(s,'time'),
datetime2date_time(e,'time'),
)
s,e = map(dateparse,time_data['date range'].split(' - '))
ret['date_range'] = (
datetime2date_time(s,'date'),
datetime2date_time(e,'date'),
)
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days'])))
ret['location'] = time_data['where']
return ret
>>>>>>> local
params.update(data)
return Class(title,abrv,session,lab=lab,**params)
@ -142,7 +171,7 @@ def get_classes(page):
page = BS(page,'lxml')
tables = page.find_all('table',attrs= {'class':'datadisplaytable'})
groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2))
return list(map(Class.scrape,groups))
return map(Class.scrape,groups)
if __name__ == "__main__":
with open('schedule.html') as file:

Loading…
Cancel
Save