Browse Source

Fixed bug where all classes end up on monday

Removed liter from git merge process
master
Raphael Roberts 7 years ago
parent
commit
c6f27b48fd
  1. 42
      scraper.py

42
scraper.py

@ -40,12 +40,17 @@ def parse_horz_row(headers,row):
datetime2date_time(e,'time'),
)
s,e = map(dateparse,time_data['date range'].split(' - '))
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
ret['days'] = sorted((days.index(time_data['days'][i]) for i in range(len(time_data['days']))))
if len(ret['days']) > 0:
class_start = (s.weekday()+1)%7
start = ret['days'][0]
s += datetime.timedelta(days=(start - class_start))
ret['date_range'] = (
datetime2date_time(s,'date'),
datetime2date_time(e,'date'),
)
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days'])))
ret['location'] = time_data['where']
return ret
@ -121,38 +126,9 @@ class Class:
lab.update(params)
lab = Class(title + " - Lab",abrv,session,**lab)
<<<<<<< HEAD
else:
lab = None
data = parse_horz_row(headers,data[0])
=======
else:
lab = None
data = data[0]
def parse_horz_row(headers,row):
data = (col.text for col in row.find_all('td'))
ret = {}
time_data = dict(zip(headers,data))
if time_data['time'] == 'TBA':
ret['time_range'] = None
else:
s,e = map(dateparse,time_data['time'].split(' - '))
ret['time_range'] = (
datetime2date_time(s,'time'),
datetime2date_time(e,'time'),
)
s,e = map(dateparse,time_data['date range'].split(' - '))
ret['date_range'] = (
datetime2date_time(s,'date'),
datetime2date_time(e,'date'),
)
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days'])))
ret['location'] = time_data['where']
return ret
>>>>>>> local
params.update(data)
return Class(title,abrv,session,lab=lab,**params)
@ -176,4 +152,6 @@ def get_classes(page):
if __name__ == "__main__":
with open('schedule.html') as file:
page = BS(file.read(),'lxml')
class1,*classes = get_classes(page)
classes = list(get_classes(page))
for _class in classes:
print(repr(_class),_class.date_range)
Loading…
Cancel
Save