|
|
|
@ -18,23 +18,77 @@ def datetime2date_time(dtime,mode): |
|
|
|
return datetime.time(dtime.hour,dtime.minute,dtime.second) |
|
|
|
def seconds_from_midnight(t): |
|
|
|
return t.hour*60**2+ t.minute*60+t.second |
|
|
|
|
|
|
|
def parse_horz_row(headers,row): |
|
|
|
data = (col.text for col in row.find_all('td')) |
|
|
|
ret = {} |
|
|
|
time_data = dict(zip(headers,data)) |
|
|
|
try: |
|
|
|
time_data['time'] |
|
|
|
except KeyError as e: |
|
|
|
print(row) |
|
|
|
raise e |
|
|
|
if time_data['time'] == 'TBA': |
|
|
|
ret['time_range'] = None |
|
|
|
else: |
|
|
|
s,e = map(dateparse,time_data['time'].split(' - ')) |
|
|
|
ret['time_range'] = ( |
|
|
|
datetime2date_time(s,'time'), |
|
|
|
datetime2date_time(e,'time'), |
|
|
|
) |
|
|
|
s,e = map(dateparse,time_data['date range'].split(' - ')) |
|
|
|
ret['date_range'] = ( |
|
|
|
datetime2date_time(s,'date'), |
|
|
|
datetime2date_time(e,'date'), |
|
|
|
) |
|
|
|
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) |
|
|
|
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) |
|
|
|
ret['location'] = time_data['where'] |
|
|
|
return ret |
|
|
|
|
|
|
|
class Class: |
|
|
|
def __init__(self,title,abrv,session,days,location,time_range,date_range,lab=None): |
|
|
|
def __init__(self, title, abrv, session, |
|
|
|
term, |
|
|
|
crn, |
|
|
|
instructor, |
|
|
|
grade_mode, |
|
|
|
credits, |
|
|
|
level, |
|
|
|
campus, |
|
|
|
time_range, |
|
|
|
date_range, |
|
|
|
days, |
|
|
|
location, |
|
|
|
lab=None): |
|
|
|
|
|
|
|
#name |
|
|
|
self.title = title |
|
|
|
self.abrv = abrv |
|
|
|
self.session = session |
|
|
|
#time |
|
|
|
self.date_range = date_range |
|
|
|
self.days = days |
|
|
|
self.location = location |
|
|
|
self.time_range = time_range |
|
|
|
#location |
|
|
|
self.location = location |
|
|
|
self.campus = campus |
|
|
|
#other |
|
|
|
self.session = session |
|
|
|
self.term = term |
|
|
|
self.crn = crn |
|
|
|
self.instructor = instructor |
|
|
|
self.grade_mode = grade_mode |
|
|
|
self.credits = credits |
|
|
|
self.level = level |
|
|
|
self.lab = lab |
|
|
|
self.date_range = date_range |
|
|
|
# data is a list of two html tables |
|
|
|
def scrape(self,data): |
|
|
|
@classmethod |
|
|
|
def scrape(cls,data): |
|
|
|
info,times = data |
|
|
|
# info |
|
|
|
title,abrv,sesession = info.find('caption').text.split(' - ') |
|
|
|
session = int(self.session) |
|
|
|
title,abrv,session = info.find('caption').text.split(' - ') |
|
|
|
session = int(session) |
|
|
|
rows = info.find_all('tr') |
|
|
|
params = {} |
|
|
|
for row in rows: |
|
|
|
name = row.find('th').text.rstrip(':') |
|
|
|
data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text) |
|
|
|
@ -42,8 +96,7 @@ class Class: |
|
|
|
if name == 'Status': |
|
|
|
type,date = data.split(' on ') |
|
|
|
type = type.replace('*','') |
|
|
|
self.type = type |
|
|
|
self.registration_date = dateparse(date) |
|
|
|
registration_date = dateparse(date) |
|
|
|
else: |
|
|
|
if name in simp_exceptions: |
|
|
|
name = name.lower().replace(' ','_') |
|
|
|
@ -56,43 +109,23 @@ class Class: |
|
|
|
except: |
|
|
|
|
|
|
|
pass |
|
|
|
self.__dict__[name] = data |
|
|
|
|
|
|
|
params[name] = data |
|
|
|
# time |
|
|
|
headers,*data = times.find_all('tr') |
|
|
|
headers = (header.text.lower() for header in headers.find_all('th')) |
|
|
|
headers = list(header.text.lower() for header in headers.find_all('th')) |
|
|
|
if len(data) > 1: |
|
|
|
data,lab = map(lambda row: parse_horz_row(headers,row),data[:2]) |
|
|
|
lab.update(params) |
|
|
|
lab = Class(title + " - Lab",abrv,session,**lab) |
|
|
|
|
|
|
|
else: |
|
|
|
lab = None |
|
|
|
data = data[0] |
|
|
|
|
|
|
|
return Class(title,abrv,session,lab=lab,**data) |
|
|
|
|
|
|
|
def parse_horz_row(headers,row): |
|
|
|
data = (col.text for col in row.find_all('td')) |
|
|
|
ret = {} |
|
|
|
time_data = dict(zip(headers,data)) |
|
|
|
if time_data['time'] == 'TBA': |
|
|
|
ret['time_range'] = None |
|
|
|
else: |
|
|
|
s,e = map(dateparse,time_data['time'].split(' - ')) |
|
|
|
ret['time_range'] = ( |
|
|
|
datetime2date_time(s,'time'), |
|
|
|
datetime2date_time(e,'time'), |
|
|
|
) |
|
|
|
s,e = map(dateparse,time_data['date range'].split(' - ')) |
|
|
|
ret['date_range'] = ( |
|
|
|
datetime2date_time(s,'date'), |
|
|
|
datetime2date_time(e,'date'), |
|
|
|
) |
|
|
|
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) |
|
|
|
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) |
|
|
|
ret['location'] = time_data['where'] |
|
|
|
return ret |
|
|
|
data = parse_horz_row(headers,data[0]) |
|
|
|
|
|
|
|
params.update(data) |
|
|
|
return Class(title,abrv,session,lab=lab,**params) |
|
|
|
def __repr__(self): |
|
|
|
return '{} on {}'.format(self.title,''.join(days[i] for i in self.days)) |
|
|
|
@property |
|
|
|
def length(self): |
|
|
|
return datetime.timedelta(seconds = sub( |
|
|
|
@ -110,5 +143,5 @@ def get_classes(page): |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
with open('schedule.html') as file: |
|
|
|
page = BS(file.read()) |
|
|
|
page = BS(file.read(),'lxml') |
|
|
|
class1,*classes = get_classes(page) |