|
|
|
@ -19,22 +19,21 @@ def datetime2date_time(dtime,mode): |
|
|
|
def seconds_from_midnight(t): |
|
|
|
return t.hour*60**2+ t.minute*60+t.second |
|
|
|
class Class: |
|
|
|
def __init__(self,title,abrv,session,days,location,time_range,date_range,lab): |
|
|
|
def __init__(self,title,abrv,session,days,location,time_range,date_range,lab=None): |
|
|
|
self.title = title |
|
|
|
self.abrv = abrv |
|
|
|
self.session = session |
|
|
|
self.days = days |
|
|
|
self.location = location |
|
|
|
self.time_range = time_range |
|
|
|
self.lab = None |
|
|
|
self.lab = lab |
|
|
|
self.date_range = date_range |
|
|
|
# data is a list of two html tables |
|
|
|
def scrape(self,data): |
|
|
|
info,times = data |
|
|
|
# info |
|
|
|
title,abrv,sesession = info.find('caption').text.split(' - ') |
|
|
|
self.lab = None |
|
|
|
self.session = int(self.session) |
|
|
|
session = int(self.session) |
|
|
|
rows = info.find_all('tr') |
|
|
|
for row in rows: |
|
|
|
name = row.find('th').text.rstrip(':') |
|
|
|
@ -64,11 +63,13 @@ class Class: |
|
|
|
headers = (header.text.lower() for header in headers.find_all('th')) |
|
|
|
if len(data) > 1: |
|
|
|
data,lab = map(lambda row: parse_horz_row(headers,row),data[:2]) |
|
|
|
lab = Class(title + " - Lab",abrv,session,**lab) |
|
|
|
|
|
|
|
else |
|
|
|
else: |
|
|
|
lab = None |
|
|
|
data = data[0] |
|
|
|
|
|
|
|
return Class(title,abrv,session,lab=lab,**data) |
|
|
|
|
|
|
|
def parse_horz_row(headers,row): |
|
|
|
data = (col.text for col in row.find_all('td')) |
|
|
|
@ -109,5 +110,5 @@ def get_classes(page): |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
with open('schedule.html') as file: |
|
|
|
page = BS(file.read(),'lxml') |
|
|
|
page = BS(file.read()) |
|
|
|
class1,*classes = get_classes(page) |