You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
180 lines
5.0 KiB
180 lines
5.0 KiB
import datetime
|
|
import itertools
|
|
from operator import sub
|
|
import re
|
|
|
|
import bs4
|
|
from bs4 import BeautifulSoup as BS
|
|
|
|
|
|
def dateparse(datetime_str):
|
|
date = "%b %d, %Y"
|
|
time = "%I:%M %p"
|
|
try:
|
|
return datetime.datetime.strptime(datetime_str, date)
|
|
except ValueError:
|
|
return datetime.datetime.strptime(datetime_str, time)
|
|
|
|
|
|
days = [None, "M", "T", "W", "R", "F", None]
|
|
simp_exceptions = ["Grade Mode"]
|
|
|
|
|
|
def datetime2date_time(dtime: datetime.datetime, mode):
|
|
if mode == "date":
|
|
return datetime.date(dtime.year, dtime.month, dtime.day)
|
|
elif mode == "time":
|
|
return datetime.time(dtime.hour, dtime.minute, dtime.second)
|
|
|
|
|
|
def seconds_from_midnight(t: datetime.time):
|
|
return t.hour * 60 ** 2 + t.minute * 60 + t.second
|
|
|
|
|
|
def parse_horz_row(headers, row: bs4.element.Tag):
|
|
data = (col.text for col in row.find_all("td"))
|
|
ret = {}
|
|
time_data = dict(zip(headers, data))
|
|
try:
|
|
time_data["time"]
|
|
except KeyError as e:
|
|
print(row)
|
|
raise e
|
|
if time_data["time"] == "TBA":
|
|
ret["time_range"] = None
|
|
else:
|
|
s, e = map(dateparse, time_data["time"].split(" - "))
|
|
ret["time_range"] = (
|
|
datetime2date_time(s, "time"),
|
|
datetime2date_time(e, "time"),
|
|
)
|
|
s, e = map(dateparse, time_data["date range"].split(" - "))
|
|
time_data["days"] = re.sub(
|
|
"[^{}]".format("".join(filter(bool, days))), "", time_data["days"]
|
|
)
|
|
ret["days"] = sorted(
|
|
(days.index(time_data["days"][i]) for i in range(len(time_data["days"])))
|
|
)
|
|
if len(ret["days"]) > 0:
|
|
class_start = (s.weekday() + 1) % 7
|
|
start = ret["days"][0]
|
|
s += datetime.timedelta(days=(start - class_start))
|
|
|
|
ret["date_range"] = (
|
|
datetime2date_time(s, "date"),
|
|
datetime2date_time(e, "date"),
|
|
)
|
|
ret["location"] = time_data["where"]
|
|
return ret
|
|
|
|
|
|
class Class:
|
|
def __init__(
|
|
self,
|
|
title,
|
|
abrv,
|
|
session,
|
|
term,
|
|
crn,
|
|
instructor,
|
|
grade_mode,
|
|
credits,
|
|
level,
|
|
campus,
|
|
time_range,
|
|
date_range,
|
|
days,
|
|
location,
|
|
lab=None,
|
|
):
|
|
|
|
# name
|
|
self.title = title
|
|
self.abrv = abrv
|
|
# time
|
|
self.date_range = date_range
|
|
self.days = days
|
|
self.time_range = time_range
|
|
# location
|
|
self.location = location
|
|
self.campus = campus
|
|
# other
|
|
self.session = session
|
|
self.term = term
|
|
self.crn = crn
|
|
self.instructor = instructor
|
|
self.grade_mode = grade_mode
|
|
self.credits = credits
|
|
self.level = level
|
|
self.lab = lab
|
|
|
|
@classmethod
|
|
def scrape(cls, info: bs4.element.Tag, times: bs4.element.Tag):
|
|
# info
|
|
title, abrv, session = info.find("caption").text.split(" - ")
|
|
session = int(session)
|
|
rows = info.find_all("tr")
|
|
params = {}
|
|
for row in rows:
|
|
name = row.find("th").text.rstrip(":")
|
|
data = re.sub(r"^ +|[\n\r\t]", "", row.find("td").text)
|
|
|
|
if name == "Status":
|
|
type, date = data.split(" on ")
|
|
type = type.replace("*", "")
|
|
registration_date = dateparse(date)
|
|
else:
|
|
if name in simp_exceptions:
|
|
name = name.lower().replace(" ", "_")
|
|
else:
|
|
name = name.lower().split(" ")[-1]
|
|
if name != "instructor":
|
|
data = data.lower()
|
|
try:
|
|
data = int(re.sub(r"\.\d+", "", data))
|
|
except:
|
|
|
|
pass
|
|
params[name] = data
|
|
# time
|
|
headers, *data = times.find_all("tr")
|
|
headers = list(header.text.lower() for header in headers.find_all("th"))
|
|
if len(data) > 1:
|
|
data, lab = map(lambda row: parse_horz_row(headers, row), data[:2])
|
|
lab.update(params)
|
|
lab = Class(title + " - Lab", abrv, session, **lab)
|
|
|
|
else:
|
|
lab = None
|
|
data = parse_horz_row(headers, data[0])
|
|
|
|
params.update(data)
|
|
return Class(title, abrv, session, lab=lab, **params)
|
|
|
|
def __repr__(self):
|
|
return "{} on {}".format(self.title, "".join(days[i] for i in self.days))
|
|
|
|
@property
|
|
def length(self):
|
|
return datetime.timedelta(
|
|
seconds=sub(
|
|
seconds_from_midnight(self.time_range[1]),
|
|
seconds_from_midnight(self.time_range[0]),
|
|
)
|
|
)
|
|
|
|
|
|
def get_classes(page):
|
|
if not isinstance(page, BS):
|
|
page = BS(page, "lxml")
|
|
tables = page.find_all("table", attrs={"class": "datadisplaytable"})
|
|
groups = ((tables[i], tables[i + 1]) for i in range(0, len(tables), 2))
|
|
return itertools.starmap(Class.scrape, groups)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
with open("schedule.html") as file:
|
|
page = BS(file.read(), "lxml")
|
|
classes = list(get_classes(page))
|
|
for _class in classes:
|
|
print(repr(_class), _class.date_range)
|