Compare commits

...

7 Commits

  1. 4
      .gitignore
  2. 3
      .gitmodules
  3. 1
      MANIFEST.in
  4. 16
      __init__.py
  5. 46
      bot.py
  6. 5
      gcalendar.py
  7. 1
      google_api_wrapper
  8. 645
      schedule.html
  9. 175
      scraper.py
  10. 50
      upload.py

4
.gitignore

@ -2,3 +2,7 @@
*.json
__pycache__
/api_info
*.html
.dir-locals.el
*.whl

3
.gitmodules

@ -1,3 +0,0 @@
[submodule "google_api_wrapper"]
path = google_api_wrapper
url = https://rlbrhost.ddns.net/git/rlbr/google_api_wrapper.git

1
MANIFEST.in

@ -0,0 +1 @@
include requirements.txt

16
__init__.py

@ -1,16 +0,0 @@
from gcalendar import calendar_api
import json
import pprint
api = calendar_api("class", r"api_info\client_secret.json", "api_info")
cals = api.get_calendars()
cal = next(
filter(lambda cal: cal["id"] == api.ids["Raphael's School Schedule".lower()], cals)
)
with open("classes.json") as file:
bodies = json.load(file)
for body in bodies:
# body['colorId'] = cal['colorId']
# pprint.pprint(body)
# input()
api.create_event("Raphael's School Schedule".lower(), body)

46
bot.py

@ -1,46 +0,0 @@
from pyppeteer import launch
import asyncio
import time
import scraper
set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0"
xpaths = {
'tab':".//a[text()='Current Student']",
'schedule':".//a[text()='Student Detail Schedule']",
'submit':"//input[@value='Submit']",
'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']"
}
async def xpath_single_element(xpath,page):
await page.waitForXPath(xpath)
elements = await page.xpath(xpath)
return elements[0]
async def main_loop(login):
browser = await launch(headless = False)
page_list = await browser.pages()
page = page_list[0]
r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin')
await page.evaluate(login)
await page.waitFor('#tab')
student_tab = await xpath_single_element(xpaths['tab'],page)
await student_tab.click()
await page.waitForXPath(xpaths['schedule'])
schedule = await xpath_single_element(xpaths['schedule'],page)
await schedule.click()
page.waitForXPath(xpaths['frame'])
await asyncio.sleep(3)
frame = page.frames[-1]
submit= await xpath_single_element(xpaths['submit'],frame)
await submit.click()
await asyncio.sleep(1)
content = await page.frames[-1].content()
await browser.close()
return scraper.get_classes(content)
def get_classes(user,password):
login = """document.getElementById('user').value='{}'
document.getElementById('pass').value='{}'
login()""".format(user,password)
loop = asyncio.get_event_loop()
r = loop.run_until_complete
return r(main_loop(login))
if __name__ == "__main__":
cl = get_classes('rlroberts5','YxmZZ905p0w6')

5
gcalendar.py

@ -1,5 +0,0 @@
import os
import sys
parent = os.path.dirname(__file__)
sys.path.insert(0,os.path.join(parent,'google_api_wrapper'))
from gapi.calendar_api import *

1
google_api_wrapper

@ -1 +0,0 @@
Subproject commit de63a3871564e2b3f1faaa5f9c211e388358bc1f

645
schedule.html

@ -1,645 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/transitional.dtd"><html lang="en"><head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="Pragma" name="Cache-Control" content="no-cache">
<meta http-equiv="Cache-Control" name="Cache-Control" content="no-cache">
<link rel="stylesheet" href="/css/web_defaultapp.css" type="text/css">
<link rel="stylesheet" href="/css/web_defaultprint.css" type="text/css" media="print">
<title>Student Detail Schedule</title>
<meta http-equiv="Content-Script-Type" name="Default_Script_Language" content="text/javascript">
<script language="JavaScript" type="text/javascript">
<!-- Hide JavaScript from older browsers
window.onunload = function() {submitcount=0;}
var submitcount=0;
function checkSubmit() {
if (submitcount == 0)
{
submitcount++;
return true;
}
else
{
alert("Your changes have already been submitted.");
return false;
}
}
// End script hiding -->
</script>
<script language="JavaScript" type="text/javascript">
<!-- Hide JavaScript from older browsers
// Function to open a window
function windowOpen(window_url) {
helpWin = window.open(window_url,'','toolbar=yes,status=no,scrollbars=yes,menubar=yes,resizable=yes,directories=no,location=no,width=350,height=400');
if (document.images) {
if (helpWin) helpWin.focus()
}
}
// End script hiding -->
</script>
</head>
<body>
<div class="headerwrapperdiv">
<div class="pageheaderdiv1">
<a href="#main_content" onmouseover="window.status='Go to Main Content'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Go to Main Content'; return true" onblur="window.status=''; return true" class="skiplinks">Go to Main Content</a>
<h1>SCT WWW Information System</h1></div><div class="headerlinksdiv">
<span class="pageheaderlinks2">
<map name="Module_Navigation_Links_H" title="Module Navigation Links">
<p>
<a href="#skip_Module_Navigation_Links_H" onmouseover="window.status='Skip Module Navigation Links'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Skip Module Navigation Links'; return true" onblur="window.status=''; return true" class="skiplinks">Skip Module Navigation Links</a>
</p><table class="plaintable" summary="This is main table for displaying Tab Items." width="100%" cellspacing="0" cellpadding="0" border="0">
<tbody><tr>
<td class="pldefault">
<table class="plaintable" summary="This table displays Tab Items." cellspacing="0" cellpadding="0" border="0">
<tbody><tr>
<td class="taboff" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_GenMnu" onmouseover="window.status='Personal Information'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Personal Information'; return true" onblur="window.status=''; return true">Personal Information</a>
</td>
<td class="bgtaboff" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
<td class="tabon" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu" onmouseover="window.status='Student'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Student'; return true" onblur="window.status=''; return true">Student</a>
</td>
<td class="bgtabon" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
<td class="taboff" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_FinAidMainMnu" onmouseover="window.status='Financial Aid'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Financial Aid'; return true" onblur="window.status=''; return true">Financial Aid</a>
</td>
<td class="bgtaboff" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
<td class="taboff" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_FacMainMnu" onmouseover="window.status='Faculty Services'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Faculty Services'; return true" onblur="window.status=''; return true">Faculty Services</a>
</td>
<td class="bgtaboff" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
</tr>
</tbody></table>
</td>
</tr>
<tr>
<td class="bgtabon" width="100%" colspan="2"><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" class="headerImg" title="Transparent Image" name="web_transparent" hspace="0" vspace="0" border="0" height="3" width="10"></td></tr></tbody></table>
</map>
</span>
<a name="skip_Module_Navigation_Links_H"></a>
</div>
<table class="plaintable" summary="This table displays Menu Items and Banner Search textbox." width="100%">
<tbody><tr>
<td class="pldefault">
<div class="headerlinksdiv2">
<form action="/neiuprod/twbksrch.P_ShowResults" method="post">
Search
<span class="fieldlabeltextinvisible"><label for="keyword_in_id"><span class="fieldlabeltext">Search</span></label></span>
<input type="text" name="KEYWRD_IN" size="20" maxlength="65" id="keyword_in_id">
<input type="submit" value="Go">
</form>
</div>
</td>
<td class="pldefault"><p class="rightaligntext" p="">
<span class="pageheaderlinks">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_RegMnu" class="submenulinktext2" id="ssbbackurl">RETURN TO MENU</a>
|
<a href="/neiuprod/twbksite.P_DispSiteMap?menu_name_in=bmenu.P_MainMnu&amp;depth_in=2&amp;columns_in=3" accesskey="2" class="submenulinktext2">SITE MAP</a>
|
<a href="/neiuprod/twbkfrmt.P_DispHelp?pagename_in=bwskfshd.P_CrseSchdDetl" accesskey="H" onclick="popup = window.open('/neiuprod/twbkfrmt.P_DispHelp?pagename_in=bwskfshd.P_CrseSchdDetl', 'PopupPage','height=500,width=450,scrollbars=yes,resizable=yes'); return false" target="_blank" onmouseover="window.status=''; return true" onmouseout="window.status=''; return true" onfocus="window.status=''; return true" onblur="window.status=''; return true" class="submenulinktext2">HELP</a>
|
<a href="twbkwbis.P_Logout" accesskey="3" class="submenulinktext2">EXIT</a>
</span>
</p></td>
</tr>
</tbody></table>
</div>
<div class="pagetitlediv">
<table class="plaintable" summary="This table displays title and static header displays." width="100%">
<tbody><tr>
<td class="pldefault">
<h2>Student Detail Schedule</h2>
</td>
<td class="pldefault">
&nbsp;
</td>
<td class="pldefault"><p class="rightaligntext" p="">
</p><div class="staticheaders">
000645225 Raphael L. Roberts<br>
Fall 2019<br>
Aug 26, 2019 03:02 am<br>
</div>
</td>
</tr>
<tr>
<td class="bg3" width="100%" colspan="3"><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" class="headerImg" title="Transparent Image" name="web_transparent" hspace="0" vspace="0" border="0" height="3" width="10"></td>
</tr>
</tbody></table>
<a name="main_content"></a>
</div>
<div class="pagebodydiv">
<!-- ** END OF twbkwbis.P_OpenDoc ** -->
<div class="infotextdiv"><table class="infotexttable" summary="This layout table contains information that may be helpful in understanding the content and functionality of this page. It could be a brief set of instructions, a description of error messages, or other special information."><tbody><tr><td class="indefault"><img src="/wtlgifs/web_info_cascade.png" alt="Information" class="headerImg" title="Information" name="web_info" hspace="0" vspace="0" border="0" height="12" width="14"></td><td class="indefault"><span class="infotext"> <b>Class Schedule</b><br>
The <a href="https://www.neiu.edu/academics/registrar-services/class-schedules-and-registration"> Schedule of Classes </a> contains important information you should know including but not limited to: registering for classes, add/drop dates, semester calendars, final exam schedule, tuition/fees rates, and how to make payments or request a payment plan. If there are questions we can answer for you, please contact the <a href="mailto:registration@neiu.edu"> Registration Office.</a><br><br>
Remember to review your class schedule prior to the first day of classes for possible changes in class location and times.<br><br></span></td></tr></tbody></table><p></p></div>
Total Credit Hours: 16.000
<br>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Computer Organization - CS 301 - 2</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">10335</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Pericles Prezas<a href="mailto:P-Prezas@neiu.edu" target="Pericles Prezas"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">4:15 pm - 5:30 pm</td>
<td class="dddefault">TR</td>
<td class="dddefault">Lech Walesa Hall 3046</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Pericles Prezas (<abbr title="Primary">P</abbr>)<a href="mailto:P-Prezas@neiu.edu" target="Pericles Prezas"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Data Structures - CS 304 - 2</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">10337</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Apr 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Xiwei Wang<a href="mailto:X-Wang9@neiu.edu" target="Xiwei Wang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">5:40 pm - 6:55 pm</td>
<td class="dddefault">MW</td>
<td class="dddefault">Lech Walesa Hall 3046</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Xiwei Wang (<abbr title="Primary">P</abbr>)<a href="mailto:X-Wang9@neiu.edu" target="Xiwei Wang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Applied Music: Tuba - MUS 151K - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">11522</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
William R. Russell<a href="mailto:W-Russell1@neiu.edu" target="William R. Russell"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault"><abbr title="To Be Announced">TBA</abbr></td>
<td class="dddefault">&nbsp;</td>
<td class="dddefault"><abbr title="To Be Announced">TBA</abbr></td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">William R Russell (<abbr title="Primary">P</abbr>)<a href="mailto:W-Russell1@neiu.edu" target="William R. Russell"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Band - MUS 231 - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">13387</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Travis M. Heath<a href="mailto:T-Heath@neiu.edu" target="Travis M. Heath"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">7:05 pm - 9:45 pm</td>
<td class="dddefault">T</td>
<td class="dddefault">Fine Arts Center 144</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Travis M Heath (<abbr title="Primary">P</abbr>)<a href="mailto:T-Heath@neiu.edu" target="Travis M. Heath"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Instrumental Ensemble: Brass - MUS 234A - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">14884</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Anna F. Mayne<a href="mailto:A-Mayne@neiu.edu" target="Anna F. Mayne"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">10:50 am - 12:05 pm</td>
<td class="dddefault">TR</td>
<td class="dddefault">Fine Arts Center 144</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Anna F Mayne (<abbr title="Primary">P</abbr>)<a href="mailto:A-Mayne@neiu.edu" target="Anna F. Mayne"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Instrumental Ensemble:Jazz Band - MUS 235A - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">11563</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Steven T. Duncan<a href="mailto:S-Duncan@neiu.edu" target="Steven T. Duncan"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">1:40 pm - 2:55 pm</td>
<td class="dddefault">TR</td>
<td class="dddefault">Fine Arts Center 144</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Steven T Duncan (<abbr title="Primary">P</abbr>)<a href="mailto:S-Duncan@neiu.edu" target="Steven T. Duncan"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Music After Beethoven - MUS 345 - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">14890</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Apr 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Peter M. Chang<a href="mailto:P-Chang1@neiu.edu" target="Peter M. Chang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">11:30 am - 12:20 pm</td>
<td class="dddefault">MWF</td>
<td class="dddefault">Fine Arts Center 145</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Peter M Chang (<abbr title="Primary">P</abbr>)<a href="mailto:P-Chang1@neiu.edu" target="Peter M. Chang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">The Universe:Past, Present And Future - PHYS 103 - 3</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">11008</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Apr 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Orin M. Harris<a href="mailto:O-Harris1@neiu.edu" target="Orin M. Harris"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">9:25 am - 10:40 am</td>
<td class="dddefault">TR</td>
<td class="dddefault">Bernard J Brommel Hall 101</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Orin M Harris (<abbr title="Primary">P</abbr>)<a href="mailto:O-Harris1@neiu.edu" target="Orin M. Harris"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<a href="javascript:history.go(-1)" onmouseover="window.status='Return to Previous'; return true" onfocus="window.status='Return to Previous'; return true" onmouseout="window.status=''; return true" onblur="window.status=''; return true">Return to Previous</a>
<!-- ** START OF twbkwbis.P_CloseDoc ** -->
<table class="plaintable" summary="This is table displays line separator at end of the page." width="100%" cellspacing="0" cellpadding="0" border="0"><tbody><tr><td class="bgtabon" width="100%" colspan="2"><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" class="headerImg" title="Transparent Image" name="web_transparent" hspace="0" vspace="0" border="0" height="3" width="10"></td></tr></tbody></table>
<a href="#top" onmouseover="window.status='Skip to top of page'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Skip to top of page'; return true" onblur="window.status=''; return true" class="skiplinks">Skip to top of page</a>
</div>
<div class="footerbeforediv">
</div>
<div class="footerlinksdiv">
<span class="pagefooterlinks">
<map name="Student_Detail_Schedule_Links_F" title="Student Detail Schedule Links">
<p>
<a href="#skip_Student_Detail_Schedule_Links_F" onmouseover="window.status='Skip Student Detail Schedule Links'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Skip Student Detail Schedule Links'; return true" onblur="window.status=''; return true" class="skiplinks">Skip Student Detail Schedule Links</a>
</p><p>[ <a href="/neiuprod/bwskhreg.p_reg_hist" onmouseover="window.status='Show Registration History'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Show Registration History'; return true" onblur="window.status=''; return true">Show Registration History</a>
| <a href="/neiuprod/bwskfreg.P_AltPin" onmouseover="window.status='Add or Drop Classes'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Add or Drop Classes'; return true" onblur="window.status=''; return true">Add or Drop Classes</a>
| <a href="/neiuprod/bwskfcls.p_sel_crse_search" onmouseover="window.status='Look Up Classes'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Look Up Classes'; return true" onblur="window.status=''; return true">Look Up Classes</a>
]
<a name="skip_Student_Detail_Schedule_Links_F"></a>
</p></map></span></div>
<div class="footerafterdiv">
</div>
<div class="globalafterdiv">
</div>
<div class="globalfooterdiv">
</div>
<div class="pagefooterdiv">
<span class="releasetext">Release: 8.7.1</span>
</div>
<div class="poweredbydiv">
</div>
<div class="div1"></div>
<div class="div2"></div>
<div class="div3"></div>
<div class="div4"></div>
<div class="div5"></div>
<div class="div6"></div>
<div class="banner_copyright"> <br><h5>© 2019 Ellucian Company L.P. and its affiliates.<br></h5></div>
</body></html>

175
scraper.py

@ -1,61 +1,79 @@
from bs4 import BeautifulSoup as BS
import datetime
import re
import itertools
from operator import sub
import re
import bs4
from bs4 import BeautifulSoup as BS
def dateparse(datetime_str):
date = '%b %d, %Y'
time = '%I:%M %p'
date = "%b %d, %Y"
time = "%I:%M %p"
try:
return datetime.datetime.strptime(datetime_str,date)
return datetime.datetime.strptime(datetime_str, date)
except ValueError:
return datetime.datetime.strptime(datetime_str,time)
days = [None,'M','T','W','R','F',None]
simp_exceptions = ['Grade Mode']
return datetime.datetime.strptime(datetime_str, time)
days = [None, "M", "T", "W", "R", "F", None]
simp_exceptions = ["Grade Mode"]
def datetime2date_time(dtime: datetime.datetime, mode):
if mode == "date":
return datetime.date(dtime.year, dtime.month, dtime.day)
elif mode == "time":
return datetime.time(dtime.hour, dtime.minute, dtime.second)
def datetime2date_time(dtime,mode):
if mode == 'date':
return datetime.date(dtime.year,dtime.month,dtime.day)
elif mode == 'time':
return datetime.time(dtime.hour,dtime.minute,dtime.second)
def seconds_from_midnight(t: datetime.time):
return t.hour * 60 ** 2 + t.minute * 60 + t.second
def seconds_from_midnight(t):
return t.hour*60**2+ t.minute*60+t.second
def parse_horz_row(headers,row):
data = (col.text for col in row.find_all('td'))
def parse_horz_row(headers, row: bs4.element.Tag):
data = (col.text for col in row.find_all("td"))
ret = {}
time_data = dict(zip(headers,data))
time_data = dict(zip(headers, data))
try:
time_data['time']
time_data["time"]
except KeyError as e:
print(row)
raise e
if time_data['time'] == 'TBA':
ret['time_range'] = None
if time_data["time"] == "TBA":
ret["time_range"] = None
else:
s,e = map(dateparse,time_data['time'].split(' - '))
ret['time_range'] = (
datetime2date_time(s,'time'),
datetime2date_time(e,'time'),
)
s,e = map(dateparse,time_data['date range'].split(' - '))
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
ret['days'] = sorted((days.index(time_data['days'][i]) for i in range(len(time_data['days']))))
if len(ret['days']) > 0:
class_start = (s.weekday()+1)%7
start = ret['days'][0]
s, e = map(dateparse, time_data["time"].split(" - "))
ret["time_range"] = (
datetime2date_time(s, "time"),
datetime2date_time(e, "time"),
)
s, e = map(dateparse, time_data["date range"].split(" - "))
time_data["days"] = re.sub(
"[^{}]".format("".join(filter(bool, days))), "", time_data["days"]
)
ret["days"] = sorted(
(days.index(time_data["days"][i]) for i in range(len(time_data["days"])))
)
if len(ret["days"]) > 0:
class_start = (s.weekday() + 1) % 7
start = ret["days"][0]
s += datetime.timedelta(days=(start - class_start))
ret['date_range'] = (
datetime2date_time(s,'date'),
datetime2date_time(e,'date'),
)
ret['location'] = time_data['where']
ret["date_range"] = (
datetime2date_time(s, "date"),
datetime2date_time(e, "date"),
)
ret["location"] = time_data["where"]
return ret
class Class:
def __init__(self, title, abrv, session,
def __init__(
self,
title,
abrv,
session,
term,
crn,
instructor,
@ -67,19 +85,20 @@ class Class:
date_range,
days,
location,
lab=None):
lab=None,
):
#name
# name
self.title = title
self.abrv = abrv
#time
# time
self.date_range = date_range
self.days = days
self.time_range = time_range
#location
# location
self.location = location
self.campus = campus
#other
# other
self.session = session
self.term = term
self.crn = crn
@ -88,70 +107,74 @@ class Class:
self.credits = credits
self.level = level
self.lab = lab
# data is a list of two html tables
@classmethod
def scrape(cls,data):
info,times = data
def scrape(cls, info: bs4.element.Tag, times: bs4.element.Tag):
# info
title,abrv,session = info.find('caption').text.split(' - ')
title, abrv, session = info.find("caption").text.split(" - ")
session = int(session)
rows = info.find_all('tr')
rows = info.find_all("tr")
params = {}
for row in rows:
name = row.find('th').text.rstrip(':')
data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text)
name = row.find("th").text.rstrip(":")
data = re.sub(r"^ +|[\n\r\t]", "", row.find("td").text)
if name == 'Status':
type,date = data.split(' on ')
type = type.replace('*','')
if name == "Status":
type, date = data.split(" on ")
type = type.replace("*", "")
registration_date = dateparse(date)
else:
if name in simp_exceptions:
name = name.lower().replace(' ','_')
name = name.lower().replace(" ", "_")
else:
name = name.lower().split(' ')[-1]
if name != 'instructor':
name = name.lower().split(" ")[-1]
if name != "instructor":
data = data.lower()
try:
data = int(re.sub(r'\.\d+','',data))
data = int(re.sub(r"\.\d+", "", data))
except:
pass
params[name] = data
# time
headers,*data = times.find_all('tr')
headers = list(header.text.lower() for header in headers.find_all('th'))
headers, *data = times.find_all("tr")
headers = list(header.text.lower() for header in headers.find_all("th"))
if len(data) > 1:
data,lab = map(lambda row: parse_horz_row(headers,row),data[:2])
data, lab = map(lambda row: parse_horz_row(headers, row), data[:2])
lab.update(params)
lab = Class(title + " - Lab",abrv,session,**lab)
lab = Class(title + " - Lab", abrv, session, **lab)
else:
lab = None
data = parse_horz_row(headers,data[0])
data = parse_horz_row(headers, data[0])
params.update(data)
return Class(title,abrv,session,lab=lab,**params)
return Class(title, abrv, session, lab=lab, **params)
def __repr__(self):
return '{} on {}'.format(self.title,''.join(days[i] for i in self.days))
return "{} on {}".format(self.title, "".join(days[i] for i in self.days))
@property
def length(self):
return datetime.timedelta(seconds = sub(
seconds_from_midnight(self.time_range[1]),
seconds_from_midnight(self.time_range[0]),
))
return datetime.timedelta(
seconds=sub(
seconds_from_midnight(self.time_range[1]),
seconds_from_midnight(self.time_range[0]),
)
)
def get_classes(page):
if not isinstance(page,BS):
page = BS(page,'lxml')
tables = page.find_all('table',attrs= {'class':'datadisplaytable'})
groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2))
return map(Class.scrape,groups)
if not isinstance(page, BS):
page = BS(page, "lxml")
tables = page.find_all("table", attrs={"class": "datadisplaytable"})
groups = ((tables[i], tables[i + 1]) for i in range(0, len(tables), 2))
return itertools.starmap(Class.scrape, groups)
if __name__ == "__main__":
with open('schedule.html') as file:
page = BS(file.read(),'lxml')
with open("schedule.html") as file:
page = BS(file.read(), "lxml")
classes = list(get_classes(page))
for _class in classes:
print(repr(_class),_class.date_range)
print(repr(_class), _class.date_range)

50
upload.py

@ -0,0 +1,50 @@
import datetime
from dateutil import rrule
from gapi.apis.calendar_api import get_calendars_from_api, calendar_api
from gapi.apis.calendar_api.models import Event
from scraper import get_classes, Class
LOCATION = "5500 St Louis Ave, Chicago, IL 60625"
def rrule_former(class_obj):
days = class_obj.days
start = datetime.datetime.combine(
class_obj.date_range[0], class_obj.time_range[0]
).astimezone()
end = datetime.datetime.combine(
class_obj.date_range[1], class_obj.time_range[1]
).astimezone()
days = [(day - 1) % 7 for day in days]
ret = rrule.rrule(
freq=rrule.WEEKLY, dtstart=start, wkst=rrule.SU, until=end, byweekday=days
)
return ret
if __name__ == "__main__":
with open("schedule.html", "rb") as file:
data = file.read()
_class: Class
my_api = calendar_api(
"Class upload", scopes=["https://www.googleapis.com/auth/calendar"]
)
calendars = get_calendars_from_api(my_api)
classes = []
for _class in get_classes(data):
if _class.time_range is not None:
e = Event(
datetime.datetime.combine(_class.date_range[0], _class.time_range[0]),
datetime.datetime.combine(_class.date_range[0], _class.time_range[1]),
_class.title,
"location: {}".format(_class.location),
[rrule_former(_class)],
location=LOCATION,
)
classes.append(e)
for _class in classes:
school_calendar.update_or_add_event(_class)
Loading…
Cancel
Save