Compare commits

...

7 Commits

  1. 4
      .gitignore
  2. 3
      .gitmodules
  3. 1
      MANIFEST.in
  4. 16
      __init__.py
  5. 46
      bot.py
  6. 5
      gcalendar.py
  7. 1
      google_api_wrapper
  8. 645
      schedule.html
  9. 175
      scraper.py
  10. 50
      upload.py

4
.gitignore

@ -2,3 +2,7 @@
*.json *.json
__pycache__ __pycache__
/api_info /api_info
*.html
.dir-locals.el
*.whl

3
.gitmodules

@ -1,3 +0,0 @@
[submodule "google_api_wrapper"]
path = google_api_wrapper
url = https://rlbrhost.ddns.net/git/rlbr/google_api_wrapper.git

1
MANIFEST.in

@ -0,0 +1 @@
include requirements.txt

16
__init__.py

@ -1,16 +0,0 @@
from gcalendar import calendar_api
import json
import pprint
api = calendar_api("class", r"api_info\client_secret.json", "api_info")
cals = api.get_calendars()
cal = next(
filter(lambda cal: cal["id"] == api.ids["Raphael's School Schedule".lower()], cals)
)
with open("classes.json") as file:
bodies = json.load(file)
for body in bodies:
# body['colorId'] = cal['colorId']
# pprint.pprint(body)
# input()
api.create_event("Raphael's School Schedule".lower(), body)

46
bot.py

@ -1,46 +0,0 @@
from pyppeteer import launch
import asyncio
import time
import scraper
set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0"
xpaths = {
'tab':".//a[text()='Current Student']",
'schedule':".//a[text()='Student Detail Schedule']",
'submit':"//input[@value='Submit']",
'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']"
}
async def xpath_single_element(xpath,page):
await page.waitForXPath(xpath)
elements = await page.xpath(xpath)
return elements[0]
async def main_loop(login):
browser = await launch(headless = False)
page_list = await browser.pages()
page = page_list[0]
r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin')
await page.evaluate(login)
await page.waitFor('#tab')
student_tab = await xpath_single_element(xpaths['tab'],page)
await student_tab.click()
await page.waitForXPath(xpaths['schedule'])
schedule = await xpath_single_element(xpaths['schedule'],page)
await schedule.click()
page.waitForXPath(xpaths['frame'])
await asyncio.sleep(3)
frame = page.frames[-1]
submit= await xpath_single_element(xpaths['submit'],frame)
await submit.click()
await asyncio.sleep(1)
content = await page.frames[-1].content()
await browser.close()
return scraper.get_classes(content)
def get_classes(user,password):
login = """document.getElementById('user').value='{}'
document.getElementById('pass').value='{}'
login()""".format(user,password)
loop = asyncio.get_event_loop()
r = loop.run_until_complete
return r(main_loop(login))
if __name__ == "__main__":
cl = get_classes('rlroberts5','YxmZZ905p0w6')

5
gcalendar.py

@ -1,5 +0,0 @@
import os
import sys
parent = os.path.dirname(__file__)
sys.path.insert(0,os.path.join(parent,'google_api_wrapper'))
from gapi.calendar_api import *

1
google_api_wrapper

@ -1 +0,0 @@
Subproject commit de63a3871564e2b3f1faaa5f9c211e388358bc1f

645
schedule.html

@ -1,645 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/transitional.dtd"><html lang="en"><head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="Pragma" name="Cache-Control" content="no-cache">
<meta http-equiv="Cache-Control" name="Cache-Control" content="no-cache">
<link rel="stylesheet" href="/css/web_defaultapp.css" type="text/css">
<link rel="stylesheet" href="/css/web_defaultprint.css" type="text/css" media="print">
<title>Student Detail Schedule</title>
<meta http-equiv="Content-Script-Type" name="Default_Script_Language" content="text/javascript">
<script language="JavaScript" type="text/javascript">
<!-- Hide JavaScript from older browsers
window.onunload = function() {submitcount=0;}
var submitcount=0;
function checkSubmit() {
if (submitcount == 0)
{
submitcount++;
return true;
}
else
{
alert("Your changes have already been submitted.");
return false;
}
}
// End script hiding -->
</script>
<script language="JavaScript" type="text/javascript">
<!-- Hide JavaScript from older browsers
// Function to open a window
function windowOpen(window_url) {
helpWin = window.open(window_url,'','toolbar=yes,status=no,scrollbars=yes,menubar=yes,resizable=yes,directories=no,location=no,width=350,height=400');
if (document.images) {
if (helpWin) helpWin.focus()
}
}
// End script hiding -->
</script>
</head>
<body>
<div class="headerwrapperdiv">
<div class="pageheaderdiv1">
<a href="#main_content" onmouseover="window.status='Go to Main Content'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Go to Main Content'; return true" onblur="window.status=''; return true" class="skiplinks">Go to Main Content</a>
<h1>SCT WWW Information System</h1></div><div class="headerlinksdiv">
<span class="pageheaderlinks2">
<map name="Module_Navigation_Links_H" title="Module Navigation Links">
<p>
<a href="#skip_Module_Navigation_Links_H" onmouseover="window.status='Skip Module Navigation Links'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Skip Module Navigation Links'; return true" onblur="window.status=''; return true" class="skiplinks">Skip Module Navigation Links</a>
</p><table class="plaintable" summary="This is main table for displaying Tab Items." width="100%" cellspacing="0" cellpadding="0" border="0">
<tbody><tr>
<td class="pldefault">
<table class="plaintable" summary="This table displays Tab Items." cellspacing="0" cellpadding="0" border="0">
<tbody><tr>
<td class="taboff" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_GenMnu" onmouseover="window.status='Personal Information'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Personal Information'; return true" onblur="window.status=''; return true">Personal Information</a>
</td>
<td class="bgtaboff" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
<td class="tabon" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_StuMainMnu" onmouseover="window.status='Student'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Student'; return true" onblur="window.status=''; return true">Student</a>
</td>
<td class="bgtabon" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
<td class="taboff" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_FinAidMainMnu" onmouseover="window.status='Financial Aid'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Financial Aid'; return true" onblur="window.status=''; return true">Financial Aid</a>
</td>
<td class="bgtaboff" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
<td class="taboff" height="22">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_FacMainMnu" onmouseover="window.status='Faculty Services'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Faculty Services'; return true" onblur="window.status=''; return true">Faculty Services</a>
</td>
<td class="bgtaboff" height="22" valign="top" align="right">
<img src="/wtlgifs/web_tab_corner_right.gif" alt="Tab Corner Right" class="headerImg" title="Tab Corner Right" name="web_tab_corner_right" hspace="0" vspace="0" border="0" height="20" width="8">
</td>
</tr>
</tbody></table>
</td>
</tr>
<tr>
<td class="bgtabon" width="100%" colspan="2"><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" class="headerImg" title="Transparent Image" name="web_transparent" hspace="0" vspace="0" border="0" height="3" width="10"></td></tr></tbody></table>
</map>
</span>
<a name="skip_Module_Navigation_Links_H"></a>
</div>
<table class="plaintable" summary="This table displays Menu Items and Banner Search textbox." width="100%">
<tbody><tr>
<td class="pldefault">
<div class="headerlinksdiv2">
<form action="/neiuprod/twbksrch.P_ShowResults" method="post">
Search
<span class="fieldlabeltextinvisible"><label for="keyword_in_id"><span class="fieldlabeltext">Search</span></label></span>
<input type="text" name="KEYWRD_IN" size="20" maxlength="65" id="keyword_in_id">
<input type="submit" value="Go">
</form>
</div>
</td>
<td class="pldefault"><p class="rightaligntext" p="">
<span class="pageheaderlinks">
<a href="/neiuprod/twbkwbis.P_GenMenu?name=bmenu.P_RegMnu" class="submenulinktext2" id="ssbbackurl">RETURN TO MENU</a>
|
<a href="/neiuprod/twbksite.P_DispSiteMap?menu_name_in=bmenu.P_MainMnu&amp;depth_in=2&amp;columns_in=3" accesskey="2" class="submenulinktext2">SITE MAP</a>
|
<a href="/neiuprod/twbkfrmt.P_DispHelp?pagename_in=bwskfshd.P_CrseSchdDetl" accesskey="H" onclick="popup = window.open('/neiuprod/twbkfrmt.P_DispHelp?pagename_in=bwskfshd.P_CrseSchdDetl', 'PopupPage','height=500,width=450,scrollbars=yes,resizable=yes'); return false" target="_blank" onmouseover="window.status=''; return true" onmouseout="window.status=''; return true" onfocus="window.status=''; return true" onblur="window.status=''; return true" class="submenulinktext2">HELP</a>
|
<a href="twbkwbis.P_Logout" accesskey="3" class="submenulinktext2">EXIT</a>
</span>
</p></td>
</tr>
</tbody></table>
</div>
<div class="pagetitlediv">
<table class="plaintable" summary="This table displays title and static header displays." width="100%">
<tbody><tr>
<td class="pldefault">
<h2>Student Detail Schedule</h2>
</td>
<td class="pldefault">
&nbsp;
</td>
<td class="pldefault"><p class="rightaligntext" p="">
</p><div class="staticheaders">
000645225 Raphael L. Roberts<br>
Fall 2019<br>
Aug 26, 2019 03:02 am<br>
</div>
</td>
</tr>
<tr>
<td class="bg3" width="100%" colspan="3"><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" class="headerImg" title="Transparent Image" name="web_transparent" hspace="0" vspace="0" border="0" height="3" width="10"></td>
</tr>
</tbody></table>
<a name="main_content"></a>
</div>
<div class="pagebodydiv">
<!-- ** END OF twbkwbis.P_OpenDoc ** -->
<div class="infotextdiv"><table class="infotexttable" summary="This layout table contains information that may be helpful in understanding the content and functionality of this page. It could be a brief set of instructions, a description of error messages, or other special information."><tbody><tr><td class="indefault"><img src="/wtlgifs/web_info_cascade.png" alt="Information" class="headerImg" title="Information" name="web_info" hspace="0" vspace="0" border="0" height="12" width="14"></td><td class="indefault"><span class="infotext"> <b>Class Schedule</b><br>
The <a href="https://www.neiu.edu/academics/registrar-services/class-schedules-and-registration"> Schedule of Classes </a> contains important information you should know including but not limited to: registering for classes, add/drop dates, semester calendars, final exam schedule, tuition/fees rates, and how to make payments or request a payment plan. If there are questions we can answer for you, please contact the <a href="mailto:registration@neiu.edu"> Registration Office.</a><br><br>
Remember to review your class schedule prior to the first day of classes for possible changes in class location and times.<br><br></span></td></tr></tbody></table><p></p></div>
Total Credit Hours: 16.000
<br>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Computer Organization - CS 301 - 2</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">10335</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Pericles Prezas<a href="mailto:P-Prezas@neiu.edu" target="Pericles Prezas"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">4:15 pm - 5:30 pm</td>
<td class="dddefault">TR</td>
<td class="dddefault">Lech Walesa Hall 3046</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Pericles Prezas (<abbr title="Primary">P</abbr>)<a href="mailto:P-Prezas@neiu.edu" target="Pericles Prezas"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Data Structures - CS 304 - 2</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">10337</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Apr 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Xiwei Wang<a href="mailto:X-Wang9@neiu.edu" target="Xiwei Wang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">5:40 pm - 6:55 pm</td>
<td class="dddefault">MW</td>
<td class="dddefault">Lech Walesa Hall 3046</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Xiwei Wang (<abbr title="Primary">P</abbr>)<a href="mailto:X-Wang9@neiu.edu" target="Xiwei Wang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Applied Music: Tuba - MUS 151K - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">11522</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
William R. Russell<a href="mailto:W-Russell1@neiu.edu" target="William R. Russell"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault"><abbr title="To Be Announced">TBA</abbr></td>
<td class="dddefault">&nbsp;</td>
<td class="dddefault"><abbr title="To Be Announced">TBA</abbr></td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">William R Russell (<abbr title="Primary">P</abbr>)<a href="mailto:W-Russell1@neiu.edu" target="William R. Russell"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Band - MUS 231 - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">13387</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Travis M. Heath<a href="mailto:T-Heath@neiu.edu" target="Travis M. Heath"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">7:05 pm - 9:45 pm</td>
<td class="dddefault">T</td>
<td class="dddefault">Fine Arts Center 144</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Travis M Heath (<abbr title="Primary">P</abbr>)<a href="mailto:T-Heath@neiu.edu" target="Travis M. Heath"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Instrumental Ensemble: Brass - MUS 234A - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">14884</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Anna F. Mayne<a href="mailto:A-Mayne@neiu.edu" target="Anna F. Mayne"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">10:50 am - 12:05 pm</td>
<td class="dddefault">TR</td>
<td class="dddefault">Fine Arts Center 144</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Anna F Mayne (<abbr title="Primary">P</abbr>)<a href="mailto:A-Mayne@neiu.edu" target="Anna F. Mayne"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Instrumental Ensemble:Jazz Band - MUS 235A - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">11563</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Aug 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Steven T. Duncan<a href="mailto:S-Duncan@neiu.edu" target="Steven T. Duncan"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 1.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">1:40 pm - 2:55 pm</td>
<td class="dddefault">TR</td>
<td class="dddefault">Fine Arts Center 144</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Steven T Duncan (<abbr title="Primary">P</abbr>)<a href="mailto:S-Duncan@neiu.edu" target="Steven T. Duncan"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">Music After Beethoven - MUS 345 - 1</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">14890</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Apr 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Peter M. Chang<a href="mailto:P-Chang1@neiu.edu" target="Peter M. Chang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">11:30 am - 12:20 pm</td>
<td class="dddefault">MWF</td>
<td class="dddefault">Fine Arts Center 145</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Peter M Chang (<abbr title="Primary">P</abbr>)<a href="mailto:P-Chang1@neiu.edu" target="Peter M. Chang"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<table class="datadisplaytable" summary="This layout table is used to present the schedule course detail"><caption class="captiontext">The Universe:Past, Present And Future - PHYS 103 - 3</caption>
<tbody><tr>
<th colspan="2" class="ddlabel" scope="row">Associated Term:</th>
<td class="dddefault">Fall 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row"><acronym title="Course Reference Number">CRN</acronym>:</th>
<td class="dddefault">11008</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Status:</th>
<td class="dddefault">**Web Registered** on Apr 25, 2019</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Assigned Instructor:</th>
<td class="dddefault">
Orin M. Harris<a href="mailto:O-Harris1@neiu.edu" target="Orin M. Harris"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a>
</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Grade Mode:</th>
<td class="dddefault">Standard</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Credits:</th>
<td class="dddefault"> 3.000</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Level:</th>
<td class="dddefault">Undergraduate</td>
</tr>
<tr>
<th colspan="2" class="ddlabel" scope="row">Campus:</th>
<td class="dddefault">Main Campus</td>
</tr>
</tbody></table>
<table class="datadisplaytable" summary="This table lists the scheduled meeting times and assigned instructors for this class.."><caption class="captiontext">Scheduled Meeting Times</caption>
<tbody><tr>
<th class="ddheader" scope="col">Type</th>
<th class="ddheader" scope="col">Time</th>
<th class="ddheader" scope="col">Days</th>
<th class="ddheader" scope="col">Where</th>
<th class="ddheader" scope="col">Date Range</th>
<th class="ddheader" scope="col">Schedule Type</th>
<th class="ddheader" scope="col">Instructors</th>
</tr>
<tr>
<td class="dddefault">Class</td>
<td class="dddefault">9:25 am - 10:40 am</td>
<td class="dddefault">TR</td>
<td class="dddefault">Bernard J Brommel Hall 101</td>
<td class="dddefault">Aug 26, 2019 - Dec 14, 2019</td>
<td class="dddefault">Lecture</td>
<td class="dddefault">Orin M Harris (<abbr title="Primary">P</abbr>)<a href="mailto:O-Harris1@neiu.edu" target="Orin M. Harris"><img src="/wtlgifs/web_email.gif" align="middle" alt="E-mail" class="headerImg" title="E-mail" name="web_email" hspace="0" vspace="0" border="0" height="28" width="28"></a></td>
</tr>
</tbody></table>
<br>
<a href="javascript:history.go(-1)" onmouseover="window.status='Return to Previous'; return true" onfocus="window.status='Return to Previous'; return true" onmouseout="window.status=''; return true" onblur="window.status=''; return true">Return to Previous</a>
<!-- ** START OF twbkwbis.P_CloseDoc ** -->
<table class="plaintable" summary="This is table displays line separator at end of the page." width="100%" cellspacing="0" cellpadding="0" border="0"><tbody><tr><td class="bgtabon" width="100%" colspan="2"><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" class="headerImg" title="Transparent Image" name="web_transparent" hspace="0" vspace="0" border="0" height="3" width="10"></td></tr></tbody></table>
<a href="#top" onmouseover="window.status='Skip to top of page'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Skip to top of page'; return true" onblur="window.status=''; return true" class="skiplinks">Skip to top of page</a>
</div>
<div class="footerbeforediv">
</div>
<div class="footerlinksdiv">
<span class="pagefooterlinks">
<map name="Student_Detail_Schedule_Links_F" title="Student Detail Schedule Links">
<p>
<a href="#skip_Student_Detail_Schedule_Links_F" onmouseover="window.status='Skip Student Detail Schedule Links'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Skip Student Detail Schedule Links'; return true" onblur="window.status=''; return true" class="skiplinks">Skip Student Detail Schedule Links</a>
</p><p>[ <a href="/neiuprod/bwskhreg.p_reg_hist" onmouseover="window.status='Show Registration History'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Show Registration History'; return true" onblur="window.status=''; return true">Show Registration History</a>
| <a href="/neiuprod/bwskfreg.P_AltPin" onmouseover="window.status='Add or Drop Classes'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Add or Drop Classes'; return true" onblur="window.status=''; return true">Add or Drop Classes</a>
| <a href="/neiuprod/bwskfcls.p_sel_crse_search" onmouseover="window.status='Look Up Classes'; return true" onmouseout="window.status=''; return true" onfocus="window.status='Look Up Classes'; return true" onblur="window.status=''; return true">Look Up Classes</a>
]
<a name="skip_Student_Detail_Schedule_Links_F"></a>
</p></map></span></div>
<div class="footerafterdiv">
</div>
<div class="globalafterdiv">
</div>
<div class="globalfooterdiv">
</div>
<div class="pagefooterdiv">
<span class="releasetext">Release: 8.7.1</span>
</div>
<div class="poweredbydiv">
</div>
<div class="div1"></div>
<div class="div2"></div>
<div class="div3"></div>
<div class="div4"></div>
<div class="div5"></div>
<div class="div6"></div>
<div class="banner_copyright"> <br><h5>© 2019 Ellucian Company L.P. and its affiliates.<br></h5></div>
</body></html>

175
scraper.py

@ -1,61 +1,79 @@
from bs4 import BeautifulSoup as BS
import datetime import datetime
import re
import itertools
from operator import sub from operator import sub
import re
import bs4
from bs4 import BeautifulSoup as BS
def dateparse(datetime_str): def dateparse(datetime_str):
date = '%b %d, %Y'
time = '%I:%M %p'
date = "%b %d, %Y"
time = "%I:%M %p"
try: try:
return datetime.datetime.strptime(datetime_str,date)
return datetime.datetime.strptime(datetime_str, date)
except ValueError: except ValueError:
return datetime.datetime.strptime(datetime_str,time)
days = [None,'M','T','W','R','F',None]
simp_exceptions = ['Grade Mode']
return datetime.datetime.strptime(datetime_str, time)
days = [None, "M", "T", "W", "R", "F", None]
simp_exceptions = ["Grade Mode"]
def datetime2date_time(dtime: datetime.datetime, mode):
if mode == "date":
return datetime.date(dtime.year, dtime.month, dtime.day)
elif mode == "time":
return datetime.time(dtime.hour, dtime.minute, dtime.second)
def datetime2date_time(dtime,mode):
if mode == 'date':
return datetime.date(dtime.year,dtime.month,dtime.day)
elif mode == 'time':
return datetime.time(dtime.hour,dtime.minute,dtime.second)
def seconds_from_midnight(t: datetime.time):
return t.hour * 60 ** 2 + t.minute * 60 + t.second
def seconds_from_midnight(t):
return t.hour*60**2+ t.minute*60+t.second
def parse_horz_row(headers,row):
data = (col.text for col in row.find_all('td'))
def parse_horz_row(headers, row: bs4.element.Tag):
data = (col.text for col in row.find_all("td"))
ret = {} ret = {}
time_data = dict(zip(headers,data))
time_data = dict(zip(headers, data))
try: try:
time_data['time']
time_data["time"]
except KeyError as e: except KeyError as e:
print(row) print(row)
raise e raise e
if time_data['time'] == 'TBA':
ret['time_range'] = None
if time_data["time"] == "TBA":
ret["time_range"] = None
else: else:
s,e = map(dateparse,time_data['time'].split(' - '))
ret['time_range'] = (
datetime2date_time(s,'time'),
datetime2date_time(e,'time'),
)
s,e = map(dateparse,time_data['date range'].split(' - '))
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
ret['days'] = sorted((days.index(time_data['days'][i]) for i in range(len(time_data['days']))))
if len(ret['days']) > 0:
class_start = (s.weekday()+1)%7
start = ret['days'][0]
s, e = map(dateparse, time_data["time"].split(" - "))
ret["time_range"] = (
datetime2date_time(s, "time"),
datetime2date_time(e, "time"),
)
s, e = map(dateparse, time_data["date range"].split(" - "))
time_data["days"] = re.sub(
"[^{}]".format("".join(filter(bool, days))), "", time_data["days"]
)
ret["days"] = sorted(
(days.index(time_data["days"][i]) for i in range(len(time_data["days"])))
)
if len(ret["days"]) > 0:
class_start = (s.weekday() + 1) % 7
start = ret["days"][0]
s += datetime.timedelta(days=(start - class_start)) s += datetime.timedelta(days=(start - class_start))
ret['date_range'] = (
datetime2date_time(s,'date'),
datetime2date_time(e,'date'),
)
ret['location'] = time_data['where']
ret["date_range"] = (
datetime2date_time(s, "date"),
datetime2date_time(e, "date"),
)
ret["location"] = time_data["where"]
return ret return ret
class Class: class Class:
def __init__(self, title, abrv, session,
def __init__(
self,
title,
abrv,
session,
term, term,
crn, crn,
instructor, instructor,
@ -67,19 +85,20 @@ class Class:
date_range, date_range,
days, days,
location, location,
lab=None):
lab=None,
):
#name
# name
self.title = title self.title = title
self.abrv = abrv self.abrv = abrv
#time
# time
self.date_range = date_range self.date_range = date_range
self.days = days self.days = days
self.time_range = time_range self.time_range = time_range
#location
# location
self.location = location self.location = location
self.campus = campus self.campus = campus
#other
# other
self.session = session self.session = session
self.term = term self.term = term
self.crn = crn self.crn = crn
@ -88,70 +107,74 @@ class Class:
self.credits = credits self.credits = credits
self.level = level self.level = level
self.lab = lab self.lab = lab
# data is a list of two html tables
@classmethod @classmethod
def scrape(cls,data):
info,times = data
def scrape(cls, info: bs4.element.Tag, times: bs4.element.Tag):
# info # info
title,abrv,session = info.find('caption').text.split(' - ')
title, abrv, session = info.find("caption").text.split(" - ")
session = int(session) session = int(session)
rows = info.find_all('tr')
rows = info.find_all("tr")
params = {} params = {}
for row in rows: for row in rows:
name = row.find('th').text.rstrip(':')
data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text)
name = row.find("th").text.rstrip(":")
data = re.sub(r"^ +|[\n\r\t]", "", row.find("td").text)
if name == 'Status':
type,date = data.split(' on ')
type = type.replace('*','')
if name == "Status":
type, date = data.split(" on ")
type = type.replace("*", "")
registration_date = dateparse(date) registration_date = dateparse(date)
else: else:
if name in simp_exceptions: if name in simp_exceptions:
name = name.lower().replace(' ','_')
name = name.lower().replace(" ", "_")
else: else:
name = name.lower().split(' ')[-1]
if name != 'instructor':
name = name.lower().split(" ")[-1]
if name != "instructor":
data = data.lower() data = data.lower()
try: try:
data = int(re.sub(r'\.\d+','',data))
data = int(re.sub(r"\.\d+", "", data))
except: except:
pass pass
params[name] = data params[name] = data
# time # time
headers,*data = times.find_all('tr')
headers = list(header.text.lower() for header in headers.find_all('th'))
headers, *data = times.find_all("tr")
headers = list(header.text.lower() for header in headers.find_all("th"))
if len(data) > 1: if len(data) > 1:
data,lab = map(lambda row: parse_horz_row(headers,row),data[:2])
data, lab = map(lambda row: parse_horz_row(headers, row), data[:2])
lab.update(params) lab.update(params)
lab = Class(title + " - Lab",abrv,session,**lab)
lab = Class(title + " - Lab", abrv, session, **lab)
else: else:
lab = None lab = None
data = parse_horz_row(headers,data[0])
data = parse_horz_row(headers, data[0])
params.update(data) params.update(data)
return Class(title,abrv,session,lab=lab,**params)
return Class(title, abrv, session, lab=lab, **params)
def __repr__(self): def __repr__(self):
return '{} on {}'.format(self.title,''.join(days[i] for i in self.days))
return "{} on {}".format(self.title, "".join(days[i] for i in self.days))
@property @property
def length(self): def length(self):
return datetime.timedelta(seconds = sub(
seconds_from_midnight(self.time_range[1]),
seconds_from_midnight(self.time_range[0]),
))
return datetime.timedelta(
seconds=sub(
seconds_from_midnight(self.time_range[1]),
seconds_from_midnight(self.time_range[0]),
)
)
def get_classes(page): def get_classes(page):
if not isinstance(page,BS):
page = BS(page,'lxml')
tables = page.find_all('table',attrs= {'class':'datadisplaytable'})
groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2))
return map(Class.scrape,groups)
if not isinstance(page, BS):
page = BS(page, "lxml")
tables = page.find_all("table", attrs={"class": "datadisplaytable"})
groups = ((tables[i], tables[i + 1]) for i in range(0, len(tables), 2))
return itertools.starmap(Class.scrape, groups)
if __name__ == "__main__": if __name__ == "__main__":
with open('schedule.html') as file:
page = BS(file.read(),'lxml')
with open("schedule.html") as file:
page = BS(file.read(), "lxml")
classes = list(get_classes(page)) classes = list(get_classes(page))
for _class in classes: for _class in classes:
print(repr(_class),_class.date_range)
print(repr(_class), _class.date_range)

50
upload.py

@ -0,0 +1,50 @@
import datetime
from dateutil import rrule
from gapi.apis.calendar_api import get_calendars_from_api, calendar_api
from gapi.apis.calendar_api.models import Event
from scraper import get_classes, Class
LOCATION = "5500 St Louis Ave, Chicago, IL 60625"
def rrule_former(class_obj):
days = class_obj.days
start = datetime.datetime.combine(
class_obj.date_range[0], class_obj.time_range[0]
).astimezone()
end = datetime.datetime.combine(
class_obj.date_range[1], class_obj.time_range[1]
).astimezone()
days = [(day - 1) % 7 for day in days]
ret = rrule.rrule(
freq=rrule.WEEKLY, dtstart=start, wkst=rrule.SU, until=end, byweekday=days
)
return ret
if __name__ == "__main__":
with open("schedule.html", "rb") as file:
data = file.read()
_class: Class
my_api = calendar_api(
"Class upload", scopes=["https://www.googleapis.com/auth/calendar"]
)
calendars = get_calendars_from_api(my_api)
classes = []
for _class in get_classes(data):
if _class.time_range is not None:
e = Event(
datetime.datetime.combine(_class.date_range[0], _class.time_range[0]),
datetime.datetime.combine(_class.date_range[0], _class.time_range[1]),
_class.title,
"location: {}".format(_class.location),
[rrule_former(_class)],
location=LOCATION,
)
classes.append(e)
for _class in classes:
school_calendar.update_or_add_event(_class)
Loading…
Cancel
Save