Browse Source

added start_page to browser to make getting page quicker

master
Raphael Roberts 7 years ago
parent
commit
fb15530eeb
  1. 20
      restscrape/browser.py

20
restscrape/browser.py

@ -11,8 +11,9 @@ class browser:
def __init__(self,**launch_opts):
self.browser = None
self.launch_opts = launch_opts
self.page = None
def restart_browser(self,proxy=None,use_adblock=True):
def restart_browser(self,proxy=None,use_adblock=True,start_page='about:blank'):
if self.browser is not None:
self.close()
opts = {}
@ -25,18 +26,27 @@ class browser:
opts.setdefault('args',[]).extend(['--disable-extensions-except='+ext, '--load-extension='+ext])
if proxy is not None:
opts.setdefault('args',[]).extend(['--proxy-server='+proxy])
opts.setdefault('args',[]).append(start_page)
self.browser = run(pyppeteer.launch(**opts))
self.page = run(self.browser.pages())[0]
run(self.page.waitForNavigation())
def open(self,url,wait_for = 0):
page = run(self.browser.pages())[0]
if self.page is None:
page = run(self.browser.pages())[0]
run(page.goto(url))
time.sleep(wait_for)
return run(page.content())
return self.get_source()
def close(self):
run(self.browser.close())
def get_source(self):
if self.page is None:
self.page = run(self.browser.pages())[0]
return run(self.page.content())
if __name__ == "__main__":
b = browser(headless=False)
b.restart_browser()
page = b.open('https://www.google.com')
b.restart_browser(start_page='https://www.google.com')
source = b.get_source()
b.close()
Loading…
Cancel
Save