From fb15530eeb3be8468161a15ded5fd13019fa7966 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Sat, 29 Dec 2018 02:49:51 -0600 Subject: [PATCH] added start_page to browser to make getting page quicker --- restscrape/browser.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/restscrape/browser.py b/restscrape/browser.py index 372d5fa..d51881c 100644 --- a/restscrape/browser.py +++ b/restscrape/browser.py @@ -11,8 +11,9 @@ class browser: def __init__(self,**launch_opts): self.browser = None self.launch_opts = launch_opts + self.page = None - def restart_browser(self,proxy=None,use_adblock=True): + def restart_browser(self,proxy=None,use_adblock=True,start_page='about:blank'): if self.browser is not None: self.close() opts = {} @@ -25,18 +26,27 @@ class browser: opts.setdefault('args',[]).extend(['--disable-extensions-except='+ext, '--load-extension='+ext]) if proxy is not None: opts.setdefault('args',[]).extend(['--proxy-server='+proxy]) + opts.setdefault('args',[]).append(start_page) self.browser = run(pyppeteer.launch(**opts)) + self.page = run(self.browser.pages())[0] + run(self.page.waitForNavigation()) def open(self,url,wait_for = 0): - page = run(self.browser.pages())[0] + if self.page is None: + page = run(self.browser.pages())[0] run(page.goto(url)) time.sleep(wait_for) - return run(page.content()) + return self.get_source() def close(self): run(self.browser.close()) + def get_source(self): + if self.page is None: + self.page = run(self.browser.pages())[0] + return run(self.page.content()) if __name__ == "__main__": b = browser(headless=False) - b.restart_browser() - page = b.open('https://www.google.com') \ No newline at end of file + b.restart_browser(start_page='https://www.google.com') + source = b.get_source() + b.close() \ No newline at end of file