From e8fe97b642bae65febd3807e663a29176a090136 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Fri, 31 May 2019 13:41:53 -0500 Subject: [PATCH] Added connect method to browser model and browser cleanup --- restscrape/models.py | 5 ++++- restscrape/scraping/browser.py | 4 ++++ restscrape/utils.py | 20 ++++++++++++++++++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/restscrape/models.py b/restscrape/models.py index 8bd9d3b..64b866d 100644 --- a/restscrape/models.py +++ b/restscrape/models.py @@ -48,9 +48,12 @@ class Browser(models.Model): proxy = models.URLField(max_length=None) use_adblock = models.BooleanField() + def connect(self): + return BrowserConnection(self.address) + def delete(self): try: - browser_handle = BrowserConnection(self.address) + browser_handle = self.connect() browser_handle.close() except Exception: pass diff --git a/restscrape/scraping/browser.py b/restscrape/scraping/browser.py index 0259612..1b52060 100644 --- a/restscrape/scraping/browser.py +++ b/restscrape/scraping/browser.py @@ -26,6 +26,10 @@ class BrowserConnection: self.address = self.browser_handle.wsEndpoint + @property + def tabs(self): + return run(self.browser_handle.pages()) + def create_tab(self): return Tab(self.browser_handle) diff --git a/restscrape/utils.py b/restscrape/utils.py index 7621f57..4e4ad3e 100644 --- a/restscrape/utils.py +++ b/restscrape/utils.py @@ -4,8 +4,10 @@ from restscrape.scraping.browser import start_browser, BrowserConnection def get_tab(proxy, use_adblock=True): try: - matching_browser = Browser.objects.get(proxy=proxy, use_adblock=use_adblock) - browser_connection = BrowserConnection(address=matching_browser.address) + + browser_connection = Browser.objects.get( + proxy=proxy, use_adblock=use_adblock + ).connect() except Browser.DoesNotExist: browser_connection = start_browser(proxy=proxy, use_adblock=use_adblock) created_browser = Browser( @@ -13,3 +15,17 @@ def get_tab(proxy, use_adblock=True): ) created_browser.save() return browser_connection.create_tab() + + +def cleanup_browsers(max_browsers=5): + alive = 0 + for browser in Browser.objects.all(): + if alive < max_browsers: + try: + browser.connect() + alive += 1 + except Exception: + browser.delete() + else: + if len(browser.connect().tabs) == 1: + browser.delete()