From 0f59684575fe7b105f5f8506c377b1edadc5075f Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Fri, 31 May 2019 05:30:50 -0500 Subject: [PATCH] Hooking up models to the browser connection --- requirements.txt | 1 + restscrape/models.py | 25 ++++++++++++++++++++++++- restscrape/utils.py | 15 +++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 restscrape/utils.py diff --git a/requirements.txt b/requirements.txt index d199a34..52a5dcd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ requests lxml django django-rest-framework +psycopg2 diff --git a/restscrape/models.py b/restscrape/models.py index 1e8844e..8bd9d3b 100644 --- a/restscrape/models.py +++ b/restscrape/models.py @@ -1,10 +1,13 @@ -import datetime from urllib.parse import quote_plus +import datetime from django.core.files.base import ContentFile from django.db import models +import django.contrib.postgres.fields as extended_fields import pytz +from restscrape.scraping.browser import BrowserConnection + # Create your models here. @@ -38,3 +41,23 @@ class Page(models.Model): def delete(self): self.page_content.delete() super().delete() + + +class Browser(models.Model): + address = models.CharField(max_length=None) + proxy = models.URLField(max_length=None) + use_adblock = models.BooleanField() + + def delete(self): + try: + browser_handle = BrowserConnection(self.address) + browser_handle.close() + except Exception: + pass + super().delete() + + +class ScrapeRequet(models.Model): + xpath_labels = extended_fields.HStoreField() + blocking = models.BooleanField() + submit_time = models.DateTimeField(auto_now=True) diff --git a/restscrape/utils.py b/restscrape/utils.py new file mode 100644 index 0000000..7621f57 --- /dev/null +++ b/restscrape/utils.py @@ -0,0 +1,15 @@ +from restscrape.models import Browser +from restscrape.scraping.browser import start_browser, BrowserConnection + + +def get_tab(proxy, use_adblock=True): + try: + matching_browser = Browser.objects.get(proxy=proxy, use_adblock=use_adblock) + browser_connection = BrowserConnection(address=matching_browser.address) + except Browser.DoesNotExist: + browser_connection = start_browser(proxy=proxy, use_adblock=use_adblock) + created_browser = Browser( + address=browser_connection.address, proxy=proxy, use_adblock=use_adblock + ) + created_browser.save() + return browser_connection.create_tab()