Browse Source

Hooking up models to the browser connection

master
Raphael Roberts 7 years ago
parent
commit
0f59684575
  1. 1
      requirements.txt
  2. 25
      restscrape/models.py
  3. 15
      restscrape/utils.py

1
requirements.txt

@ -4,3 +4,4 @@ requests
lxml
django
django-rest-framework
psycopg2

25
restscrape/models.py

@ -1,10 +1,13 @@
import datetime
from urllib.parse import quote_plus
import datetime
from django.core.files.base import ContentFile
from django.db import models
import django.contrib.postgres.fields as extended_fields
import pytz
from restscrape.scraping.browser import BrowserConnection
# Create your models here.
@ -38,3 +41,23 @@ class Page(models.Model):
def delete(self):
self.page_content.delete()
super().delete()
class Browser(models.Model):
address = models.CharField(max_length=None)
proxy = models.URLField(max_length=None)
use_adblock = models.BooleanField()
def delete(self):
try:
browser_handle = BrowserConnection(self.address)
browser_handle.close()
except Exception:
pass
super().delete()
class ScrapeRequet(models.Model):
xpath_labels = extended_fields.HStoreField()
blocking = models.BooleanField()
submit_time = models.DateTimeField(auto_now=True)

15
restscrape/utils.py

@ -0,0 +1,15 @@
from restscrape.models import Browser
from restscrape.scraping.browser import start_browser, BrowserConnection
def get_tab(proxy, use_adblock=True):
try:
matching_browser = Browser.objects.get(proxy=proxy, use_adblock=use_adblock)
browser_connection = BrowserConnection(address=matching_browser.address)
except Browser.DoesNotExist:
browser_connection = start_browser(proxy=proxy, use_adblock=use_adblock)
created_browser = Browser(
address=browser_connection.address, proxy=proxy, use_adblock=use_adblock
)
created_browser.save()
return browser_connection.create_tab()
Loading…
Cancel
Save