Browse Source

Fixed some issues with the model fields and added some things

master
Raphael Roberts 7 years ago
parent
commit
ed8757bbf7
  1. 68
      restscrape/migrations/0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp.py
  2. 35
      restscrape/migrations/0007_auto_20190618_1014.py
  3. 15
      restscrape/models.py

68
restscrape/migrations/0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp.py

@ -0,0 +1,68 @@
# Generated by Django 2.2.1 on 2019-06-18 02:52
import django.contrib.postgres.fields.hstore
import django.contrib.postgres.fields.jsonb
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
dependencies = [
('restscrape', '0005_auto_20190531_0136'),
]
operations = [
migrations.CreateModel(
name='Browser',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('address', models.CharField(max_length=255)),
('proxy', models.URLField(max_length=255)),
('use_adblock', models.BooleanField()),
],
),
migrations.CreateModel(
name='PageRequestTask',
fields=[
('task_id', models.UUIDField(primary_key=True, serialize=False)),
],
),
migrations.CreateModel(
name='ScrapeRequest',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('submit_time', models.DateTimeField(editable=False)),
('blocking', models.BooleanField()),
('url', models.URLField()),
('use_adblock', models.BooleanField()),
('wait_for', models.IntegerField()),
('xpath_labels', django.contrib.postgres.fields.hstore.HStoreField()),
],
),
migrations.CreateModel(
name='ScrapingResponse',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
],
),
migrations.CreateModel(
name='CompletedScrapingResponse',
fields=[
('scrapingresponse_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='restscrape.ScrapingResponse')),
('xpath_labels', django.contrib.postgres.fields.jsonb.JSONField()),
('completion_time', models.DateTimeField()),
],
bases=('restscrape.scrapingresponse',),
),
migrations.CreateModel(
name='PendingScrapingResponse',
fields=[
('scrapingresponse_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='restscrape.ScrapingResponse')),
('request', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='restscrape.ScrapeRequest')),
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='restscrape.PageRequestTask')),
],
bases=('restscrape.scrapingresponse',),
),
]

35
restscrape/migrations/0007_auto_20190618_1014.py

@ -0,0 +1,35 @@
# Generated by Django 2.2.1 on 2019-06-18 15:14
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
dependencies = [
('restscrape', '0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp'),
]
operations = [
migrations.AlterField(
model_name='pagerequesttask',
name='task_id',
field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='pendingscrapingresponse',
name='task',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='restscrape.PageRequestTask'),
),
migrations.AlterField(
model_name='scraperequest',
name='submit_time',
field=models.DateTimeField(auto_now=True),
),
migrations.AlterField(
model_name='scraperequest',
name='wait_for',
field=models.IntegerField(default=0),
),
]

15
restscrape/models.py

@ -47,8 +47,8 @@ class Page(models.Model):
class Browser(models.Model):
address = models.CharField(max_length=None)
proxy = models.URLField(max_length=None)
address = models.CharField(max_length=255)
proxy = models.URLField(max_length=255)
use_adblock = models.BooleanField()
def connect(self):
@ -90,8 +90,10 @@ def cleanup_browsers(max_browsers=5):
else:
if len(browser.connect().tabs) == 1:
browser.delete()
class PageRequestTask(models.Model):
task_id = models.UUIDField(primary_key=True)
task_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
_result = None
@property
@ -111,13 +113,14 @@ class PageRequestTask(models.Model):
class ScrapeRequest(models.Model):
# internal
submit_time = models.DateTimeField(editable=False)
submit_time = models.DateTimeField(auto_now=True)
# parameters
blocking = models.BooleanField()
url = models.URLField()
use_adblock = models.BooleanField()
wait_for = models.IntegerField()
use_adblock = models.BooleanField(default=True)
wait_for = models.IntegerField(default=0)
xpath_labels = extended_fields.HStoreField()
proxy = models.URLField()
def submit(self):
self.save()

Loading…
Cancel
Save