diff --git a/restscrape/migrations/0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp.py b/restscrape/migrations/0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp.py new file mode 100644 index 0000000..a079169 --- /dev/null +++ b/restscrape/migrations/0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp.py @@ -0,0 +1,68 @@ +# Generated by Django 2.2.1 on 2019-06-18 02:52 + +import django.contrib.postgres.fields.hstore +import django.contrib.postgres.fields.jsonb +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ('restscrape', '0005_auto_20190531_0136'), + ] + + operations = [ + migrations.CreateModel( + name='Browser', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('address', models.CharField(max_length=255)), + ('proxy', models.URLField(max_length=255)), + ('use_adblock', models.BooleanField()), + ], + ), + migrations.CreateModel( + name='PageRequestTask', + fields=[ + ('task_id', models.UUIDField(primary_key=True, serialize=False)), + ], + ), + migrations.CreateModel( + name='ScrapeRequest', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('submit_time', models.DateTimeField(editable=False)), + ('blocking', models.BooleanField()), + ('url', models.URLField()), + ('use_adblock', models.BooleanField()), + ('wait_for', models.IntegerField()), + ('xpath_labels', django.contrib.postgres.fields.hstore.HStoreField()), + ], + ), + migrations.CreateModel( + name='ScrapingResponse', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ], + ), + migrations.CreateModel( + name='CompletedScrapingResponse', + fields=[ + ('scrapingresponse_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='restscrape.ScrapingResponse')), + ('xpath_labels', django.contrib.postgres.fields.jsonb.JSONField()), + ('completion_time', models.DateTimeField()), + ], + bases=('restscrape.scrapingresponse',), + ), + migrations.CreateModel( + name='PendingScrapingResponse', + fields=[ + ('scrapingresponse_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='restscrape.ScrapingResponse')), + ('request', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='restscrape.ScrapeRequest')), + ('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='restscrape.PageRequestTask')), + ], + bases=('restscrape.scrapingresponse',), + ), + ] diff --git a/restscrape/migrations/0007_auto_20190618_1014.py b/restscrape/migrations/0007_auto_20190618_1014.py new file mode 100644 index 0000000..4acdfa3 --- /dev/null +++ b/restscrape/migrations/0007_auto_20190618_1014.py @@ -0,0 +1,35 @@ +# Generated by Django 2.2.1 on 2019-06-18 15:14 + +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ('restscrape', '0006_browser_completedscrapingresponse_pagerequesttask_pendingscrapingresponse_scraperequest_scrapingresp'), + ] + + operations = [ + migrations.AlterField( + model_name='pagerequesttask', + name='task_id', + field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False), + ), + migrations.AlterField( + model_name='pendingscrapingresponse', + name='task', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='restscrape.PageRequestTask'), + ), + migrations.AlterField( + model_name='scraperequest', + name='submit_time', + field=models.DateTimeField(auto_now=True), + ), + migrations.AlterField( + model_name='scraperequest', + name='wait_for', + field=models.IntegerField(default=0), + ), + ] diff --git a/restscrape/models.py b/restscrape/models.py index 923a890..fea3e82 100644 --- a/restscrape/models.py +++ b/restscrape/models.py @@ -47,8 +47,8 @@ class Page(models.Model): class Browser(models.Model): - address = models.CharField(max_length=None) - proxy = models.URLField(max_length=None) + address = models.CharField(max_length=255) + proxy = models.URLField(max_length=255) use_adblock = models.BooleanField() def connect(self): @@ -90,8 +90,10 @@ def cleanup_browsers(max_browsers=5): else: if len(browser.connect().tabs) == 1: browser.delete() + + class PageRequestTask(models.Model): - task_id = models.UUIDField(primary_key=True) + task_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) _result = None @property @@ -111,13 +113,14 @@ class PageRequestTask(models.Model): class ScrapeRequest(models.Model): # internal - submit_time = models.DateTimeField(editable=False) + submit_time = models.DateTimeField(auto_now=True) # parameters blocking = models.BooleanField() url = models.URLField() - use_adblock = models.BooleanField() - wait_for = models.IntegerField() + use_adblock = models.BooleanField(default=True) + wait_for = models.IntegerField(default=0) xpath_labels = extended_fields.HStoreField() + proxy = models.URLField() def submit(self): self.save()