|
|
|
@ -82,12 +82,30 @@ class PageRequestTask(models.Model): |
|
|
|
return res |
|
|
|
|
|
|
|
|
|
|
|
class ScrapeRequest(models.Model): |
|
|
|
# internal |
|
|
|
submit_time = models.DateTimeField(editable=False) |
|
|
|
# parameters |
|
|
|
blocking = models.BooleanField() |
|
|
|
url = models.URLField() |
|
|
|
use_adblock = models.BooleanField() |
|
|
|
wait_for = models.IntegerField() |
|
|
|
xpath_labels = extended_fields.HStoreField() |
|
|
|
|
|
|
|
def submit(self): |
|
|
|
self.save() |
|
|
|
pending = PendingScrapingResponse(request=self) |
|
|
|
pending.save() |
|
|
|
return pending |
|
|
|
|
|
|
|
|
|
|
|
class ScrapingResponse(models.Model): |
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) |
|
|
|
|
|
|
|
|
|
|
|
class PendingScrapingResponse(ScrapingResponse): |
|
|
|
task = models.ForeignKey(PageRequestTask, on_delete=models.CASCADE) |
|
|
|
request = models.ForeignKey(ScrapeRequest, on_delete=models.CASCADE) |
|
|
|
|
|
|
|
def mark_complete(self, xpath_labels): |
|
|
|
completion_time = datetime.datetime.now(pytz.UTC) |
|
|
|
@ -95,6 +113,7 @@ class PendingScrapingResponse(ScrapingResponse): |
|
|
|
xpath_labels=xpath_labels, completion_time=completion_time, id=self.id |
|
|
|
) |
|
|
|
completed_response.save() |
|
|
|
self.request.delete() |
|
|
|
self.delete() |
|
|
|
return completed_response |
|
|
|
|
|
|
|
@ -102,14 +121,3 @@ class PendingScrapingResponse(ScrapingResponse): |
|
|
|
class CompletedScrapingResponse(ScrapingResponse): |
|
|
|
xpath_labels = extended_fields.JSONField() |
|
|
|
completion_time = models.DateTimeField() |
|
|
|
|
|
|
|
|
|
|
|
class ScrapeRequest(models.Model): |
|
|
|
# internal |
|
|
|
submit_time = models.DateTimeField(auto_now=True, editable=False) |
|
|
|
# parameters |
|
|
|
blocking = models.BooleanField() |
|
|
|
url = models.URLField() |
|
|
|
use_adblock = models.BooleanField() |
|
|
|
wait_for = models.IntegerField() |
|
|
|
xpath_labels = extended_fields.HStoreField() |