From c85da5458384c5e4fd1aa433b48f794ac9463355 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Mon, 27 May 2019 15:07:41 -0500 Subject: [PATCH] Added content_size to Page model --- restscrape/__init__.py | 2 -- .../migrations/0003_page_content_size.py | 19 +++++++++++++++++++ restscrape/models.py | 14 ++++++++++++-- 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 restscrape/migrations/0003_page_content_size.py diff --git a/restscrape/__init__.py b/restscrape/__init__.py index fe18976..e69de29 100644 --- a/restscrape/__init__.py +++ b/restscrape/__init__.py @@ -1,2 +0,0 @@ -from restscrape import migrations -from restscrape import scraping diff --git a/restscrape/migrations/0003_page_content_size.py b/restscrape/migrations/0003_page_content_size.py new file mode 100644 index 0000000..19dd751 --- /dev/null +++ b/restscrape/migrations/0003_page_content_size.py @@ -0,0 +1,19 @@ +# Generated by Django 2.2.1 on 2019-05-17 20:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('restscrape', '0002_auto_20190517_1311'), + ] + + operations = [ + migrations.AddField( + model_name='page', + name='content_size', + field=models.PositiveIntegerField(default=0), + preserve_default=False, + ), + ] diff --git a/restscrape/models.py b/restscrape/models.py index b48c8fa..6efb6cf 100644 --- a/restscrape/models.py +++ b/restscrape/models.py @@ -1,7 +1,10 @@ -from django.db import models -from django.core.files.base import ContentFile +import datetime from urllib.parse import quote_plus +from django.core.files.base import ContentFile +from django.db import models +import pytz + # Create your models here. @@ -9,14 +12,21 @@ class Page(models.Model): url = models.CharField(max_length=300, primary_key=True) access_time = models.DateTimeField() page_content = models.FileField(upload_to='page_cache') + content_size = models.PositiveIntegerField() @property def filename(self): return quote_plus(self.url) def write(self, page_content): + self.content_size = len(page_content) file = ContentFile(page_content) + self.access_time = datetime.datetime.now(pytz.utc) self.page_content.save(name=self.filename, content=file) def read(self): return self.page_content.read() + + def delete(self): + self.page_content.delete() + super().delete()