Browse Source

Added encoding to Page model

master
Raphael Roberts 7 years ago
parent
commit
2c25811c60
  1. 18
      restscrape/migrations/0004_page_encoding.py
  2. 12
      restscrape/models.py

18
restscrape/migrations/0004_page_encoding.py

@ -0,0 +1,18 @@
# Generated by Django 2.2.1 on 2019-05-27 21:00
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('restscrape', '0003_page_content_size'),
]
operations = [
migrations.AddField(
model_name='page',
name='encoding',
field=models.CharField(default='utf-8', max_length=15),
),
]

12
restscrape/models.py

@ -10,8 +10,9 @@ import pytz
class Page(models.Model):
url = models.CharField(max_length=300, primary_key=True)
encoding = models.CharField(max_length=15, default="utf-8")
access_time = models.DateTimeField()
page_content = models.FileField(upload_to='page_cache')
page_content = models.FileField(upload_to="page_cache")
content_size = models.PositiveIntegerField()
@property
@ -19,13 +20,20 @@ class Page(models.Model):
return quote_plus(self.url)
def write(self, page_content):
if not isinstance(page_content, bytes):
try:
page_content = page_content.encode(encoding="utf-8")
except UnicodeEncodeError:
page_content = page_content.encode(encoding="utf-16")
self.encoding = "utf-16"
self.content_size = len(page_content)
file = ContentFile(page_content)
self.access_time = datetime.datetime.now(pytz.utc)
self.page_content.save(name=self.filename, content=file)
def read(self):
return self.page_content.read()
raw: bytes = self.page_content.read()
return raw.decode(self.encoding)
def delete(self):
self.page_content.delete()

Loading…
Cancel
Save