9 changed files with 107 additions and 44 deletions
-
4requirements.txt
-
2restscrape/__init__.py
-
22restscrape/migrations/0001_initial.py
-
18restscrape/migrations/0002_auto_20190517_1311.py
-
19restscrape/models.py
-
10restscrape/scraping/__init__.py
-
68restscrape/scraping/browser.py
-
4restscrape/scraping/proxy.py
-
4restscrape/scraping/scraper.py
@ -1,3 +1,5 @@ |
|||||
|
websockets==6.0 |
||||
pyppeteer |
pyppeteer |
||||
requests |
requests |
||||
lxml |
|
||||
|
lxml |
||||
|
django |
||||
@ -0,0 +1,2 @@ |
|||||
|
from restscrape import migrations |
||||
|
from restscrape import scraping |
||||
@ -0,0 +1,22 @@ |
|||||
|
# Generated by Django 2.2.1 on 2019-05-17 18:04 |
||||
|
|
||||
|
from django.db import migrations, models |
||||
|
|
||||
|
|
||||
|
class Migration(migrations.Migration): |
||||
|
|
||||
|
initial = True |
||||
|
|
||||
|
dependencies = [ |
||||
|
] |
||||
|
|
||||
|
operations = [ |
||||
|
migrations.CreateModel( |
||||
|
name='Page', |
||||
|
fields=[ |
||||
|
('url', models.CharField(max_length=300, primary_key=True, serialize=False)), |
||||
|
('acess_time', models.DateTimeField()), |
||||
|
('page_content', models.FileField(upload_to='page_cache')), |
||||
|
], |
||||
|
), |
||||
|
] |
||||
@ -0,0 +1,18 @@ |
|||||
|
# Generated by Django 2.2.1 on 2019-05-17 18:11 |
||||
|
|
||||
|
from django.db import migrations |
||||
|
|
||||
|
|
||||
|
class Migration(migrations.Migration): |
||||
|
|
||||
|
dependencies = [ |
||||
|
('restscrape', '0001_initial'), |
||||
|
] |
||||
|
|
||||
|
operations = [ |
||||
|
migrations.RenameField( |
||||
|
model_name='page', |
||||
|
old_name='acess_time', |
||||
|
new_name='access_time', |
||||
|
), |
||||
|
] |
||||
@ -1,3 +1,22 @@ |
|||||
from django.db import models |
from django.db import models |
||||
|
from django.core.files.base import ContentFile |
||||
|
from urllib.parse import quote_plus |
||||
|
|
||||
# Create your models here. |
# Create your models here. |
||||
|
|
||||
|
|
||||
|
class Page(models.Model): |
||||
|
url = models.CharField(max_length=300, primary_key=True) |
||||
|
access_time = models.DateTimeField() |
||||
|
page_content = models.FileField(upload_to='page_cache') |
||||
|
|
||||
|
@property |
||||
|
def filename(self): |
||||
|
return quote_plus(self.url) |
||||
|
|
||||
|
def write(self, page_content): |
||||
|
file = ContentFile(page_content) |
||||
|
self.page_content.save(name=self.filename, content=file) |
||||
|
|
||||
|
def read(self): |
||||
|
return self.page_content.read() |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue