From de2cbb46b715ef0ef06c3d7f193bbb3bd9ecde04 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Mon, 27 May 2019 15:56:03 -0500 Subject: [PATCH] Blackened codebase --- restscrape/apps.py | 2 +- restscrape/scraping/__init__.py | 14 ++++---------- restscrape/scraping/proxy.py | 12 ++++++------ restscrape/scraping/scraper.py | 17 +++++++++-------- setup.py | 4 ++-- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/restscrape/apps.py b/restscrape/apps.py index 9fc7a09..e0b6e82 100644 --- a/restscrape/apps.py +++ b/restscrape/apps.py @@ -2,4 +2,4 @@ from django.apps import AppConfig class RestscrapeConfig(AppConfig): - name = 'restscrape' + name = "restscrape" diff --git a/restscrape/scraping/__init__.py b/restscrape/scraping/__init__.py index 40e332f..868aeeb 100644 --- a/restscrape/scraping/__init__.py +++ b/restscrape/scraping/__init__.py @@ -7,14 +7,7 @@ from restscrape.scraping.proxy import ProxyIter, create_proxy_iter US_PROXY_ITER = create_proxy_iter() -def scrape( - url, - labels, - proxy_iter=None, - wait_for=0, - max_tries=4, - raw_tags=True -): +def scrape(url, labels, proxy_iter=None, wait_for=0, max_tries=4, raw_tags=True): browser = Browser(headless=False) if proxy_iter is not None: for trial in range(max_tries): @@ -47,5 +40,6 @@ def scrape( if __name__ == "__main__": - ret, browser = scrape('http://rlbrhost.ddns.net/', - {'links': '//a'}, wait_for=10, raw_tags=False) + ret, browser = scrape( + "http://rlbrhost.ddns.net/", {"links": "//a"}, wait_for=10, raw_tags=False + ) diff --git a/restscrape/scraping/proxy.py b/restscrape/scraping/proxy.py index 6555422..57f1b05 100644 --- a/restscrape/scraping/proxy.py +++ b/restscrape/scraping/proxy.py @@ -2,13 +2,13 @@ import requests from restscrape.scraping.scraper import proxy_scraper -US_PROXY_URL = 'https://www.us-proxy.org/' +US_PROXY_URL = "https://www.us-proxy.org/" class ProxyIter: - '''Like itertools.cycle but + """Like itertools.cycle but uses a set underneath the hood and adds a method to remove an item from -iteration (if proxy doesn't work etc)''' +iteration (if proxy doesn't work etc)""" def __init__(self, proxy_list): self.proxy_set = set(proxy_list) @@ -34,9 +34,9 @@ iteration (if proxy doesn't work etc)''' def create_proxy_iter(url=US_PROXY_URL): - '''Create a proxy_iter from proxy_webpage''' + """Create a proxy_iter from proxy_webpage""" resp = requests.get(url) resp.raise_for_status() return ProxyIter( - '{ip address}:{port}'.format(**row) - for row in proxy_scraper(resp.text)) + "{ip address}:{port}".format(**row) for row in proxy_scraper(resp.text) + ) diff --git a/restscrape/scraping/scraper.py b/restscrape/scraping/scraper.py index dca1e68..f05ef41 100644 --- a/restscrape/scraping/scraper.py +++ b/restscrape/scraping/scraper.py @@ -2,7 +2,6 @@ import lxml.etree class Scraper: - def __init__(self, page_source): if not isinstance(page_source, lxml.etree._Element): page_source = lxml.etree.HTML(page_source) @@ -15,18 +14,18 @@ class Scraper: if not isinstance(table, lxml.etree._Element): table = self.xpath(table)[0] header = table.xpath(header_xpath)[0] - headers = list(element.text.lower() - for element in header.findall('th')) - for row in table.xpath(rows_xpath)[0].findall('tr'): - yield dict(zip(headers, (data.text for data in row.findall('td')))) + headers = list(element.text.lower() for element in header.findall("th")) + for row in table.xpath(rows_xpath)[0].findall("tr"): + yield dict(zip(headers, (data.text for data in row.findall("td")))) def label_convert(self, labels, raw_tags=False): ret = {} for label, xpath in labels.items(): res = self.xpath(xpath) if raw_tags: - ret[label] = list(lxml.etree.tostring( - element, pretty_print=True) for element in res) + ret[label] = list( + lxml.etree.tostring(element, pretty_print=True) for element in res + ) else: ret[label] = list(element.text for element in res) @@ -37,4 +36,6 @@ def proxy_scraper(page_source): page = Scraper(page_source) yield from page.extract_table( table="//table[@id='proxylisttable']", - header_xpath="./thead/tr", rows_xpath="./tbody") + header_xpath="./thead/tr", + rows_xpath="./tbody", + ) diff --git a/setup.py b/setup.py index b5e91c3..6c0a7e0 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages -with open('requirements.txt') as file: - INSTALL_REQUIRES = file.read().rstrip().split('\n') +with open("requirements.txt") as file: + INSTALL_REQUIRES = file.read().rstrip().split("\n") setup( author="Raphael Roberts",