You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
42 lines
1.2 KiB
42 lines
1.2 KiB
from bs4 import UnicodeDammit
|
|
from lxml import html
|
|
import json
|
|
with open('xpaths.json') as file:
|
|
XPATHS = json.load(file)
|
|
class __price_scraper__:
|
|
def __init__(self,url,page_source):
|
|
self.url = url
|
|
self.page_source = page_source
|
|
self.xpaths = XPATHS[self.base_url]
|
|
self.etree = html.fromstring(self.page_source)
|
|
|
|
def scrape(self):
|
|
scrape_for = ['price','name']
|
|
if self.in_stock():
|
|
for cat in scrape_for:
|
|
self.__setattr__(cat,eval_xpath(cat))
|
|
|
|
def eval_xpath(self,name):
|
|
return self.etree.xpath(self.xpaths[name])
|
|
|
|
def __get_stock__(self):
|
|
return self.eval_xpath('stock')
|
|
|
|
class amazon_scraper(__price_scraper__):
|
|
base_url = "www.amazon.com"
|
|
def in_stock(self):
|
|
stock_val = self.__get_stock__()
|
|
return stock_val != 'poop'
|
|
|
|
class banggood_scraper(__price_scraper__):
|
|
base_url = "www.banggood.com"
|
|
def in_stock(self):
|
|
stock_val = self.__get_stock__()
|
|
return stock_val.text.lower() != 'out of stock'
|
|
|
|
if __name__ == "__main__":
|
|
test = 'test\\bg.html'
|
|
with open(test,'rb') as file:
|
|
source = UnicodeDammit(file.read()).unicode_markup
|
|
|
|
res = banggood_scraper('idek',source)
|