You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
from bs4 import UnicodeDammitfrom lxml import htmlimport jsonwith open('xpaths.json') as file: XPATHS = json.load(file)class __price_scraper__: def __init__(self,url,page_source): self.url = url self.page_source = page_source self.xpaths = XPATHS[self.base_url] self.etree = html.fromstring(self.page_source)
def scrape(self): scrape_for = ['price','name'] if self.in_stock(): for cat in scrape_for: self.__setattr__(cat,eval_xpath(cat))
def eval_xpath(self,name): return self.etree.xpath(self.xpaths[name])
def __get_stock__(self): return self.eval_xpath('stock')
class amazon_scraper(__price_scraper__): base_url = "www.amazon.com" def in_stock(self): stock_val = self.__get_stock__() return stock_val != 'poop'
class banggood_scraper(__price_scraper__): base_url = "www.banggood.com" def in_stock(self): stock_val = self.__get_stock__() return stock_val.text.lower() != 'out of stock'
if __name__ == "__main__": test = 'test\\bg.html' with open(test,'rb') as file: source = UnicodeDammit(file.read()).unicode_markup
res = banggood_scraper('idek',source)
|