diff --git a/quad_parts/price_scraper.py b/quad_parts/price_scraper.py index cdf9162..f3973e7 100644 --- a/quad_parts/price_scraper.py +++ b/quad_parts/price_scraper.py @@ -1,9 +1,14 @@ -from lxml.etree import etree +from bs4 import UnicodeDammit +from lxml import html +import json +with open('xpaths.json') as file: + XPATHS = json.load(file) class __price_scraper__: def __init__(self,url,page_source): self.url = url self.page_source = page_source - self.etree = etree.fromstring(self.page_source) + self.xpaths = XPATHS[self.base_url] + self.etree = html.fromstring(self.page_source) def scrape(self): scrape_for = ['price','name'] @@ -11,15 +16,27 @@ class __price_scraper__: for cat in scrape_for: self.__setattr__(cat,eval_xpath(cat)) - def eval_xpath(name): + def eval_xpath(self,name): return self.etree.xpath(self.xpaths[name]) def __get_stock__(self): - return self.eval_xpath('in_stock') + return self.eval_xpath('stock') -def amazon_scraper(__price_scraper__): - xpaths = { - } +class amazon_scraper(__price_scraper__): + base_url = "www.amazon.com" def in_stock(self): stock_val = self.__get_stock__() - return stock_val != 'poop' \ No newline at end of file + return stock_val != 'poop' + +class banggood_scraper(__price_scraper__): + base_url = "www.banggood.com" + def in_stock(self): + stock_val = self.__get_stock__() + return stock_val.text.lower() != 'out of stock' + +if __name__ == "__main__": + test = 'test\\bg.html' + with open(test,'rb') as file: + source = UnicodeDammit(file.read()).unicode_markup + + res = banggood_scraper('idek',source) \ No newline at end of file diff --git a/quad_parts/xpaths.json b/quad_parts/xpaths.json new file mode 100644 index 0000000..ef8690c --- /dev/null +++ b/quad_parts/xpaths.json @@ -0,0 +1,34 @@ +{ + "www.banggood.com": + { + "name": "//div[@class='title_hd']/h2 | //h1[@itemprop='name']", + "price": "//div[contains(@class,'item_now_price')] | //div[@class='now']", + "stock": "//strong[contains(text(),'stock')]" + }, + "www.gearbest.com": + { + "name": "//h1[@class='goodsIntro_title']", + "price": "//span[contains(@class,'goodsIntro_price')]", + "outofstock": "//div[@class='goodsIntro_noticeSubmit']" + }, + "www.amazon.com": + { + "name": "//span[@id='priceblock_dealprice' or @id='priceblock_ourprice']", + "price": "//span[@id='productTitle']" + }, + "www.getfpv.com": + { + "name": "//div[@class='product-name']/span", + "price": "//div[@class='price-box']/p[@class='special-price']/span[@class='price'] | //div[@class='price-box']/span[@class='regular-price']/span" + }, + "www.dalprops.com": + { + "name": "//h1[@itemprop='name']", + "price": "//*[@id='product-price']" + }, + "hobbyking.com": + { + "name": "//h1[contains(@class,'product-name')]", + "price": "//p[@class='special-price']/span[@class='price'] | //span[@class='regular-price']/span[@class='price']" + } +}