You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
1.2 KiB

  1. from bs4 import UnicodeDammit
  2. from lxml import html
  3. import json
  4. with open('xpaths.json') as file:
  5. XPATHS = json.load(file)
  6. class __price_scraper__:
  7. def __init__(self,url,page_source):
  8. self.url = url
  9. self.page_source = page_source
  10. self.xpaths = XPATHS[self.base_url]
  11. self.etree = html.fromstring(self.page_source)
  12. def scrape(self):
  13. scrape_for = ['price','name']
  14. if self.in_stock():
  15. for cat in scrape_for:
  16. self.__setattr__(cat,eval_xpath(cat))
  17. def eval_xpath(self,name):
  18. return self.etree.xpath(self.xpaths[name])
  19. def __get_stock__(self):
  20. return self.eval_xpath('stock')
  21. class amazon_scraper(__price_scraper__):
  22. base_url = "www.amazon.com"
  23. def in_stock(self):
  24. stock_val = self.__get_stock__()
  25. return stock_val != 'poop'
  26. class banggood_scraper(__price_scraper__):
  27. base_url = "www.banggood.com"
  28. def in_stock(self):
  29. stock_val = self.__get_stock__()
  30. return stock_val.text.lower() != 'out of stock'
  31. if __name__ == "__main__":
  32. test = 'test\\bg.html'
  33. with open(test,'rb') as file:
  34. source = UnicodeDammit(file.read()).unicode_markup
  35. res = banggood_scraper('idek',source)