1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| from selenium import webdriver from fake_useragent import UserAgent import time
class DouyuSpider(object): start_url = 'https://www.douyu.com/g_LOL' driver = webdriver.Chrome(executable_path=r"C:\Programs\chromedriver\chromedriver.exe") options = webdriver.ChromeOptions() ua = UserAgent().chrome options.add_argument('user-agent=%s' % ua)
def run(self): with open('result.json', 'a', encoding='utf-8') as f: self.driver.set_window_size(1920, 1080) self.driver.get(self.start_url) for i in range(20): self.driver.execute_script("window.scrollBy(0, %d)" % (i * 100)) time.sleep(.3) time.sleep(1) outers = self.driver.find_elements_by_xpath('//a[@class="DyListCover-wrap"]') for outer in outers: src = outer.find_element_by_xpath('.//picture[contains(@class,"DyImg-content")]/img').get_attribute('src') title = outer.find_element_by_xpath('.//h3[contains(@class,"DyListCover-intro")]').get_attribute('title') item = { 'src': src, 'title': title } print(item) f.writelines(str(item)+'\n') f.close() self.driver.quit()
def main(): spider = DouyuSpider() spider.run()
main()
|