只获取其中一个板块第一页

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# -*- coding: utf-8 -*-
from selenium import webdriver
from fake_useragent import UserAgent
import time

class DouyuSpider(object):
start_url = 'https://www.douyu.com/g_LOL'
driver = webdriver.Chrome(executable_path=r"C:\Programs\chromedriver\chromedriver.exe")
options = webdriver.ChromeOptions()
ua = UserAgent().chrome
options.add_argument('user-agent=%s' % ua)

def run(self):
with open('result.json', 'a', encoding='utf-8') as f:
self.driver.set_window_size(1920, 1080)
self.driver.get(self.start_url)
for i in range(20):
self.driver.execute_script("window.scrollBy(0, %d)" % (i * 100))
time.sleep(.3)
time.sleep(1)
outers = self.driver.find_elements_by_xpath('//a[@class="DyListCover-wrap"]')
for outer in outers:
src = outer.find_element_by_xpath('.//picture[contains(@class,"DyImg-content")]/img').get_attribute('src')
title = outer.find_element_by_xpath('.//h3[contains(@class,"DyListCover-intro")]').get_attribute('title')
item = {
'src': src,
'title': title
}
print(item)
f.writelines(str(item)+'\n')
f.close()
self.driver.quit()


def main():
spider = DouyuSpider()
spider.run()


main()