1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class SeleniumDownloadMiddleware(object):
def __init__(self):
self.driver = webdriver.Chrome(executable_path=r"C:\Programs\chromedriver\chromedriver.exe")
options = webdriver.ChromeOptions()
ua = UserAgent().chrome
options.add_argument('user-agent=%s'%ua)

def process_request(self,request,spider):
self.driver.set_window_size(3000, 30000)
self.driver.get(request.url)
time.sleep(3)
for i in range(10):
self.driver.execute_script("window.scrollBy(0, %d)"%(i*200))
time.sleep(.3)
source = self.driver.page_source
response = HtmlResponse(url=self.driver.current_url,body=source,request=request,encoding='utf-8')
return response



def process_response(self,request,response,spider):
print('SeleniumDownloadMiddleware,process_response')
# self.driver.close()
# self.driver.quit()
return response