|
- # -*- coding: utf-8 -*-
- # version: Python 3.7.0
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- import parsel
- opt = webdriver.ChromeOptions()
- opt.add_argument("--headless")
- opt.add_argument('--disable-gpu')
- opt.add_argument('--blink-settings=imagesEnabled=false')
- driver = webdriver.Chrome(options=opt)
- wait = WebDriverWait(driver, 60)
- driver.get('http://1s1k.eduyun.cn/portal/redesign/index/index.jsp?sdResIdCaseId=8aee80cd63a9fbd80163aea6926314cb&t=2&sessionKey=L7SODwD6KR1UHab26g3v')
- def get_content():
- try:
- wait.until(EC.element_to_be_clickable((By.ID, 'sheji1'))).click()
- wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID, 'xreader')))
- wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.reader-container-inner')))
- sel = parsel.Selector(driver.page_source)
- res = sel.xpath('//div[@class="reader-container-inner"]//text()').getall()
- print(''.join(res))#这里可以写入txt文件或者写入word文件,不在赘述!
- finally:
- driver.quit()
- if __name__ == '__main__':
- get_content()
复制代码
|
|