|
- # version: Python 3.7.0
- import requests,parsel,os
- headers = {'User-Agent': '(KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
- host_url = 'https://www.ddshubao.com'
- def get_total_url():
- r = requests.get(url=f'{host_url}/book/1221/', headers=headers,verify=False)
- li_list = parsel.Selector(r.content.decode('gb18030')).css('#allchapter li')
- for li_url in li_list:
- get_Article(li_url.css('a::attr(href)').get())
- def get_Article(url):
- r = requests.get(url=f'{host_url}{url}', headers=headers,verify=False)
- tem_str = r.content.decode('gb18030')
- parse_Article(tem_str)
- down = parsel.Selector(tem_str).xpath('//div[@class="content-nav down"]/a[last()]/text()').get()
- if down == "下一页":
- next_url = parsel.Selector(tem_str).xpath('//div[@class="content-nav down"]/a[last()]/@href').get()
- r = requests.get(url=f'{host_url}{next_url}', headers=headers, verify=False)
- parse_Article(r.content.decode('gb18030'))
- def parse_Article(text):
- sel = parsel.Selector(text).css('div.readbox')
- temp = sel.xpath("h1/text()").getall()
- temp.extend(sel.xpath("./div[@class='content']/text()").getall())
- tem_str = '\n'.join(temp)
- file_pointer.write(f"{tem_str}\n")
- if __name__ == '__main__':
- path_file = r'D:\result.txt'#自定义txt文件!!!
- if os.path.isfile(path_file): os.remove(path_file)
- file_pointer = open(path_file, 'w',encoding='utf-8')
- file_pointer.write('绝魔之地狱之门\n')
- get_total_url()
- file_pointer.close()
复制代码
|
|