|
- from bs4 import BeautifulSoup
- import requests
- start_page, end_page = 1, 5
- urls, res = [], []
- for i in range(start_page, end_page+1):
- url = f'https://www.eshukan.com/SuperSearchList.aspx?keyword=&classify=0&wenZhong=-1&kanQi=0&area=0&level=0&heXin=0&puKan=0&first=0&countrySupport=0&college=0&yxyz=0&hornor=0&contentIncluded=0&doubleAnonymous=0&comment=0&gaoFei=0&banMianFei=0&banMianFeiArea=0&shenGaoTime=-1&hot=-1&method=Email%E6%8A%95%E7%A8%BF&page={i}'
- html = requests.get(url=url)
- soup = BeautifulSoup(html.text)
- for link in soup.select('#allclass a'):
- urls.append([f'https://www.eshukan.com{link.get("href")}',link.text.strip()])
- for url, name in urls:
- html = requests.get(url=url)
- if html.status_code == 200:
- soup = BeautifulSoup(html.text)
- bjb, email='请自行前往详情页查找', '请自行前往详情页查找'
- for info in soup.select('.sjcon p'):
- bjb = info.text.strip() if '编辑部:' in info.text else bjb
- email = info.text.strip() if '邮箱' in info.text else email
- res.append([name, bjb, email, url])
- print(res)
复制代码 |
评分
-
1
查看全部评分
-
|