|
- import requests, re
- url = 'https://www.wcaworld.com/'
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) '
- 'Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.7.0.18838'}
- html = requests.get(url, headers=headers).text
- nURL = re.findall(r'id="login-form".*?action="(.*?)"', html)[0]
- sess = requests.Session()
- data = {'referer': 'https://www.wcaworld.com',
- 'REMOTE_ADDR': '你的ip',
- 'returnurl': '',
- 'verifyurl': 'https://www.wcaworld.com/Account/SsoLoginResult/',
- 'username': '用户名',
- 'password': '密码'}
- sess.post(nURL, data=data, headers=headers)
- html = sess.get('https://www.wcainterglobal.com/directory/members/85798', headers=headers).text
- mainText = re.findall(r'<div class="memberprofile_row memberprofile_detail" style="white-space:pre-wrap">(.*?)</div',
- html, re.S | re.M)
- print(mainText)
- print('*' * 30)
- tb = re.findall(r' <table .*?">(.*?)</table', html, re.S | re.M)[0]
- for tr in re.findall(r'<td.*?">(.*?)</td', tb, re.S | re.M):
- print(tr.strip())
复制代码 |
|