|
- # -*- coding: utf-8 -*-
- # version: Python 3.7.0
- import requests,os,csv,parsel
- def get_html(data):
- r = requests.get(base_url, params=data, headers=headers)
- parse_html(r.content.decode('utf-8'))
- def parse_html(text):
- tr_list = parsel.Selector(text).xpath('//*[@id="div_listing"]//tr[not(@class)][not(td[div])]')
- for tr in tr_list:
- td_list = [td.xpath('string(.)').get() for td in tr.xpath('td')]
- csv.writer(f_point).writerow(td_list)
- if __name__ == '__main__':
- file_path = r'D:\result.csv'#自定义保存的csv文件
- if os.path.isfile(file_path): os.remove(file_path)
- f_point = open(file_path, 'a', encoding='utf-8-sig', newline='')
- csv.writer(f_point).writerow(['挂牌编号','行政区','房屋座落','房屋用途','建筑面积','售价(万)','上架时间'])
- headers = {'User-Agent': '(KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
- base_url = 'http://www.kshome.com.cn:8087/soldlist.aspx'
- for i in range(5):#设置抓取页数,也可以动态获取“总页数”!!!
- get_html({'page': i+1})
- f_point.close()
复制代码
|
|