原创不易,转发请带本文链接
爬虫爬取库房无忧网的每个库房的详细数据
# coding: utf - 8
import os
import pandas as pd
import requests
from utils.geolocataion_converting import converting_bd09_wgs84
from utils.read_write import writeOneJSON, readJson, eachFile
from utils.time_change import timestamp_datetime
'''
此文件用于抓取库房无忧网的库房数据
'''
os.chdir(r'D:\project\jianguiyuan\\')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'}
def query(district,street,houseType):
try:
url = 'http://sz.kufangwuyou.com/house/query'
data = {"regin": district, "street": street, "level": 5, "houseType": houseType,
"city": '440300', "start": "0", "pageSize": '10000'}
r = requests.post(url, headers=headers, json=data)
response = r.json()
if response['result']['totalCount']>0:
file = str(houseType) + '_' + str(district) + '_' + str(street) + '_'
writeOneJSON(response['result'],json_dir + file+ '.json')
except Exception as e:
print(e)
print(district)
print(street)
print(houseType)
main_find()
def main_find():
len = eachFile(csv_dir)
child = readJson(child_json)
result =child['result']
for houseType in [1,2,5]:
for one in result:
district = one['areaId']
for street in one['child']:
street = street['areaId']
query(district,street,houseType)
def json_csv():
json_files = eachFile(json_dir)
for m in range(0,len(json_files)):
file = json_files[m]
path = file.split('_')[0]
json_data = readJson(json_dir + file)
# json_data = json_data['result']
data = json_data['data']
df = pd.DataFrame.from_dict(data, orient='columns')
df['createTime'] = df['createTime'].map(lambda x: timestamp_datetime(x))
df['lastUpdateTime'] = df['lastUpdateTime'].map(lambda x: timestamp_datetime(x))
df.to_csv( csv_dir + path + '.csv',index=False,mode='a',header=False)
if __name__ == "__main__":
json_dir = 'D:\data\\query\json无空\\'
csv_dir = 'D:\详细信息\\'
kufang = '厂房.csv'
converting_bd09_wgs84(pd.read_csv(csv_dir+kufang,error_bad_lines=False),csv_dir+'厂房BD09-WGS84.csv')
child_json = 'data/child.json'
json_csv()
如需帮忙抓取数据,请私聊我,我这暂时只有深圳的