The project is simpler than the Food and Drug Administration, but when I caught became http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx crawl url of
each output is -1000 get pretty Long.
A lesson.
The second is the text format files can be output json objects
# -*- coding: utf-8 -*-
import requests
import json
if __name__ == '__main__':
all_data_list = []
url= 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
path=input('请输入要查询的地址:')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}
#参数
for pageIndex in range(1,5):
pageIndex = str(pageIndex)
data={
'cname':'',
'pid':'',
'keyword': path, #地址
'pageIndex': pageIndex, #页数
'pageSize': '20' #一页显示多少个
}
response= requests.post(url=url, headers=headers, data=data)
all_data_list.append(response.json())
# 输出状态码
print(response.status_code)
# 输出错误
print(response.raise_for_status())
fileName = path + '.json'
fp = open(fileName, 'w', encoding='utf-8')
# json中含有中文,所以要加上ensure_ascii=False
json.dump(all_data_list, fp=fp, ensure_ascii=False)
print(fileName, "完成")