Demostración básica de los archivos de escritura del rastreador de Python

Escriba un archivo txt, un archivo json, un archivo csv, etc.:

import requests,json,csv
from retrying import retry

url='http://www.cninfo.com.cn/new/disclosure'
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10134) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"Cookie":"JSESSIONID=204BABEA1EEB56A40A98F189227102C2; insert_cookie=45380249; routeId=.uc1; _sp_ses.2141=*; _sp_id.2141=4d159bf3-398b-4dd5-8e07-78ee07cbcbf9.1662302323.3.1662383561.1662359150.afa633dd-c1a8-4d81-90a7-01b0c3630055"}
cookies = {
    'JSESSIONID': 'F17AE1B1A669C109B80E291DC8E01260',
    'insert_cookie': '37836164',
    'routeId': '.uc1',
    '_sp_ses.2141': '*',
    '_sp_id.2141': 'd269a1fb-8640-498a-a381-8633cb7bbe47.1665656359.1.1665656359.1665656359.20c72bb8-6534-47a6-9698-3a39e640286c',
}

@retry(stop_max_attempt_number=3)
def main():
    for page in range(1,4):
        print(page)
        data = {
            'column': 'szse_latest',
            'pageNum': page,
            'pageSize': '30',
            'sortName': '',
            'sortType': '',
            'clusterFlag': 'true',
        }
        res=requests.post(url,data=data,cookies=cookies,headers=headers,timeout=1)

        # 写入txt文件
        # with open('./data.txt', 'a+', encoding='utf-8') as f:
        #     for i in res.json()['classifiedAnnouncements']:
        #         for j in i:
        #             list_data = []
        #             print(j['announcementTitle'],j['announcementTypeName'],j['secName'])
        #             list_data.extend([j['announcementTitle'],j['announcementTypeName'],j['secName']])
        #             f.write(j['announcementTitle']+'  ')
        #             f.write(j['announcementTypeName']+'  ')
        #             f.write(j['secName'])
        #             f.write('\n')

        #写入json文件
        # with open('./data.json','w',encoding='utf-8') as f:
        #     list_data = []
        #     for i in res.json()['classifiedAnnouncements']:
        #         for j in i:
        #             item={}
        #             # print(j['announcementTitle'], j['announcementTypeName'], j['secName'])
        #             item['announcementTitle']=j['announcementTitle']
        #             item['announcementTypeName'] = j['announcementTypeName']
        #             item['secName'] = j['secName']
        #             list_data.append(item)
        #     json.dump(list_data,f,ensure_ascii=False,indent=2)
            # f.write(json.dumps(list_data,ensure_ascii=False,indent=2))


        # 写入csv文件,行写入
        with open('./data.csv','a',encoding='utf-8',newline='') as f:
            writer=csv.writer(f)
            writer.writerow(['announcementTitle', 'announcementTypeName','secName'])
            for i in res.json()['classifiedAnnouncements']:
                for j in i:
                    writer.writerow([j['announcementTitle'],j['announcementTypeName'], j['secName']])

        # 写入csv文件 字典写入
        # with open('./data.csv', 'a', encoding='utf-8', newline='') as f:
        #     fieldnames = ['announcementTitle', 'announcementTypeName','secName']
        #     writer = csv.DictWriter(f, fieldnames=fieldnames)
        #     writer.writeheader()
        #     for i in res.json()['classifiedAnnouncements']:
        #         for j in i:
        #             item = {}
        #             item['announcementTitle'] = j['announcementTitle']
        #             item['announcementTypeName'] = j['announcementTypeName']
        #             item['secName'] = j['secName']
        #             print(item)
        #             writer.writerow(item)


if __name__=='__main__':
    main()

Supongo que te gusta

Origin blog.csdn.net/weixin_45387160/article/details/127417368
Recomendado
Clasificación