16.ajax_case01

# 抓取北京市2018年积分落户公示名单
# 'http://www.bjrbj.gov.cn/integralpublic/settlePerson'

import csv
import json
import requests

fw = open('luohu.csv', 'w')
writer = csv.writer(fw)
writer.writerow(['id','name','birthday','company','score'])

def get_publicity(page_number):
    url = 'http://www.bjrbj.gov.cn/integralpublic/settlePerson/settlePersonJson?sort=pxid&order=asc&limit=100&offset=0&name=&rows=100&page={}'.format(page_number*100)

    header = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
                          '(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
    }

    response = requests.get(url,headers=header,timeout=5)

    result = json.loads(response.text)

    for item in result['rows']:
        id = item['pxid']
        name = item['name']
        birthday = item['csrq']
        company = item['unit']
        score = item['score']
        print(id,name,birthday,company,score)
        writer.writerow([id, name, birthday, company, score])

def main():
    for i in range(0,61):
        get_publicity(i)

if __name__ == '__main__':
    main()

猜你喜欢

转载自www.cnblogs.com/hankleo/p/10646539.html