Python crawls Teddy internal push platform data

1. Target website

Website: https://www.5iai.com/#/jobList
target data
target link

Two, python program

import requests
import json
import pandas as pd

def getEnterprise(url, dataframe):
    response = requests.get(url, headers=headers)
    js = json.loads(response.text)
    js_data = js['data']['content']
    data = pd.json_normalize(js_data)
    data = data.get(['id', 'enterpriseExtInfo.shortName', 'positionName'])
    data.columns = ['招聘信息ID', '企业名称', '招聘岗位']
    dataframe = pd.concat([dataframe, data], ignore_index=True)
    return dataframe

def getResume(url, dataframe2):
    response2 = requests.get(url, headers=headers)
    js2 = json.loads(response2.text)
    js_data2 = js2['data']['content']
    data2 = pd.json_normalize(js_data2)
    data2 = data2.get(['id', 'username', 'expectPosition'])
    data2.columns = ['求职者ID', '姓名', '预期岗位']
    dataframe2 = pd.concat([dataframe2, data2], ignore_index=True)
    return dataframe2

if __name__ == '__main__':
    headers = {
    
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}

    url_enterprise = 'https://www.5iai.com/api/enterprise/job/public/es?pageSize=10&pageNumber='
    url_resume = 'https://www.5iai.com/api/resume/baseInfo/public/es?pageSize=10&pageNumber='

    dataframe = pd.DataFrame(columns=['招聘信息ID', '企业名称', '招聘岗位'])
    for i in range(1, 160):
        dataframe = getEnterprise(url_enterprise + str(i), dataframe)
    dataframe.index = dataframe.index + 1
    print(dataframe)
    dataframe['招聘信息ID'] = dataframe['招聘信息ID'] + '\t'
    dataframe = dataframe.rename_axis('序号')
    dataframe.to_csv('找工作.csv', encoding='gb18030')

    dataframe2 = pd.DataFrame(columns=['求职者ID', '姓名', '预期岗位'])
    for i in range(1, 1095):
        dataframe2 = getResume(url_resume + str(i), dataframe2)
    dataframe2['预期岗位'] = dataframe2['预期岗位'].apply(lambda x: str(x)[1:-1].replace('"', ''))
    dataframe2.index = dataframe2.index + 1
    print(dataframe2)
    dataframe2['求职者ID'] = dataframe2['求职者ID'] + '\t'
    dataframe2 = dataframe2.rename_axis('序号')
    dataframe2.to_csv('找人才.csv', encoding='gb18030')

3. Effect display

Effect
find a job
looking for talent

Guess you like

Origin blog.csdn.net/m0_67790374/article/details/130376607