爬虫(6):爬取岗位数量

import re

import urllib.request
import urllib.parse

# urllib2.urlopen('https://www.baidu.com/s?wd=千锋')

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}

'''
<div class="rt">
                共3491条职位
            </div>
'''
jobNumre = "共(\d+)条职位"


def getJobNum(job):
    '''
    获取岗位数量
    :param job: 岗位类型
    :return: 岗位数量
    '''

    job = urllib.parse.urlencode({'': job})  # 字典中键为空
    url = "https://search.51job.com/list/030200,000000,0000,00,9,99," + job + ",2,9.html"

    req = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(req).read().decode('gbk')
    jobNum = re.findall(jobNumre, response)[0]
    # print(response)
    return jobNum


if __name__ == '__main__':
    jobList = ["python", 'java', 'php', '项目经理', 'c#']

    for job in jobList:
        jobNum = getJobNum(job)

        print(job, "=", jobNum)

猜你喜欢

转载自blog.csdn.net/yx1179109710/article/details/80900072