import re import urllib.request import urllib.parse # urllib2.urlopen('https://www.baidu.com/s?wd=千锋') headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"} ''' <div class="rt"> 共3491条职位 </div> ''' jobNumre = "共(\d+)条职位" def getJobNum(job): ''' 获取岗位数量 :param job: 岗位类型 :return: 岗位数量 ''' job = urllib.parse.urlencode({'': job}) # 字典中键为空 url = "https://search.51job.com/list/030200,000000,0000,00,9,99," + job + ",2,9.html" req = urllib.request.Request(url, headers=headers) response = urllib.request.urlopen(req).read().decode('gbk') jobNum = re.findall(jobNumre, response)[0] # print(response) return jobNum if __name__ == '__main__': jobList = ["python", 'java', 'php', '项目经理', 'c#'] for job in jobList: jobNum = getJobNum(job) print(job, "=", jobNum)
爬虫(6):爬取岗位数量
猜你喜欢
转载自blog.csdn.net/yx1179109710/article/details/80900072
今日推荐
周排行