python3拉勾网爬虫之(您操作太频繁,请稍后访问)

你是否经历过这个:
在这里插入图片描述
那就对了~
因为需要post和相关的cookie来请求~
所以,一个简单的代码爬拉钩~~~

 1 import requests
 2 import time
 3 import json
 4 
 5 
 6 def main():
 7     url_start = "https://www.lagou.com/jobs/list_运维?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput="
 8     url_parse = "https://www.lagou.com/jobs/positionAjax.json?city=成都&needAddtionalResult=false"
 9     headers = {
10         'Accept': 'application/json, text/javascript, */*; q=0.01',
11         'Referer': 'https://www.lagou.com/jobs/list_%E8%BF%90%E7%BB%B4?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput=',
12         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
13     }
14     for x in range(1, 5):
15         data = {
16             'first': 'true',
17             'pn': str(x),
18             'kd': '运维'
19                 }
20         s = requests.Session() # 创建一个session对象
21         s.get(url_start, headers=headers, timeout=3)  # 用session对象发出get请求,请求首页获取cookies
22         cookie = s.cookies  # 为此次获取的cookies
23         response = s.post(url_parse, data=data, headers=headers, cookies=cookie, timeout=3)  # 获取此次文本
24         time.sleep(5)
25         response.encoding = response.apparent_encoding
26         text = json.loads(response.text)
27         info = text["content"]["positionResult"]["result"]
28         for i in info:
29             print(i["companyFullName"])
30             companyFullName = i["companyFullName"]
31             print(i["positionName"])
32             positionName = i["positionName"]
33             print(i["salary"])
34             salary = i["salary"]
35             print(i["companySize"])
36             companySize = i["companySize"]
37             print(i["skillLables"])
38             skillLables = i["skillLables"]
39             print(i["createTime"])
40             createTime = i["createTime"]
41             print(i["district"])
42             district = i["district"]
43             print(i["stationname"])
44             stationname = i["stationname"]
45 
46 if __name__ == '__main__':
47     main()

猜你喜欢

转载自www.cnblogs.com/kuba8/p/10808023.html