爬取 爱笔智能 招聘职位

爬取爱笔智能招聘职位

http://aibee.com/cn/joinus.aspx 

 1 import requests
 2 from urllib.parse import urlencode
 3 from pyquery import PyQuery as pq
 4 from pymongo import MongoClient
 5 import json
 6 
 7 
 8 base_url = 'http://aibee.com/cn/joinus.aspx?action=jobinfo&'
 9 
10 headers = {
11     'Host': 'aibee.com',
12     'Referer': 'http://aibee.com/cn/joinus.aspx',
13     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
14     'X-Requested-With': 'XMLHttpRequest',
15 } 
16 
17 client = MongoClient()
18 db = client['aibee']
19 collection = db['aibee']
20 max_id = 50
21 
22 
23 
24 def get_page(id):  
25 
26     formData = {  
27             'id': id,
28         }  
29      
30 
31     data = urlencode(formData)
32     url = base_url + urlencode(formData)    
33     try:
34         response = requests.get(url, data=data, headers=headers)
35         if response.status_code == 200:
36 
37             return response.json()
38     except requests.ConnectionError as e:
39         print('Error', e.args)
40 
41 
42 def parse_page(json_1):
43     if json_1:
44         items = json_1.get('shuzu')
45         for item in items:
46             if id == 1 :
47                 continue
48             else:
49                 
50                 aibee = {}
51                 aibee['id'] = item.get('id')
52                 aibee['title'] = item.get('title')
53                 aibee['zhize'] = pq(item.get('zhize')).text()
54                 aibee['yaoqiu'] = pq(item.get('yaoqiu')).text()
55                 aibee['dtt'] = item.get('dtt')
56                 aibee['emailaddr'] = item.get('emailaddr')
57                 yield aibee
58 
59 
60 def write_to_file(content):
61     with open('aibee.json','a',encoding='utf-8') as f:
62         f.write(json.dumps(content,ensure_ascii=False)+'\n')
63         f.close()
64 
65 def save_to_mongo(result):
66     if collection.insert(result):
67         print('Saved to Mongo')
68 
69 
70 if __name__ == '__main__':
71     for id in range(1, max_id + 1):
72         json_1 = get_page(id)
73         #print(json_1)
74 
75         results = parse_page(json_1)
76         for result in results:
77             print(result)
78             write_to_file(result)
79             save_to_mongo(result)

猜你喜欢

转载自www.cnblogs.com/wanglinjie/p/9226880.html