Python爬虫案例Demo——Boss直聘信息的爬取

这是第三天爬的一个案例,大家注意:cookie失效会导致代码运行的时候失效,大家自己替换就好,反正截止到目前运行为止是成功的:
代码如下:

# 延时防止Ip被封,同时写入CSV文件中
import requests
import time,csv,pprint,re,json

# https://www.zhipin.com/c100010000/?query=Web%E5%89%8D%E7%AB%AF&page=4
url = "https://www.zhipin.com/c100010000/?query=Web%E5%89%8D%E7%AB%AF"
cookie_str = "lastCity=101280100; __c=1579014251; __g=-; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1578649034,1578765623,1579014251; __l=l=%2Fwww.zhipin.com%2Fguangzhou%2F&r=https%3A%2F%2Fcn.bing.com%2F&friend_source=0&friend_source=0; __a=58611270.1578649034.1578765623.1579014251.48.3.4.48; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1579014289; __zp_stoken__=9af4tzqlX58fbH%2Bf%2BsKBO%2ByL6anOmthiJ48g04bm4JIOvKd1PXCwoRlNJy0uVQIfwBjvkXSjsL1CbtGIyRY%2FSB7o98c8vMTZRvd3zeXK7Db2qjIEDsJQJl%2FbBjSKMCSZw6lI; __zp_sseed__=23gHsZTrFQC+PcxKy/4q0mEXGXEhThTPP4I9fYPdK84=; __zp_sname__=c4f19eee; __zp_sts__=1579014301444"

headers = {

    "origin": "https://www.zhipin.com",
    "referer": "https://www.zhipin.com/c100010000/?query=Web%E5%89%8D%E7%AB%AF&ka=sel-city-100010000",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
    "cookie":cookie_str
}

f = open("Boss招聘信息.csv",mode="a",encoding='gb18030')
csv_write = csv.writer(f)
csv_write.writerow('["公司地址","经验","学历"]')
for page in range(1):
    json1 = {
        "query": "Web前端",
        "page": str(page),
        "ka": "page-"+str(page)
    }
    response = requests.post(url=url+"&=page"+str(page),headers=headers,data=json1)
    response.encoding = response.apparent_encoding
    print(type(response.text))
    html = response.text
    pprint.pprint(html)
    r = re.findall( r'<p>([\u4e00-\u9fa5 ]+)<em class="vline"></em>([\d+-年]+|[\u4e00-\u9fa5]+)<em class="vline"></em>([\u4e00-\u9fa5]+)',html)
    pprint.pprint(r)
    for address,workYear,education in r:
        print(address,workYear,education)
        csv_write.writerow([str(address),str(workYear),str(education)])
    time.sleep(5)
f.close()

Pycharm中代码的运行效果是:
在这里插入图片描述
最后生成的文件的效果是:
在这里插入图片描述
依旧有点丑哈!

发布了84 篇原创文章 · 获赞 69 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/weixin_43862765/article/details/103981450