python爬取拉钩网数据

import requests
import re#引用正则匹配
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}#伪装浏览器,制作一个请求头
def local():
    url="https://www.lagou.com/";
    response=requests.get(url,headers = headers);
    result=response.text;
    rep=r' <a href="(.*?)" data-lg-tj-id="4A00" data-lg-tj-no=".*?" data-lg-tj-cid="idnull">(.*?)</a>'
    result=re.findall(rep,result)
    return result

def postion(url):
    response=requests.get(url,headers = headers)
    ggg = [];
    soup = BeautifulSoup(response.text, 'html.parser')
    for news in soup.select('.default_list'):  # 定位
        # print(news)
        place = news.find_all(class_='add')[0].text
        ggg.append(place)
        companyName = news.select('a')[1].text
        ggg.append(companyName)
        companyClass = news.find_all(class_='industry')[0].text.replace(' ', '')
        ggg.append(companyClass)
        companySpeak = news.find_all(class_='li_b_r')[0].text
        ggg.append(companySpeak)
        workMoney = news.find_all(class_='money')[0].text
        ggg.append(workMoney)
        workNeed = news.find_all(class_='li_b_l')[0].text.split('k')[-1]
        ggg.append(workNeed)
        url = news.find_all(class_='position_link')[0]['href']
        ggg.append(url)

    return ggg
for url,title in local():
    result=postion(url)
    for item in result:
        print(item)

还有些不足之处,以后会努力改进,仅供大家参考!

猜你喜欢

转载自blog.csdn.net/liuzemeeting/article/details/79212404