利用Beautiful Soup爬取招聘网站数据

import requests
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame

url='https://search.51job.com/list/120300,000000,0000,32,9,99,%25E5%2589%258D%25E7%25AB%25AF%25E5%25BC%2580%25E5%258F%2591,2,1.html='
res=requests.get(url)
res.encoding='gbk'
soup=BeautifulSoup(res.text)
position_tag=soup.find_all('p',class_='t1')
# print(position_tag[2])
#获取职位
position=[]
for i in range(len(position_tag)):
position.append(position_tag[i].a['title'])
#获取公司
company_tag=soup.find_all('span',class_='t2')
company=[]
for i in range(len(company_tag)-1):
company.append(company_tag[1:][i].a['title'])
#获取地区
place_tag=soup.find_all('span',class_='t3')
place=[]
for i in range(len(place_tag)-1):
place.append(place_tag[1:][i].get_text())
#获取薪酬
salary_tag=soup.find_all('span',class_='t4')
salary=[]
for i in range(len(salary_tag)-1):
salary.append(salary_tag[1:][i].get_text())

jobinfo=DataFrame([position,company,place,salary]).T
jobinfo.colums=['职位名','公司','地区','薪酬']
print(jobinfo)
jobinfo.describe()

猜你喜欢

转载自www.cnblogs.com/tiankong-blue/p/11610490.html