代码如下:
from selenium import webdriver
import time
from openpyxl import Workbook
#创建存储爬取信息的空列表
jobList = []
workList = []
hrefList = []
#爬取职位信息的方法
def Parse():
#打开火狐浏览器
browser = webdriver.Firefox()
url = 'https://www.zhipin.com/'
browser.get(url)
#最大化窗口
browser.maximize_window()
time.sleep(2)
#打开网页在输入框输入想要查找的职位,这里以AI大数据师为例
input = browser.find_element_by_class_name('search-form-con').find_element_by_class_name('ipt-search')
input.send_keys('AI大数据师')
time.sleep(2)
#点击进入职位页面
browser.find_element_by_class_name('search-form ').find_element_by_tag_name('button').click()
#页面切换
browser.switch_to_window(browser.window_handles[0])
job_list = browser.find_element_by_class_name('job-list').find_elements_by_tag_name('li')
for each in job_list:
#获取职位名称
jobName = each.find_element_by_class_name('job-title').text
#获取公司名称
company = each.find_element_by_class_name('company-text').find_element_by_tag_name('a').text
#获取薪资
salary = each.find_element_by_class_name('red').text
jobList.append([jobName,company,salary])
#获取详情页链接
href = each.find_element_by_class_name('info-primary').find_element_by_tag_name('a').get_attribute('href')
hrefList.append(href)
#存储
wb = Workbook()
ws = wb.active
for each in hrefList:
time.sleep(1)
#进入详情页
browser.get(each)
#获取工作年限
workyear = browser.find_element_by_class_name('info-primary').find_element_by_tag_name('p').text
#获取发布时间
ptime = browser.find_element_by_class_name('info-primary').find_element_by_class_name('time').text
#获取职位描述
describ = browser.find_element_by_class_name('detail-content').find_element_by_class_name('text').text
workList.append([workyear,ptime,describ])
for i in range(len(jobList)):
jobList[i].append(workList[i][0])
jobList[i].append(workList[i][1])
jobList[i].append(workList[i][2])
ws.append(jobList[i])
#存入excel表格
wb.save('招聘信息表.xlsx')
browser.close()
Parse()