python实现腾讯招聘爬取

import requests
from lxml import etree
from day5 import helper
def tx_zhaoping(num):
    headers = {
        'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    x_url = 'https://hr.tencent.com/position.php?keywords=python&lid=0&tid=0&start={}#a'

    for l in range(0,num):
        url = x_url.format(l)
        #数据库实例化
        myhelper = helper.MysqlHelper()
        #sql语句
        sql = 'insert into tx_zhaoping(didian,leixing,renshu,position,duty,requirement)values (%s,%s,%s,%s,%s,%s)'
        response = requests.get(url,headers=headers)

        # with open('zhaoping.html','wb') as f:
        #     f.write(response.content)
        html_ele = etree.HTML(response.text)
        li_list = html_ele.xpath('//div[@id="position"]/div[1]/table/tr/td/a/@href')
        #获取每一页中职位的详情页url

        #
        for li_ele in li_list:
            next_url ='https://hr.tencent.com/'+li_ele
            #拼接url

            response_list = requests.get(next_url,headers=headers)
            # with open('xiangqing.html','wb' ) as f:
            #     f.write(response_list.content)
            xiangqing_ele = etree.HTML(response_list.text)
            didian = xiangqing_ele.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[2]/td/text()')[0]
            #地点
            # print(didian)
            leixing = xiangqing_ele.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[2]/td/text()')[1]
            #工作类型
            # print(leixing)
            renshu = xiangqing_ele.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[2]/td/text()')[2]
            #所需人数
            # print(renshu)

            position = xiangqing_ele.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[1]/td')[0].text
            #招聘职位
            print(position)
            duty = xiangqing_ele.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[3]/td/ul/li')
            #工作责任
            duty_list = []
            requirement_list = []

            for j in duty:

                duty_list.append(j.text)
            duty_type = ''.join(duty_list)
            print(duty_type)



            requirement = xiangqing_ele.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[4]/td/ul/li')
            #工作要求

            for v in requirement:
                # print(v.text)
                requirement_list.append(v.text)
            #因为要求中有空值 if 循环剔除控制并转为字符串
                if None in requirement_list:
                    requirement_list.remove(None)
            requirement_type = ''.join(requirement_list)
            print(requirement_type)


            # print(requirement_type)
            #数据库存取
            data = (didian,leixing,renshu,position,duty_type,requirement_type)
            myhelper.execute_modify_sql(sql,data)


if __name__ == '__main__':
    num = int(input('请输入你要爬取的招聘信息页数'))
    num = 10 *num

    tx_zhaoping(num)

下面是实例化数据库的代码

import pymysql

class MysqlHelper(object):
    def __init__(self):
        self.db = pymysql.connect(host='localhost', port=3306, user='root', password='123456', database='lianjia', charset='utf8')
        self.cursor = self.db.cursor()

    def execute_modify_sql(self,sql, data):
        self.cursor.execute(sql, data)
        self.db.commit()

    def __del__(self):
        self.cursor.close()
        self.db.close()

# if __name__ == '__main__':
#     conn = MysqlHelper()
#     conn.execute_modify_sql('insert into lianjiaxinxi(title) VALUE (%s)', data=('huzeqi hehehe'))

猜你喜欢

转载自blog.csdn.net/majiexiong/article/details/81838582