爬虫:腾讯招聘

#mysqlhelper.py
import pymysql

class MysqlHelper(object):
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='123456', database='py1011', charset='utf8')
        self.cursor = self.db.cursor()

    def execute_modify_sql(self,sql, data):
        self.cursor.execute(sql, data)
        self.db.commit()

    def __del__(self):
        self.cursor.close()
        self.db.close()

if __name__ == '__main__':
    conn = MysqlHelper()
    # conn.execute_modify_sql('insert into lianjiaxinxi(title) VALUE (%s)', data=('huzeqi hehehe'))
import requests
import re
from lxml import etree
import mysqlhelper

myhelper = mysqlhelper.MysqlHelper()
sql = 'INSERT INTO tengxun (title, location_t, type_t, renshu_t,zhize,yaoqiu) VALUES (%s, %s, %s, %s, %s, %s)'

for i in range(0,40,10):
    url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start="%s"%i#a'

    headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
            }

    response = requests.get(url,headers=headers)

    # with open('tengxun.html', 'wb') as f:
    #     f.write(response.content)
    html = response.text

    zhiwei_url_list = re.findall('<a target="_blank" href="(.*?)">',html)

    # print(zhiwei_url_list)

    for zhiwei_item in zhiwei_url_list:
        zhiwei_url = 'https://hr.tencent.com/'+zhiwei_item
        # print(zhiwei_url)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
        }

        response = requests.get(zhiwei_url, headers=headers)

        html_ele = etree.HTML(response.text)

        title = html_ele.xpath('//tr[@class="h"]/td')[0].text
        print(title)

        location = html_ele.xpath('//tr[2]/td[1]/span')[0].text
        textone = html_ele.xpath('//tr[2]/td[1]/text()')[0]
        location_t = location + textone
        print(location_t)

        type = html_ele.xpath('//tr[2]/td[2]/span')[0].text
        texttwo = html_ele.xpath('//tr[2]/td[2]/text()')[0]
        type_t = type + texttwo
        print(type_t)

        renshu = html_ele.xpath('//tr[2]/td[3]/span')[0].text
        textthree = html_ele.xpath('//tr[2]/td[3]/text()')[0]
        renshu_t = renshu + textthree
        print(renshu_t)

        gongzuozhize = html_ele.xpath('//tr[3]/td/div')[0].text
        print(gongzuozhize)
        zhizeli_list = html_ele.xpath('//tr[3]/td/ul/li')
        zhize = ''
        # zhizeli_str = ','.join(zhizeli_list)
        for zzneirong in zhizeli_list:
            zhizeneirong = zzneirong.text
            # zhize = gongzuozhize + zhizeli_str
            zhize += zhizeneirong+'\n'
        print(zhize)

        gongzuoyaoqiu = html_ele.xpath('//tr[4]/td/div')[0].text
        print(gongzuoyaoqiu)
        yaoqiuli_list = html_ele.xpath('//tr[4]/td/ul/li')
        try:
            yaoqiu = ''
            # yaoqiuli_str = ','.join(yaoqiuli_list)
            for yqneirong in yaoqiuli_list:
                yaoqiuneirong = yqneirong.text
                # yaoqiu = gongzuoyaoqiu + yaoqiuli_str
                yaoqiu += yaoqiuneirong+'\n'
            print(yaoqiu)
        except:
            yaoqiu = '本科学历'+'\n'+'相关工作经验1年以上'
            print(yaoqiu)
        print('-'*50)

        data = (title, location_t, type_t, renshu_t,zhize,yaoqiu)
        myhelper.execute_modify_sql(sql, data)

猜你喜欢

转载自blog.csdn.net/cheng535/article/details/81837501