采集51job职位数量画图后发邮件

"""
采集51job职位数量画图后发邮件
"""
import re
import matplotlib.pyplot as plt
from my_email import Email

import requests
from lxml import etree


def get_job_data():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
        'Referer': 'https://search.51job.com/list/040000,000000,0000,00,9,99,java,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=',
    }
    keywords = ['java', 'php', 'c++', 'web 前端', 'ios', 'Android', '产品经理', '软件测试', 'UI设计师', 'Python', 'Python web',
                'Python 测试', 'Python 数据', 'Python 爬虫']
    data_list = []
    for keyword in keywords:
        try:
            # 注意:关键字经过2次url编码
            keyword_quote = requests.utils.quote(keyword)
            keyword_quote = requests.utils.quote(keyword_quote)
            url = f'https://search.51job.com/list/040000,000000,0000,00,9,99,{keyword_quote},2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
            response = requests.get(url, headers=headers)
            response_str = response.content.decode('gbk')

            html = etree.HTML(response_str)
            job_count_elements = html.xpath('//div[@class="rt"]/text()')
            job_count = job_count_elements[0] if job_count_elements else ''
            search_job_count = re.search(r'\d+', job_count)
            job_count = search_job_count.group() if search_job_count else None
            # print(keyword, job_count)
            data_list.append((keyword, job_count))
        except Exception as e:
            print(e)
    return data_list


if __name__ == '__main__':
    data_list = get_job_data()
    print(data_list)

    # 画图
    jobs = [tuple1[0] for tuple1 in data_list]
    counts = [int(tuple1[1]) for tuple1 in data_list]
    x = range(len(jobs))
    y = counts
    plt.figure(figsize=(20, 8), dpi=100)
    plt.bar(x, y, width=0.5)
    plt.xticks(x, jobs)
    plt.grid(linestyle='--', alpha=0.5)
    plt.title('51job职位搜索')
    # plt.show()
    ret = plt.savefig('51job.png')

    # 发邮件
    email = Email("[email protected]", "")
    subject = '51job每周职位数量统计'
    receivers = ['[email protected]', '[email protected]']
    email.send_email_img(subject, data_list, receivers, '51job.png')

猜你喜欢

转载自blog.csdn.net/zhu6201976/article/details/105408343