Rastreamento e armazenamento de dados Python no banco de dados mysql

import pymysql 
import request 
from bs4 import BeautifulSoup 
import lxml 
message_list = [] 

def get_content(): 
    url = "http://www.scetc.cn/reList" 
    headers = {"User-Agent": "Mozilla/5.0(compatível; MSIE 9.0;Windows NT 6.1;Trident / 5.0)"} 
    resposta = requests.request(url=url, headers=headers,method="GET") 
    response.encoding = 'utf-8' 
    html = response.text 
    return html 

def get_path(): 
    html = get_content() 
    soup = BeautifulSoup(html,'lxml') 
    list = soup.select('div[class="newsbox"] ul li a') 
    for a in list: 
        href = a['href '] 
        message_list.append(href)

def add(name,site,time,place,major,remark): 
    con = pymysql.connect(host='localhost', user='root', password='123456', database='test') 
    cursor = con. cursor() 
    sql = "inserir no emprego(nome,site,hora,local,major,observação)valores (%s,%s,%s,%s,%s,%s)" infor_list = [nome, site 
    , hora, local, principal, observação] 
    cursor.execute(sql, lista_infor) 
    cursor.close() 
    con.close() 
    print("数据存储成功!") 

def data_store(): 
    get_path() 
    for path in message_list: 
        url = "http://www.scet.cn/"+path 
        headers = {"User-Agent": "Mozilla/5.0(compatível;MSIE 9.0;Windows NT 6.1;Trident / 5.0)"}
        response = requests.request(url=url, headers=headers, method="GET") 
        response.encoding = 'utf-8' 
        htmls = response.text 
        soup = BeautifulSoup(htmls, 'lxml') 
        list = soup.select( 'div[class="flat-wrapper"] tabela tr td') 
        emprego = [] 
        para uma lista: 
            content = a.string 
            emprego .append(conteúdo) 
        print(emprego) 
        #add(emprego[1], emprego[ 3], emprego[5], emprego[7], emprego[9], emprego[11]) 

if __name__=='__main__': 
    data_store()

Acho que você gosta

Origin blog.csdn.net/weixin_57803787/article/details/124873903
Recomendado
Clasificación