红楼梦 + 写入 MySQL


import requests
import re
import pymysql
from bs4 import BeautifulSoup

conn = pymysql.Connect(host='127.0.0.1', user='root', password='123123', database='hlm')
cursor = conn.cursor()

url = 'http://www.purepen.com/hlm/'
headers={
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
res = requests.get(url, headers=headers)
res.encoding='gbk'
soup = BeautifulSoup(res.text, 'lxml')

for tr_list in soup.find_all(name='tr'):
    td_data = list(tr_list.find_all(name='td'))
    # url = tr_list.find_all(name='a').href
    if len(td_data) == 4:
        section1 = td_data[0].text
        title1 = td_data[1].text
        url1 = str(td_data[1])
        url1 = 'http://www.purepen.com/hlm/' + re.findall('<td><a href="(.*?)">', url1)[0]

        section2 = td_data[2].text
        title2 = td_data[3].text
        url2 = str(td_data[3])
        url2 = 'http://www.purepen.com/hlm/' + re.findall('<td><a href="(.*?)">', url2)[0]
        print(section1, title2,url1,'\n',
              section2, title2,url2 )

        # content 是表名
        sql = "insert into content (section,title,url) values ('%s','%s','%s')"%(section1,title1,url1)
        sql2 = "insert into content (section,title,url) values ('%s','%s','%s')"%(section2,title2,url2)
        cursor.execute(sql)
        cursor.execute(sql2)

        # 记得提交
        conn.commit()

cursor.close()
conn.close()


猜你喜欢

转载自www.cnblogs.com/kai-/p/12662845.html