python使用mysql

链接mysql做了个小练习:爬取 http://wufazhuce.com 上的问题,描述和答案,存到本地的数据库里。

数据表结构:

CREATE TABLE `questions` (
`title` varchar(2000) DEFAULT NULL,
`description` varchar(200) DEFAULT NULL,
`answers` varchar(2000) DEFAULT NULL,
`url` varchar(2000) DEFAULT NULL,
`daynum` varchar(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8
View Code

代码:

# author:

import pymysql.cursors
import requests
from bs4 import BeautifulSoup

con = pymysql.connect(host='192.168.86.130', user='root', password='letmein', db='0603simon', port=3306,
                      charset='utf8')

# with con.cursor() as cur:
#     sql = 'show tables'
#     result = cur.execute(sql)
#     print(result)
# exit()
cur = con.cursor()
for p_num in range(1, 1872):
    # for p_num in range(8,9):
    url = 'http://wufazhuce.com/question/%s' % p_num
    response = requests.get(url=url)
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, features="html.parser")
    # print(soup)
    tar = soup.find('div', class_='one-cuestion')
    # print(tar)
    if not tar:
        print('not tar')
        sql = '''insert into questions (title,description,answers,url,daynum) values('404','404','404','%s','%s')''' % (
            url, p_num)
        print(sql)
        result = cur.execute(sql)
        con.commit()
        print('执行结果:' + str(result))
        continue
    title = tar.find('h4').text.strip()
    # print (title)
    # exit()
    desc = soup.find('div', class_='cuestion-contenido').text.strip()
    # print(desc)
    ans = soup.find_all('div', class_='cuestion-contenido')
    for index, answer in enumerate(ans):
        if index == 0:
            continue
            # print(answer.text.strip())
    answer = answer.text.strip()
    # print(url)
    if answer.__len__() > 1800:
        answer = answer[0:1800]
    sql = '''insert into questions (title,description,answers,url,daynum) values(%s,%s,%s,'%s','%s')''' % (
        con.escape(title), con.escape(desc), con.escape(answer), url, p_num)

    print(sql)
    result = cur.execute(sql)
    con.commit()
    print('执行结果:' + str(result))

cur.close()
con.close()
View Code

猜你喜欢

转载自www.cnblogs.com/Simonsun002/p/9152944.html