python爬虫10--文件存储之关系型数据库存储

关系型数据库基于关系模型,而关系模型通过二维表存储,关系型数据库有:SQLite、MySQL、Oracle、SQL Server、DB2等。

MySQL数据库存储

1.连接数据库

import pymysql
#用connect()方法申明一个mysql连接对象db
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306)
#获得mysql操作游标,利用游标来执行sql语句
cursor = db.cursor()
#新建数据库,并调用execute()方法执行
cursor.execute('CREATE DATABASE spider DEFAULT CHARACTER SET utf8')
#关闭数据库
db.close()

2.创建表

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
#创建表
sql = 'CREATE TABLE IF NOT EXISTS user (id VARCHAR(255) NOT NULL,name VARCHAR(255) NOT NULL,age INT NOT NULL,PRIMARY KEY (id))'
cursor.execute(sql)
db.close()

3.插入数据

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'INSERT INTO user(id,name,age) values("001","xxx",18)'
try:
    cursor.execute(sql)
    db.commit()    #真正将语句提交到数据执行的方法,对数据库插入、更新、删除都需调用
except:
    db.rollback()
db.close()

动态插入:

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
data = {
    "id":"002",
    "name":'ss',
    "age":20
}
table = 'user'
keys = ','.join(data.keys())
values = ','.join(['%s']*len(data))
sql = 'INSERT INTO {table}({keys}) values({values})'.format(table=table,keys=keys,values=values)
try:
    if cursor.execute(sql,tuple(data.values())):
        print('成功')
        db.commit()
except:
    print('失败')
    db.rollback()
db.close()

4.更新数据

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
sql = 'UPDATE  user SET age=25 WHERE id="001"'
try:
    cursor.execute(sql)
    db.commit()
except:
    db.rollback()
db.close()

动态更新并去重,如果数据存在,则更新,如果数据不存在则插入:

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
data = {
    "id":"002",
    "name":'ss',
    "age":40
}
table = 'user'
keys = ','.join(data.keys())
values = ','.join(['%s']*len(data))
sql = 'INSERT INTO {table}({keys}) values({values}) ON DUPLICATE KEY UPDATE'.format(table=table,keys=keys,values=values)
update = ','.join([" {key}=%s".format(key=key) for key in data])
sql += update
try:
    if cursor.execute(sql,tuple(data.values())*2):
        print('成功')
        db.commit()
except:
    print('失败')
    db.rollback()
db.close()

5.删除

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
table = 'user'
cond = 'age > 30'
sql = 'DELETE FROM {table} WHERE {cond}'.format(table=table,cond=cond)
try:
    cursor.execute(sql)
    db.commit()
except:
    db.rollback()
db.close()

6.查询

import pymysql
db = pymysql.connect(host='localhost',user='root',password='rr123456',port=3306,db='spider')
cursor = db.cursor()
table = 'user'
cond = 'age > 20'
sql = 'SELECT * FROM {table} WHERE {cond}'.format(table=table,cond=cond)
try:
    cursor.execute(sql)
    print(cursor.rowcount)    #获取查询条数
    print(cursor.fetchone())   #查询一条,以元组形式展示结果
    print(cursor.fetchall())   #查询所有,不含上面已查询的,fetchall()内部实现偏移指针用来指向查询结果,获取一条后,指针指向下一条,以二元组形式全部展示结果。若数量大开销高,可用while加fetchone()代替,循环一次,指针偏移一次
except:
    db.rollback()
db.close()

7.案例

 

猜你喜欢

转载自www.cnblogs.com/rong1111/p/12174380.html