pandas中的to_sql()函数将csv文件写入到MySQL数据库

掌握了利用pandas中的to_sql函数将csv文件存储到MySQL数据库中.具体方法如下:

首先,在数据库中建立表格,这里是使用数据库操作语言,并非python.

CREATE TABLE cars (
1 bigint(20) DEFAULT NULL,
0 bigint(20) DEFAULT NULL,
宝马 text,
1.1 bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

其中,‘cars’是所要建立的表的名字。上面的代码是在pycharm中登录数据库后,双击某个具体的数据库打开的编辑界面中输入的。

然后,利用python中pandas库中的to_sql函数。

def create_table(self, table_name, csv_filename):
engine = reate_engine(str(r’mysql+mysqldb://%s:’+’%s’+’@%s/%s?charset=utf8’)
%(self.user,self.password,self.host,self.db))
try:
data = pd.read_csv(csv_filename, sep=’,’, encoding=’utf-8’) # , sep=’\t’
data.to_sql(table_name, con=engine, if_exists=’append’, index=False)
except Exception as e:
print(e)

对于数据库的操作来说,整体写为一个类(宇轩教的):

-- coding: UTF-8 --

import pandas as pd
from sqlalchemy import create_engine
import pymysql
from pymysql import IntegrityError
import settings
import uuid
from sqlalchemy import create_engine

class Data_Mysql () :

def __init__ (self, MYSQL_DB = None, MYSQL_HOST = None, 
              MYSQL_USER = None, MYSQL_PASSWORD = None, mult = False,port=3306,connect_timeout = 10):
    #self.engine = create_engine('mysql://root:[email protected]?charset=utf8')#用sqlalchemy创建引擎  
    #df.to_sql('tick_data',engine,if_exists='append')#存入数据库,这句有时候运行一次报错,运行第二次就不报错了,不知道为什么  
    #df1 = pd.read_sql('tick_data',engine)#从数据库中读取表存为DataFrame  

    #self.host = settings.MYSQL_HOST
    #self.user = settings.MYSQL_USER
    #self.password = settings.MYSQL_PASSWORD
    self.host = (not MYSQL_HOST and settings.MYSQL_HOST) or (MYSQL_HOST)
    self.user = (not MYSQL_USER and settings.MYSQL_USER) or (MYSQL_USER)
    self.password = (not MYSQL_PASSWORD and settings.MYSQL_PASSWORD) or (MYSQL_PASSWORD)
    self.db = (not MYSQL_DB and settings.MYSQL_DB) or (MYSQL_DB)
    self.port = port   
    self.connect_timeout = connect_timeout
    def setMultMode ():
        #不指定连接数据库,允许联合多数据库进行查询
        self.db = None
    mult and setMultMode()
    self.conn = None
    self.cursor = None
    #p = pymysql()

def __enter__ (self):
    return self

def __exit__ (self , type, value, trace):
    try:
        self.close()
    except:
        pass

def read (self, table, column = '*', LIMIT = ' '):

    if not self.conn:
        self.connect ()
    sql = "select " + str (column) + " from " + str (table) + ' ' + LIMIT
    print ('sql = ', sql)
    df = pd.read_sql (sql, self.conn)

self.close ()

    return df

def write (self, sql, values ):
    if not self.conn:
        self.connect ()
    print(sql)
    print(values)
    sta = self.cursor.execute(sql, values)
    self.conn.commit();  
    return sta;         

def connect (self):

    self.conn = pymysql.connect(host=self.host, user=self.user, 
                                password=self.password, db=self.db, 
                                charset='utf8', use_unicode=True,port = self.port,
                                connect_timeout = self.connect_timeout)
    self.cursor = self.conn.cursor ()
    pass

'''在指定的数据库中创建表,表的内容由自己的csv文件导入'''

def create_table(self, table_name, csv_filename):
    engine = create_engine(str(r'mysql+mysqldb://%s:'+'%s'+'@%s/%s?charset=utf8')%(self.user,self.password,self.host,self.db))
    try:
        data = pd.read_csv(csv_filename, sep=',', encoding='utf-8') # , sep='\t'
        # df = pd.DataFrame(data)
        # print(df)
        data.to_sql(table_name, con=engine, if_exists='append', index=False)
    except Exception as e:
        print(e)
    #self.cursor.execute()

def close(self):
    self.conn.close()    

if name == “main“:

readMysql = Data_Mysql(MYSQL_DB='KnowledgeGraph', MYSQL_HOST='192.168.100.244',MYSQL_PASSWORD='pcm=pwd@2016',MYSQL_USER='pcm')
readMysql.create_table('cars', 'cars.csv')

需要注意的是:data.to_sql的参数中if_exists = ‘append’的选项。因为第一步中建立了表,所以在这里要用append. 如果换成replace, class中的建表函数会自己在数据库中建表,但是表的格式是数据库中特定的格式,不能解析中文字符。所以,这里采用先建表,再append的方式。

本文是为了个人工作记录使用,所以格式上没有好好规范。

猜你喜欢

转载自blog.csdn.net/ljyt2/article/details/78438293