Pandas operates excel to import MySQL database

Table of contents

foreword

1. Connect to the MySQL database

2. Use Pandas to read Excel files

3. Create table and insert statement

Fourth, traversal insertion

Five, complete code

Summarize


foreword

Use Pandas to read the data in Excel, create a data table with a specified structure in MySQL according to the Excel name, and finally insert the data into the corresponding data table

This article takes the hotel id of a certain platform as an example, and imports the hotel id in Excel in different cities into the corresponding city hotel table in MySQL


1. Connect to the MySQL database

# 填写必要的参数——>host:数据库地址,默认写本地localhost,user:用户名,password:密码,db:连接的数据库名,port端口:3306
db = pymysql.connect(host="localhost", user="root", password="***", db="***", port=3306)
# 创建一个游标对象
cur = db.cursor()

2. Use Pandas to read Excel files

path = r"****"
fileList = os.listdir(path=path)
for filename in fileList:
    if '.xlsx' in filename:
        df = read_excel(os.path.join(path,filename),usecols=['酒店id'])
        # 错误值处理
        df = df.dropna()

3. Create table and insert statement

# 将酒店与城市名拼接 结果如 酒店-上海
name = '酒店-' + filename.replace('酒店id.xlsx','')
print(name)
# 先执行删除语句:如果存在该表则删除。
cur.execute("drop table if exists `{}`".format(name))
# 执行sql语句:创建表名为name的数据表
cur.execute("CREATE TABLE `{}`(`酒店id` int(0) NOT NULL)".format(name)
# 插入语句
query = """insert into `{}` (`酒店id`) values (%s)""".format(name)

Fourth, traversal insertion

        for i in range(len(df)):
            id = df['酒店id'].iloc[i]
            cur.execute(query,id)

Five, complete code

import os
from pandas import read_excel
import pymysql

# 填写必要的参数——>host:数据库地址,默认写本地localhost,user:用户名,password:密码,db:连接的数据库名,port端口:3306
db = pymysql.connect(host="localhost", user="root", password="***", db="***", port=3306)
# 创建一个游标对象
cur = db.cursor()
# 设置文件读取路径
path = r"***"
fileList = os.listdir(path=path)
for filename in fileList:
    if '.xlsx' in filename:
        # print(filename)
        name = '酒店-' + filename.replace('酒店id.xlsx','')
        print(name)
        # 先执行删除语句:如果存在该表则删除。
        cur.execute("drop table if exists `{}`".format(name))
        # 执行sql语句:创建表名为name的数据表
        cur.execute(
            "CREATE TABLE `{}`(`酒店id` int(0) NOT NULL,`酒店名称` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`品牌` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`档次` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`省份` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`城市` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`商圈` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`地址` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`经度` decimal(10, 7) NULL DEFAULT NULL,`纬度` decimal(10, 7) NULL DEFAULT NULL,`房间数` int(0) NULL DEFAULT NULL,`开业时间` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`装修时间` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`简介` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,`评分` float(5, 1) NULL DEFAULT NULL,`房型数` int(0) NULL DEFAULT NULL,`电话` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,`房型详情` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL,PRIMARY KEY (`酒店id`) USING BTREE)ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;".format(
                name)
        )
        # 插入语句
        query = """insert into `{}` (`酒店id`) values (%s)""".format(name)
        # 读取Excel保存为Df对象
        df = read_excel(os.path.join(path,filename),usecols=['酒店id'])
        # 错误值处理
        df = df.dropna()
        # 遍历插入
        for i in range(len(df)):
            id = df['酒店id'].iloc[i]
            cur.execute(query,id)
    # 提交事务
    db.commit()
# 关闭连接
cur.close()
db.close()

Summarize

If you need national hotel data, you can private message

Guess you like

Origin blog.csdn.net/Achernar0208/article/details/125721678