Due to business requirements, the need to increase the number Mysql database, so it is necessary to redistribute the data from the old database according to different sub-library rules Mysql database. Could use ETL tools, but, the company is too broken VM does not support it. Ah, just recently learn a python, I took it to practice your hand.
Features: 1, many to many data migration
2, HTTP (from where the fall, you get up from where)
3, the number of comparison data. Is the number of pieces of data before comparing the source and target databases are the same.
4, the data filtering table. Filter out unwanted data table.
Note: This tool must be put in the first place when reading data id, id because the script will be re-edited. Of course, if you do not you can put this note off on it.
Master File DataTanslate.py
import pymysql
import hashlib
import hascode
import nev_dataTransferSql
import datetime
import configparser
import math
import sys
#获取参数
conf = configparser.ConfigParser()
conf.read("./dataBaseConfig.ini")
#源数据库参数
sourceDBUrl = (conf.get('soureDB', 'sourceDBUrl')).split(',')
sourceDBUser = (conf.get('soureDB', 'sourceDBUser')).split(',')
sourceDBKey = (conf.get('soureDB', 'sourceDBKey')).split(',')
sourceDataBse = (conf.get('soureDB', 'sourceDataBse')).split(',')
#目标数据库参数
targetDBUrl = (conf.get('targetDB', 'targetDBUrl')).split(',')
targetDBUser = (conf.get('targetDB', 'targetDBUser')).split(',')
targetDBKey = (conf.get('targetDB', 'targetDBKey')).split(',')
targetDBs = (conf.get('targetDB', 'targetDBs')).split(',')
#当前执行数据库标识
sourcedatabseFlg =(conf.get('soureDB', 'sourcedatabseFlg')).split(',')
currentSourcedatabseFlg = conf.get('progress', 'currentSourcedatabseFlg')
dataBaseInterval = int(conf.get('config', 'dataBaseInterval'))
#不需要迁移的表
filterTables = (conf.get('filter', 'tables')).split(',')
dataBaseNo = 0
def connectDB(url,user,key,dataBase):
# 连接数据库
db = pymysql.connect(url, user,key, dataBase)
# 使用cursor()方法创建一个游标对象
cursor = db.cursor()
return db,cursor
def getDataFromTarget(sourceDBUrl,sourceDBUser, sourceDBKey,sql,dataBase):
#从源数据库获取数据
# 连接数据库
db, cursor = connectDB(sourceDBUrl,sourceDBUser, sourceDBKey,dataBase)
# 使用execute()方法执行SQL语句
cursor.execute(sql)
# 使用fetall()获取全部数据
data = cursor.fetchall()
# 关闭游标和数据库的连接
cursor.close()
db.close()
return data
def insertDataToTarget(index,sql,args):
#向数据库插入数据
# 连接数据库
db, cursor = connectDB(targetDBUrl[index], targetDBUser[index],targetDBKey[index], targetDBs[index])
# 使用execute()方法执行SQL语句
cursor.executemany(sql,args)
# 关闭游标和数据库的连接
cursor.close()
db.commit()
db.close()
#分库规则
def formatVin(args):
vin = args[2]
m = hashlib.md5()
m.update(vin.encode(encoding='UTF-8'))
num = hascode.getHashCode(m.hexdigest())
mvalue = (num & 2147483647)%len(targetDBs)
if(mvalue <= len(targetDBs) ):
return mvalue
else:
return 0
# print(vin)
#根据分库规则对数据分组
def shunt(args):
list_ = [[()]] * len(targetDBs)
for index in range(len(args)):
index2 = formatVin((args[index]))
if (len((list_[index2])[0]) == 0):
(list_[index2]) = [args[index]]
else:
(list_[index2]).append(args[index])
for index3 in range(len(list_)):
list_[index3] = tuple(list_[index3])
return list_
def tables(sourceDBUrl,sourceDBUser, sourceDBKey,sourceDataBse,pageIndex,size,getData_sql,insert_sql,currentPage,currentTable):
#开始执行数据迁移
#print(getData_sql)
getData_sql = getData_sql + str(pageIndex) + " ," + str(size)
#D获取需要插入的数据
args = getDataFromTarget(sourceDBUrl,sourceDBUser, sourceDBKey,getData_sql,sourceDataBse)
if(len(list(args)) == 0):
return
args = editResultID(args, pageIndex,currentPage,currentTable)
result = shunt(args)
for index in range(len(result)):
if(len(result)>0):
print("插入数据:")
# insertDataToTarget(index,insert_sql,result[index])
else:
break
return len(result)
#b保存当前数据插入信息,以备续传
def saveCurrentData(data,currentPage,currentTable):
print("当前源数据库",sourcedatabseFlg[dataBaseNo])
print("当前表",currentTable)
print("总页数",pageCount)
print("页码", currentPage)
print("开始ID",(data[0])[0])
print("结束ID",(data[len(data)-1])[0])
conf = configparser.ConfigParser()
conf.read("./dataBaseConfig.ini")
conf.set("progress", "table", currentTable)
conf.set("progress", "pageNo", str(currentPage))
conf.set("progress", "beginId", str((data[0])[0]))
conf.set("progress", "endId", str((data[len(data)-1])[0]))
conf.write(open("./dataBaseConfig.ini", "w"))
def editResultID(result,pageIndex,currentPage,currentTable):
count = pageIndex + 1 + dataBaseInterval * dataBaseNo
newResult = list(result)
for index in range(len(newResult)):
temp = list(newResult[index])
temp[0] = count + index
newResult[index] = tuple(temp)
saveCurrentData(newResult,currentPage,currentTable)
return tuple(newResult)
def getPageCount(sourceDBUrl,sourceDBUser, sourceDBKey,sourceDb,currentTable,pageSize):
sql = "select count(*) from " + currentTable
amount = (getDataFromTarget(sourceDBUrl,sourceDBUser, sourceDBKey,sql,sourceDb)[0])[0]
pageCount = math.ceil(amount/pageSize) + 1
print("总页数为:" +str(pageCount))
return pageCount
def saveCurrentProcess(currentTable,currentPage):
global currentSourcedatabseFlg
currentSourcedatabseFlg = sourcedatabseFlg[dataBaseNo]
conf = configparser.ConfigParser()
conf.read("./dataBaseConfig.ini")
conf.set("progress", "table", currentTable)
conf.set("progress", "pageNo", str(currentPage))
conf.set("progress", "beginId", str(1))
conf.set("progress", "endId", str((0)))
conf.set("progress", "currentsourcedatabseflg", str(sourcedatabseFlg[dataBaseNo]))
conf.write(open("./dataBaseConfig.ini", "w"))
def start(thread_num):
try:
#从配置文件读取信息
beginPageNo = int(conf.get("progress", "pageNo"))
beginTable = conf.get("progress", "table")
# 数据转移配置
pageSize = int(conf.get('config', 'pageSize'))
print("开始页码:",beginPageNo)
starttime = datetime.datetime.now()
#获取转移数据所需sql,以及所有表
sqlList,tablesList = nev_dataTransferSql.getDataTransferSql(sourceDBUrl[0], sourceDBUser[0], sourceDBKey[0],sourceDataBse[0], filterTables, [], " ORDER BY id desc ", " LIMIT ")
tableList = []
for table in tablesList:
if(table[0] not in filterTables):
tableList.append(table[0])
print("要迁移的表:%s"% tableList)
for dataBaseIndex in range(len(sourcedatabseFlg)):
if(currentSourcedatabseFlg != sourcedatabseFlg[dataBaseIndex]):
continue
# 已完成表数量
amount = 0
print("第%d个库"%dataBaseIndex)
for value in sqlList:
currentTable = str(tableList[amount])
print(currentTable,"开始")
#判断当前表是否续传
if(beginTable != currentTable):
beginPageNo = 1
global pageCount
#获取数据页数
pageCount = getPageCount(sourceDBUrl[dataBaseIndex],sourceDBUser[dataBaseIndex], sourceDBKey[dataBaseIndex],sourceDataBse[dataBaseIndex],currentTable,pageSize)
for i in range(beginPageNo,pageCount):
index = (i-1)*pageSize
if(tables(sourceDBUrl[dataBaseIndex],sourceDBUser[dataBaseIndex], sourceDBKey[dataBaseIndex],sourceDataBse[dataBaseIndex],index,pageSize,value[0] ,value[1],i,currentTable)==0):
break
print("表 %s 已完成 %s" % (str(tableList[amount]), str(((i / (pageCount - 1)) * 100)) + "%"))
print("%d-----整体已完成:%s" % (thread_num, str((amount / (len(tableList) - 1)) * 100) + "%"))
if(amount == len(tableList)-1):
global dataBaseNo
if(dataBaseIndex +1 < len(sourcedatabseFlg)):
dataBaseNo = dataBaseIndex + 1
saveCurrentProcess(tableList[0],0)
amount += 1
endtime = datetime.datetime.now()
print("总耗时:%s"% (endtime - starttime).seconds)
except OSError as err:
print("OS error: {0}".format(err))
except ValueError:
print("Could not convert data to an integer.")
except:
print("Unexpected error:", sys.exc_info()[0])
raise
start(1)
Used to generate sql file
import pymysql
import configparser
#获取参数
conf = configparser.ConfigParser()
conf.read("./dataBaseConfig.ini")
def connectDB(url,user,key,table):
# 连接数据库
#db = pymysql.connect("localhost", "root", "admin", table)
db = pymysql.connect(url, user,key, table)
# 使用cursor()方法创建一个游标对象
cursor = db.cursor()
return db,cursor
def getDataFromTarget(sourceDBUrl, sourceDBUser, sourceDBKey,sql,table):
# 从目标数据库取出数据
db, cursor = connectDB(sourceDBUrl, sourceDBUser, sourceDBKey,table)
# 使用execute()方法执行SQL语句
cursor.execute(sql)
# 使用fetall()获取全部数据
data = cursor.fetchall()
# 关闭游标和数据库的连接
cursor.close()
db.close()
return data
def editeCloumn(cloumn,reType,reCloumn):
#编辑表字段
if(cloumn[1]== "datetime"):
return "date_format("+ cloumn[0] + ",'%Y-%m-%d %H:%i:%S') AS " + cloumn[0]
else:
return cloumn[0]
def editeSql(table,cloumns,_cloumns,orderBy,limit):
#生成SQL文 table:表名;cloumns:字段集合;_cloumns:完整的字段集合;orderBy:查询排序:limit:分页
sql_select = "select " + cloumns + " from " + table
if( len(orderBy)>0 and table!="user"):
sql_select +=orderBy
if(len(limit)>0):
sql_select += " limit "
sql_insert = "insert into " + table + "(" + _cloumns + ")" + "values( " + "%s,"*(len(_cloumns.split(",")) - 1) + "%s )"
return sql_select,sql_insert
#获取表结构,并输出参数字符串
def getTableStructure(sourceDBUrl, sourceDBUser, sourceDBKey,table,filterCloumn,reType,reCloumn):
#table:表名;filterCloumn:过滤字段:filterCloumn;reType:替换字段类型;reCloumn:替换字段;
structure = getDataFromTarget(sourceDBUrl, sourceDBUser, sourceDBKey,'desc ' + table[0], "toyota")
cloumns = ""
_cloumns = ""
for cloumn in structure:
if(cloumn[0] not in filterCloumn):
value = editeCloumn(cloumn,reType,reCloumn)
if(len(cloumns)>0):
cloumns += "," + value
_cloumns += "," + cloumn[0]
else:
cloumns = value
_cloumns = cloumn[0]
return cloumns,_cloumns
def getDataTransferSql(sourceDBUrl, sourceDBUser, sourceDBKey,dataBase,filterTable,filterCloumn,orderBy,limit):
tablse = getDataFromTarget(sourceDBUrl, sourceDBUser, sourceDBKey,'show tables',dataBase)
list = []
for table in tablse:
if(table[0] not in filterTable):
cloumns,_cloumns = getTableStructure(sourceDBUrl, sourceDBUser, sourceDBKey,table,filterCloumn,"","")
list.append(editeSql(table[0], cloumns,_cloumns,orderBy,limit))
return list,tablse
hascode related
def convert_n_bytes(n, b):
bits = b * 8
return (n + 2 ** (bits - 1)) % 2 ** bits - 2 ** (bits - 1)
def convert_4_bytes(n):
return convert_n_bytes(n, 4)
def getHashCode(s):
h = 0
n = len(s)
for i, c in enumerate(s):
h = h + ord(c) * 31 ** (n - 1 - i)
return convert_4_bytes(h)
Delete data from the target database deleteDataFromTables.py
import pymysql
import configparser
#获取数据库参数
conf = configparser.ConfigParser()
conf.read("./dataBaseConfig.ini")
#目标数据库
targetDBUrl = (conf.get('targetDB', 'targetDBUrl')).split(',')
targetDBUser = (conf.get('targetDB', 'targetDBUser')).split(',')
targetDBKey = (conf.get('targetDB', 'targetDBKey')).split(',')
targetDBList = (conf.get('targetDB', 'targetDBs')).split(',')
beginPageNo = int(conf.get("progress", "pageNo"))
beginid = int(conf.get("progress", "beginid"))
endid = int(conf.get("progress", "endid"))
targetTables = conf.get("progress", "table")
def connectDB(url,user,key,table):
# 连接数据库
db = pymysql.connect(url, user,key, table)
# 使用cursor()方法创建一个游标对象
cursor = db.cursor()
return db,cursor
def deletData(beginId,endId):
for DBIndex in range(len(targetDBUrl)):
db, cursor = connectDB(targetDBUrl[DBIndex], targetDBUser[DBIndex], targetDBKey[DBIndex], targetDBList[DBIndex])
print("<-------------------------正在删除" + targetDBUser[DBIndex] + "库----------------------------->")
print("正在删除" + targetTables + "表")
sql = "delete from " + targetTables + " where id>= " + str(beginId) + " and id<= " + str(endId) + " ;"
print(sql)
try:
cursor.execute(sql)
db.commit()
except:
print("操作异常" )
db.rollback()
cursor.close()
db.close()
deletData(beginid,endid)
Compare the number of pieces of data in Table
import pymysql
import configparser
#获取参数
conf = configparser.ConfigParser()
conf.read("./dataBaseConfig.ini")
#目标数据库参数
sourceDBUrl = (conf.get('soureDB', 'sourceDBUrl')).split(',')
sourceDBUser = (conf.get('soureDB', 'sourceDBUser')).split(',')
sourceDBKey = (conf.get('soureDB', 'sourceDBKey')).split(',')
sourceDataBase = (conf.get('soureDB', 'sourceDataBse')).split(',')
#目标数据库参数
targetDBUrl = (conf.get('targetDB', 'targetDBUrl')).split(',')
targetDBUser = (conf.get('targetDB', 'targetDBUser')).split(',')
targetDBKey = (conf.get('targetDB', 'targetDBKey')).split(',')
targetDBs = (conf.get('targetDB', 'targetDBs')).split(',')
#需要过滤的数据表
filterTables = (conf.get('filter', 'tables')).split(',')
beginId = conf.get('compare', 'beginId')
enId = conf.get('compare', 'enId')
def connectDB(url,user,key,table):
# 连接数据库
db = pymysql.connect(url, user,key, table)
# 使用cursor()方法创建一个游标对象
cursor = db.cursor()
return db,cursor
def getDataFromTarget(sourceDBUrl,sourceDBUser,sourceDBKey,sql,dataBase):
# 从目标数据库取出数据
db, cursor = connectDB(sourceDBUrl, sourceDBUser, sourceDBKey,dataBase)
# 使用execute()方法执行SQL语句
cursor.execute(sql)
# 使用fetall()获取全部数据
data = cursor.fetchall()
# 关闭游标和数据库的连接
cursor.close()
db.close()
return data
def editeCloumn(cloumn,reType,reCloumn):
#编辑表字段
if(cloumn[1]== "datetime"):
return "date_format("+ cloumn[0] + ",'%Y-%m-%d %H:%i:%S') AS " + cloumn[0]
else:
return cloumn[0]
def editeSql(table):
#生成SQL文 table:表名;cloumns:字段集合;_cloumns:完整的字段集合;orderBy:查询排序:limit:分页
sql_selectTarget = "select count(*)" + " from " + table + " where 1=1"
if(beginId):
sql_selectTarget = sql_selectTarget + " and id >= " + str(beginId)
if(enId):
sql_selectTarget = sql_selectTarget + " and id <= " + str(enId)
sql_selectSource = "select count(*)" + " from " + table
return sql_selectTarget,sql_selectSource
def getTableRecodAmount(sourceDBUrl,sourceDBUser,sourceDBKey,sql,dataBase):
# 从目标数据库取出数据
db, cursor = connectDB(sourceDBUrl, sourceDBUser, sourceDBKey,dataBase)
# 使用execute()方法执行SQL语句
cursor.execute(sql)
# 使用fetall()获取全部数据
amount = cursor.fetchall()
# 关闭游标和数据库的连接
cursor.close()
db.close()
return amount
def getDataTransferSql(targetDBUrl,targetDBUser,targetDBKey,targetDBs):
tablse = getDataFromTarget(targetDBUrl,targetDBUser,targetDBKey,'show tables',targetDBs)
list = []
list2 = []
newTables=[]
for table in tablse:
if(table[0] not in filterTables):
sql_selectTarget, sql_selectSource = editeSql(table[0])
list.append(sql_selectTarget)
list2.append(sql_selectSource)
newTables.append(table)
return list,list2,newTables
def start():
index = 0
sqlList, sqlList2,tables = getDataTransferSql(targetDBUrl[index],targetDBUser[index],targetDBKey[index],targetDBs[index])
for index2 in range(len(sqlList)):
amount = 0
amount2 = 0
for index3 in range(len(targetDBs)):
amount = amount + ((getTableRecodAmount(targetDBUrl[index3],targetDBUser[index3],targetDBKey[index3],sqlList[index2],targetDBs[index3]) )[0])[0]
print("目标库 " + "表:" + (tables[index2])[0] + "数据总数为:" + str(amount))
for index4 in range(len(sourceDataBase)):
amount2 = amount2 + ((getTableRecodAmount(sourceDBUrl[index4],sourceDBUser[index4],sourceDBKey[index4],sqlList2[index2],sourceDataBase[index4]) )[0])[0]
print("源数据库 " + "表:" + (tables[index2])[0] + "数据总数为:" + str(amount2))
print( "表:" + (tables[index2])[0] + "相差" + str(amount2 - amount))
start()
Profile dataBaseConfig.ini
[soureDB]
sourcedburl =
sourcedbuser =
sourcedbkey =
sourcedatabse =
sourcedatabseflg =
[targetDB]
targetdburl =
targetdbuser =
targetdbkey =
targetdbs =
[progress]
currentsourcedatabseflg =
table =
pageno = 1
beginid =
endid =
[config]
pagesize =
databaseinterval = 10000000
[filter]
tables =
[compare]
beginId =
enId =
Configuration parameters (dataBaseConfig.ini) | |||||||||
Source Database Configuration | [soureDB] | ||||||||
sourcedburl = | Source database link | The source database, used between a plurality of databases "," separated | |||||||
sourcedbuser = | username | ||||||||
sourcedbkey = | password | ||||||||
sourcedatabse = | data storage name | ||||||||
sourcedatabseflg = | Source database flag (required, a flag bit corresponding to each database, and is the only) | ||||||||
Target database configuration | [targetDB] | ||||||||
targetdburl = | Target database link, use multiple databases, "" interval | ||||||||
targetdbuser = | username | ||||||||
targetdbkey = | password | ||||||||
targetdbs = | data storage name | ||||||||
Configure schedule | [progress] | ||||||||
currentsourcedatabseflg = | The current source database flag (required) | Initially all set to be empty, the script will automatically update the data; setting a general re-start data transfer | |||||||
table = alarm | Current table | ||||||||
pageno = 1 | current page | ||||||||
beginid = | Id start | ||||||||
entity = | End id | ||||||||
customize | |||||||||
[config] | |||||||||
pagesize = 5 | Each time the number of pieces of data extraction (optional) | ||||||||
databaseinterval = 10000000 | Different source database, ID spacing between the same form. For example, starting from the first database ID 1, the second database with a table ID from 1 + 10 million start, avoid ID conflicts | ||||||||
[filter] | |||||||||
tables = user | When you need to migrate data and migration statistics table | ||||||||
Access range data from the target database comparison | [compare] | ||||||||
beginId = 0 | Start ID | ||||||||
enId =40000000 | Stop ID | ||||||||
After configuring the parameters directly into the script has been configured VM environment to perform any directory python3 DataTanslate.py can; if she fails, abort the script, perform deleteDataFromTables.py then execute DataTanslate.py. |