[Experimental] MySQL how much data needs to be indexed
data preparation
Python randomly generated using a plurality of pieces of data, ready to use test, as follows
# db.py
import pymysql
# 默认数据返回的是二维列表:
# (
# (每一行)
# (每一行)
# )
# 加上 cursor=pymysql.cursors.DictCursor后的数据返回格式:
# [{'psw': 'e10adc3949ba59abbe56e057f20f883e'}]
class db:
host = ""
port = 3306
user = "root"
psw = ""
db_name = "testdb"
connect = None
cursor = None
def __init__(self):
self.connect = pymysql.connect(host=self.host,port=self.port,user=self.user,passwd=self.psw,db=self.db_name)
self.cursor = self.connect.cursor(cursor=pymysql.cursors.DictCursor)
pass
def query(self,sql):
connect = pymysql.connect(host=self.host,port=self.port,user=self.user,passwd=self.psw,db=self.db_name)
cursor = connect.cursor(cursor=pymysql.cursors.DictCursor)
cursor.execute(sql)
data = cursor.fetchall()
connect.close()
return data
def operate(self,sql):
# connect = pymysql.connect(host=self.host,port=self.port,user=self.user,passwd=self.psw,db=self.db_name)
# cursor = connect.cursor(cursor=pymysql.cursors.DictCursor)
try:
self.cursor.execute(sql)
self.connect.commit()
self.count = self.cursor.rowcount
return count
except Exception as e:
print(e)
print(sql)
return False
import db as db
import random
my_db = db.db()
def get_vcode():
v_code = chr(random.randint(65,90))
v_code = v_code + chr(random.randint(97,122))
v_code = v_code + chr(random.randint(65,90))
v_code = v_code + chr(random.randint(48,57))
v_code = v_code + chr(random.randint(65,90))
v_code = v_code + chr(random.randint(48,57))
return v_code
if __name__ == '__main__':
for i in range(1,1000000):
sql = "INSERT INTO `testdb`.`test_data_100w`(`data_1`, `data_2`, `data_3`) VALUES (%d, '%s', '%s')" % (i+1,get_vcode(),get_vcode())
my_db.operate(sql)
print("当前编号:"+str(i))
Data Format
start testing
Ten thousand data
Index ago
After the index
Performance .001 seconds, negligible ~
One hundred thousand data
Index ago
After the index
Faster: 0.014 seconds, but improved 8 times !
One million data
Index ago
After the index
0.14 seconds faster query, 71 times
3000W of data
Index ago
After the index
Long indexing used: 1673.908s about 27 minutes!
Faster: about 4.2 second, 1,411 times
in conclusion
Because MySQL itself has been very good, and
in the circumstances tens of thousands of pieces of data, the index of advantage is not obvious.
After the data reaches hundreds of thousands, the effect is significant index can significantly improve query speed, the greater the amount of data, indexing more and more important. When the amount of data with millions of levels, whether the index can lead to performance difference is thousands of times!