How to optimize mysql millions of data

1. Prepare data

        1. Create a table structure

 CREATE TABLE users (
  id INT PRIMARY KEY AUTO_INCREMENT,
  username VARCHAR(20) NOT NULL,
  password VARCHAR(32) NOT NULL,
  email VARCHAR(50) NOT NULL,
  phone VARCHAR(20) NOT NULL,
  qq_id VARCHAR(20),
  wechat_id VARCHAR(20),
  nick_name VARCHAR(20),
  signature VARCHAR(50),
  create_time DATETIME NOT NULL,
  last_login DATETIME,
  update_time DATETIME NOT NULL
);

        2. Prepare 100,000 data 

 INSERT INTO users (username, password, email, phone, qq_id, wechat_id, nick_name, signature, create_time, last_login, update_time)
SELECT 
  CONCAT('user', LPAD(id, 5, '0')) AS username, -- 用户名,user00001 ~ user100000
  MD5('[email protected]') AS password, -- 密码统一为[email protected]
  CONCAT(FLOOR(RAND() * 9000000000) + 1000000000) AS email, -- 随机生成的11位数邮箱
  CONCAT('138', LPAD(FLOOR(RAND() * 1000000000), 8, '0')) AS phone, -- 随机11位手机号码
  LPAD((FLOOR(UNIX_TIMESTAMP() * 1000) << 12) + (FLOOR(RAND() * 4096)), 16, '0') AS qq_id, -- 随机32位雪花算法ID(QQ)
  LPAD((FLOOR(UNIX_TIMESTAMP() * 1000) << 12) + (FLOOR(RAND() * 4096)), 16, '0') AS wechat_id, -- 随机32位雪花算法ID(微信)
  CONCAT('nick', id) AS nick_name, -- 昵称,nick1 ~ nick100000
  CONCAT('signature', id) AS signature, -- 个性签名,signature1 ~ signature100000
  NOW() - INTERVAL FLOOR(RAND() * 10000) DAY AS create_time, -- 随机生成的创建时间
  NOW() - INTERVAL FLOOR(RAND() * 365) DAY AS last_login, -- 随机生成的最后登录时间
  NOW() - INTERVAL FLOOR(RAND() * 10000) DAY AS update_time -- 随机生成的更新时间
FROM
  (SELECT @rownum := @rownum + 1 AS id FROM (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4) t1, (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4) t2, (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4) t3, (SELECT @rownum := 0) t4) nums
WHERE 
  id <= 100000; -- 生成10万条假数据
 

        The execution is successful but 64 pieces of data are inserted, and the maximum single transmission packet is 4KB. The solution here is to set the maximum transmission package to 256KB, and the display is successful, but the query has no data and has not changed (the configuration file needs to be modified).

 SHOW VARIABLES LIKE 'max_allowed_packet';
 SET GLOBAL max_allowed_packet=268435456;

    

 

         I used the original and crude method, and used the stored procedure to add data in a loop. (too slow not recommended)

DELIMITER //

CREATE PROCEDURE insert_users(IN count INT)
BEGIN
  DECLARE i INT DEFAULT 1;
  DECLARE cur_time TIMESTAMP DEFAULT NOW();
  WHILE i <= count DO
    INSERT INTO users (username, password, email, phone, qq_id, wechat_id, nick_name, signature, create_time, last_login, update_time)
    VALUES (
      CONCAT('user', LPAD(i, 7, '0')),
      MD5('[email protected]'),
      CONCAT(FLOOR(RAND() * 9000000000) + 1000000000),
      CONCAT('138', LPAD(FLOOR(RAND() * 1000000000), 8, '0')),
      LPAD((FLOOR(UNIX_TIMESTAMP() * 1000) << 12) + (FLOOR(RAND() * 4096)), 16, '0'),
      LPAD((FLOOR(UNIX_TIMESTAMP() * 1000) << 12) + (FLOOR(RAND() * 4096)), 16, '0'),
      CONCAT('nick', i),
      CONCAT('signature', i),
      cur_time - INTERVAL FLOOR(RAND() * 10000) DAY,
      cur_time - INTERVAL FLOOR(RAND() * 365) DAY,
      cur_time - INTERVAL FLOOR(RAND() * 10000) DAY
    );
    SET i = i + 1;
  END WHILE;
END//
DELIMITER ;

        implement

CALL insert_users(1000000);

       Then there was a long wait (the waiting time was exhausted), it took a total of 13950.374s, nearly 4 hours to complete.

        Optimization: Multi-threaded batch insertion

                        It takes nearly 2 minutes for 200 threads to insert 1000 pieces of data each time using optimistic locks


/**
 * @author LJL
 * @version 1.0
 * @title TestUser1
 * @date 2023/6/11 0:46
 * @description TODO
 */

@SpringBootTest
@RunWith(SpringJUnit4ClassRunner.class)
public class TestUser1 {

    @Autowired
    Users1Service users1Service;

    static volatile AtomicInteger total = new AtomicInteger(0);

    @Test
    public void save() throws InterruptedException {

        long start = System.currentTimeMillis();
        /**
         * 创建一个同时运行200个线程执行添加任务
         * 每个任务一千条数据
         * 耗时:111527
         */
        ExecutorService executorService = Executors.newFixedThreadPool(200);
        for (int i = 0; i < 1000; i++) {
            executorService.submit(new Task());
        }

        while (total.get() < 1000000){
            Thread.sleep(10000);
        }
        System.out.println("total: " +total.get());
        //关闭线程池
        executorService.shutdown();
        System.out.println("执行完毕,耗时:" + (System.currentTimeMillis() - start) );

    }


    public class Task implements Runnable{

        @Override
        public void run() {
            System.out.println("线程" + Thread.currentThread().getId() + "开始执行任务")  ;
            List<Users1> users1List = createUsers1();
            int row = users1Service.batchInsert(users1List);
            System.out.println("线程" + Thread.currentThread().getId() + "添加成功"  + row + "条记录")  ;
        }


        private  List<Users1> createUsers1(){
            List<Users1> users1List = new ArrayList<>();
            for (int i = 1; i <= 1000; i++) {
                int count = 0;
                while (count < 10){
                    boolean flag = total.compareAndSet(total.get(), total.get() +1);
                    if (flag){
                        count = 10;
                        break;
                    }
                    count ++;
                }
                Users1 users1 = builder();
                users1List.add(users1);
            }
            return users1List;
        }


    }



    private Users1 builder(){

        Users1 users1 = new Users1();

        String password = "[email protected]'";
        String pwdMD5 = md5(password);
        String userName = "user" + generateRandomNumber(11);;
        String email = generateRandomNumber(11);
        String phone = generateRandomNumber(11);
        String qqId =  generateRandomNumber(11);
        String wxId =  generateRandomNumber(11);
        String nickName = "nick" + generateRandomNumber(11);;
        LocalDateTime create_time =  LocalDateTime.now();
        LocalDateTime last_login =  LocalDateTime.now();
        LocalDateTime update_time = LocalDateTime.now();


        users1.setEmail(email);
        users1.setPassword(pwdMD5);
        users1.setNickName(nickName);
        users1.setPhone(phone);
        users1.setUsername(userName);
        users1.setWechatId(wxId);
        users1.setQqId(qqId);
        users1.setCreateTime(create_time);
        users1.setUpdateTime(update_time);
        users1.setLastLogin(last_login);

        return users1;
    }




    /**
     * 返回
     * @param s
     * @return
     */
    public static String md5(String s) {
        //生成一个md5加密器
        try {
            MessageDigest md = MessageDigest.getInstance("MD5");
            //计算MD5 的值
            md.update(s.getBytes());
            //BigInteger 将8位的字符串 转成16位的字符串 得到的字符串形式是哈希码值
            //BigInteger(参数1,参数2) 参数1 是 1为正数 0为零 -1为负数
            return new BigInteger(1, md.digest()).toString(16);
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        }
        return null;
    }


    /**
     * @param  length    随机数的长度
     * @return      返回固定x位的随机数
     */
    public static String generateRandomNumber(int length) {
        Random random = new Random();
        StringBuilder sb = new StringBuilder(length);
        for (int i = 0; i < length; i++) {
            int num = random.nextInt(10); // 生成0-9之间的数字
            sb.append(num);
        }
        return sb.toString();
    }

}

        Data volume: million, file size: 188416, query time 4.1198s

 

 

        Indexed by primary key (const huge fast)

 

        Query a single qq (ALL is very slow)

 

       Batch query qq (ALL): return to the table so it is ALL

         add normal index 

                    Single query (index)

 

         batch query 

        Replace unique index

                       No index is used when adding data, duplicate data is generated, and joint index is used

              single row query

      

 

                 batch query 

               I don’t want to test anymore, I feel nothing, just summarize it directly

    Summarize:

                1. It can walk the index and walk the index, avoiding the full-text search caused by returning to the table query.

                2. Create a joint joint index for common query fields, follow the frequent query, and place it on the left

                3. If it can be batched, it can be batched, with high efficiency and slow execution efficiency

                4. B + tree auto-increment is more efficient than uuid snowflake algorithm.

                5. A proper amount of index is enough, maintenance is troublesome

                

        

       

Guess you like

Origin blog.csdn.net/2201_75630288/article/details/131144672