Create a test table and data

-- 建立源表
create table t_source  
(  
  item_id int,  
  created_time datetime,  
  modified_time datetime,  
  item_name varchar(20),  
  other varchar(20)  
);  

-- 建立目标表
create table t_target like t_source; 

-- 生成100万测试数据,其中有50万created_time和item_name重复
delimiter //      
create procedure sp_generate_data(http://www.amjmh.com)    
begin     
    set @i := 1;   
    
    while @i<=500000 do  
        set @created_time := date_add('2017-01-01',interval @i second);  
        set @modified_time := @created_time;  
        set @item_name := concat('a',@i);  
        insert into t_source  
        values (I @, @ CREATED_TIME, modified_time @, @ ITEM_NAME, 'OTHER');  
        SET @i: I = @ +. 1;    
    End the while;  
    the commit;    
    
    SET @last_insert_id: = 500000;  
    INSERT INTO t_source  
    SELECT item_id + @last_insert_id,  
           CREATED_TIME,  
           DATE_ADD (modified_time, SECOND interval The @last_insert_id),  
           ITEM_NAME,  
           'OTHER'   
      from t_source;  
    the commit;
End     
//      
DELIMITER;     
    
Call sp_generate_data ();  

- the source table primary key or no uniqueness constraint, there may exist two identical the data, then insert a record simulate this situation.
insert into t_source select * from t_source where item_id = 1;
        The source table with a record 1,000,001, the de-duplication destination table records should be 500,000.

mysql> select count(*),count(distinct created_time,item_name) from t_source;
+----------+----------------------------------------+
| count(*) | count(distinct created_time,item_name) |
+----------+----------------------------------------+
|  1000001 |                                 500000 |
+----------+----------------------------------------+
1 row in set (1.92 sec)
---------------------

Guess you like

Origin www.cnblogs.com/hyhy904/p/11311204.html
Recommended