概述
本实验目的是尝试选择一种最优方式向MySQL数据库插入5000W条随机测试数据。以下内容包括JDBC、Mybatis和Spring JDBCTemplate各种插入方式的效率分析、参数调整以及超大数据量插入的解决办法。
JDBC各种插入方式比较
我们知道,jdbc将数据插入数据库有多种方式,包括常用的单条记录插入、多Values拼接SQL字符串和批量插入的方式,在此基础上还可以增加事务提交和预编译的策略来提高插入效率,下面以100W数据为例,分别测试单条记录、多Values拼接SQL字符串、批量插入、多Values拼接SQL字符串+事务提交、批量插入+事务提交形式的执行效率。
单条记录插入
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
conn = DBConnection.getConnection();
String sql = "INSERT INTO student2(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES(?,?,?,?,?,?,?)";
pstm = conn.prepareStatement(sql);
Long startTime = System.currentTimeMillis();
int count = 0;
System.out.println("开始插入...");
Student student = null;
for (int i = 0; i < 100000; i++){
pstm.setLong(1, i);
pstm.setString(2,String.valueOf(i));
pstm.setString(3, String.valueOf(i));
pstm.setString(4, String.valueOf(i));
pstm.setString(5, String.valueOf(i));
pstm.setString(6, new java.util.Date().toLocaleString());
pstm.setBoolean(7, true);
pstm.execute();
}
Long endTime = System.currentTimeMillis();
System.out.println("插入完成,总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}finally{
if(pstm!=null){
try {
pstm.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
if(conn!=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
插入条数:100000条
用时:155742ms、157012ms、156323ms
多Values形式
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
conn = DBConnection.getConnection();
StringBuffer sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
Long startTime = System.currentTimeMillis();
System.out.println("开始插入");
int count = 0;
for(int i=0;i<100000;i++){
if(count!=0){
sql.append(",");
}
count++;
sql.append("("+i+",'"+i+"','"+i+"','"+i+
"','"+i+"','"+new java.util.Date().toLocaleString()+"',"+true+")");
if(count==25000){
pstm = conn.prepareStatement(sql.toString());
pstm.execute();
count = 0;
sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
}
}
Long endTime = System.currentTimeMillis();
System.out.println("插入数据库总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}finally{
if(pstm!=null){
try {
pstm.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
if(conn!=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
插入条数:100000
插入耗时:2092ms、1922ms、2012ms
批量插入
Connection conn = null;
PreparedStatement pstm =null;
try {
conn = DBConnection.getConnection();
String sql = "INSERT INTO student2(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES(?,?,?,?,?,?,?)";
pstm = conn.prepareStatement(sql);
Long startTime = System.currentTimeMillis();
int count =0;
System.out.println("开始插入...");
for(int i = 0;i<100000;i++){
pstm.setLong(1,i);
pstm.setString(2,String.valueOf(i));
pstm.setString(3,String.valueOf(i));
pstm.setString(4,String.valueOf(i));
pstm.setString(5,String.valueOf(i));
pstm.setString(6,new java.util.Date().toLocaleString());
pstm.setBoolean(7,true);
pstm.addBatch();
count++;
if(count>=GenerateRandomData.BATCH_SIZE) {
//每GenerateRandomData.BATCH_SIZE条数据进行一次批量插入操作
int[] ids = pstm.executeBatch();
pstm.clearBatch();
count = 0;
}
}
Long endTime = System.currentTimeMillis();
System.out.println("插入完成,总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}finally{
if(pstm!=null){
try {
pstm.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
if(conn!=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
插入条数:100000
插入耗时:135883ms、136233ms、135235ms
多Values拼接SQL字符串+事务
Connection conn = null;
PreparedStatement pstm =null;
ResultSet rt = null;
try {
conn = DBConnection.getConnection();
StringBuffer sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
//手动事务
conn.setAutoCommit(false);
Long startTime = System.currentTimeMillis();
System.out.println("开始插入");
int count = 0;
Student student = null;
for(int i=0;i<100000;i++){
if(count!=0){
sql.append(",");
}
count++;
sql.append("("+i+",'"+i+"','"+i+"','"+i+ "','"+i+"','"+new java.util.Date().toLocaleString()+"',"+true+")");
if(count==25000){
pstm = conn.prepareStatement(sql.toString());
pstm.execute();
conn.commit();
count = 0;
sql = new StringBuffer("INSERT INTO student(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES");
}
}
Long endTime = System.currentTimeMillis();
System.out.println("插入数据库总用时:" + (endTime - startTime));
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}finally{
if(pstm!=null){
try {
pstm.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
if(conn!=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
插入条数:100000
插入耗时:2021ms、2025ms、1921ms
批量插入+事务提交
Connection conn = null;
PreparedStatement pstm =null;
try {
conn = DBConnection.getConnection();
String sql = "INSERT INTO student2(c_id,stu_name,stu_number,stu_phone_number,c_name,stu_create_time,status) VALUES(?,?,?,?,?,?,?)";
pstm = conn.prepareStatement(sql);
//手动事务
conn.setAutoCommit(false);
Long startTime = System.currentTimeMillis();
int count =0;
System.out.println("开始插入...");
for(int i = 0;i<100000;i++){
pstm.setLong(1,i);
pstm.setString(2,String.valueOf(i));
pstm.setString(3,String.valueOf(i));
pstm.setString(4,String.valueOf(i));
pstm.setString(5,String.valueOf(i));
pstm.setString(6,new java.util.Date().toLocaleString());
pstm.setBoolean(7,true);
pstm.addBatch();
count++;
if(count>=25000) {
//每25000条数据进行一次批量插入操作
int[] ids = pstm.executeBatch();
pstm.clearBatch();
conn.commit();
count = 0;
}
}
Long endTime = System.currentTimeMillis();
System.out.println("插入完成,总用时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}finally{
if(pstm!=null){
try {
pstm.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
if(conn!=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
插入条数:100000
插入耗时:8970ms、9051ms、8825ms
预编译+批量插入+事务提交
在批量插入+事务提交基础上修改JDBCUrl,添加useServerPrepStmts=true
插入条数:100000
插入耗时:8022ms、8058ms、7983ms
Mybatis插入方式比较
mybatis主要有三种插入方式:for循环insert、mybatis的insertBatch、批量foreach插入
for循环insert
Long start = System.currentTimeMillis();
for(int i = 0;i<100000;i++){
Student student = new Student();
//随机生成姓名
student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
//随机生成学号
student.setStuNumber(stuNumberStarts[randomInt(5)]+getRandom(9));
//随机生成学院ID与名称
int randomInt = randomInt(6);
student.setcId(collegeIDs[randomInt]);
student.setcName(collegeNames[randomInt]);
//随机生成状态,false概率为0.1%
Boolean flag = randomInt(1000)>0?true:false;
student.setStatus(flag);
//随机生成手机号
student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
student.setStuCreateTime(new Date());
studentDao.insert(student);
}
Long end = System.currentTimeMillis();
System.out.println("用时:"+(end-start));
插入条数:100000条
插入耗时:181488ms、181001ms、192105ms
mybatis的insertBatch
SqlSessionTemplate sqlSessionTemplate = new SqlSessionTemplate(sqlSessionFactory);
SqlSession sqlSession = sqlSessionTemplate.getSqlSessionFactory().openSession(ExecutorType.BATCH,false);
Long start = System.currentTimeMillis();
for(int i = 0;i<100000;i++){
//随机生成姓名
Student student = new Student();
student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
//随机生成学号
student.setStuNumber(stuNumberStarts[randomInt(5)]+getRandom(9));
//随机生成学院ID与名称
int randomInt = randomInt(6);
student.setcId(collegeIDs[randomInt]);
student.setcName(collegeNames[randomInt]);
//随机生成状态,false概率为0.1%
Boolean flag = randomInt(1000)>0?true:false;
student.setStatus(flag);
//随机生成手机号
student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
student.setStuCreateTime(new Date());
studentDao.insert(student);
}
sqlSession.commit();
Long end = System.currentTimeMillis();
System.out.println("用时:"+(end-start));
插入条数:100000条
插入耗时:162081ms、160213ms、158989ms
批量foreach插入
Long start = System.currentTimeMillis();
for(int i = 0;i<100000;i++){
Student student = new Student();
//随机生成姓名
student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
//随机生成学号
student.setStuNumber(stuNumberStarts[randomInt(5)]+getRandom(9));
//随机生成学院ID与名称
int randomInt = randomInt(6);
student.setcId(collegeIDs[randomInt]);
student.setcName(collegeNames[randomInt]);
//随机生成状态,false概率为0.1%
Boolean flag = randomInt(1000)>0?true:false;
student.setStatus(flag);
//随机生成手机号
student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
student.setStuCreateTime(new Date());
students.add(student);
}
studentDao.insertBatch(students);
Long end = System.currentTimeMillis();
System.out.println("用时:"+(end-start));
插入条数:100000条
插入耗时:2898ms、2697ms、2813ms
Spring JDBCTemplate
List<Object[]> students = new ArrayList<>();
Long start = System.currentTimeMillis();
String sql = " insert into student(c_id, stu_name, stu_number,c_name, stu_phone_number, stu_create_time,status) values(?,?,?,?,?,?,?)";
for(int i = 0;i<100000;i++){
//随机生成学院ID与名称
int randomInt = randomInt(6);
Object[] student = {collegeIDs[randomInt],lastNames[randomInt(14)]+firstNames[randomInt(31)],stuNumberStarts[randomInt(5)]+getRandom(9),
collegeNames[randomInt],phoneNumberStarts[randomInt(7)]+getRandom(8),new Date(),randomInt(1000)>0?true:false};
students.add(student);
}
jdbcTemplate.batchUpdate(sql,students);
Long end = System.currentTimeMillis();
System.out.println("用时:"+(end-start));
插入条数:100000条
插入耗时:142744ms、145688ms、140214ms
多种方式比较
第一次结果 | 第二次结果 | 第三次结果 | 平均值 | |
---|---|---|---|---|
JDBC for循环insert | 155742ms | 157012ms | 156323ms | 156359ms |
JDBC多Values | 2092ms | 1922ms | 2012ms | 2009ms |
JDBC批量插入 | 135883ms | 136233ms | 135235ms | 135784ms |
JDBC多Values+事务提交 | 2021ms | 2025ms | 1921ms | 1989ms |
JDBC批量插入+事务提交 | 8970ms | 9051ms | 8825ms | 8949ms |
JDBC预编译+批量插入+事务提交 | 8022ms | 8058ms | 7983ms | 8021ms |
Mybatis for循环insert | 181488ms | 181001ms | 192105ms | 184865ms |
Mybatis的insertBatch | 162081ms | 160213ms | 158989ms | 160427ms |
批量foreach插入 | 2898ms | 2697ms | 2813ms | 2803ms |
Spring JDBCTemplate | 142744ms | 145688ms | 140214ms | 142882ms |
综上结果分析得出:
- 大数据量插入时,批量插入方式要优于单条插入
- 手动事务提交可以大幅度提高数据写入速度
- 预编译也可以提升数据库插入速度
- Mybatis 批量foreach插入与多values的方式在10W数据量时测试结果明显优于其他方式,最佳为多Values形式。
虽然本次实验结果显示多Values形式的写入方式优势明显,但是在本机测试时,当数据量达到800W左右时将会出现OOM异常。
生成5000W随机测试数据
通过上面的比较,最终选择的方案是使用Mybatis 批量foreach插入的方式来实现此功能。
生成数据操作
直接上代码
properties配置文件
#学院名称
collection.collegeNames = 信息学院,美术学院,机械学院,建筑学院,物流学院,外国语学院,马克思学院,历史学院,统计学院,化工学院,环境学院,地理学院,物理学院,教育学院
#教学楼
collection.address = 弘毅楼,致远楼,明义楼,锐思楼
#学生姓
student.lastNames = 张,王,李,赵,刘,胡,陈,苗,戴,习,毛,朱,韩,陆,孙
#学生名
student.firstNames = 兰,兰兰,明,明明,红,楠,中,庆,滔,人,同,娟,娟娟,丽,美丽,利,陆,空间,建国,国庆,狗蛋,狗剩,营,丽雅,长城,尚,飞,果,娜娜,亚楠,岁月
#电话开头三位
phoneNumberStarts = 133,150,153,178,177,180,181,189,187,130,131,132
#学生学号前四位入学年份
student.stuNumberStarts = 2017,2018,2016,2019,2020
#学生数量
student.size = 50000000
学院数据生成
xml
<!--新增学院-->
<insert id="insert" keyProperty="cId" useGeneratedKeys="true">
insert into college(c_name, c_address, c_phone_number, c_create_time)
values (#{cName}, #{cAddress}, #{cPhoneNumber}, #{cCreateTime})
</insert>
dao接口
/**
* 添加学院
*
* @param college 学院
* @return 结果
*/
int insert(College college);
serviceIpml
package com.zsl.codeClub.service.impl;
import com.github.pagehelper.PageHelper;
import com.github.pagehelper.PageInfo;
import com.zsl.codeClub.dao.StudentDao;
import com.zsl.codeClub.entity.College;
import com.zsl.codeClub.dao.CollegeDao;
import com.zsl.codeClub.service.CollegeService;
import com.zsl.codeClub.utils.JsonResult;
import com.zsl.codeClub.utils.PageUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;
/**
* 学院
* @author sl.zhang
* @since 2020-07-24 09:55:49
*/
@Service("collegeService")
public class CollegeServiceImpl implements CollegeService {
@Resource
private CollegeDao collegeDao;
//学院名称
@Value("${collection.collegeNames}")
private String[] collegeNames;
//地址
@Value("${collection.address}")
private String[] address;
//电话开头三位
@Value("${phoneNumberStarts}")
private String[] phoneNumberStarts;
@Override
public JsonResult insertBatch() {
int count = 0;
Long start = System.currentTimeMillis();
for(int i = 0;i<collegeNames.length;i++){
College college = new College();
college.setcName(collegeNames[i]);
//随机生成状态,false概率为0.1%
Boolean flag = randomInt(1000)>0?true:false;
college.setStatus(flag);
//随机生成手机号
college.setcPhoneNumber(phoneNumberStarts[randomInt(phoneNumberStarts.length)]+getRandom(8));
college.setcAddress(address[randomInt(address.length)]);
college.setcCreateTime(new Date());
int insert = collegeDao.insert(college);
count +=insert;
}
Long end = System.currentTimeMillis();
return JsonResult.ok("time",end-start).put("size",count);
}
/**
* 获取随机数,取值范围为[0,(n-1)]
* @param n
* @return
*/
public static int randomInt(int n){
Random random = new Random();
return random.nextInt(n);
}
/**
* 生成随机字符串
* @return
*/
public String getRandom(int count){
//元素
int[] array = {0,1,2,3,4,5,6,7,8,9};
//随机对象
Random rand = new Random();
//循环产生
for (int i = 10; i > 1; i--) {
int index = rand.nextInt(i);
int tmp = array[index];
array[index] = array[i - 1];
array[i - 1] = tmp;
}
//拼接结果为字符串
int result = 0;
for(int i = 0; i < count; i++){
result = result * 10 + array[i];
}
String sixString = Integer.toString(result);
//有可能出现count-1位数,前面加0补全
if (sixString.length() == count-1) {
sixString = "0" + sixString;
}
return sixString;
}
}
学生数据生成
xml
<!--批量插入-->
<insert id="insertBatch">
insert into student(c_id, stu_name, stu_number,c_name, stu_phone_number, stu_create_time,status)
values
<foreach collection="list" item="item" separator=",">
(#{item.cId},#{item.stuName},#{item.stuNumber},#{item.cName},#{item.stuPhoneNumber},#{item.stuCreateTime},#{item.status})
</foreach>
</insert>
<!--查询最大学号-->
<select id="selectMaxStuNumber" resultType="string">
select max(stu_number) from student;
</select>
dao接口
/**
* 批量插入
* @param students
*/
int insertBatch(List<Student> students);
/**
* 获取最大学号
* @return
*/
String selectMaxStuNumber();
serviceImpl
package com.zsl.codeClub.service.impl;
import com.github.pagehelper.PageHelper;
import com.github.pagehelper.PageInfo;
import com.zsl.codeClub.dao.CollegeDao;
import com.zsl.codeClub.entity.College;
import com.zsl.codeClub.entity.Student;
import com.zsl.codeClub.dao.StudentDao;
import com.zsl.codeClub.service.StudentService;
import com.zsl.codeClub.utils.JsonResult;
import com.zsl.codeClub.utils.PageUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;
/**
* 学生
* @author sl.zhang
* @since 2020-07-24 10:22:31
*/
@Service("studentService")
public class StudentServiceImpl implements StudentService {
@Resource
private StudentDao studentDao;
@Resource
private CollegeDao collegeDao;
//姓名
@Value("${student.lastNames}")
private String[] lastNames;
@Value("${student.firstNames}")
private String[] firstNames;
//电话开头三位
@Value("${phoneNumberStarts}")
private String[] phoneNumberStarts;
//学生学号前四位入学年份
@Value("${student.stuNumberStarts}")
private String[] stuNumberStarts;
//学生数量
@Value("${student.size}")
private Integer size;
@Override
public JsonResult generate() {
Long start = System.currentTimeMillis();
List<Student> students = new ArrayList<>();
//获取学院信息
List<College> colleges = collegeDao.queryAll(null);
if(colleges==null||colleges.size()==0){
return JsonResult.error(400,"未添加学院信息");
}
//查询最大学号
String stuNumber = studentDao.selectMaxStuNumber();
Long startNumber = 0L;
if(stuNumber!=null){
startNumber = Long.parseLong(stuNumber);
}
//统计生成个数
Long count = 0L;
for(int i = 0;i<size;i++){
Student student = new Student();
//随机生成姓名
student.setStuName(lastNames[randomInt(14)]+firstNames[randomInt(31)]);
//随机生成学号
student.setStuNumber(stuNumberStarts[randomInt(stuNumberStarts.length)]+generateStuNumber(++startNumber));
//随机生成学院ID与名称
int randomInt = randomInt(colleges.size());
student.setcId(colleges.get(randomInt).getcId());
student.setcName(colleges.get(randomInt).getcName());
//随机生成状态,false概率为0.1%
Boolean flag = randomInt(1000)>0?true:false;
student.setStatus(flag);
//随机生成手机号
student.setStuPhoneNumber(phoneNumberStarts[randomInt(7)]+getRandom(8));
student.setStuCreateTime(new Date());
students.add(student);
if(i%25000==0){
//没25000条数据提交一次
count += studentDao.insertBatch(students);
students.clear();
}
}
count += studentDao.insertBatch(students);
Long end = System.currentTimeMillis();
System.out.println("用时:"+(end-start));
return JsonResult.ok("time",end-start).put("size",count);
}
/**
* 获取随机数,取值范围为[0,(n-1)]
* @param n
* @return
*/
public static int randomInt(int n){
Random random = new Random();
return random.nextInt(n);
}
/**
* 生成随机字符串
* @return
*/
public String getRandom(int count){
//元素
int[] array = {0,1,2,3,4,5,6,7,8,9};
//随机对象
Random rand = new Random();
//循环产生
for (int i = 10; i > 1; i--) {
int index = rand.nextInt(i);
int tmp = array[index];
array[index] = array[i - 1];
array[i - 1] = tmp;
}
//拼接结果为字符串
int result = 0;
for(int i = 0; i < count; i++){
result = result * 10 + array[i];
}
String sixString = Integer.toString(result);
//有可能出现count-1位数,前面加0补全
if (sixString.length() == count-1) {
sixString = "0" + sixString;
}
return sixString;
}
String generateStuNumber(Long number){
String sixString = Long.toString(number);
//有可能出现9位数,前面加0补全
if (sixString.length() != 10) {
for(int i = 0;i<10-sixString.length();i++){
sixString = "0" + sixString;
}
}
return sixString;
}
}
测试生成50000000数据用时约1380s!