在公司干这一年中,解码入库一直不可缺少,什么不要频繁的链接数据库啦,要关各种流啊,各种连接。不管sb流会报内存溢出。
首先我要说一下,数据量小的情况,可以全部拼接了在去入库,在处理能力上还是很快的,如果在文本中有几万条数据,那么就要分批次的入库了。
package com.hxkj.data2db;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashSet;
import com.hxkj.c3p0.C3P0Inner;
import com.mysql.jdbc.Statement;
/***
* 空气质量
* @author
*
*/
public class AirQualityUtils {
public static int num=0;
public static void main(String[] args) {
digui("C:\\Users\\gaoyongqin\\Desktop\\电科院\\弘象样例数据\\Z_NAFP_C_BABJ_20180619052953_P_NWPC-CUACE-ENVIRONMENT-2018061900\\NWP_HAZE2MICAPS2018061900");
}
public static void digui(String path){
// String path = "C:\\Users\\gaoyongqin\\Desktop\\电科院\\弘象样例数据\\Z_NAFP_C_BABJ_20180619052953_P_NWPC-CUACE-ENVIRONMENT-2018061900\\NWP_HAZE2MICAPS2018061900"; // 路径
File f = new File(path);
// File[] listFiles = f.listFiles();
// for (File file : listFiles) {
// intoDB(file.getAbsolutePath());
// }
if (!f.exists()) {
System.out.println(path + " not exists");
return;
}
File fa[] = f.listFiles();
for (int i = 0; i < fa.length; i++) {
File fs = fa[i];
if (fs.isDirectory()) {
System.out.println(fs.getName() + " [目录]");
digui(path+"\\"+fs.getName());
} else {
System.out.println(fs.getName());
// String path ="d:\\Z_SURF_C_BABJ_20140626000124_O_AWS_FTM_PQC.txt";
// String path1 ="C:\\Users\\gaoyongqin\\Desktop\\电科院\\弘象样例数据\\Z_NAFP_C_BABJ_20180619052953_P_NWPC-CUACE-ENVIRONMENT-2018061900\\NWP_HAZE2MICAPS2018061900\\"+fs.getName();
// path="d:\\aaa.txt";
if(path.contains("_3")){
AirQualityUtils. intoDB(path+"\\"+fs.getName());
}
}
}
}
public static void intoDB(String path){
num++;
// File inFile = new File("C:\\Users\\gaoyongqin\\Desktop\\电科院\\弘象样例数据\\气象预报数据/Z_SEVP_C_BABJ_20171119211010_P_RFFC-SNWFD-201711200600-07212.TXT");
// File outFile = new File("C:\\Users\\zl\\Desktop\\电科院\\弘象样例数据\\气象预报数据/test2.txt");
int i = 0;
// String head = "站号 经度 纬度 海拔高度 预报时效 温度 相对湿度 风向 风速 气压 降水量 总云量 "
// + "低云量 天气现象 能见度 最高气温 最低气温 最大相对湿度 最小相对湿度 24小时累计降水量 12小时累计降水量 12小时总云量 12小时低云量 12小时天气现象 12小时风向 12小时风速";
// String documentStr = "";
StringBuffer sb = new StringBuffer();
StringBuffer sb2 = new StringBuffer();
BufferedReader br = null;
// BufferedWriter bw = null;
try {
if(path.contains(".024")){//24小时数据入库
br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(path))));
// bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFile)));
String str = "";
String substring = "";
String timeNum = "";
String stationId ="",lon="",lat="",hight="",content="",element="",time ="",foreignKeySql="";
String substring2 = path.substring(0, path.lastIndexOf("\\"));
String substring3 = substring2.substring(substring2.lastIndexOf("\\")+1, substring2.length());
element=substring3.substring(0, substring3.lastIndexOf("_"));
// bw.write(head);
// bw.flush();
// String sql = "";
Connection con = C3P0Inner.getConnection();
PreparedStatement prep = null;
if(num==1){
foreignKeySql = "insert into data_air_quality (id,stationid,lon,lat,hight,"+element+",time) value";
String[] split2 = null;
while((str = br.readLine())!=null){
i++;
// if (str.contains("NNNN")) {
// break;
// }
// if (i<=5) {
// if(i==3){
// substring = str.substring(0, 10);
// }
// continue;
// }
// if (i==6) {
// String[] split = str.split(" ");
//
// timeNum = split[4];
// }
// if ((i-5)%(Integer.valueOf(timeNum)+1)==1) {
//
// String[] split = str.split(" ");
// stationId = split[0];
// lon = split[1];
// lat = split[2];
// element = split[3];
// continue;
// }
if(i<=2){
if(i==1){
time = str.substring(0, str.indexOf("_")).split(" ")[2];
}
continue;
}
str = str.replace(" ", "','").replace(" ", "','").replace(" ", "','").replace(" ", "','");
//split2 = str.split(" ");
sb.append(",(null,'")
// .append(stationId).append("','").append(lon).append("','").
// append(lat).append("','").append(element).append("','")
.append(str)
// .append("','").append(split2[0]).append("','").append(split2[1])
// .append("','").append(split2[2]).append("','").append(split2[3])
// .append("','").append(split2[4]).append("','").append(split2[5])
// .append("','").append(split2[6]).append("','").append(split2[7])
// .append("','").append(split2[8]).append("','").append(split2[9])
// .append("','").append(split2[10]).append("','").append(split2[11])
// .append("','").append(split2[12]).append("','").append(split2[13])
// .append("','").append(split2[14]).append("','").append(split2[15])
// .append("','").append(split2[16]).append("','").append(split2[17])
// .append("','").append(split2[18]).append("','").append(split2[19])
// .append("','").append(split2[20])
.append("','").append(time).append("')");
// bw.write(sb.toString());
// System.out.println(sb.toString());
//sb.replace(start, end, substring)
//String replace = ",('"+sb.toString().replace(" ", "','").replace(" ", "','").replace(" ", "','")+"','"+substring+"')";
//sb2.append(replace);
// System.out.println(replace.toString());
//System.out.println(sb.toString());
//
if (i%2000==0) {
sb.deleteCharAt(0);
//入库
// long c3 = System.currentTimeMillis();
// long c4 = System.currentTimeMillis();
// System.out.println(c4-c3);
try {
// System.out.println(foreignKeySql+sb.toString()+"ON DUPLICATE KEY UPDATE stationId=VALUES(stationId),forecast_aging=VALUES(forecast_aging);");
prep = con.prepareStatement(foreignKeySql+sb.toString());
prep.executeUpdate();
//ResultSet res = prep.getGeneratedKeys();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
if (prep!=null) {
prep.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
sb = new StringBuffer();
//sb2 = new StringBuffer();
}
}
sb.deleteCharAt(0);
}else{//这一步是说如果已经在第一遍入过同样的数据,那么直接在后面追加新的要素(此方法以后在优化)
foreignKeySql = "UPDATE data_air_quality SET ";
while((str = br.readLine())!=null){
i++;
if(i<=2){
continue;
}
str = str.replace(" ", ",").replace(" ", ",").replace(" ", ",").replace(" ", ",");
String[] split = str.split(",");
//split2 = str.split(" ");
sb.append(element).append("=")
.append(split[4])
.append(" where stationid=").append(split[0]);
try {
prep = con.prepareStatement(foreignKeySql+sb.toString());
prep.executeUpdate();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
if (prep!=null) {
prep.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
sb = new StringBuffer();
}
}
//入库
// long c4 = System.currentTimeMillis();
// System.out.println(c4-c3);
try {
prep = con.prepareStatement(foreignKeySql+sb.toString());
prep.executeUpdate();
//ResultSet res = prep.getGeneratedKeys();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
if (prep!=null) {
prep.close();
con.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
if (br!=null) {
br.close();
}
// if (bw!=null) {
// bw.close();
// }
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
再有,数据入库啊什么的,都要避免重复数据不是,那么在入库操作的时候要添加:ON DUPLICATE KEY UPDATE
prep = con.prepareStatement(foreignKeySql+foreignKeyValue.toString()+"ON DUPLICATE KEY UPDATE V01301=VALUES(V01301),D_DATETIME=VALUES(D_DATETIME);",Statement.RETURN_GENERATED_KEYS);
不懂的话可以百度,这个也很好用。
还有就是foreignKeyValue.deleteCharAt(0);
当当当当
去掉第一个逗号,你懂得