数据导入有如下几种方式:
1.利用HBase提供的ImportTsv将csv文件导入到HBase (舍弃)
2.利用HBase提供的completebulkload将数据导入到HBase
static String family[] = {"studentinfo"};
Put put=null;
ImmutableBytesWritable rowkey=null;
@Override
protected void map(LongWritable key, DBImportBean value, Context context) throws IOException, InterruptedException {
rowkey = new ImmutableBytesWritable(value.getTestunit_id_accountid().getBytes());
put = new Put(Bytes.toBytes(value.getTestunit_id_accountid()));
//put.setWriteToWAL(false);
put.addColumn(Bytes.toBytes(family[0]),Bytes.toBytes(DBImportBean.columnName.get(2)), Bytes.toBytes(value.getJob()));
.......
//++count;
context.write(rowkey, put);
}
try {
TableName tableName = TableName.valueOf("test_unit_stutranscript");
HTable xTable = new HTable(conf,tableName);
xTable.setWriteBufferSize(12*1024*1024);
xTable.setAutoFlushTo(false);
//mysql的jdbc驱动
// DBConnector dbConnector = DBConnectorUtil.getDBConnector();
String userName = "test";
String password = "test";
DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver", "jdbc:mysql://ip:3306/test?useUnicode=true&characterEncoding=utf8", userName, password);
Job job = Job.getInstance(conf);
//当前类
job.setJarByClass(DBImportHbase2.class);
job.setMapperClass(DBMapper.class);
//设置map key类型
//job.setMapOutputKeyClass(LongWritable.class);
//job.setMapOutputValueClass(DBImportBean.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
//job.setReducerClass(PutSortReducer.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
// DBInputFormat.setInput(job, DBRecord.class, "test1", null,null, fields);
//通过sql语句,传数据,sqlcount总数据
DBInputFormat.setInput(job, DBImportBean.class, sql,sqlcount);
// System.exit(job.waitForCompletion(true) ? 0 : 1);
Path output = new Path(outputPath);
if (fs.exists(output)) {
fs.delete(output, true);//如果输出路径存在,就将其删除
}
FileOutputFormat.setOutputPath(job, output);//输出路径
HFileOutputFormat2.configureIncrementalLoad(job, xTable.getTableDescriptor(), xTable.getRegionLocator());
job.waitForCompletion(true);
if (job.isSuccessful()){
//加载到hbase表
FsShell shell = new FsShell(conf);
shell.run(new String[]{"-chmod", "-R", "777", outputPath});
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
loader.doBulkLoad(new Path(outputPath), xTable);
/**/
//HBaseUtils.doBulkLoad(outputPath, tableName.getName().toString());
logger.info("成功-----------------");
} else {
logger.error("失败-----------------");
}
} catch (Exception e) {
e.printStackTrace();
}
}
1.利用HBase提供的ImportTsv将csv文件导入到HBase (舍弃)
2.利用HBase提供的completebulkload将数据导入到HBase
3.利用HBase提供的Import将数据导入到HBase(舍弃)
实际生产环境上很难使用命令行的方式。毕竟源数据不可能直接用来写HBase。在数据迁移的过程中会涉及到数据清洗、整理归并等许多额外的工作。显然不够灵活。
本文用的是MapReduce+completebulkload
注意,不要使用reducer。mapper到reducer需要走网络,受限于集群带宽。
static String family[] = {"studentinfo"};
Put put=null;
ImmutableBytesWritable rowkey=null;
@Override
protected void map(LongWritable key, DBImportBean value, Context context) throws IOException, InterruptedException {
rowkey = new ImmutableBytesWritable(value.getTestunit_id_accountid().getBytes());
put = new Put(Bytes.toBytes(value.getTestunit_id_accountid()));
//put.setWriteToWAL(false);
put.addColumn(Bytes.toBytes(family[0]),Bytes.toBytes(DBImportBean.columnName.get(2)), Bytes.toBytes(value.getJob()));
.......
//++count;
context.write(rowkey, put);
}
}
public static void main(String[] args) {
Configuration conf = HBaseUtils.getConfiguration();try {
TableName tableName = TableName.valueOf("test_unit_stutranscript");
HTable xTable = new HTable(conf,tableName);
xTable.setWriteBufferSize(12*1024*1024);
xTable.setAutoFlushTo(false);
//mysql的jdbc驱动
// DBConnector dbConnector = DBConnectorUtil.getDBConnector();
String userName = "test";
String password = "test";
DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver", "jdbc:mysql://ip:3306/test?useUnicode=true&characterEncoding=utf8", userName, password);
Job job = Job.getInstance(conf);
//当前类
job.setJarByClass(DBImportHbase2.class);
job.setMapperClass(DBMapper.class);
//设置map key类型
//job.setMapOutputKeyClass(LongWritable.class);
//job.setMapOutputValueClass(DBImportBean.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
//设置map value类型
job.setMapOutputValueClass(Put.class);
// job.setReducerClass(KeyValueSortReducer.class);//job.setReducerClass(PutSortReducer.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
job.setInputFormatClass(DBInputFormat.class);
job.setOutputFormatClass(HFileOutputFormat2.class);//设置Reduce数目
job.setNumReduceTasks(0);
//String[] fields = { "id", "name" };// DBInputFormat.setInput(job, DBRecord.class, "test1", null,null, fields);
//通过sql语句,传数据,sqlcount总数据
DBInputFormat.setInput(job, DBImportBean.class, sql,sqlcount);
// System.exit(job.waitForCompletion(true) ? 0 : 1);
//生成中间文件Hfile后,最后通过BulkLoad,迁移到hbase数据目录下
//1、消除了对HBase集群的插入压力
//2、提高了Job的运行速度,降低了Job的执行时间String outputPath="/dataset/hbase";
FileSystem fs = FileSystem.get(conf);Path output = new Path(outputPath);
if (fs.exists(output)) {
fs.delete(output, true);//如果输出路径存在,就将其删除
}
FileOutputFormat.setOutputPath(job, output);//输出路径
HFileOutputFormat2.configureIncrementalLoad(job, xTable.getTableDescriptor(), xTable.getRegionLocator());
job.waitForCompletion(true);
if (job.isSuccessful()){
//加载到hbase表
FsShell shell = new FsShell(conf);
shell.run(new String[]{"-chmod", "-R", "777", outputPath});
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
loader.doBulkLoad(new Path(outputPath), xTable);
/**/
//HBaseUtils.doBulkLoad(outputPath, tableName.getName().toString());
logger.info("成功-----------------");
} else {
logger.error("失败-----------------");
}
} catch (Exception e) {
e.printStackTrace();
}
}