Bulk insert data into hbase

insert.java   

 

package cn.ls.insert;

import cn.ls.util.HbaseConn;

import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Created by Administrator on 2016/12/30.
*/
public class Insert {

public static void main(String[] args) throws IOException {

/*Connection hbaseConn = new HbaseConn().getHbaseConn();
HTableInterface table = (HTableInterface) hbaseConn.getTable(TableName.valueOf(args[0]));
Put put = new Put(Bytes.toBytes(args[1])); // zookey
put.add(Bytes.toBytes(args[2]), Bytes.toBytes(args[3]),Bytes.toBytes(args[4])); // 列族,qualifier,value
table.put(put);
table.close();// 释放资源
hbaseConn.close();*/

/ * hbase In fact, there is no concept of the column, the column is the data * /
// insertOne ();
insertBatch ();

}

private static void insertBatch() throws IOException {
// 批量插入
Connection hbaseConn = new HbaseConn().getHbaseConn();
HTableInterface table = (HTableInterface) hbaseConn.getTable(TableName.valueOf("testCreate1228"));
List<Put> list = new ArrayList<Put>();
for (int i =0; i< 10; i++){
Put put = new Put(Bytes.toBytes("20161130abc" + i));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("lie1"+i), Bytes.toBytes("20161130abc"+i));
list.add(put);
}
table.put(list);
table.close();
hbaseConn.close();
}

private static void insertOne() throws IOException {
// 插入单条
Connection hbaseConn = new HbaseConn().getHbaseConn();
HTableInterface table = (HTableInterface) hbaseConn.getTable(TableName.valueOf("testCreate1228"));
Put put = new Put(Bytes.toBytes("20161130abc3"));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("lie1"), Bytes.toBytes("20161130abc"));
table.put(put);
table.close();
hbaseConn.close();
}

}

 

 

 

 

MyBulkload.java

 

 

package cn.ls.insert;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MyBulkload {

public static class MyBulkMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {
@Override
protected void setup(Mapper.Context context) throws IOException,
InterruptedException {
super.setup(context);
}

@Override
protected void Map (LongWritable Key, the Text value, the Context context)
throws IOException, InterruptedException {
// data in \ t slicing tissue, may be resolved to customize, such as complex json / xml text line
String line = value.toString ();
String [] = line.split Terms ( "\ T");
IF (terms.length ==. 4) {
byte [] = RowKey Terms [0] .getBytes ();
ImmutableBytesWritable imrowkey = new new ImmutableBytesWritable (RowKey);
// write in context, rowkey => keyvalue, column family: column name info: name, info: Age, info: Phone
context.write (imrowkey, the keyValue new new (RowKey, Bytes.toBytes ( "info" ), Bytes.toBytes ( "name"), Bytes.toBytes (Terms [. 1])));
context.write (imrowkey, the KeyValue new new (RowKey, Bytes.toBytes ( "info"), Bytes.toBytes ( "Age" ), Bytes.toBytes (terms [2] )));
context.write(imrowkey, new KeyValue(rowkey, Bytes.toBytes("info"), Bytes.toBytes("phone"), Bytes.toBytes(terms[3])));
}
}
}

public static void main(String[] args) throws Exception {

if (args.length != 3) {
System.err.println("Usage: MyBulkload <table_name> <data_input_path> <hfile_output_path>");
System.exit(2);
}
String tableName = args[0];
String inputPath = args[1];
String outputPath = args[2];

// create htable for instance, the import table for acquiring meta information region including key range partitioning
the Configuration HBaseConfiguration.create the conf = ();
htable new new htable = Table (the conf, tableName);

Job job = Job.getInstance(conf, "MyBulkload");

job.setMapperClass(MyBulkMapper.class);

job.setJarByClass(MyBulkload.class);
job.setInputFormatClass(TextInputFormat.class);

// The most important configuration code, need to focus on analysis
HFileOutputFormat.configureIncrementalLoad (job, table);

FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

 

Guess you like

Origin www.cnblogs.com/95lyb/p/11670570.html
Recommended