添加依赖:
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.4.9</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.4.9</version>
</dependency>
student.txt数据:
1,Sam,18
2,Tom,16
3,Jetty,25
4,LiLei,56
hbase建表:
create 'student','cf1'
1.MapReduce作业从一个文件中读取数据并写入hbase表中
public class HadoopConnectTest extends Configured implements Tool{
public static class Mapper1 extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String values[] = value.toString().split(",");
String id = values[0];
String name = values[1];
String age = values[2];
// 新建put并插入数据
Put put = new Put(id.getBytes());
put.addColumn("cf1".getBytes(), "name".getBytes(), name.getBytes());
put.addColumn("cf1".getBytes(), "age".getBytes(), age.getBytes());
if (!put.isEmpty()) {
// 插入表名
ImmutableBytesWritable ib = new ImmutableBytesWritable("student".getBytes());
context.write(ib, put);
}
}
}
//HDFS路径
private static final String HDFS = "hdfs://192.168.30.141:9000";
//输入文件路径
private static final String INPATH = HDFS + "/student.txt";
public int run() throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = HBaseConfiguration.create();
//任务的配置设置,configuration是一个任务的配置对象,封装了任务的配置信息
conf.set("hbase.zookeeper.quorum", "hadoop1:2181,hadoop2:2181,hadoop3:2181");
conf.set("hbase.rootdir", "hdfs://hadoop1:9000/hbase");
Job job = Job.getInstance(conf, "HFile bulk load test");
job.setJarByClass(HadoopConnectTest.class);
job.setMapperClass(Mapper1.class);
// TableMapReduceUtil是HBase提供的工具类,会自动设置mapreuce提交到hbase任务的各种配置,封装了操作,只需要简单的设置即可
//设置表名为student,reducer类为空,job为此前设置号的job
TableMapReduceUtil.initTableReducerJob("student", null, job);
// 设置reduce过程,这里由map端的数据直接提交,不要使用reduce类,因而设置成null,并设置reduce的个数为0
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(INPATH));
return (job.waitForCompletion(true) ? 0 : -1);
}
public static void main(String[] args) {
int status = new HadoopConnectTest().run();
System.exit(status);
}
}
执行jar包:
./hadoop jar /usr/local/hbase.jar
验证:
2.MapReduce从hbase表中读取数据并存入文件中
public class HBaseMapper extends TableMapper<Text, Text> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
for (Cell cell : value.rawCells()) {
String row = new String(CellUtil.cloneRow(cell));
String name = new String(CellUtil.cloneValue(cell));
context.write(new Text(row), new Text(name));
}
}
}
public class HBaseJob {
public static final String tableName = "student";
public static final String outputFilePath = "hdfs://hadoop1:9000/output";
public static Configuration conf = HBaseConfiguration.create();
static {
conf.set("hbase.zookeeper.quorum", "hadoop1:2181,hadoop2:2181,hadoop3:2181");
conf.set("hbase.rootdir", "hdfs://hadoop1:9000/hbase");
conf.set("hbase.master", "hadoop1:60000");
}
public static void main(String[] args)
throws IOException, InterruptedException, ClassNotFoundException {
Scan scan = new Scan();
scan.addColumn("cf1".getBytes(),"name".getBytes());
Job job = Job.getInstance(conf, "hbase_word_count");
job.setJarByClass(HBaseJob.class);
TableMapReduceUtil.initTableMapperJob(
"student",
scan,
HBaseMapper.class,
Text.class,
Text.class,
job);
FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}