HDFS 通过mapreduce 进行 HBase 导入导出

简单演示，主要是TableMapper和TableReducer的使用

读取hdfs数据到hbase中，测试数据文件见附件

package hbase;

import java.sql.Date;
import java.text.SimpleDateFormat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class HDFSToHBase {

private static final String INPUT_PATH = "hdfs://hadoop:9000/in/kpi";

/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();
//设置zookeeper
conf.set("hbase.zookeeper.quorum", "hadoop");
//设置hbase表名称
conf.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");
//将该值改大，防止hbase超时退出
conf.set("dfs.socket.timeout", "180000");

Job job = new Job(conf);

//1.读取文件
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.setInputPaths(job, INPUT_PATH);

//2.设置自己的map
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);

job.setReducerClass(MyReducer.class);
job.setOutputFormatClass(TableOutputFormat.class);

job.waitForCompletion(true);
}

static class MyMapper extends Mapper<LongWritable, Text, LongWritable, Text>{

protected void map(LongWritable k1, Text v1, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,LongWritable,Text>.Context context) throws java.io.IOException ,InterruptedException {

String[] splited = v1.toString().split("\t");
try{
SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss");
String datestr = df.format(new Date(Long.parseLong(splited[0].trim())));
String rowkey = splited[1]+":"+datestr;
context.write(k1, new Text(rowkey+"\t"+v1));
}catch(NumberFormatException e){
final Counter counter = context.getCounter("BatchImport", "ErrorFormat");
counter.increment(1L);
System.out.println("出错了"+splited[0]+" "+e.getMessage());
}

}
}

static class MyReducer extends TableReducer<LongWritable, Text, LongWritable>{
protected void reduce(LongWritable k2, java.lang.Iterable<Text> v2s,
org.apache.hadoop.mapreduce.Reducer<LongWritable,Text,LongWritable,org.apache.hadoop.io.Writable>.Context context) throws java.io.IOException ,InterruptedException {

for(Text v2:v2s){
String[] splited = v2.toString().split("\t");

Put put = new Put(Bytes.toBytes(splited[0]));

//导出各列
put.add(Bytes.toBytes("f1"), Bytes.toBytes("time"), Bytes.toBytes(splited[1]));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("mobile"), Bytes.toBytes(splited[2]));
context.write(k2, put);
}

};
}

}

------------------------------------------------------------------

从hbase读取数据到hdfs

package hbase;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class HBaseToHDFS {

private static final String OUT_PATH = "hdfs://hadoop:9000/out/kpi";

/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();
//设置zookeeper
conf.set("hbase.zookeeper.quorum", "hadoop");
//设置hbase表名称
conf.set(TableInputFormat.INPUT_TABLE, "wlan_log");
//将该值改大，防止hbase超时退出
conf.set("dfs.socket.timeout", "180000");

Job job = new Job(conf);

job.setInputFormatClass(TableInputFormat.class);

job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);

//job.setReducerClass(MyReducer.class);
//job.setOutputFormatClass(TableOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));

FileSystem fs = FileSystem.get(new URI(OUT_PATH),conf);
if(fs.exists(new Path(OUT_PATH))){
fs.delete(new Path(OUT_PATH), true);
}

job.waitForCompletion(true);
}

static class MyMapper extends TableMapper<Text, Text>{
protected void map(org.apache.hadoop.hbase.io.ImmutableBytesWritable key,
org.apache.hadoop.hbase.client.Result result,
org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result,Text,Text>.Context context) throws IOException ,InterruptedException {

String mobile = new String(result.getValue(Bytes.toBytes("f1"), Bytes.toBytes("mobile")));
System.out.println(key +"----------"+mobile);
context.write(new Text(mobile), new Text(mobile));
};

}

static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
protected void reduce(Text arg0, java.lang.Iterable<LongWritable> arg1, org.apache.hadoop.mapreduce.Reducer<Text,LongWritable,Text,LongWritable>.Context arg2) throws IOException ,InterruptedException {

};
}

}

HDFS 通过mapreduce 进行 HBase 导入导出

猜你喜欢