import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import java.io.IOException; public class HdfsToHBase { public static void main(String[] args)throws Exception { Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum","master:2181,slave1:2181,slave2:2181");//当然这些都可以作为参数传入,这里只是实验,所以写死在了代码里,实际成产过程中肯定要用参数的方式 conf.set("hbase.rootdir","hdfs://master:9000/hbase"); conf.set(TableOutputFormat.OUTPUT_TABLE,"member"); Job job = Job.getInstance(conf,HdfsToHBase.class.getSimpleName()); job.setJarByClass(HdfsToHBase.class); job.setMapperClass(HdfsToHBaseMapper.class); job.setReducerClass(HdfsToHBaseReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TableOutputFormat.class); FileInputFormat.setInputPaths(job,"hdfs://master:9000/hbase/im.csv"); System.out.println("ok................"); job.waitForCompletion(true); } static String first=""; static int count; static int at; static String fam[]; static String qua[]; public static class HdfsToHBaseMapper extends Mapper<LongWritable, Text,Text,Text>{ private Text outKey = new Text(); private Text outValue= new Text(); @Override protected void map(LongWritable key, Text value, Context context)throws IOException,InterruptedException { String key1=""; String value1=""; // 将输入的纯文本文件的数据转化成String String line = value.toString(); /*first += line +"\t";*/ String[] splited = line.split(","); count = splited.length; String first1[] = line.split(","); String family[] = new String[count]; String qulifier[] = new String[count]; for (int i = 0; i < splited.length; i++) { if(i==0) { key1 = (splited[i]+"\t"); } else { value1 += (splited[i] + "\t"); if (at == 0) { if (first1[i].indexOf(":") > 0) { family[i] = first1[i].substring(0, first1[i].indexOf(":")); qulifier[i] = first1[i].substring(first1[i].indexOf(":") + 1, first1[i].length()); } else { family[i] = first1[i]; qulifier[i] = ""; } } } } if (at == 0) { fam = family; qua = qulifier; } outKey.set(key1); outValue.set(value1); if(at==0){ System.out.println("第一行head 不传入 reduce。。。。。"); }else { context.write(outKey, outValue); } at++; } } public static class HdfsToHBaseReducer extends TableReducer<Text,Text,NullWritable> { @Override protected void reduce(Text k2, Iterable<Text> v2s, Context context) throws IOException, InterruptedException { Put put = new Put(k2.toString().substring(0,k2.toString().indexOf("\t")).getBytes()); for(Text v2 : v2s) { String[] splited = v2.toString().split("\t"); for (int j = 1; j <= splited.length; j++) { put.addColumn(Bytes.toBytes(fam[j]),Bytes.toBytes(qua[j]),Bytes.toBytes(splited[j-1])); System.out.println("put..."+fam[j]+"...."+qua[j]); } } System.out.println("reduce over..............."); context.write(NullWritable.get(),put); } } }
mapReduce 将csv文件从本地或者hdfs 导入 hbase表
猜你喜欢
转载自blog.csdn.net/true1cc/article/details/79141023
今日推荐
周排行