mapReduce 将csv文件从本地或者hdfs 导入 hbase表



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import java.io.IOException;


public class HdfsToHBase {
public static void main(String[] args)throws Exception {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","master:2181,slave1:2181,slave2:2181");//当然这些都可以作为参数传入,这里只是实验,所以写死在了代码里,实际成产过程中肯定要用参数的方式
        conf.set("hbase.rootdir","hdfs://master:9000/hbase");
        conf.set(TableOutputFormat.OUTPUT_TABLE,"member");
        Job job = Job.getInstance(conf,HdfsToHBase.class.getSimpleName());
        job.setJarByClass(HdfsToHBase.class);
        job.setMapperClass(HdfsToHBaseMapper.class);
        job.setReducerClass(HdfsToHBaseReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TableOutputFormat.class);
        FileInputFormat.setInputPaths(job,"hdfs://master:9000/hbase/im.csv");
    System.out.println("ok................");
        job.waitForCompletion(true);
        }
    static String first="";
    static int count;
    static int at;
    static  String fam[];
    static  String qua[];
    public static class HdfsToHBaseMapper extends  Mapper<LongWritable, Text,Text,Text>{

        private Text outKey = new Text();
        private Text outValue= new Text();

        @Override
        protected void map(LongWritable key, Text  value, Context context)throws IOException,InterruptedException {
             String key1="";
             String value1="";
            // 将输入的纯文本文件的数据转化成String
             String line = value.toString();
            /*first += line +"\t";*/
             String[] splited = line.split(",");
             count = splited.length;
            String first1[] = line.split(",");
            String family[] = new String[count];
            String qulifier[] = new String[count];
             for (int i = 0; i < splited.length; i++) {
                    if(i==0) {
                        key1 = (splited[i]+"\t");

                    }
                    else {
                        value1 += (splited[i] + "\t");
                        if (at == 0) {
                            if (first1[i].indexOf(":") > 0) {
                                family[i] = first1[i].substring(0, first1[i].indexOf(":"));
                                qulifier[i] = first1[i].substring(first1[i].indexOf(":") + 1, first1[i].length());
                            } else {
                                family[i] = first1[i];
                                qulifier[i] = "";
                            }
                        }
                    }
                }
            if (at == 0) {
                fam = family;
                qua = qulifier;
            }
            outKey.set(key1);
            outValue.set(value1);
            if(at==0){
                System.out.println("第一行head 不传入 reduce。。。。。");
            }else {
                context.write(outKey, outValue);
            }
            at++;
        }
    }
    public static class HdfsToHBaseReducer extends TableReducer<Text,Text,NullWritable> {
        @Override
        protected void reduce(Text k2, Iterable<Text> v2s, Context context) throws IOException, InterruptedException {
            Put put = new Put(k2.toString().substring(0,k2.toString().indexOf("\t")).getBytes());
           for(Text v2 : v2s) {
                String[] splited = v2.toString().split("\t");
                    for (int j = 1; j <= splited.length; j++) {
                        put.addColumn(Bytes.toBytes(fam[j]),Bytes.toBytes(qua[j]),Bytes.toBytes(splited[j-1]));
                        System.out.println("put..."+fam[j]+"...."+qua[j]);
                    }
            }
            System.out.println("reduce over...............");
            context.write(NullWritable.get(),put);
        }
    }
}




猜你喜欢

转载自blog.csdn.net/true1cc/article/details/79141023