5.7 HBase和MapReduce整合

   读hdfs数据写到HBase中： 
 

   注意hadoop和HBase的包都需要导入， 
 

 
  先创建表 
 

   WordCountMapper： 
 

 
       package com.laoxiao.mr.hbase; 
      
       import java.io.IOException; 
      
       import org.apache.hadoop.io.IntWritable; 
      
       import org.apache.hadoop.io.LongWritable; 
      
       import org.apache.hadoop.io.Text; 
      
       import org.apache.hadoop.mapreduce.Mapper; 
      
       /** 
      
        *  
      
        * @author root 
      
        * 定义map任务输入和输出数据类型。 
      
        * 
      
        */ 
      
       public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ 
      
        /** 
      
         * map方法是一行数据调用一次。每一次调用传入一行数据。该行数据的下标位为key。内容为value 
      
         */ 
      
        protected void map(LongWritable key, Text value, 
      
          Context context) 
      
          throws IOException, InterruptedException { 
      
          String[] words =value.toString().split(" "); 
      
          for (int i = 0; i < words.length; i++) { 
      
          String w = words[i]; 
      
          Text outkey =new Text(w); 
      
          IntWritable outvalue=new IntWritable(1); 
      
          context.write(outkey, outvalue); 
      
         } 
      
        } 
      
       }

   WordCountReducer： 
 

 
       package com.laoxiao.mr.hbase; 
      
       import java.io.IOException; 
      
       import org.apache.hadoop.hbase.client.Put; 
      
       import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 
      
       import org.apache.hadoop.hbase.mapreduce.TableReducer; 
      
       import org.apache.hadoop.io.IntWritable; 
      
       import org.apache.hadoop.io.Text; 
      
       public class WordCountReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>{ 
      
        /* 
      
         * 该方法每一组调用一次。 
      
         */ 
      
        protected void reduce(Text key, Iterable<IntWritable> arg1, 
      
          Context context) 
      
          throws IOException, InterruptedException { 
      
         int sum =0; 
      
         for(IntWritable i :arg1){ 
      
          sum=sum+i.get(); 
      
         } 
      
         Put put = new Put(key.toString().getBytes()); 
      
         put.add("cf".getBytes(), "count".getBytes(), (sum+"").getBytes()); 
      
         context.write(null, put); 
      
        } 
      
       }

   RunJob： 
 

 
       package com.laoxiao.mr.hbase; 
      
       import org.apache.hadoop.conf.Configuration; 
      
       import org.apache.hadoop.fs.Path; 
      
       import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 
      
       import org.apache.hadoop.io.IntWritable; 
      
       import org.apache.hadoop.io.Text; 
      
       import org.apache.hadoop.mapreduce.Job; 
      
       import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
      
       public class RunJob { 
      
        public static void main(String[] args) throws Exception { 
      
         Configuration config =new Configuration(); 
      
         config.set("fs.defaultFS", "hdfs://node1:9000"); 
      
         config.set("yarn.resourcemanager.hostname", "node1"); 
      
       // config.set("mapred.jar", "C:\\Users\\Administrator\\Desktop\\wc.jar"); 
      
         // hbase zk集群 
      
         config.set("hbase.zookeeper.quorum", "node1"); 
      
       // try { 
      
          Job job =Job.getInstance(config); 
      
          job.setJobName("word count"); 
      
          job.setJarByClass(RunJob.class);//job的入口类 
      
          job.setMapperClass(WordCountMapper.class); 
      
          job.setMapOutputKeyClass(Text.class); 
      
          job.setMapOutputValueClass(IntWritable.class); 
      
          String tableName = "wc"; 
      
          // 本地方式运行 参数addDependencyJars 需要设置为false 如果集群模式运行 保持true即可 
      
       // TableMapReduceUtil.initTableReducerJob(tableName, WordCountReducer.class, job);//集群运行 
      
          TableMapReduceUtil.initTableReducerJob(tableName, WordCountReducer.class, job, 
      
            null, null, null, null, false); 
      
          Path inputPath = new Path("/usr/input/wc.txt"); 
      
          FileInputFormat.addInputPath(job, inputPath); 
      
          boolean f= job.waitForCompletion(true); 
      
          if(f){ 
      
           System.out.println("job执行成功！"); 
      
          } 
      
       // } catch (Exception e) { 
      
       // // TODO Auto-generated catch block 
      
       // e.printStackTrace(); 
      
       // } 
      
        } 
      
       }

5.7 HBase和MapReduce整合

猜你喜欢