Hadoop2.5.2 export data to Hbase

Keep it as a backup, all the examples in this category are tested by myself.

package jyw.test;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.KeyValue;

import org.apache.hadoop.hbase.client.HBaseAdmin;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 /*
  * Simple map reduce arithmetic
  * */
public class WordCountHBase {
 
  /* Implement the Map class    
   * Input type default file read index position long type
   * input value type string
   * output type string
   * output value type number
  */
  public static class Map extends
      Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();
 
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      StringTokenizer itr = new StringTokenizer(value.toString());
      while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        context.write(word, one);
      }
    }
  }
 
  /* Implement the Reduce class
   * output type of map
   * output value type of map
   * tablereduce output type is null,
   * output value type put
  */
  public static class Reduce extends
      TableReducer<Text, IntWritable, NullWritable> {
	    
	  public static Configuration configuration = null;
	    static
	    {
	        configuration = HBaseConfiguration.create();
	        //configuration.set("hbase.master", "192.168.0.201:60000");
	     //   configuration.set("hbase.zookeeper.quorum", "idc01-hd-nd-03,idc01-hd-nd-04,idc01-hd-nd-05");
	        //configuration.set("hbase.zookeeper.property.clientPort", "2181");
	    }
	    public  String  selectRowKey(String tablename, String rowKey) throws IOException
	    {
	        HTable table = new HTable(configuration, tablename);
	        Get g = new Get(rowKey.getBytes());
	        Result rs = table.get(g);
            String retstr="";
	        for (KeyValue kv : rs.raw())
	        {
	         
	          retstr= new String(kv.getValue());
	        }
	        return retstr;
	    }   
	  
    public void reduce(Text key, Iterable<IntWritable> values,
        Context context) throws IOException, InterruptedException {
 
      int sum = 0;
 
      Iterator<IntWritable> iterator = values.iterator();
      while (iterator.hasNext()) {
        sum += iterator.next().get();
      }
    // test query
      String result = selectRowKey("wordcount","product");
      
      
      // Put instantiation, one line per word
      Put put = new Put(Bytes.toBytes((result+"_"+key).toString()));
      // The column family is content, the column modifier is count, and the column value is the number
      put.add(Bytes.toBytes("content"), Bytes.toBytes("count"),
          Bytes.toBytes(String.valueOf(sum)));
      
      context.write(NullWritable.get(), put);
    }
  
     
  }
 
  // Create HBase data table
  public static void createHBaseTable(String tableName)  
throws IOException {
    // create table description
    HTableDescriptor htd = new HTableDescriptor(tableName);
    // Create column family description
    HColumnDescriptor col = new HColumnDescriptor("content");
    htd.addFamily(col);
 
    // configure HBase
    Configuration conf = HBaseConfiguration.create();
 
    //conf.set("hbase.zookeeper.quorum","127.0.0.1");
    //conf.set("hbase.zookeeper.property.clientPort", "2181");
    HBaseAdmin hAdmin = new HBaseAdmin(conf);
 
    if (hAdmin.tableExists(tableName)) {
      System.out.println("The data table already exists.");
   //   hAdmin.disableTable(tableName);
   //   hAdmin.deleteTable(tableName);
    }else {
  
    	System.out.println("Create table: " + tableName);
    	hAdmin.createTable(htd);
    }
  }
 
  public static void main(String[] args) throws Exception {
    String tableName = "wordcount";
    // Step 1: Create a database table
    WordCountHBase.createHBaseTable(tableName);
 
    // Step 2: Perform MapReduce processing
    // Configure MapReduce
    Configuration conf = new Configuration();
    // These few sentences are very important
   // conf.set("mapred.job.tracker", "master:9001");
    //conf.set("hbase.zookeeper.quorum","master");
    //conf.set("hbase.zookeeper.property.clientPort", "2181");
    conf.set(TableOutputFormat.OUTPUT_TABLE, tableName);
 
    Job job = new Job(conf, "New Word Count");
    job.setJarByClass(WordCountHBase.class);
 
    // Set up the Map and Reduce processing classes
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
 
    // set output type
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
 
    // set input and output format
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TableOutputFormat.class);
 
    // set input directory
    FileInputFormat.addInputPath(job, new Path("hdfs://192.168.0.42:9000/user/jiayongwei/input/"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
 
  }
}

 

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=326994839&siteId=291194637