Hadoop2.5.2 export data from hdfs mapreduce to multiple hbase tables

Hadoop and hbase are configured for normal running process, check after jps

60559 HRegionServer

7329 Main

20653 Jps

29355 HQuorumPear

16221 ResourceManager

29417 HMaster

16538 NodeManager

15750 NameNode

15880 DataNode

16046 SecondaryNameNode

 

Many examples on the Internet are based on hadoop 0.9x, and the functions of the new version of hadoop have changed.

The example is to read files from hadoop hdfs and write to multiple hbase tables after map reduce

Therefore, the retest example is as follows:

hadoop 2.5.2

hbase 1.1.4

There is a scenario: for example, the log needs to be analyzed, and after statistics, it is stored in the hbase result set table and index table:

The new version of hbase function is not used in the example. If you use the new version of the function, please refer to the modification

http://bobboy007.iteye.com/admin/blogs/2289537

package jyw.test;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
//import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.io.Writable;
/*
 * Test reduce writing to multiple tables
 * */
public class HBaseMultiTableOutputReduce {

	// Implement the Map class
	public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();

		public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			StringTokenizer itr = new StringTokenizer(value.toString());
			while (itr.hasMoreTokens()) {
				word.set(itr.nextToken());
				context.write(word, one);
			}
		}
	}

	/* Implement the Reduce class
	 * output type of map
	 * output value type of map
	 * output type of reduce
	 * output type of reduce
	 * Check if there is a setup, clear method, test to myql
 */
	public static class Reduce extends Reducer<Text, IntWritable, Writable, Put> {

		public void reduce(Text key, Iterable<IntWritable> values, Context context) {
			ImmutableBytesWritable putTable1 = new ImmutableBytesWritable(Bytes.toBytes("wordcount"));
			ImmutableBytesWritable putTable2 = new ImmutableBytesWritable(Bytes.toBytes("wordcount1"));
			int sum = 0;

			Iterator<IntWritable> iterator = values.iterator();
			while (iterator.hasNext()) {
				sum += iterator.next().get();
			}

			// Put instantiation, one line per word
			Put put = new Put(Bytes.toBytes(key.toString()));
			// The column family is content, the column modifier is count, and the column value is the number
			put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));

			try {
				context.write(putTable1, put);
				context.write(putTable2, put);
			} catch (Exception e) {
				e.printStackTrace ();
			}
			// context.write(NullWritable.get(), put);
		}
	}

	// Create HBase data table
	public static void createHBaseTable(String tableName) throws IOException {
		// create table description
		HTableDescriptor htd = new HTableDescriptor(tableName);
		// Create column family description
		HColumnDescriptor col = new HColumnDescriptor("content");
		htd.addFamily(col);

		// configure HBase
		Configuration conf = HBaseConfiguration.create();

		// conf.set("hbase.zookeeper.quorum","127.0.0.1");
		// conf.set("hbase.zookeeper.property.clientPort", "2181");
		HBaseAdmin hAdmin = new HBaseAdmin(conf);

		if (hAdmin.tableExists(tableName)) {
			System.out.println("The data table already exists and is being recreated.");
			// hAdmin.disableTable(tableName);
			// hAdmin.deleteTable(tableName);
		} else {

			System.out.println("Create table: " + tableName);
			hAdmin.createTable(htd);
		}
	}

	public static void main(String[] args) throws Exception {
		String tableName1 = "wordcount";
		String tableName2 = "wordcount1";
		// Step 1: Create a database table
		HBaseMultiTableOutputReduce.createHBaseTable(tableName1);
		HBaseMultiTableOutputReduce.createHBaseTable(tableName2);
		// Step 2: Perform MapReduce processing
		// Configure MapReduce
		Configuration conf = new Configuration();
		// These few sentences are very important
		// conf.set("mapred.job.tracker", "master:9001");
		// conf.set("hbase.zookeeper.quorum","master");
		// conf.set("hbase.zookeeper.property.clientPort", "2181");
		// conf.set(TableOutputFormat.OUTPUT_TABLE, tableName);

		Job job = new Job(conf, "multi output Count");
		job.setJarByClass(HBaseMultiTableOutputReduce.class);

		// Set up the Map and Reduce processing classes
		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);

		// set output type
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);

		// set input and output format
		job.setInputFormatClass(TextInputFormat.class);
		// job.setOutputFormatClass(TableOutputFormat.class);
		job.setOutputFormatClass(MultiTableOutputFormat.class);

		// set input directory
		FileInputFormat.addInputPath(job, new Path("hdfs://192.168.0.42:9000/user/jiayongwei/input/"));
		System.exit(job.waitForCompletion(true) ? 0 : 1);

	}
}

 

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=326995075&siteId=291194637