Hbase学习--------Hbase与mapReducer的结合

1.mapReducer本地模式提交的搭建

1.1、将 Hadoop安装本地解压
1.2、配置 Hadoop的环境变量
添加%HADOOP_HOME%
修改%PATH%添加%HADOOP_HOME%/bin;%HADOOP_HOME%/sbin
1.3、在解压的 Hadoop的bin目录下添加 winutils.exe工具
Java工程
2.1、jdk一定要使用自己的jdk、不要使用 eclipse自带
2.2、根目录(src目录下),不要添加任何 Mapreduce的配置文件hdfs-site.xml yarn-site.xml core-site.xml mapred-site.xml
2.3、在代码当中,通过conf.set方式来进行指定。conf set(“fs.defaults”,“hdfs://活跃的namenode节点:8020”);
2.4、导入orgpackage
在这里插入图片描述
2.5修改Hadoop源码
在这里插入图片描述
在这里插入图片描述
报错原因:使用了eclipse自带的JRE,需要切换成本地环境的JRE
在这里插入图片描述
在这里插入图片描述MRApps.addLog4jSystemProperties(logLevel, logSize, numBackups, vargs);注释掉,不然会报错。
3、右键run执行

2.写mapReducer的job流程

1.获取配置文件
2.设置相关属性
3.获取job对象,设置相关值
4.设置输入源和输出源
5.设置mapper类
6.设置mapper的输出类型,key,value
7.设置分类器和比较器(可选)
8.设置reducer类
9.设置reducer的输出类型 key,value
10.提交job任务

3.实例:从hdfs获取数据统计(wordcount)结果写入Hbase00000000

代码:
主程序

package com.xjq.HdfsToHbase;

import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.generated.master.master_jsp;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class WCRunner {
	public static void main(String[] args) throws Exception {
		//配置文件
		Configuration conf = new Configuration();
		//设置连接zookeeper
		conf.set("hbase.zookeeper.quorum", "node02,node03,node04");
		//设置连接hdfs
		conf.set("fs.defaultFS", "hdfs://node01:8020");
		//获得job类
		Job job = Job.getInstance(conf);
		job.setJarByClass(WCRunner.class);
		//设置mapper类
		job.setMapperClass(WCMapper.class);
		//mapper输出类型设置,key,value
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		//把数据写入hbase时,这个方法必须写
		//String ,表名
		// Class<? extends TableReducer> ,Reducer类 
		//Job      job类 
		//Class arg3, String arg4, String arg5, String arg6, 源代码是null
		//boolean  写false
		TableMapReduceUtil.initTableReducerJob("wc", WCReducer.class, job, null, null, null, null, false);
		//设置输入路径 这是从hdfs作为输入源 
		FileInputFormat.addInputPath(job, new Path("/usr/wc"));
		//设置reducer输出格式
		job.setOutputKeyClass(NullWritable.class);
		job.setOutputValueClass(Put.class);
		
		//设置输入源和输出源
		//job.setOutputFormatClass(cls);
		//job.setInputFormatClass(cls);
		//提交job作业
		job.waitForCompletion(true);
	}
}

Mapper类:

package com.xjq.HdfsToHbase;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		String[] strs=value.toString().split(",");
		for (String string : strs) {
			context.write(new Text(string), new IntWritable(1));
		}
	}
}
 

Reducer类:
由于把结果写入Hbase所以Reducer类必须继承TableReducer类

package com.xjq.HdfsToHbase;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WCReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>{
	@Override
	protected void reduce(Text key, Iterable<IntWritable> iter,Context context)
			throws IOException, InterruptedException {
		int sum = 0;
		for (IntWritable intWritable : iter) {
			sum =sum+intWritable.get();
		}
		String row = key.toString();
		//Hbase写入数据必须用put
		Put put = new Put(row.getBytes());
		put.add("cf".getBytes(), "ct".getBytes(), String.valueOf(sum).getBytes());
		context.write(null, put);
	}
}

4.实例:把Hbase里面的数据(3中写入Hbase的数据)存入Hdfs

主程序:

package com.xjq.HbaseToHdfs;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class HbToHdRunner {

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		conf.set("hbase.zookeeper.quorum", "node02,node03,node04");
		conf.set("fs.defaultFS", "hdfs://node01:8020");
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(HbToHdRunner.class);
		//从Hbase读入数据必须使用Scan
		Scan scan = new Scan();
		scan.setCaching(500);    // 设置缓存大小。默认1,必须调大
		scan.setCacheBlocks(false); //don't set to true for MR jobs 
		//从Hbase读入数据比必须写这个方法
		//参数1 字节数组 表名,参数2 Scan对象, 参数3 mapper对象,
		TableMapReduceUtil.initTableMapperJob("wc".getBytes(), scan, HbToHdMapper.class, null, null, job);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		job.setReducerClass(HbToHdReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		//设置输出路径FileOutputFormat.setOutputPath(job, outpath );
				//    注意:输出路径必须为空
		Path outpath = new Path("/Hbase-wc");
		if(outpath.getFileSystem(conf).exists(outpath))
			outpath.getFileSystem(conf).delete(outpath, true);
		FileOutputFormat.setOutputPath(job, outpath );
		
		job.waitForCompletion(true);
	}
}

mapper类:
从Hbase读取数据,必须继承TableMapper类

package com.xjq.HbaseToHdfs;

import java.io.IOException;

import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class HbToHdMapper extends TableMapper<Text, Text>{
	@Override
	protected void map(ImmutableBytesWritable key, Result value,Context context)
			throws IOException, InterruptedException {
			//key为rowkey,value为这一行所有列族和列值
		String word = Bytes.toString(key.get());
		String count = Bytes.toString(CellUtil.cloneValue(value.getColumnLatestCell("cf".getBytes(), "ct".getBytes())));
		
		context.write(new Text(word), new Text(count));
	}
}

Reducer类:

package com.xjq.HbaseToHdfs;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class HbToHdReducer extends Reducer<Text, Text, Text, Text>{
	@Override
	protected void reduce(Text key, Iterable<Text> iter, Context context)
			throws IOException, InterruptedException {
		Text tx = new Text();
		for (Text text : iter) {
			tx=text;
		}
		context.write(key,tx);
	}
	
}

发布了19 篇原创文章 · 获赞 1 · 访问量 314

猜你喜欢

转载自blog.csdn.net/qq_43719634/article/details/105029966