昨天写的代码并不符合MapReduce的思想,今天重新写了一篇
数据:
A:B D E
B:A C F
C:A D E
D:A B C D
F:C D E G
E:A D
共分两次MapReduce过程
第一次MapReduce 得到PeopleA 含有PeopleA好友的所有人的集合
package nuc.edu.ls.friends.Final;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FindFriend {
public static class MapTask extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String[] split = value.toString().split(":");
String[] split2 = split[1].split("\\s+");
for (String string : split2) {
// PeopleA 谁的好友有A
context.write(new Text(string),new Text(split[0]));
}
}
}
public static class ReduceTask extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
StringBuilder whoFriendHasMe=new StringBuilder();
for (Text text : arg1) {
String who = text.toString();
whoFriendHasMe.append(who+"\t");
}
//A 好友为A的集合
context.write(new Text(arg0.toString()+":"), new Text(whoFriendHasMe.toString()));
}
}
public static void main(String[] args) throws Exception{
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "eclipseToCluster");
job.setMapperClass(MapTask.class);
job.setReducerClass(ReduceTask.class);
job.setJarByClass(FindFriend.class);
//job.setJar("C:\\Users\\LENOVO\\Desktop\\WordCount.jar");
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("d:/friend.txt"));
FileOutputFormat.setOutputPath(job, new Path("d:/friendResult1/"));
boolean completion = job.waitForCompletion(true);
System.out.println(completion ? 0 : 1);
}
}
运行结果:
第二次MapReduce 得到共同好友
package nuc.edu.ls.friends.Final;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FindFriendFinal {
public static class MapTask extends Mapper<LongWritable, Text, Text, Text> {
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String[] split = value.toString().split(":");
String[] split2 = split[1].split("\t");
Arrays.sort(split2);
List<String> peopleList = Arrays.asList(split2);
for (int i = 0; i < peopleList.size(); i++) {
for (int j = i + 1; j < peopleList.size(); j++) {
if(peopleList.get(i).length()>0&&peopleList.get(j).length()>0)
context.write(new Text(peopleList.get(i) + "与" + peopleList.get(j) + "共同好友"), new Text(split[0]));
}
}
}
}
public static class ReduceTask extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context arg2)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
StringBuilder sb=new StringBuilder();
for (Text text : arg1) {
sb.append(text.toString()+" ");
}
arg2.write(arg0, new Text(sb.toString()));
}
}
public static void main(String[] args) throws Exception{
System.setProperty("HADOOP_USER_NAME", "root");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "eclipseToCluster");
job.setMapperClass(MapTask.class);
job.setReducerClass(ReduceTask.class);
job.setJarByClass(FindFriendFinal.class);
//job.setJar("C:\\Users\\LENOVO\\Desktop\\WordCount.jar");
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("d:/friendResult1/part-r-00000"));
FileOutputFormat.setOutputPath(job, new Path("d:/friendResultFinal/"));
boolean completion = job.waitForCompletion(true);
System.out.println(completion ? 0 : 1);
}
}
运行结果: