0. 问题
- 通过mapreduce找出用户A,B,C…中每两个人所共同拥有的好友都有谁
- 输入文件
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J - 输出文件格式为: 用户-用户 共同好友
A-H E C D O
A-I O
A-J O B
1. 主方法
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration cfg=new Configuration();
Job job = Job.getInstance(cfg);
//设置主方法所在类
job.setJarByClass(friend.class);
job.setMapperClass(FriendMaper.class);
job.setReducerClass(FriendReduceer.class);
//job的输出key-value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//输入路径和输出路径的设置
FileInputFormat.addInputPath(job, new Path("d:\\mr\\input\\friend"));
FileOutputFormat.setOutputPath(job, new Path("d:\\mr\\outfriend"));
System.exit(job.waitForCompletion(true)?0:1);
}
2. map
static class FriendMaper extends Mapper<LongWritable,Text,Text,Text>{
private Text mkey=new Text();
private Text mvalue=new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("map");
//lines1: A B,C,D,F,E,O
String[] lines1 = value.toString().split(":");
//lines2: B C D F E O
String[] lines2 = lines1[1].split(",");
for (String word:lines2){
//将好友拆分后依次写入map输出key
mkey.set(word);
//map输出value始终为该好友所属用户
mvalue.set(lines1[0]);
context.write(mkey,mvalue);
}
}
}
3. reduce
static class FriendReduceer extends Reducer<Text,Text,Text,Text>{
private Text rkey=new Text();
private Text rvalue=new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
System.out.println("reduce");
//创建ArrayList用于装map输出value,便于索引
List<String> slist=new ArrayList<>();
for (Text v:values){
slist.add(v.toString());
}
//排序,保证A-Z的顺序
Collections.sort(slist);
//reduce会将同一个好友的拥有者放入一个reduce,通过for循环遍历两两组合
for (int i=0;i<slist.size()-1;i++){
for (int j=0;j<slist.size()-1;j++){
//过滤掉重复的用户组合
if (j<=i){
continue;
}
//输出格式为用户-用户 好友
String tmpkey=slist.get(i)+"-"+slist.get(j);
rkey.set(tmpkey);
rvalue.set(key.toString());
context.write(rkey,rvalue);
}
}
}
}
4. 第二次mapreduce, 实现合并两个用户的共同好友
主方法不变, 只列出map和reduce类,将第一次mapreduce输出文件作为输入文件
static class FriendMaper extends Mapper<LongWritable,Text,Text,Text>{
private Text mkey=new Text();
private Text mvalue=new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("map");
//lines: B-C A
String[] lines = value.toString().split("\\s");
mvalue.set(lines[1]);
mkey.set(lines[0]);
//map输出key为用户-用户,输出value为其共同好友
context.write(mkey,mvalue);
}
}
static class FriendReduceer extends Reducer<Text,Text,Text,Text> {
private Text rvalue=new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
System.out.println("reduce");
//StringBuffer 可以追加字符串
StringBuffer buf=new StringBuffer();
for (Text v:values){
buf.append(v.toString()+" ");
}
rvalue.set(buf.toString());
context.write(key,rvalue);
}
}