MapReduce练习 共同好友

感觉这是一篇失败的代码,虽然实现了功能,但感觉只是强行与MapReduce沾边,不用MapReduce反而写的少。

1.map

package nuc.edu.ls.friends;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
 * 
 */
public class MapTask extends Mapper<LongWritable, Text, Text, Text> {
   @Override
protected void map(LongWritable key, Text value,
		Mapper<LongWritable, Text, Text, Text>.Context context)
		throws IOException, InterruptedException {
	 
	  context.write(new Text("friend"), value);
}
}

2.reduce

package nuc.edu.ls.friends;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class ReduceTask extends Reducer<Text, Text, Text, Text> {
//一个求交集的方法,其实使用List的containsAll会很简单,但是产生了令人费解的bug,被交集赋值的无法重新赋值,费解搞不懂
	public static List<String> intersect(List<String> arr1, List<String> arr2) {
		List<String> result = new ArrayList<String>();
		for (String arr : arr2) {// 遍历list1
			if (arr1.contains(arr)) {// 如果存在这个数
				result.add(arr);// 放进一个list里面,这个list就是交集
				System.out.println(arr + ",");
			}
		}
		return result;
	}

	@Override
	protected void reduce(Text arg0, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		Map<String, List<String>> allPeople = new HashMap<String, List<String>>();
		while (values.iterator().hasNext()) {
			Text one = values.iterator().next();
			String[] name = one.toString().split(":");
			String[] friends = name[1].split("\\s+");
			List<String> list = Arrays.asList(friends);
			allPeople.put(name[0], new ArrayList<String>(list));
		}
		Set<String> peopleSet = allPeople.keySet();
		List<String> hasInsert=new ArrayList<String>();
		for (String me : peopleSet) {
			List<String> myFriend = new ArrayList<String>();
			myFriend = allPeople.get(me);
			for (String other : peopleSet) {
				if (me != other) {
					System.err.println(me + ":" + myFriend);
					List<String> otherFriend = new ArrayList<String>();
					otherFriend = allPeople.get(other);
					System.err.println(other + ":" + otherFriend);
					List<String> result = intersect(myFriend, otherFriend);
					if (result.size() > 0) {
						if(!(hasInsert.contains(me+"+"+other)||hasInsert.contains(other+"+"+me))){
							
							context.write(new Text(me + "与" + other + "共同好友:"), new Text(result.toString()));
							hasInsert.add(me+"+"+other);
						}
						
					}
				}
			}

		}
	}
}

3.driver

package nuc.edu.ls.friends;


import java.io.File;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Driver {
	public static void main(String[] args) throws Exception {
		System.setProperty("HADOOP_USER_NAME", "root");

		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf, "eclipseToCluster");

		job.setMapperClass(MapTask.class);
		job.setReducerClass(ReduceTask.class);
		 job.setJarByClass(Driver.class);
        //job.setJar("C:\\Users\\LENOVO\\Desktop\\WordCount.jar");
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
        
		FileInputFormat.addInputPath(job, new Path("d:/friend.txt"));
		FileOutputFormat.setOutputPath(job, new Path("d:/friendResult/"));

		boolean completion = job.waitForCompletion(true);
		System.out.println(completion ? 0 : 1);

	}
}

4.实验结果:

猜你喜欢

转载自blog.csdn.net/qq_39184715/article/details/81951060
今日推荐