数据分析案例 4 电影数据分析统计

数据样例:
movie:电影id
rate:用户评分
timeStamp:评分时间
uid:用户id

{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}
{"movie":"919","rate":"4","timeStamp":"978301368","uid":"1"}
需求:
1.每个用户评分最高的3部电影评分信息
2.每个用户的uid和评分的平均值
3.最大方(评分平均值高)的3个用户的uid和评分平均值
    排序
4.最热门的3部电影id和评价次数
5.评价最高的3部电影id和评分均值    
并将统计结果输出到结果文件中(或数据库中)

步骤:
1读文件,赋值到bean中
2按评分排序
3 取前3电影
4.最热门的3部电影id和评价次数  
热门定义 :评价次数最多的

5.评价最高的3部电影id和评分均值 评价最高(评分最高)

并将统计结果输出到结果文件中(或数据库中)


由于数据文件太大,只上传一部分

rate.txt

{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"1"}
{"movie":"914","rate":"3","timeStamp":"978301968","uid":"1"}
{"movie":"3408","rate":"4","timeStamp":"978300275","uid":"1"}
{"movie":"2355","rate":"5","timeStamp":"978824291","uid":"1"}
{"movie":"1197","rate":"3","timeStamp":"978302268","uid":"1"}
{"movie":"1287","rate":"5","timeStamp":"978302039","uid":"1"}
{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}
{"movie":"919","rate":"4","timeStamp":"978301368","uid":"1"}
{"movie":"595","rate":"5","timeStamp":"978824268","uid":"1"}
{"movie":"938","rate":"4","timeStamp":"978301752","uid":"1"}
{"movie":"2398","rate":"4","timeStamp":"978302281","uid":"1"}
{"movie":"2918","rate":"4","timeStamp":"978302124","uid":"1"}
{"movie":"1035","rate":"5","timeStamp":"978301753","uid":"1"}
{"movie":"2791","rate":"4","timeStamp":"978302188","uid":"1"}
{"movie":"2687","rate":"3","timeStamp":"978824268","uid":"1"}
{"movie":"2018","rate":"4","timeStamp":"978301777","uid":"1"}
{"movie":"3105","rate":"5","timeStamp":"978301713","uid":"1"}
{"movie":"2797","rate":"4","timeStamp":"978302039","uid":"1"}
{"movie":"2321","rate":"3","timeStamp":"978302205","uid":"1"}
{"movie":"720","rate":"3","timeStamp":"978300760","uid":"1"}
{"movie":"1270","rate":"5","timeStamp":"978300055","uid":"1"}
{"movie":"527","rate":"5","timeStamp":"978824195","uid":"1"}
{"movie":"2340","rate":"3","timeStamp":"978300103","uid":"1"}
{"movie":"48","rate":"5","timeStamp":"978824351","uid":"1"}
{"movie":"1097","rate":"4","timeStamp":"978301953","uid":"1"}
{"movie":"1721","rate":"4","timeStamp":"978300055","uid":"1"}
{"movie":"2321","rate":"3","timeStamp":"978302205","uid":"1"}
{"movie":"1545","rate":"4","timeStamp":"978824139","uid":"1"}
{"movie":"745","rate":"3","timeStamp":"978824268","uid":"1"}
{"movie":"2294","rate":"4","timeStamp":"978824291","uid":"1"}
{"movie":"3186","rate":"4","timeStamp":"978300019","uid":"1"}
{"movie":"1566","rate":"4","timeStamp":"978824330","uid":"1"}
{"movie":"588","rate":"4","timeStamp":"978824268","uid":"1"}
{"movie":"1907","rate":"4","timeStamp":"978824330","uid":"1"}
{"movie":"783","rate":"4","timeStamp":"978824291","uid":"1"}
{"movie":"1836","rate":"5","timeStamp":"978300172","uid":"1"}
{"movie":"1022","rate":"5","timeStamp":"978300055","uid":"1"}

{"movie":"2762","rate":"4","timeStamp":"978302091","uid":"1"}

以上是数据文件100万条

HotMovie.java

package ch04;

public class HotMovie implements Comparable<HotMovie> {
	private int movie;
	private int count;

	public HotMovie() {
		super();
	}

	public HotMovie(int movie, int count) {
		super();
		this.movie = movie;
		this.count = count;
	}

	public int getMovie() {
		return movie;
	}

	public void setMovie(int movie) {
		this.movie = movie;
	}

	public int getCount() {
		return count;
	}

	public void setCount(int count) {
		this.count = count;
	}

	@Override
	public String toString() {
		return "HotMovie [movie=" + movie + ", count=" + count + "]";
	}

	@Override
	public int compareTo(HotMovie o) {
		return o.getCount()-this.getCount();
	}
}

Movie.java

//电影评分信息
public class Movie implements Comparable<Movie> {
	private int movie;
	private int rate;
	private long timeStamp;
	private int uid;

	public Movie() {
		super();
	}

	public Movie(int movie, int rate, long timeStamp, int uid) {
		super();
		this.movie = movie;
		this.rate = rate;
		this.timeStamp = timeStamp;
		this.uid = uid;
	}

	public int getMovie() {
		return movie;
	}

	public void setMovie(int movie) {
		this.movie = movie;
	}

	public int getRate() {
		return rate;
	}

	public void setRate(int rate) {
		this.rate = rate;
	}

	public long getTimeStamp() {
		return timeStamp;
	}

	public void setTimeStamp(long timeStamp) {
		this.timeStamp = timeStamp;
	}

	public int getUid() {
		return uid;
	}

	public void setUid(int uid) {
		this.uid = uid;
	}

	@Override
	public String toString() {
		return "[movie=" + movie + ", rate=" + rate + ", timeStamp=" + timeStamp + ", uid=" + uid + "]";
	}

	@Override
	public int compareTo(Movie o) {
		return o.getRate() - this.getRate();
	}

}
rate.java
public class Rate implements Comparable<Rate> {
	private int uid;
	private float rate;

	public Rate() {
		super();
	}

	public Rate(int uid, float rate) {
		super();
		this.uid = uid;
		this.rate = rate;
	}

	public int getUid() {
		return uid;
	}

	public void setUid(int uid) {
		this.uid = uid;
	}

	public float getRate() {
		return rate;
	}

	public void setRate(float rate) {
		this.rate = rate;
	}

	@Override
	public String toString() {
		return "[uid=" + uid + ", rate=" + rate + "]";
	}

	@Override
	public int compareTo(Rate o) {
		if (o.getRate() == this.getRate()) {
			return 0;
		} else if (o.getRate() > this.getRate()) {
			return 1;
		} else {
			return -1;
		}
	}

}

RateMovie.java

public class RateMovie implements Comparable<RateMovie> {
	private int movie;
	private float rate;

	public RateMovie() {
		super();
	}

	public RateMovie(int movie, float rate) {
		super();
		this.movie = movie;
		this.rate = rate;
	}

	public int getMovie() {
		return movie;
	}

	public void setMovie(int movie) {
		this.movie = movie;
	}

	public float getRate() {
		return rate;
	}

	public void setRate(float rate) {
		this.rate = rate;
	}

	@Override
	public String toString() {
		return "[movie=" + movie + ", rate=" + rate + "]";
	}

	@Override
	public int compareTo(RateMovie o) {
		if (o.getRate() == this.getRate()) {
			return 0;
		} else if (o.getRate() > this.getRate()) {
			return 1;
		} else {
			return -1;
		}
	}
}
Test.java(主类)
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map.Entry;

import com.alibaba.fastjson.JSON;

/*
	movie:电影id
	rate:用户评分
	timeStamp:评分时间
	uid:用户id
	
	{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
	{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}
	{"movie":"919","rate":"4","timeStamp":"978301368","uid":"1"}
需求:
		1.每个用户评分最高的3部电影评分信息
		2.每个用户的uid和评分的平均值
		3.最大方(评分平均值高)的3个用户的uid和评分平均值
		4.最热门的3部电影id和评价次数
		5.评价最高的3部电影id和评分均值
	
	并将统计结果输出到结果文件中(或数据库中)
 */
public class Test {
	public static void main(String[] args) {
		// 保存uid对应的电影评分信息
		HashMap<Integer, ArrayList<Movie>> map = new HashMap<Integer, ArrayList<Movie>>();

		// 保存movie对应的电影评分信息
		HashMap<Integer, ArrayList<Movie>> map2 = new HashMap<Integer, ArrayList<Movie>>();

		// 把rating.txt中的数据读取出来,封装到java对象中
		try (BufferedReader br = new BufferedReader(new FileReader("../案例1/src/qq/rating.txt"));) {
			String line;
			while ((line = br.readLine()) != null) {
				// 把读取的数据转换成Movie对象
				Movie movie = JSON.parseObject(line, Movie.class);

				// 判断map集合中是否存有对应uid电影评分信息
				if (map.containsKey(movie.getUid())) {
					// 取出用户对应电影评分信息的集合
					map.get(movie.getUid()).add(movie);
				} else {
					// 如果不包含用户的评分信息就自己创建一个集合然后放进map集合中
					ArrayList<Movie> list = new ArrayList<Movie>();
					list.add(movie);
					map.put(movie.getUid(), list);
				}

				// 判断map2集合中是否有对应movie对应的电影评分信息
				if (map2.containsKey(movie.getMovie())) {
					// 如果有就拿出对应的集合,然后把当前的电影评分对象添加进去
					map2.get(movie.getMovie()).add(movie);
				} else {
					// 创建一个存放电影信息的list集合
					ArrayList<Movie> list = new ArrayList<Movie>();
					// 把电影评分对象添加进list集合中
					list.add(movie);
					// 再把movie对应的list集合添加进map2集合中
					map2.put(movie.getMovie(), list);
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}

		// 1.每个用户评分最高的3部电影评分信息
		// 把每个用户的电影信息集合降序排序
		for (Entry<Integer, ArrayList<Movie>> entry : map.entrySet()) {
			ArrayList<Movie> list = entry.getValue();
			Collections.sort(list);
		}
		try (BufferedWriter bw = new BufferedWriter(new FileWriter("D:/1.每个用户评分最高的3部电影评分信息.txt"));) {
			for (Entry<Integer, ArrayList<Movie>> entry : map.entrySet()) {
				bw.write(entry.getKey() + ":");
				bw.newLine();
				ArrayList<Movie> list = entry.getValue();
				bw.write(list.get(0).toString());
				bw.newLine();
				bw.write(list.get(1).toString());
				bw.newLine();
				bw.write(list.get(2).toString());
				bw.newLine();
			}
			System.out.println(" 1.每个用户评分最高的3部电影评分信息保存完毕");
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("1.每个用户评分最高的3部电影评分信息保存失败");
		}

		// 2.每个用户的uid和评分的平均值
		ArrayList<Rate> avgRateList = getAvgRateList(map);
		try (BufferedWriter bw = new BufferedWriter(new FileWriter("D:/2.每个用户的uid和评分的平均值.txt"));) {
			for (Rate rate : avgRateList) {
				bw.write(rate.toString());
				bw.newLine();
			}
			System.out.println("2.每个用户的uid和评分的平均值保存完毕");
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("2.每个用户的uid和评分的平均值失败");
		}

		// 3.最大方(评分平均值高)的3个用户的uid和评分平均值
		Collections.sort(avgRateList);
		try (BufferedWriter bw = new BufferedWriter(new FileWriter("D:/3.最大方(评分平均值高)的3个用户的uid和评分平均值.txt"));) {
			for (int i = 0; i < 3; i++) {
				Rate rate = avgRateList.get(i);
				bw.write(rate.toString());
				bw.newLine();
			}
			System.out.println("3.最大方(评分平均值高)的3个用户的uid和评分平均值保存完毕");
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("3.最大方(评分平均值高)的3个用户的uid和评分平均值保存失败");
		}

		// 4.最热门的3部电影id和评价次数
		ArrayList<HotMovie> hotMovieList = getHotMovieList(map2);
		try (BufferedWriter bw = new BufferedWriter(new FileWriter("D:/4.最热门的3部电影id和评价次数.txt"));) {
			for (int i = 0; i < 3; i++) {
				HotMovie hotMovie = hotMovieList.get(i);
				bw.write(hotMovie.toString());
				bw.newLine();
			}
			System.out.println("4.最热门的3部电影id和评价次数保存完毕");
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("4.最热门的3部电影id和评价次数保存失败");
		}

		// 5.评价最高的3部电影id和评分均值
		ArrayList<RateMovie> rateMovieList = tetRateMovieList(map2);
		try (BufferedWriter bw = new BufferedWriter(new FileWriter("D:/5.评价最高的3部电影id和评分均值.txt"));) {
			for (int i = 0; i < 3; i++) {
				RateMovie rateMovie = rateMovieList.get(i);
				bw.write(rateMovie.toString());
				bw.newLine();
			}
			System.out.println("5.评价最高的3部电影id和评分均值保存完毕");
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("5.评价最高的3部电影id和评分均值保存失败");
		}
	}

	private static ArrayList<RateMovie> tetRateMovieList(HashMap<Integer, ArrayList<Movie>> map2) {
		// 创建保存电影的历史评价分对象集合
		ArrayList<RateMovie> list = new ArrayList<RateMovie>();

		// 遍历电影对应的电影评分信息集合
		for (Entry<Integer, ArrayList<Movie>> entry : map2.entrySet()) {
			Integer movie = entry.getKey();// 电影的id
			ArrayList<Movie> list2 = entry.getValue();// 电影评分信息集合
			float rate = 0;
			for (Movie m : list2) {
				rate += m.getRate();
			}
			rate /= list2.size();
			list.add(new RateMovie(movie, rate));
		}

		Collections.sort(list);
		return list;
	}

	private static ArrayList<HotMovie> getHotMovieList(HashMap<Integer, ArrayList<Movie>> map2) {
		// 创建保存电影被评论次数对象的集合
		ArrayList<HotMovie> list = new ArrayList<HotMovie>();

		// 遍历电影对应的电影评分信息集合
		for (Entry<Integer, ArrayList<Movie>> entry : map2.entrySet()) {
			Integer movie = entry.getKey();// 电影的id
			ArrayList<Movie> list2 = entry.getValue();// 电影评分信息集合
			list.add(new HotMovie(movie, list2.size()));
		}

		Collections.sort(list);
		return list;
	}

	private static ArrayList<Rate> getAvgRateList(HashMap<Integer, ArrayList<Movie>> map) {
		// 存储用户评分的平均值信息
		ArrayList<Rate> list = new ArrayList<Rate>();
		// DecimalFormat df = new DecimalFormat("#.00");
		// df.format(number)
		// 遍历map集合,算出每一个用户的平均值,然后存储到list集合中
		for (Entry<Integer, ArrayList<Movie>> entry : map.entrySet()) {
			Integer uid = entry.getKey();
			float rate = 0;
			// 遍历电影评分信息然后累加评分
			ArrayList<Movie> list2 = entry.getValue();
			for (Movie m : list2) {
				rate += m.getRate();
			}
			rate /= list2.size();
			list.add(new Rate(uid, rate));
		}
		return list;
	}
}


猜你喜欢

转载自blog.csdn.net/a331685690/article/details/80230057
今日推荐