腾讯面试题之词频统计

有一千万条短信,有重复,以文本文件的形式保存,一行一条,找出重复出现最多的前10条。

package com.week.ms;

import java.io.*;
import java.util.*;

public class Message {

    public static void main(String[] args) {
        String kFile = "data.txt";
        ArrayList array=solution(kFile);
        for(int i=0;i<array.size();i++){
            Map map = (Map) array.get(i);
            System.out.println(map);
            if(i>=9){
                break;
            }
        }
    }

    public static ArrayList solution(String kFile) {

        Comparator c = new Comparator<Map>() {
            @Override
            public int compare(Map m1, Map m2) {
                // TODO Auto-generated method stub
                int cnt1 = (int) m1.get("cnt");
                int cnt2 = (int) m2.get("cnt");
                if(cnt1 >= cnt2){
                    return -1;
                }else{
                    return 1;
                }
            }
        };

        HashMap count=new HashMap();

        ArrayList array =new ArrayList();

        FileInputStream inputStream = null;
        Scanner sc = null;
        try {
            inputStream=new FileInputStream(kFile);
            sc = new Scanner(inputStream,"UTF-8");
            //我们将使用Java.util.Scanner类扫描文件的内容,一行一行连续地读取,允许对每一行进行处理,而不保持对它的引用。总之没有把它们存放在内存中:
            while(sc.hasNextLine()){
                String line = sc.nextLine();
                if(line!=null){
                    if(count.containsKey(line)){
                        int cnt = (int) count.get(line);
                        cnt++;
                        count.put(line,cnt);
                        for(int i=0;i<array.size();i++){//更新Array
                            Map map = (Map) array.get(i);
                            if(map.get("msg").equals(line)){
                                map.put("cnt",cnt);
                            }
                        }
                    }else{
                        count.put(line,1);
                        Map map=new HashMap();
                        map.put("msg",line);
                        map.put("cnt",1);
                        array.add(map);

                    }
                    Collections.sort(array,c);
                }
            }

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (sc != null) {
                sc.close();
            }
        }
        return array;
    }
}

猜你喜欢

转载自blog.csdn.net/jxq0816/article/details/80794432