Output a single file first N most frequently occurring English words

Ideas:

Traverse the file from beginning to end, traverse to read every word from a file.

To traverse to put words in hash_map, and count the number of times the word appears.

Define global variables N output control how many words

Cycling on the number of words to compare all get the largest number of occurrences, the output, delete. N cycle times.

 

Source:

package demo1;

// traverse the file from beginning to end, traverse to read every word from a file.
// traverse to put words into hash_map in, and count the number of times the word appears.
//import java.io.BufferedInputStream; 
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

//import java.io.Reader; 
// read from text documents
public class writeFromFile {
	public static String readTxtFile(String filePath) {
		try {
			String encoding = "GBK";
			File file = new File(filePath);
			if (file.isFile () && file.exists ()) {// file exists is determined
				InputStreamReader read = new InputStreamReader (new FileInputStream (file), encoding); // considering the encoding format
				BufferedReader bufferedReader = new BufferedReader(read);
				String lineTxt = null;
				String lineText = "";
				while ((lineTxt = bufferedReader.readLine()) != null) {
					lineText + = (lineTxt);
				}
				read.close();
				return lineText;
			} else {
				System.out.println ( "can not find the file specified");
			}
		} catch (Exception e) {
			System.out.println ( "Error reading the contents of the file");
			e.printStackTrace ();
		}
		return null;
	}

	public static void daochu(String a) throws IOException {
		File file = new File ( "E: / TXT folder /daochu.txt");
		FileOutputStream fos = new FileOutputStream(file);
		OutputStreamWriter osw = new OutputStreamWriter(fos, "UTF-8");
		osw.append(a);
		osw.close();
		fos.close();
	}
}

  

 1 package demo1;
 2 
 3 import java.io.IOException;
 4 import java.util.HashMap;
 5 import java.util.Iterator;
 6 
 7 public class Tongji {
 8     final static int N = 10;
 9 
10     public static String StatList(String str) {
11         StringBuffer sb = new StringBuffer();
12         HashMap<String, Integer> has = new HashMap<String, Integer>(); // 打开一个哈希表
13         String [] = str.split slist ( "[the Z-zA-A ^ \ '] +" );
 14          for ( int I = 0; I <slist.length; I ++ ) {
 15              IF ! (Has.containsKey (slist [I])) { // If this is no word
 16  // the Pattern of Pattern.compile PA = ( "[^ A-zA-the Z] +"); // regular expression matching string
 . 17  // Matcher match = pa.matcher (slist [I]);
 18 is  // IF (match.matches ()!) 
. 19                  has.put (slist [I], 1 );
 20 is              } the else { // If there is, the number of times plus 1 
21 is                  has.put (slist [I], has.get (slist [I]) +. 1);
22             }
23         }
24 
25 //遍历map
26         Iterator<String> iterator = has.keySet().iterator();
27         String a[] = new String[10];
28         int s[] = new int[10];
29 
30         for (int i = 0; i < N; i++) {
31             iterator = has.keySet().iterator();
32             while (iterator.hasNext()) {
33                 String word = (String) iterator.next();
34                 if (s[i] < has.get(word)) {
35                     s[i] = has.get(word);
36                     a[i] = word;
37                 }
38             }
39             sb.append("单词:").append(a[i]).append(" 次数").append(has.get(a[i])).append("\r\n");
40             has.remove(a[i]);
41         }
42         return sb.toString();
43     }
44 
45     public static void main(String[] args) {
46 // TODO Auto-generated method stub
47         String filePath = "E:/TXT文件夹/Harry.txt";
48         String sz = writeFromFile.readTxtFile(filePath);
49         String ltxt = null;
50         System.out.println(ltxt = StatList(sz));
51         try {
52             writeFromFile.daochu(ltxt);
53         } catch (IOException e) {
54 // TODO Auto-generated catch block
55             e.printStackTrace();
56         }
57     }
58 }

 

Guess you like

Origin www.cnblogs.com/022414ls/p/11795111.html