Continued word count (up)

The phrase Statistics

We just need to be extracted from the text of the text when dealing with special data processing division, such as simply ",", ".", "?", "!" And a carriage return to English delimiter. And some useless words such as intermittent

"a",  "it", "the", "and", "this"等。

package analyse_word;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Scanner;
import java.util.Set;
public class recognize_sentence {
 
 public static boolean useless(String str) throws FileNotFoundException {
  File file = new File("D:\\useless.txt");// 读取文件
  String words[] = new String [100000];
  int out_words[] = new int [100000];
  if (!file.exists()) {// 如果文件打不开或不存在则提示错误
   System.out.println("文件不存在");
   return false;
  }
  Scanner x = new Scanner(file);
  HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
  while (x.hasNextLine()) {
   String line = x.nextLine();
   String[] lineWords = line.split("[\n]");
   Set<String> wordSet = hashMap.keySet();
   for (int i = 0; i < lineWords.length; i++) {
    if (wordSet.contains(lineWords[i])) {
     Integer number = hashMap.get(lineWords[i]);
     number++;
     hashMap.put(lineWords[i], number);
    } else {
     hashMap.put(lineWords[i], 1);
    }
   }
  }
  Iterator<String> iterator = hashMap.keySet().iterator();
  int max = 0,i=0;
  while (iterator.hasNext()) {
   String word = iterator.next();
   if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!"  ".equals(word)) {
    if(str.indexOf(" "+word+" ")==0) {
     return true;
    }
    words[i]=word;
    out_words[i]=hashMap.get(word);
    i++;
   }
  }
  return true;
 }
 public static void recognize() throws FileNotFoundException {
  File file = new File("D:\\Englis_letters.txt");// 读取文件
  if (!file.exists()) {// 如果文件打不开或不存在则提示错误
   System.out.println("文件不存在");
   return;
  }
  Scanner x = new Scanner(file);
  HashMap<String, Integer> hashMap = new HashMap<String, Integer>();
  while (x.hasNextLine()) {
   String line = x.nextLine();
   String[] lineWords = line.split("[\\t+;.,“”‘’?!\n+]");
   Set<String> wordSet = hashMap.keySet();
   for (int i = 0; i < lineWords.length; i++) {
    if (wordSet.contains(lineWords[i])) {
     Integer number = hashMap.get(lineWords[i]);
     number++;
     hashMap.put(lineWords[i], number);
    } else {
     hashMap.put(lineWords[i], 1);
    }
   }
  }
  Iterator<String> iterator = hashMap.keySet().iterator();
  while (iterator.hasNext()) {
   String word = iterator.next();
   if(useless(word)) {
    System.out.println(word);
   }
  }
 }
 public static void main(String[] args) throws FileNotFoundException {
  recognize();
 }
}

Guess you like

Origin www.cnblogs.com/goubb/p/11031048.html