java统计哈利波特中,字母,单词。

一、输出某个英文文本文件中 26 字母出现的频率,由高到低排列,并显示字母出现的百分比,精确到小数点后面两位。

1.字母频率 = 这个字母出现的次数 / (所有A-Z,a-z字母出现的总数)

2.如果两个字母出现的频率一样,那么就按照字典序排列。

代码:

package Tjwords;

import java.io.*;
import java.text.DecimalFormat;

public class FileReaderTest
{
static DecimalFormat df=new DecimalFormat("######0.00");
public static void main(String[] args) throws IOException
{
FileReader fr = null;
try
{
//创建字符输入流
fr = new FileReader("E:\\Harry Potter and the Sorcerer's Stone.txt");
//创建一个长度为32的“竹筒”
char[] cbuf = new char[32];
char[] word= {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'};
double add=0;
double[] words=new double[52];
for(int x=0;x<52;x++) {
words[x]=0;
}
//用于保存实际读取的字符数
int hasRead = 0;
char ar;
int length=0;
//使用循环来重复“取水”过程
while ((hasRead = fr.read(cbuf)) > 0 )
{

for(int in=0;in<32;in++) {
ar=cbuf[in];
for(int num=0;num<52;num++) {
if(ar==word[num]) {
words[num]++;
add++;
}
}
}
//取出“竹筒”中水滴(字节),将字符数组转换成字符串输入!
}
//System.out.println("总"+" "+add);
double xzz=0;
int max=0;
for(int num1=0;num1<52;num1++) {
max=0;
for(int num=0;num<52;num++) {
if(words[max]<words[num]) {
max=num;
}
}
System.out.println(word[max]+" "+df.format(words[max]/add*100)+"%");

words[max]=0;
}

}
catch (IOException ioe)
{
ioe.printStackTrace();
}
finally
{
//使用finally块来关闭文件输入流
if (fr != null)
{
fr.close();
}
}
}
}

二、输出单个文件中的前 N 个最常出现的英语单词。

package Tjwords;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
public class haXiBiao {
final static int N=1000;
public static String StatList(String str) {
StringBuffer sb = new StringBuffer();
HashMap<String ,Integer> has = new HashMap<String ,Integer> (); // 打开一个哈希表
String[] slist = str.split("[^a-zA-Z\']+");
for (int i = 0; i < slist.length; i++) {
if (!has.containsKey(slist[i])) { // 若尚无此单词
// Pattern pa=Pattern.compile("[^a-zA-Z]+"); //正则表达式 匹配字符串
// Matcher match=pa.matcher(slist[i]);
// if(!match.matches())
has.put(slist[i], 1);
}
else {//如果有,就在将次数加1
has.put(slist[i],has.get(slist[i])+1 );
}
}

//遍历map
Iterator<String> iterator = has.keySet().iterator();
String a[]=new String[1000];
int s[]=new int[1000];


for(int i=0;i<N;i++)
{
iterator = has.keySet().iterator();
while(iterator.hasNext()){
String word = (String) iterator.next();
if(s[i]<has.get(word))
{
s[i]=has.get(word);
a[i]=word;
}
}
sb.append("单词:").append(a[i]).append(" 次数").append(has.get(a[i])).append("\r\n");
has.remove(a[i]);
}
return sb.toString();
}
public static void main(String[] args) {
// TODO Auto-generated method stub
String filePath = "E:\\Harry Potter and the Sorcerer's Stone.txt";
String sz=writeFromFile.readTxtFile(filePath);
String ltxt=null;
System.out.println(ltxt=StatList(sz));

}
}

package Tjwords;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
//import java.io.Reader;
//从文本文档中读入
public class writeFromFile {
public static String readTxtFile(String filePath){
try {
String encoding="GBK";
File file=new File(filePath);
if(file.isFile() && file.exists()){ //判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
String lineText="";
while((lineTxt = bufferedReader.readLine()) != null){
lineText+=(lineTxt);
}
read.close();
return lineText;
}else{
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
return null;
}
public static void daochu(String a) throws IOException
{
File file=new File("E:\\Harry Potter and the Sorcerer's Stone.txt");
FileOutputStream fos=new FileOutputStream(file);
OutputStreamWriter osw = new OutputStreamWriter(fos,"UTF-8");
osw.append(a);
osw.close();
fos.close();
}
}

猜你喜欢

转载自www.cnblogs.com/hang-hang/p/11828106.html