/********************** 程序说明 ***********************
*
*单词只能由字母,数字和下划线组成,且第一个字符只能是字母或者下划线
*注:只由下划线组成的也被认为是单词,如"","___","___________"等
- 并且如果单词的头部为数字,则不认为数字后面的是一个单词,例如:
- “1abc bcd def"中,认为只有两个单词bcd和def,忽略1后面得"abc”。
*******************************************************************/
#include <iostream>
#include <fstream>
using namespace std;
#define WORD_LENGTH 50 //单词长度
char c_Table[64] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
char n_Table[11] = "0123456789";
struct word
{
char w[WORD_LENGTH];
word * next;
};
word * head = NULL, * p, * end; //单词表指针
bool check_c(char ch) //检查是不是字母 数字 下划线
{
for ( int i = 0; i < 63; i++ )
{
if ( ch == c_Table[i] )
{
return true;
}
}
return false;
}
bool check_n(char ch) //用于检查首位不是数字
{
for ( int i = 0; i < 10; i++ )
{
if ( ch == n_Table[i] )
{
return true;
}
}
return false;
}
void clear_word(char w[50]) //给单词赋初始值
{
for ( int i = 0; i < WORD_LENGTH; i++ )
{
w[i] = '\0';
}
}
bool check_in_list(char w[50]) //检查是否与单词表重复
{
word * p = head;
while ( p != end )
{
if ( strcmp(p->w,w) == 0 )
{
return false; //与单词表中单词重复
}
p = p->next;
}
return true; //没找到相同单词(可以加入单词表)
}
int main()
{
ifstream fin;
char filename[200];
char filename_cut[200];
char read_word[WORD_LENGTH];
char ch;
bool flag = false, flag1 = false;
int ctr = 0;
int words_num = 0, different_words_num = 0;
printf("程序从文件中读取英文语句,判断其中的单词数(文件拖拽)\n");
printf("请输入文件名:");
gets(filename);
if ( filename[0] == '\"' )
{
filename[strlen(filename)-1] = '\0';
strcpy(filename,filename+1);
}
for ( int j = strlen(filename); j >= 0; j-- )
{
if ( filename[j] == '\\' )
{
strcpy(filename_cut,filename+j+1);
break;
}
}
fin.open(filename,0);
if ( fin != NULL )
{
clear_word(read_word);
while ( !fin.eof() )
{
ch = '\0';
fin.get(ch);
if ( check_c(ch) )
{
if ( !flag && !check_n(ch) && !flag1 )
{
flag = true;
read_word[ctr] = ch;
ctr++;
}
else
if ( !flag && check_n(ch) )
{
flag1 = true;
}
else
if ( flag )
{
read_word[ctr] = ch;
ctr++;
}
}
else
{
if ( flag1 )
{
flag1 = false;
}
else
if ( flag && check_in_list(read_word) )
{
if ( head == NULL )
{
head = new word;
end = new word;
clear_word(head->w);
clear_word(end->w);
head->next = end;
p = head;
}
else
{
p = new word;
clear_word(p->w);
word * tmp = p;
p = end;
end = tmp;
p->next = end;
tmp = NULL;
}
for ( int i = 0; i < ctr; i++ )
{
p->w[i] = read_word[i];
}
ctr = 0;
flag = false;
clear_word(read_word);
different_words_num++;
words_num++;
}
else
if ( flag && !check_in_list(read_word) )
{
ctr = 0;
flag = false;
clear_word(read_word);
words_num++;
}
}
}
printf("\n%s文件中,",filename_cut);
printf("共%d个单词,",words_num);
printf("其中有%d个不同的单词:\n",different_words_num);
word * h = head;
while ( h != end )
{
printf("%s ",h->w);
h = h->next;
}
printf("\n\n");
word * r, *s;
r = head;
while ( r != end )
{
s = r;
r = r->next;
delete s;
}
delete r;
fin.close();
}
else
{
printf("打开文件失败!\n");
}
return 1;
}