word_count头文件
#ifndef word_count_h
#define word_count_h
#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* Representation of a word count object.
Includes next field for constructing singly linked list*/
struct word_count {
char* word;
int count;
struct word_count* next;
};
/* Introduce a type name for the struct */
typedef struct word_count WordCount;
/* Initialize a word count list, updating the reference to the list */
void init_words(WordCount** wclist);
/* Length of a word count list */
size_t len_words(WordCount* wchead);
/* Find a word in a word_count list */
WordCount* find_word(WordCount* wchead, char* word);
/* Insert word with count=1, if not already present; increment count if present. */
void add_word(WordCount** wclist, char* word);
//static int wordcntcmp(const WordCount *wc1, WordCount *wc2);
/* print word counts to a file */
void fprint_words(WordCount* wchead, FILE* ofile);
/* Inserts a word into the list in order. Assumes the existing list is already sorted */
void wordcount_insert_ordered(WordCount** wclist, WordCount* elem,
bool less(const WordCount*, const WordCount*));
/* Sort a word count list in place */
void wordcount_sort(WordCount** wclist, bool less(const WordCount*, const WordCount*));
#endif /* word_count_h */
就是实现一个链表
下面先过一遍主函数
int main(int argc, char* argv[]) {
// Count Mode (default): outputs the total amount of words counted
bool count_mode = true;
int total_words = 0;
// Freq Mode: outputs the frequency of each word
bool freq_mode = false;
FILE* infile = NULL;
// Variables for command line argument parsing
int i;
static struct option long_options[] = {
{"count", no_argument, 0, 'c'},
{"frequency", no_argument, 0, 'f'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}};
// Sets flags
while ((i = getopt_long(argc, argv, "cfh", long_options, NULL)) != -1) {
switch (i) {
case 'c':
count_mode = true;
freq_mode = false;
break;
case 'f':
count_mode = false;
freq_mode = true;
break;
case 'h':
return display_help();
}
}
if (!count_mode && !freq_mode) {
printf("Please specify a mode.\n");
return display_help();
}
/* Create the empty data structure */
init_words(&word_counts);
if ((argc - optind) < 1) {
// No input file specified, instead, read from STDIN instead.
infile = stdin;
} else {
// At least one file specified. Useful functions: fopen(), fclose().
// The first file can be found at argv[optind]. The last file can be
// found at argv[argc-1].
}
if (count_mode) {
printf("The total number of words is: %i\n", total_words);
} else {
wordcount_sort(&word_counts, wordcount_less);
printf("The frequencies of each word are: \n");
fprint_words(word_counts, stdout);
}
return 0;
}
前面几行是解析命令行参数,Linux下有固定的实现,我们先仔细看细节,这里有两个模式,一个是count模式,一个freq模式,count模式是统计词频,freq模式是输出频率
分别对应函数实现
/*
* 3.1.1 Total Word Count
*
* Returns the total amount of words found in infile.
* Useful functions: fgetc(), isalpha().
*/
int num_words(FILE* infile) {
int num_words = 0;
return num_words;
}
/*
* 3.1.2 Word Frequency Count
*
* Given infile, extracts and adds each word in the FILE to `wclist`.
* Useful functions: fgetc(), isalpha(), tolower(), add_word().
*/
void count_words(WordCount** wclist, FILE* infile) {}
这道题目的难度是用纯C的API去解析字符串,这里字符串的解析有一些技巧。至于链表的实现反而比较简单, 这里实现一个基本work的版本。
/*
* 3.1.1 Total Word Count
*
* Returns the total amount of words found in infile.
* Useful functions: fgetc(), isalpha().
*/
int num_words(FILE* infile) {
int num_words = 0;
int SPACE = ' ', CR = '\n', LF = '\r';
bool is_word = false;
int c, i = 0;
do{
c = fgetc(infile);
if(isalpha(c)){
is_word = true;
i++;
}else if(c==SPACE||c==CR||c==LF||c==EOF){
if(is_word){
num_words++;
i = 0;
}
}
if(i > MAX_WORD_LEN){
too_long = true;
printf("%s\n", "WORD LEN > MAX_WORD_LEN !");
break;
}
}while(c!=EOF);
return num_words;
}
/*
* 3.1.2 Word Frequency Count
*
* Given infile, extracts and adds each word in the FILE to `wclist`.
* Useful functions: fgetc(), isalpha(), tolower(), add_word().
*/
void count_words(WordCount** wclist, FILE* infile) {
int SPACE = ' ', CR = '\n', LF = '\r';
bool is_word = false;
int c, i = 0;
char buffer[MAX_WORD_LEN];
do{
c = fgetc(infile);
if(isalpha(c)){
is_word = true;
buffer[i++] = c;
}else if(c==SPACE||c==CR||c==LF||c==EOF){
if(is_word){
buffer[i] = '\0';
i = 0;
add_word(wclist,new_string(buffer));
buffer[0] = '\0';
is_word = false;
}
}
if(i > MAX_WORD_LEN){
too_long = true;
printf("%s\n", "WORD LEN > MAX_WORD_LEN !");
break;
}
}while(c!=EOF);
}
/*
* Comparator to sort list by frequency.
* Useful function: strcmp().
*/
static bool wordcount_less(const WordCount* wc1, const WordCount* wc2) {
return (wc1->count==wc2->count?strcmp(wc1->word,wc2->word)<0:wc1->count<wc2->count);
}