Berkeley-CS 162: Operating Systems and System Programming 作业一纯C链表WordCount

word_count头文件

#ifndef word_count_h
#define word_count_h

#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/* Representation of a word count object.
   Includes next field for constructing singly linked list*/
struct word_count {
  char* word;
  int count;
  struct word_count* next;
};

/* Introduce a type name for the struct */
typedef struct word_count WordCount;

/* Initialize a word count list, updating the reference to the list */
void init_words(WordCount** wclist);

/* Length of a word count list */
size_t len_words(WordCount* wchead);

/* Find a word in a word_count list */
WordCount* find_word(WordCount* wchead, char* word);

/* Insert word with count=1, if not already present; increment count if present. */
void add_word(WordCount** wclist, char* word);

//static int wordcntcmp(const WordCount *wc1, WordCount *wc2);

/* print word counts to a file */
void fprint_words(WordCount* wchead, FILE* ofile);

/* Inserts a word into the list in order. Assumes the existing list is already sorted */
void wordcount_insert_ordered(WordCount** wclist, WordCount* elem,
                              bool less(const WordCount*, const WordCount*));

/* Sort a word count list in place */
void wordcount_sort(WordCount** wclist, bool less(const WordCount*, const WordCount*));

#endif /* word_count_h */

就是实现一个链表

下面先过一遍主函数

int main(int argc, char* argv[]) {

  // Count Mode (default): outputs the total amount of words counted
  bool count_mode = true;
  int total_words = 0;

  // Freq Mode: outputs the frequency of each word
  bool freq_mode = false;

  FILE* infile = NULL;

  // Variables for command line argument parsing
  int i;
  static struct option long_options[] = {
   
   {"count", no_argument, 0, 'c'},
                                         {"frequency", no_argument, 0, 'f'},
                                         {"help", no_argument, 0, 'h'},
                                         {0, 0, 0, 0}};

  // Sets flags
  while ((i = getopt_long(argc, argv, "cfh", long_options, NULL)) != -1) {
    switch (i) {
      case 'c':
        count_mode = true;
        freq_mode = false;
        break;
      case 'f':
        count_mode = false;
        freq_mode = true;
        break;
      case 'h':
        return display_help();
    }
  }

  if (!count_mode && !freq_mode) {
    printf("Please specify a mode.\n");
    return display_help();
  }

  /* Create the empty data structure */
  init_words(&word_counts);

  if ((argc - optind) < 1) {
    // No input file specified, instead, read from STDIN instead.
    infile = stdin;
  } else {
    // At least one file specified. Useful functions: fopen(), fclose().
    // The first file can be found at argv[optind]. The last file can be
    // found at argv[argc-1].
  }

  if (count_mode) {
    printf("The total number of words is: %i\n", total_words);
  } else {
    wordcount_sort(&word_counts, wordcount_less);

    printf("The frequencies of each word are: \n");
    fprint_words(word_counts, stdout);
  }
  return 0;
}

前面几行是解析命令行参数，Linux下有固定的实现，我们先仔细看细节，这里有两个模式，一个是count模式，一个freq模式，count模式是统计词频，freq模式是输出频率

分别对应函数实现

/*
 * 3.1.1 Total Word Count
 *
 * Returns the total amount of words found in infile.
 * Useful functions: fgetc(), isalpha().
 */
int num_words(FILE* infile) {
  int num_words = 0;
  
  return num_words;
}

/*
 * 3.1.2 Word Frequency Count
 *
 * Given infile, extracts and adds each word in the FILE to `wclist`.
 * Useful functions: fgetc(), isalpha(), tolower(), add_word().
 */
void count_words(WordCount** wclist, FILE* infile) {}

这道题目的难度是用纯C的API去解析字符串，这里字符串的解析有一些技巧。至于链表的实现反而比较简单, 这里实现一个基本work的版本。

/*
 * 3.1.1 Total Word Count
 *
 * Returns the total amount of words found in infile.
 * Useful functions: fgetc(), isalpha().
 */
int num_words(FILE* infile) {
  int num_words = 0;
  int SPACE = ' ', CR = '\n', LF = '\r';
  bool is_word = false;
  int c, i = 0;
  do{
    c = fgetc(infile);
    if(isalpha(c)){
      is_word = true;
      i++;
    }else if(c==SPACE||c==CR||c==LF||c==EOF){
      if(is_word){
        num_words++;
        i = 0;
      }
    }
    if(i > MAX_WORD_LEN){
        too_long = true;
      	printf("%s\n", "WORD LEN > MAX_WORD_LEN !");
      	break;
    }
  }while(c!=EOF);
  return num_words;
}

/*
 * 3.1.2 Word Frequency Count
 *
 * Given infile, extracts and adds each word in the FILE to `wclist`.
 * Useful functions: fgetc(), isalpha(), tolower(), add_word().
 */
void count_words(WordCount** wclist, FILE* infile) {
  int SPACE = ' ', CR = '\n', LF = '\r';
  bool is_word = false;
  int c, i = 0;
  char buffer[MAX_WORD_LEN];
  do{
    c = fgetc(infile);
    if(isalpha(c)){
      is_word = true;
      buffer[i++] = c;
    }else if(c==SPACE||c==CR||c==LF||c==EOF){
      if(is_word){
        buffer[i] = '\0';
        i = 0;
        add_word(wclist,new_string(buffer));
        buffer[0] = '\0';
        is_word = false;
      }
    }
    if(i > MAX_WORD_LEN){
        too_long = true;
      	printf("%s\n", "WORD LEN > MAX_WORD_LEN !");
      	break;
    }
  }while(c!=EOF);
}

/*
 * Comparator to sort list by frequency.
 * Useful function: strcmp().
 */
static bool wordcount_less(const WordCount* wc1, const WordCount* wc2) { 
  return (wc1->count==wc2->count?strcmp(wc1->word,wc2->word)<0:wc1->count<wc2->count);
 }

Berkeley-CS 162: Operating Systems and System Programming 作业一纯C链表WordCount

猜你喜欢