统计单词个数的简单程序

背景

没事写了个简单的统计单词个数的程序,特记录下。

代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

#define MAX_LINE_SIZE   256
#define MAX_WORD_SIZE   32
#define MAX_FILE_SIZE   (1024 * 1024 * 1024)


struct word_node{
    char word[MAX_WORD_SIZE];
    int counter;
    struct word_node *next;
} *word_list = NULL;

typedef struct word_node word_list_t;



int get_file_size(char *file_name)
{
    struct stat s;

    stat(file_name, &s);

    return s.st_size;
}


int is_existed(char *word)
{
    word_list_t *ptr = NULL;

    /* compare the word with already words in list */
    for (ptr=word_list; ptr; ptr=ptr->next){
        if (strcmp(word, ptr->word) == 0){
            ptr->counter++;
            return 1;
        }
    }

    return 0;
}


int create_new_word(char *word)
{
    word_list_t *new_node = NULL;
    word_list_t *ptr = NULL;
    word_list_t *pre = NULL;

    new_node = (word_list_t *)malloc(sizeof(word_list_t));
    if (new_node == NULL){
        printf("malloc error");
        return -1;
    }

    strcpy(new_node->word, word);
    new_node->counter = 1;

    if (word_list == NULL){
        word_list = new_node;
    }
    else{
        for (ptr=word_list; ptr; ptr=ptr->next){
            pre = ptr;
        }
        pre->next = new_node;
    }

    return 0;
}


int quick_sort(word_list_t *head, word_list_t *tail)
{
    int counter;
    word_list_t *mid = NULL;
    word_list_t *p = NULL;
    word_list_t *q = NULL;
    word_list_t *t = NULL;

    if ((head->next == tail) || (head->next->next == tail))
        return 0;

    mid = head->next;
    p = head;
    q = mid;
    counter = mid->counter;
    t = mid->next;  

    while (t != tail){
        if (t->counter < counter){
            p = p->next = t;
        }
        else{
            q = q->next = t;
        }
        t = t->next;    
    }
    p->next = mid;
    q->next = tail;

    quick_sort(head, mid);
    quick_sort(mid, tail);

    return 0;
}


int show_words()
{
    word_list_t *ptr = NULL;

    printf("the words list:\n");
    printf("word                counters\n");
    printf("----------------------------------------\n");
    for (ptr=word_list; ptr; ptr=ptr->next){
        printf("%-32s %4d\n", ptr->word, ptr->counter); 
    }

    return 0;
}


int main(char argc, char **argv)
{
    int ret = -1;
    FILE *fp = NULL;
    char *file_name = NULL;
    char line[MAX_LINE_SIZE];
    char *delim = ".,! ";
    char *token = NULL;
    char *saveptr = NULL;
    char *str = NULL;

    /* check args */
    if (argc != 2){
        printf("Usage: %s filename\n", argv[0]);
        return -1;
    }   

    /* check size of file */
    file_name = argv[1];
    if (get_file_size(file_name) > MAX_FILE_SIZE){
        printf("the size of file too large.\n");
        return -1;
    }

    /* open file */
    fp = fopen(file_name, "r");
    if (fp == NULL){
        printf("open file error.\n");
        return -1;
    }

    /* read words line by line */
    while (fgets(line, MAX_LINE_SIZE, fp) != NULL){
        /* delete the '\n' of line */
        line[strlen(line)-1] = '\0';
        str = line;

        /* parse words in this line */
        while (1){
            token = strtok_r(str, delim, &saveptr);
            if (token == NULL){
                break;
            }
            str = NULL;

            /* check the length of word */
            if (strlen(token) > MAX_WORD_SIZE){
                printf("the word[%s] is too long.\n", token);
                return -1;
            }
            /* if existed, update counters of the word */
            if (is_existed(token)){
                ;               
            }
            /* not, create new node to store the word */
            else{
                create_new_word(token);
            }
        }

    }

    /* sort by ascending order */
    quick_sort(word_list, NULL);

    /* show words infor */
    show_words();

    return 0;
}

猜你喜欢

转载自blog.csdn.net/donglicaiju76152/article/details/77750898