一个用于C语言的#define处理器的简单版本（即无参数的情况）

这应当属于一个表查找问题，我们采用散列表（哈希表）查找的方法，并用分离链接法来解决冲突。

有一点要注意的是：在处理程序文本的过程中，要能够删除注释，而且替换文本末尾不能包含多余的空白符

我们简单描述一下该算法：我们将输入的名字转换为一个小的非负整数，该整数将作为一个指针数组的下标。数组的每个元素指向一个链表的开头，链表中的各个块用于描述具有该散列值的名字。

程序的主要流程是：我们从输入中获取下一个单词或字符，检查它是否是以#开头，如果是的话，就另行处理；否则，直接输出

本程序可用来处理两种预处理指令：#define name defn 以及 #undef name
对于其他的指令，我们予以忽视，当作正常的程序文本处理。当然，我们也可以在此基础上添加处理其他指令的功能

下面我们给出具体实现，代码都注释的很详细，就不多说了

#define MAXWORD  100       /*  每个单词的最大长度  */
#define HASHSIZE 101       /*  哈希表的大小  */

struct nlist {             /*  链表项  */
    char *name;            /*  定义的名字  */
    char *defn;            /*  替换文本  */
    struct nlist *next;
};

extern struct nlist *hashtab[];

int getword(char *, int);
int comment();
void getdef(void);

/*  通用的错误处理函数  */
void error_skip(char *fmt, ...);
void error_exit(char *fmt, ...);

/*  模拟一个缓冲区  */
int getch(void);
void ungetch(int);

/*  哈希表有关的操作  */
unsigned hash(char *);
struct nlist *lookup(char *);
struct nlist *install(char *, char *);
void undef(char *);

struct nlist *hashtab[HASHSIZE];     /*  指针表，其中的每个元素都分别指向一个链表的开头  */

int main(void)
{
    char word[MAXWORD];
    struct nlist *p;

    while (getword(word, MAXWORD) != EOF)
        if (strcmp(word, "#") == 0)
            getdef();
        else if (!isalpha(word[0]) && word[0] != '_')
            printf("%s", word);
        else if ((p = lookup(word)) == NULL)
            printf("%s", word);
        else
            printf("%s", p->defn);
    return 0;
}

/*  getdef函数：处理以#开头的行  */
void getdef(void)
{
    char dir[MAXWORD];
    char name[MAXWORD];     /*  存储定义的名字  */
    char def[MAXWORD];      /*  存储替换文本  */
    int i, c;

    if (!isalpha(getword(dir, MAXWORD)))
        error_skip("expect a word after #");
    else if (strcmp(dir, "define") == 0) {
        if (isalpha(getword(name, MAXWORD)) || name[0] == '_') {
            while (isspace(c = getch()))
                ;
            ungetch(c);       /*  收集替换文本（从name后的第一个非空白符直到遇见换行符）  */
            for (i = 0; i < MAXWORD - 1; i++)
                if ((def[i] = getch()) == '\n')
                    break;
                else if (def[i] == '/') {    /*  跳过替换文本末尾可能出现的注释  */
                    if ((def[i + 1] = getch()) == '*') {
                        comment();
                        i--;
                    } else
                        i++;
                }
            while (i >= 0 && isspace(def[i]))    /*  删除替换文本末尾的空白符  */
                i--;
            def[++i] = '\0';
            if (i > 0)      /*  将出现的名字加入到表中  */
                install(name, def);
            else
                error_skip("expect a def after name");
        } else
            error_skip("expect a name after define");
    } else if (strcmp(dir, "undef") == 0) {     /*  删除一个名字的定义及其替换文本  */
        if (isalpha(getword(name, MAXWORD)) || name[0] == '_')
            undef(name);
        else
            error_skip("expect a name after undef");
    } else
        printf("#%s", dir);   /*  原样输出其他预处理指令，如#include  */
}

getword函数可以正确处理下划线、#、注释以及引号内的字符。但对于注释，它只处理多行形式的注释。由于程序需要，函数会原样输出空白符，而不是跳过空白符；然后就是解析参数，并返回每个参数的最后一个字符，交给主调函数处理

/*  getword函数：从输入中获取下一个单词或字符(适用于c程序)  */
int getword(char *word, int lim)
{
    char *w;
    int c;

    w = word;
    while (isspace(c = getch()))
        putchar(c);
    if (c != EOF)
        *w++ = c;

    if (isalpha(c) || c == '_') {   /*  收集单词  */
        for (; --lim > 0; w++)
            if (!isalnum(*w = getch()) && *w != '_') {
                ungetch(*w);
                break;
            }
    } else if (c == '\'' || c == '\"') {   /*  正确处理引号内的字符  */
        for ( ; --lim > 0; w++)
            if ((*w = getch()) == '\\')   /*  正确处理转义字符  */
                *++w = getch();
            else if (*w == c) {
                w++;
                break;
            } else if (*w == EOF)
                break;
    } else if (c == '/') {   /*  跳过注释  */
        if ((c = getch()) == '*') {
            c = comment();
            w--;
        } else
            ungetch(c);
    }
    *w = '\0';
    return c;
}

/*
 * 这里我们假设输入的c程序都是合法的，注释不嵌套，而且是配对的
 */

/*  comment函数：跳过注释  */
int comment(void)
{
    int c, d;

    c = getch();
    d = getch();
    while (c != '*' || d != '/') {
        c = d;
        d = getch();
    }
    return d;
}

hash.c

/*  hash函数：生成散列值  */
unsigned hash(char *s)
{
    unsigned hashval;

    for (hashval = 0; *s != '\0'; s++)
        hashval = *s + 31 * hashval;
    return hashval % HASHSIZE;
}

/*  lookup函数：在hashtab中查找name  */
struct nlist *lookup(char *name)
{
    struct nlist *np;

    for (np = hashtab[hash(name)]; np != NULL; np = np->next)
        if (strcmp(name, np->name) == 0)
            break;
    return np;
}

/*  install函数：将(name, defn)加入到hashtab中  */
struct nlist *install(char *name, char *defn)
{
    struct nlist *np;
    unsigned hashval;

    if ((np = lookup(name)) == NULL) {    /*  未找到  */
        np = malloc(sizeof(struct nlist));
        if (np == NULL || (np->name = strdup(name)) == NULL)
            error_exit("install: alloc fail");
        hashval = hash(name);
        np->next = hashtab[hashval];
        hashtab[hashval] = np;
    } else {     /*  已存在，释放前一个defn  */
        free(np->defn);
        np->defn = NULL;
    }
    if ((np->defn = strdup(defn)) == NULL)
        error_exit("install: alloc fail");
    return np;
}

/*  undef函数：删除name及其定义  */
void undef(char *name)
{
    struct nlist *np, *pre;
    unsigned hashval;

    pre = NULL;
    hashval = hash(name);
    for (np = hashtab[hashval]; np != NULL; np = np->next) {
        if (strcmp(name, np->name) == 0)
            break;
        pre = np;
    }
    if (np != NULL) {   /*  name在表中  */
        if (pre == NULL)
            hashtab[hashval] = np->next;   /*  要删除的name在链表的开头  */
        else
            pre->next = np->next;
        free(np->name);
        free(np->defn);
        free(np);
        np->name = NULL;
        np->defn = NULL;
        np = NULL;
    }
}

buf.c

#define BUFSIZE 100    /*  缓冲区的大小  */

static int buf[BUFSIZE];    /*  getch和ungetch函数共同维护的缓冲区  */
static int bufp = -1;   /*  栈顶指针  */

/*
 * 如果缓冲区内有字符，则getch函数先取出缓冲区内的字符；否则，
 * 它从标准输入中读取字符
 */

/*  getch函数：取一个字符  */
int getch(void)
{
    return (bufp > -1) ? buf[bufp--] : getchar();
}

/*
 * ungetch函数与标准库中的ungetc函数类似，但我们这里的ungetch函数可以
 * 压回EOF字符，而ungetc函数则不能
 * 其原因在于我们这里的buf是一个整形数组，其每个元素的大小足够放下EOF字符
 */

/*  ungetch函数：将多读入的字符压回到输入中  */
void ungetch(int c)
{
    if (bufp < BUFSIZE)
        buf[++bufp] = c;
    else
        printf("error: too many characters\n");
}

error.c

/*  error_skip函数：打印出错信息，并跳过本行剩余字符  */
void error_skip(char *fmt, ...)
{
    va_list args;

    va_start(args, fmt);
    fprintf(stderr, "error: ");
    vfprintf(stderr, fmt, args);
    fprintf(stderr, "\n");
    va_end(args);
    while (getch() != '\n')
        ;
}

/*  error_exit函数：打印出错信息，并终止程序运行  */
void error_exit(char *fmt, ...)
{
    va_list args;

    va_start(args, fmt);
    fprintf(stderr, "error: ");
    vfprintf(stderr, fmt, args);
    fprintf(stderr, "\n");
    va_end(args);
    exit(1);
}

一个用于C语言的#define处理器的简单版本（即无参数的情况）

下面我们给出具体实现，代码都注释的很详细，就不多说了

猜你喜欢