c0706 交叉引用生成器

问题描述

【问题描述】
“交叉引用生成器”是指具有如下功能的程序:
对输入的一篇文档,统计出现的所有单词及其所在的行号。将其结果以规定的格式输出。
【输入形式】
程序从crossin.txt读入一篇文档。该文档由若干行组成,每行中包含一些列的单词。行号由1开始计数。该文档的单词量能不超过200,每个单词最大长度不超过20个字符。
【输出形式】
将输入文档中出现的单词及其所在行号输出到文件crossout.txt中。
输出有若干行,每一行都是文档中出现的一个单词。按如下规格输出(无空格):
word:line1,line2,…lineN
输出时遵循以下规定:
1.只输出所有由英文字母(包括连字符)构成的单词,数字或包含其他特殊字符的单词不用输出。先输出大写A?Z开头的字符,再输出小写a–z开头的字符
2.各单词后面的行号从小到大排列。行号不重复打印,即一个单词如果在一行中出现多次,只打印一次该行号
3.统计的单词不包括如下:a, an, the, and
【样例输入】
Alcatel provides end-to-end solutions.
It enables enterprises to deliver content to any type of user.
lcatel operates in 130 countries.
Alcatel focus on optimizing their service offerings and revenue streams.
【样例输出】
Alcatel:1,4
It:2
any:2
content:2
countries:3
deliver:2
enables:2
end:1
enterprises:2
focus:4
in:3
lcatel:3
of:2
offerings:4
on:4
operates:3
optimizing:4
provides:1
revenue:4
service:4
solutions:1
streams:4
their:4
to:1,2
type:2
user:2
和之前的内容相比,题目有一定的变化,要求更多了(之前只是将单词分割出来,并建立一个二叉树统计)参见二叉树统计单词

自做代码:C语言编写

//第一次解决
/*
2020/4/6
*/
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<ctype.h>

#define MAXWORD 100

 struct Node
{
	char word[MAXWORD];
	int nlines;
	int line[20];
	Node* lchild;
	Node* rchild;
};

/*
从流中输入
*/
int getword(char *word,int lim,FILE* in)
{
	int c;//使用int类型
	char* w = word;

	while(isspace(c = fgetc(in)))
	{
		if(c == '\n')
		{
			*w = '\0';//w中什么都不存放,需要******编写这行代码
			return c;//返回'\n'
		}
	}

	if(c != EOF)
		*w++ = c;
	if( !isalpha(c))
	{
		*w = '\0';
		return c;
	}

	for(; --lim;w++)
	{
		if( !isalnum( *w = fgetc(in)) && c != '-')//if( !isalnum( c = fgetc(in)) && c != '-')//存放到w中
		{
			ungetc(*w,in);
			break;
		}
	}
	*w = '\0';
	return w[0];
}

Node* insert(Node* root,char *word,int curline)
{
	int condition;
	if(root == NULL)
	{
		root = (Node*)malloc(sizeof(Node));
		strcpy(root->word,word);
		root->nlines = 1;
		root->line[root->nlines-1] = curline;
		root->lchild = root->rchild = NULL;
	}
	else
	{
		condition = strcmp(root->word,word);
		if(condition == 0)
		{
			if(root->line[root->nlines-1] == curline) ;
			else
			{
				root->nlines++;
				root->line[root->nlines-1] = curline;
			}
		}
		else if(condition>0)
		{
			root->lchild = insert(root->lchild,word,curline);
		}
		else
		{
			root->rchild = insert(root->rchild,word,curline);
		}
	}
	return root;//在最后面返回
}

void printToFile(Node* root,FILE *out)//中序遍历就是按照从小到大顺序输出
{
	int i;
	if(root)
	{
	    printToFile(root->lchild,out);

		if(strcmp(root->word,"a") && strcmp(root->word,"an") && strcmp(root->word,"the") && strcmp(root->word,"and") )//有一个为0,整体为0
        {
            fprintf(out,"%s:",root->word);
            for(i = 0;i<root->nlines;i++)
            {
                if(i == root->nlines-1)
                    fprintf(out,"%d\n",root->line[i]);
                else
                {
                    fprintf(out,"%d,",root->line[i]);
                }
            }
        }
		printToFile(root->rchild,out);
	}
}


int main()
{
	char c;
	int curline = 1;
	char word[MAXWORD];
	FILE *in,*out;
	Node* root;
	in = fopen("crossin.txt","r");
	out = fopen("crossout.txt","w");

	root = NULL;//root忘记初始化
	while( (c = getword(word,MAXWORD,in) ) !=EOF)//while( c = getword(word,MAXWORD,in) !=EOF),注意优先级
	{
		if(c == '\n')
		{
			curline++;
		}
		if(isalpha(word[0]))
		{
			root = insert(root,word,curline);
		}
	}
	printToFile(root,out);
	fclose(in);
	fclose(out);
	return 0;
}

自做代码:C++编写

//第二次解决
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<ctype.h>

const int MAXWORD = 100;

struct Node
{
	char word[MAXWORD];
	int nline;
	int lines[200];
	Node* lchild;
	Node* rchild;
};

int getword(char word[],int lim,FILE* in)
{
	char c;
	char *w = word;

	while( isspace(c = fgetc(in)))
	{
		if(c == '\n')
		{
			*w = '\0';
			return c;
		}
	}

	if(c != EOF)
	{
		*w++ = c;
	}
	if( !isalpha(c))
	{
		*w = '\0';
		return c;
	}

	for(;--lim;w++)
	{	
		*w = fgetc(in);
		if( !isalpha(*w) && (*w)!='-')
		{
			ungetc(*w,in);
			break;
		}
	}
	*w = '\0';
	return w[0];
}


void insert(Node* &root,char word[],int curLine)
{
	if(root == NULL)
	{
		root = (Node*)malloc(sizeof(Node));
		strcpy(root->word,word);
		root->nline = 1;
		root->lines[0] = curLine;//->和[]的优先级相同,且从左向右结合
		root->lchild = root->rchild = NULL;
		return ;
	}

	int cmp = strcmp(root->word,word);
	if(cmp>0)
	{
		insert(root->lchild,word,curLine);
	}
	else if(cmp == 0)
	{
		if(root->lines[root->nline-1] != curLine)
		{
			root->lines[root->nline] = curLine;
			root->nline++;
		}
	}
	else
	{
		insert(root->rchild,word,curLine);
	}
}

void midOrder(Node* root,FILE* out)
{
	if(root)
	{

		midOrder(root->lchild,out);
		if( strcmp(root->word,"a") == 0 || strcmp(root->word,"an")==0 || strcmp(root->word,"the") == 0 || strcmp(root->word,"and") == 0)
		{

		}
		else
		{
			fprintf(out,"%s:",root->word);
			for(int i = 0;i<root->nline;i++)
			{
				if(i == root->nline - 1)
					fprintf(out,"%d\n",root->lines[i]);
				else
				{
					fprintf(out,"%d,",root->lines[i]);
				}
		}
		midOrder(root->rchild,out);
		}		
	}
}
		
int main()
{
	char word[MAXWORD];
	int curLine = 1;
	char c;
	FILE *in,*out;
	in = fopen("crossin.txt","r");
	out = fopen("crossout.txt","w");


	Node* root = NULL;

	while( (c = getword(word,MAXWORD,in) ) != EOF)
	{ 
		if(c == '\n')
			curLine++;
		else if(isalpha(word[0]))
		{
			insert(root,word,curLine);
		}
	}
	midOrder(root,out);

	fclose(in);
	fclose(out);
	return 0;
}
发布了117 篇原创文章 · 获赞 71 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/qq_34686440/article/details/105330872