[Compilation principle] C++ realizes lexical analysis (can be run with annotations)

Purpose:

Compile a word-reading process. From the input source program, identify the words with independent meanings, namely, the five categories of basic reserved words, identifiers, constants, operators, and separators. And output the internal code of each word and the value of the word symbol in turn.

The difference between lexical analysis, grammatical analysis, and semantic analysis:

1. Lexical analysis :

  Lexical analysis is the first stage of the compilation process. The task at this stage can be seen as-read the source program character by character from left to right, and identify each "word" symbol from it, that is, the composition of the source program Scan the character stream and then recognize words (also called word symbols or symbols) according to word formation rules;

  Through the lexical analysis program, the task of reading the source program and recognizing symbols is realized. The lexical analysis process is based on the lexical rules of the language;

  Output: The "words" output by the lexical analysis program are often output in the form of two-tuples, that is, the type of the word and the value of the word itself;

  Recognition: morphology, which defines the words that make up a language, is the smallest unit in the language;

2. Syntax analysis:

  Grammatical analysis is a logical stage of the compilation process. The task of this stage is to combine word sequences into various grammatical phrases based on lexical analysis, such as "program", "sentence", "expression", etc.;

  Grammar is the structure and format of user data and control information

  Recognition: Grammar-the rules for organizing lists into meaningful phrases and sentences, just like grammar in English translation;

3. Syntax analysis

  Semantic analysis is a logical stage of the compilation process. The semantics is to explain the meaning of each part of the control information. It specifies what kind of control information needs to be sent, as well as the completed actions and what kind of responses. The task at this stage is to respond to the structure Perform context-sensitive review of the correct source program and type review;

  Collect type information for later use in the code generation phase;

  The semantic analysis will review the type and report an error: an array variable cannot be used in an expression, and the type at the right end and left end of the assignment statement does not match;

  Recognition: Semantics-combined with the context, we can deduce the true meaning of the sentence, that is, the English meaning translated after we input Chinese, or the translated Chinese after inputting English;

Experiment code:

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <string>
#include <cstring>
using namespace std;


int seekresult;//fseek的时候用来接着的
string word=" ";//字符串,当前词
char ch; //每次读进来的一个字符
int num = 0;//每次读进来的一个字符
int rows = 1;//行数
int cols=1;//列数
bool flag;//文件是否扫描结束
int type;//单词类型

string Keyword[8]={ "main","float","int","scanf","cos","sqrt","if","printf" };
char Jiefu[36][4] = { ';','(',')','^',',','#','%','[',']','{','}','.' };
char Mark[8] = { '-','*','/','>','<','=','+','!' };
char DigitBTable[1000][40] = {};//常数表
int Inum = 0;
int Dnum = 0;
char IDentifierTable[1000][40] = {};//标识符表


//判断是否为关键词
bool IsKeyword(string word) {
	for (int i = 1; i <= 8; i++) {
		if (Keyword[i] == word)
			return true;
	}
	return false;
}

//判断是否为标点符号
bool IsMark(char mark) {
	for (int i = 1; i <= 8; i++) {
		if (Mark[i] == mark)
			return true;
	}
	return false;
}

//判断是否为关系运算符
bool IsGuanxiyunsuanfu(char ch) {
	if (ch == '=' || ch == '<' || ch == '>')
		return true;
	else
		return false;
}

//判断是否为字母
bool IsLetter(char ch) {
	if ((ch >= 'A'&&ch <= 'Z')||(ch>='a'&&ch<='z'))
		return true;
	else
		return false;
}

//判断是否为运算符
bool IsSuanshuyunsuanfu(char ch) {
	if (ch == '+' || ch == '-' || ch == '*')
		return true;
	else
		return false;
}

//判断是否为界符
bool IsJiefu(char ch) {
	int i = 0;
	int temp = 0;
	for (i = 0; i < 14; i++)
	{
		if (ch == Jiefu[i][0]) {
			temp = 1;
			break;
		}
	}
	if (temp == 1)
		return true;
	else
		return false;
}

//判断是否为数字
bool IsDigit(char ch) {
	if (ch >='0' &&ch <= '9')
		return true;
	else
		return false;
}


//从文件中读取一个词
int Scanner(FILE *fp) { // FILE *fp 定义了一个文件指针通过操作该指针可以进行文件读写
	//先读写一个字符,赋值给ch
	ch = fgetc(fp);
	
	if (feof(fp)) {   //feof(fp)函数用于测试fp文件指针是否已经到达文件结尾,已达到则返回1,反之返回0.
		flag = 0;
		       return 0;
	}

	else if (ch == ' ') {
		cols++;
		return 0;
	}

	else if (ch == '\n') {
		rows++;
		cols = 1;
		return 0;
	}

	//如果是字母开头或者_开头,判断是关键字还是标识符
	else if (IsLetter(ch) || ch == '_') {
		word += ch;
		cols++;
		while ((ch = fgetc(fp)) && (ch == IsLetter(ch) || IsDigit(ch) || ch == '_')) {
			word += ch;
			cols++;
		}
		//文件读完,返回true
		if (feof(fp)) {
			flag = 0;
			return 1;
		}
		//检验是否是关键字
		for (int i = 1; i <= 8; i++) {
			if (word == Keyword[i]) {
				//seek_cur当前位置,fseek函数作用:文件位置指针从当前位置移一个位置
				seekresult = fseek(fp, -1, SEEK_CUR);
					//5+i-1:关键字
					return  5 + i - 1;
			}
		}
		for (int Ii = 0; Ii < Inum; Ii++) {
			if (Inum != 0 && strcmp(IDentifierTable[Ii], word.c_str())== 0){
				seekresult = fseek(fp, -1, SEEK_CUR);
					//1:标识符
					//return 1
					return 1000 + Ii + 1;
			}

		}
		strcpy(IDentifierTable[Inum], word.c_str());
			Inum = Inum + 1;
			//写追加
			ofstream Arithmetic_operator;
			Arithmetic_operator.open("IDentifierTable.txt",ios::app);
			Arithmetic_operator << word << " " << endl;
			Arithmetic_operator.close();

			seekresult = fseek(fp, -1, SEEK_CUR);
			//1:标识符
			//return 1
			return 1000 + Inum;

	}
	else if (IsSuanshuyunsuanfu) {
		word += ch;
		cols++;
		 //4是运算符
		return 4;

	}
	else if (IsDigit(ch)) {
		word += ch;
		cols++;
		while ((ch = fgetc(fp)) && IsDigit(ch)) {
			word += ch;
			cols++;
		}
		int Di = 0;
		for (Di = 0; Di < Inum; Di++) {
			if (Dnum != 0 && strcmp(DigitBTable[Di], word.c_str()) == 0) {
				seekresult = fseek(fp, -1, SEEK_CUR);
				//常数为2
				//return 2
				return 2000 + Di + 1;
			}
		}
		strcpy(IDentifierTable[Inum], word.c_str());
		Dnum = Dnum + 1;
		//写追加
		ofstream Arithmetic_operator;
		Arithmetic_operator.open("DigitBTable.txt", ios::app);
		Arithmetic_operator << word << " " << endl;
		Arithmetic_operator.close();

		/*if(feof(fp)){
		flag = 0;
		return 2;
	} */

		seekresult = fseek(fp, -1, SEEK_CUR);
			//2:数字(常量)
			return 2000 + Dnum;
	}

	//检验界符5
	else if (IsJiefu(ch)) {
	int Ji;
	for (Ji = 0; Ji < 12; Ji++) {
		if (ch == Jiefu[Ji][0]) {
			break;
		}
	}
	word += ch;
	cols++;
	return (6 - 1 + 32 + Ji);//界符6-1+32+i
}

//检验关系运算符4 :<=、>=、<>、==、 < 、>
	else if (IsGuanxiyunsuanfu(ch))
	{
	cols++;
	word += ch;
	//检验  <> <=
	if (ch == '<')
	{
		ch = fgetc(fp);
		if (ch == '>' || ch == '=')
		{
			word += ch;
			cols++;
			return 4;
		}
	}
	//检验  >= ==
	else {
		ch = fgetc(fp);
		if (ch == '=')
		{
			word += ch;
			cols++;
			return 4;
		}
	}
	if (feof(fp)) {
		flag = 0;
	}
	seekresult = fseek(fp, -1, SEEK_CUR);
	//3:算数运算符 
	return 3;
	}

	//首字符是 / 有可能是除号 也有可能是注释
	else if (ch == '/')
	{
	cols++; word += ch;
	ch = fgetc(fp);
	//这种情况是除号
	if (ch != '*' && ch != '/')
	{
		seekresult = fseek(fp, -1, SEEK_CUR);
		//3:算数运算符 
		return 3;
	}
	//注释符//:这一行剩下的全被注释了
	if (ch == '/')
	{
		word.clear();
		while ((ch = fgetc(fp)) && ch != '\n' && !feof(fp))
		{
		}
		if (feof(fp)) {
			flag = 0;
			return 0;
		}
		else {
			seekresult = fseek(fp, -1, SEEK_CUR);
		}
		rows++; cols = 1;
		return 0;
	}
	if (ch == '*')
	{
		bool flag5 = 1;
		while (flag5)
		{
			word.clear();
			ch = fgetc(fp);
			cols++;
			if (ch == '\n')
			{
				rows++;
				cols = 1;
			}
			if (ch != '*')
				continue;
			else
			{
				ch = fgetc(fp);
				cols++; if (ch == '\n') { rows++; cols = 1; }
				if (ch == '/') {
					flag5 = 0;
				}
				else continue;
			}
			if (feof(fp))
			{
				flag = 0;
				return 0;
			}
		}
	}
	}
	else {
	word += ch;
	cols++;
	return -1;
	}
}

int main()
{
	FILE *fp;

	cout << "open " << "test.txt" << endl;
	system("pause");

	flag = 1;
	//打开源代码文件 

	//未打开 
	if ((fp = fopen("test.txt", "r")) == NULL)
	{
		cout << "Sorry,can't open this file." << endl;
		flag = 0;
	}
	//已打开 
	while (flag == 1)
	{
		//反复调用扫描函数提取单词
		type = Scanner(fp);

		//1:标识符
		if (type > 1000 && type < 2000)
		{
			//cout<<"type:1 identifier      "<<"line "<<line<<" col "<<col-word.length()<<"  "<<word<<endl;
			cout << "(" << word << "," << type - 1000 << ")" << endl;
			if (word.length() > 20)
				cout << "ERROR Identifier length cannot exceed 20 characters" << endl;
			word.clear();
		}
		//2:数字   
		else if (type > 2000)
		{
			//cout<<"type:2 positive number "<<"line "<<line<<" col "<<col-word.length()<<"  "<<word<<endl;
			cout << "(" << word << "," << (type - 2000) << ")" << endl;
			if (word[0] == '0')
				cout << "ERROR: The first digit cannot be 0!" << endl;
			word.clear();
		}
		//3:算数运算符 + - * / 
		else if (type == 3)
		{
			//cout<<"type:3 unary_operator  "<<"line "<<line<<" col "<<col-1<<"  "<<word<<endl;
			cout << "(" << word << "," << "3" << ")" << endl;
			word.clear();
		}

		//4:关系运算符 <、<=、>、>=、= 、<> 
		else if (type == 4)
		{
			//cout<<"type:4 double_operator "<<"line "<<line<<" col "<<col-2<<"  "<<word<<endl;
			cout << "(" << word << "," << "4" << ")" << endl;
			word.clear();
		}
		//6-1+32 - 6-1+32+11:界符
		else if (type >= 37)
		{
			//cout<<"type:5 Separator       "<<"line "<<line<<" col "<<col-1<<"  "<<word<<endl;
			cout << "(" << word << "," << "_" << ")" << endl;
			//cout<<"("<<type<<","<<"_"<<")"<<endl;  
			word.clear();
		}
		//5 - 5-1+32:保留字 
		else if (type >= 5 && type <= 36)
		{
			//cout<<"type:6 reserved word   "<<"line "<<line<<" col "<<col-word.length()<<"  "<<word<<endl;
			cout << "(" << word << "," << "_" << ")" << endl;
			//cout<<"("<<type<<","<<"_"<<")"<<endl;  
			word.clear();
		}
		//非法字符
		else if (type == -1)
		{
			cout << "Illegal character   " << "line " << rows << " cols " << cols - 1 << "  " << word << endl;
			word.clear();
		}
	}
	int a = fclose(fp);
	cout << "Do you want to close?(Y or N)" << endl;
	char end;
	while (cin >> end && end != 'Y') {
		cout << "Do you want to close?(Y or N)" << endl;
	}
	return 0;
}

operation result:

Experimental text:

Guess you like

Origin blog.csdn.net/qq_44624536/article/details/113757788
Recommended