C言語の字句解析プログラム
これは、教師によって割り当てられたコンパイル原理の実験的なクラスタスクです。クラスの後、コードを書くのに1週間近くかかりました(主にC ++はあまりにも長い間役に立たず、多くの関数はなじみがなく、多くの情報が参照されました)。解析には単純な構文エラー判定関数しかありませんが、関数を増やしたい場合は、関連する関数コードセグメントに追加できます。
トークンのタイプコードは大まかな違いであり、追加するのはとても簡単です。
c言語の字句解析プロセスを図に示します。
コードは次のとおりです。今回のコードは、コードのクリーン度、特に変数の命名と関数の記述に関する予備知識の演習です。うまく記述されていると思われる場合は、コードのクリーン度に関する関連記事を確認してください。 。
今回のコードには基本的にコメントはありませんが、フローチャートと合わせて誰もが理解できると思います。長すぎると思われる場合は、Visual Studioにコードをコピーして、ファンクションコードブロックを閉じてください。論理的に明確に見えます。
#include <iostream>
#include <string>
#include <map>
#include <ctype.h>
#include <algorithm>
using namespace std;
string readFile(string fileName);
string fileFilter();
string singleLineCommentsFilter();
string multilineCommmentsFileter();
string specialCharacterFilter();
void separateAndJudge();
bool isReservedWord(string vocabulary);
void separateAndJudge();
void showTokenData();
int digitStarted(int cnt);
bool isBoundSymbol(char ch);
bool isOperator(char ch);
int judgeStartingCharactorType(char ch);
bool isDigit(char ch);
bool isAlpha(char ch);
int alphaStarted(int cnt);
int underlineStarted(int cnt);
string transCharToString(char ch);
int operatorStarted(int cnt);
string codeSource;
map<string, int> tokens;//identifier is 1,reservedWord 2,digit 3,borderSymbol 4,operator 5
int main()
{
codeSource = readFile("d:\\testCode.txt");
cout << "This is source code" << endl << "---------------------------------" << endl << endl;
cout << codeSource << endl;
cout << "This is code filtered" << endl << "---------------------------------" << endl;
codeSource = fileFilter();
cout << codeSource << endl;
cout << "-----------------------" << endl;
separateAndJudge();
cout << "this is tokens " << endl;
cout << "-----------------------" << endl;
showTokenData();
return 0;
}
string readFile(string fileName)
{
FILE* fp;
if ((fp = fopen(fileName.c_str(), "r")) == NULL)
{
cout << "cant open file";
exit(0);
}
else
{
string codeSource;
char ch;
while ((ch = fgetc(fp)) != EOF)
{
codeSource += ch;
}
return codeSource;
}
};
string fileFilter()
{
string filteredCode = singleLineCommentsFilter();
filteredCode = multilineCommmentsFileter();
filteredCode = specialCharacterFilter();
return filteredCode;
};
string singleLineCommentsFilter()
{
long cnt = 0;
for (; cnt < codeSource.length(); cnt++)
{
while (codeSource[cnt] == '/' and codeSource[cnt + 1] == '/' and cnt < codeSource.length())
{
while (codeSource[cnt] != '\n')
{
codeSource.erase(cnt, 1);
}
}
}
return codeSource;
}
string multilineCommmentsFileter()
{
int cnt = 0;
for (; cnt < codeSource.length(); cnt++)
{
if (codeSource[cnt] == '/' and codeSource[cnt + 1] == '*' )
{
do
{
codeSource.erase(cnt, 1);
if ((codeSource[cnt + 1] == '*' and codeSource[cnt + 2] != '/'))
{
cout << "multilineCommments unmatch"<<endl;
exit(0);
};
} while ((codeSource[cnt + 1] != '*' and codeSource[cnt + 2] != '/'));
codeSource.erase(cnt, 4);
}
}
return codeSource;
}
string specialCharacterFilter()
{
for (int cnt = 0; cnt < codeSource.length(); cnt++)
{
if (codeSource[cnt] == '\n' or codeSource[cnt] == '\t' or codeSource[cnt] == '\v' or codeSource[cnt] == '\r')
{
codeSource.erase(cnt, 1);
cnt--;
}
}
return codeSource;
}
void separateAndJudge()
{
int cnt = 0;
for (; cnt < codeSource.length(); cnt++)
{
int nowCnt = 0;
while (codeSource[cnt] != ' ' and cnt < codeSource.length())
{
string a = "";
switch (judgeStartingCharactorType(codeSource[cnt]))
{
case 1:
cnt = digitStarted(cnt);
break;
case 2:
cnt = alphaStarted(cnt);
break;
case 3:
cnt = underlineStarted(cnt);
break;
case 4:
tokens[transCharToString(codeSource[cnt])] = 4;
cnt++;
break;
case 5:
cnt = operatorStarted(cnt);
//tokens[transCharToString(codeSource[cnt])] = 5;
//cnt++;
break;
case 6:
cout << "unrecognizable charactor!!!" << endl;
cout << "please check grammer again." << endl;
exit(0);
cnt++;
break;
default:
cnt++;
break;
}
}
}
}
int judgeStartingCharactorType(char ch)
{
int type = 0;
if (isDigit(ch)) {
type = 1; }
else
{
if (isAlpha(ch)) {
type = 2; }
else
{
if (ch == '_') {
type = 3; }
else
{
if (isBoundSymbol(ch)) {
type = 4; }
else
{
if (isOperator(ch)) {
type = 5; }
else {
type = 6; }
}
}
}
}
return type;
}
bool isBoundSymbol(char ch)
{
string temp = "";
temp += ch;
bool flag = false;
string boundSymbol[7] =
{
"(", ")", ",", ";", "{", "}","\"",
};
for (int i = 0; i < 7; i++)
{
if (boundSymbol[i] == temp)
{
flag = true;
}
}
return flag;
}
bool isOperator(char ch)
{
string temp = transCharToString(ch);
bool flag = false;
string operators[9]=
{
"+","-","*","/","=","%",">","<","=",
};
for (int i = 0; i < 9; i++)
{
if (operators[i] == temp)
{
flag = true;
}
}
return flag;
}
bool isDigit(char ch)
{
bool flag = false;
if (ch >= '0' and ch <= '9')
{
flag = true;
}
return flag;
}
bool isAlpha(char ch)
{
bool flag = false;
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z'))
{
flag = true;
};
return flag;
}
int digitStarted(int cnt)
{
string digit;
digit += codeSource[cnt];
cnt++;
while (isDigit(codeSource[cnt]) or codeSource[cnt] == '.')
{
digit += codeSource[cnt];
++cnt;
}
tokens[digit] = 3;
return cnt;
}
int alphaStarted(int cnt)
{
string alpha;
alpha += codeSource[cnt];
cnt++;
while (isAlpha(codeSource[cnt]) or isDigit(codeSource[cnt]) or codeSource[cnt] == '_')
{
alpha += codeSource[cnt];
++cnt;
}
if (isReservedWord(alpha)) {
tokens[alpha] = 2; }
else {
tokens[alpha] = 1; }
return cnt;
}
int underlineStarted(int cnt)
{
string word;
word += codeSource[cnt];
cnt++;
while (isAlpha(codeSource[cnt]) or isDigit(codeSource[cnt]))
{
word += codeSource[cnt];
++cnt;
}
tokens[word] = 1;
return cnt;
}
int operatorStarted(int cnt)
{
bool flag = false;
string operators = "";
if (codeSource[cnt]=='=' and codeSource[cnt+1]=='=' )
{
operators = "==";
cnt++;
}
else
{
operators = transCharToString(codeSource[cnt]);
cnt++;
}
tokens[operators] = 5;
return cnt;
}
string transCharToString(char ch)
{
string temp = " ";
temp[0] = ch;
return temp;
}
bool isReservedWord(string vocabulary)
{
string reserveWords[32] = {
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
};
bool flag = false;
for (int i = 0; i < 32; i++)
{
if (reserveWords[i] == vocabulary)
{
flag = true;
}
}
return flag;
};
void showTokenData()
{
map<string, int>::iterator iter;
for (iter = tokens.begin(); iter != tokens.end(); iter++)
cout <<"<"<< iter->first << ',' << iter->second<<">" << endl;
}