Design and Implementation of lexical analysis program
Lexical analysis program ( Lexical Analyzer ) requirements:
- stream from a source program composed of character scan left to right
- identify the lexical meaning of the word ( Lexemes )
- Return word record (word class, the word itself)
- filtered spaces
- skip comments
- lexical errors found
Program Structure:
Input: character stream (input what way, what data structure stored)
deal with:
- Traverse (What traversal)
- lexical rules
Output: word stream (what output form)
- tuple
Word class:
1. Identifier (10)
2. unsigned (11)
3. Leave the word (the word one yard)
4. Operator (word one yard)
5. delimiter (word one yard)
Word symbols |
Species do not code |
Word symbols |
Species do not code |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 100
int P = 0, SYN, n-, I; // SYN code for other recording types, which is a global variable
char prog [SIZE], ch, token [8];
key @ defined; char * keyword [6] = { "begin", "then", "if", "while", "do", "end"} word array
void scaner (); // the number of the array are moved prog token array is compared, other kinds of code updates
main ()
{
P = 0;
the printf ( "Please enter the source string (with '#' end) : \ n-");
do
{
CH = getchar ();
PROG [P ++] = CH;
} the while (CH = '#'); // entry keyboard input string array PROG!
P = 0;
do
{
Scaner ();
Switch (SYN)
{
Case -1: the printf ( "lexical analysis error \ n-"); BREAK;
default: the printf ( "<%d,%s>\n",syn,token);break;
}
} while (syn = 0!) ; // Input # number, other kinds of syn code is 0, the end of the cycle
printf ( "lexical analysis success \ n-");
getchar ();
}
void Scaner ()
{
for (n-0 = ; n <8; n ++) // initialize token array
{token [n-] = '\ 0';}
n-= 0;
CH = PROG [P ++];
the while (CH == '')
{CH = PROG [P ++] ;}
iF ((CH> = 'a' && CH <= 'Z') || (CH> = 'a' && CH <= 'the Z')) // determines whether the input key
{
do {
token [n-++ ] = CH;
CH = PROG [P ++];
} the while ((CH> = 'A' && CH <= 'Z') || (CH> = 'A' && CH <= 'the Z') || (CH> = '0' && CH <= '. 9'));
SYN = 10;
for (n-= 0; n-<. 6; n-++) // contrast six keywords
{
IF (strcmp (token,keyword [n-]) == 0)
SYN = n-+. 1;
}
the P--;
}
the else IF (CH> = '0' && CH <= '. 9') // determines whether the input is an integer constant
{
the P-- ;
do
{
token[n++]=prog[p++];
ch=prog[p];
}while(ch>='0'&&ch<='9');
syn=11;
return;
}
else
{
switch(ch)
{
case '+':syn=13;token[0]=ch;break;
case '-':syn=14;token[0]=ch;break;
case '*':syn=15;token[0]=ch;break;
case '/':syn=16;token[0]=ch;break;
case ':':syn=17;token[0]=ch;
ch=prog[p++];
if(ch=='='){token[1]=ch;syn++;}
else p--;
break;
case '<':syn=20;token[0]=ch;
ch=prog[p++];
if(ch=='>'){token[1]=ch;syn++;}
else if(ch=='='){token[1]=ch;syn=syn+2;}
else p--;
break;
Case '>': SYN = 23 is; token [0] = CH;
CH PROG = [P ++];
IF (CH == '=') {token [. 1] = CH; SYN ++;}
the else the P--;
BREAK;
Case '=': SYN = 25; token [0] = CH; BREAK;
Case ';': SYN = 26 is; token [0] = CH; BREAK;
Case '(': SYN = 27; token [0] = CH; BREAK;
Case ')': SYN = 28; token [0] = CH; BREAK;
Case '#': SYN = 0; token [0] = CH; BREAK;
default: the printf ( "lexical analysis error, please! check whether the input illegal characters \ n-"); SYN = -1; BREAK;
}
}
}
operation result
Lexical analysis program ( Lexical Analyzer ) requirements:
- stream from a source program composed of character scan left to right
- identify the lexical meaning of the word ( Lexemes )
- Return word record (word class, the word itself)
- filtered spaces
- skip comments
- lexical errors found
Program Structure:
Input: character stream (input what way, what data structure stored)
deal with:
- Traverse (What traversal)
- lexical rules
Output: word stream (what output form)
- tuple
Word class:
1. Identifier (10)
2. unsigned (11)
3. Leave the word (the word one yard)
4. Operator (word one yard)
5. delimiter (word one yard)
Word symbols |
Species do not code |
Word symbols |
Species do not code |
begin |
1 |
: |
17 |
if |
2 |
:= |
18 |
then |
3 |
< |
20 |
while |
4 |
<= |
21 |
do |
5 |
<> |
22 |
end |
6 |
> |
23 |
l(l|d)* |
10 |
>= |
24 |
dd* |
11 |
= |
25 |
+ |
13 |
; |
26 |
- |
14 |
( |
27 |
* |
15 |
) |
28 |
/ |
16 |
# |
0 |
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 100
int P = 0, SYN, n-, I; // SYN code for other recording types, which is a global variable
char prog [SIZE], ch, token [8];
key @ defined; char * keyword [6] = { "begin", "then", "if", "while", "do", "end"} word array
void scaner (); // the number of the array are moved prog token array is compared, other kinds of code updates
main ()
{
P = 0;
the printf ( "Please enter the source string (with '#' end) : \ n-");
do
{
CH = getchar ();
PROG [P ++] = CH;
} the while (CH = '#'); // entry keyboard input string array PROG!
P = 0;
do
{
Scaner ();
Switch (SYN)
{
Case -1: the printf ( "lexical analysis error \ n-"); BREAK;
default: the printf ( "<%d,%s>\n",syn,token);break;
}
} while (syn = 0!) ; // Input # number, other kinds of syn code is 0, the end of the cycle
printf ( "lexical analysis success \ n-");
getchar ();
}
void Scaner ()
{
for (n-0 = ; n <8; n ++) // initialize token array
{token [n-] = '\ 0';}
n-= 0;
CH = PROG [P ++];
the while (CH == '')
{CH = PROG [P ++] ;}
iF ((CH> = 'a' && CH <= 'Z') || (CH> = 'a' && CH <= 'the Z')) // determines whether the input key
{
do {
token [n-++ ] = CH;
CH = PROG [P ++];
} the while ((CH> = 'A' && CH <= 'Z') || (CH> = 'A' && CH <= 'the Z') || (CH> = '0' && CH <= '. 9'));
SYN = 10;
for (n-= 0; n-<. 6; n-++) // contrast six keywords
{
IF (strcmp (token,keyword [n-]) == 0)
SYN = n-+. 1;
}
the P--;
}
the else IF (CH> = '0' && CH <= '. 9') // determines whether the input is an integer constant
{
the P-- ;
do
{
token[n++]=prog[p++];
ch=prog[p];
}while(ch>='0'&&ch<='9');
syn=11;
return;
}
else
{
switch(ch)
{
case '+':syn=13;token[0]=ch;break;
case '-':syn=14;token[0]=ch;break;
case '*':syn=15;token[0]=ch;break;
case '/':syn=16;token[0]=ch;break;
case ':':syn=17;token[0]=ch;
ch=prog[p++];
if(ch=='='){token[1]=ch;syn++;}
else p--;
break;
case '<':syn=20;token[0]=ch;
ch=prog[p++];
if(ch=='>'){token[1]=ch;syn++;}
else if(ch=='='){token[1]=ch;syn=syn+2;}
else p--;
break;
Case '>': SYN = 23 is; token [0] = CH;
CH PROG = [P ++];
IF (CH == '=') {token [. 1] = CH; SYN ++;}
the else the P--;
BREAK;
Case '=': SYN = 25; token [0] = CH; BREAK;
Case ';': SYN = 26 is; token [0] = CH; BREAK;
Case '(': SYN = 27; token [0] = CH; BREAK;
Case ')': SYN = 28; token [0] = CH; BREAK;
Case '#': SYN = 0; token [0] = CH; BREAK;
default: the printf ( "lexical analysis error, please! check whether the input illegal characters \ n-"); SYN = -1; BREAK;
}
}
}
operation result