Lexical analysis program design and implement design and implementation of lexical analysis program

Design and Implementation of lexical analysis program

Lexical analysis program ( Lexical  Analyzer ) requirements:

- stream from a source program composed of character scan left to right

- identify the lexical meaning of the word ( Lexemes )

- Return word record (word class, the word itself)

- filtered spaces

- skip comments

- lexical errors found

 

Program Structure:

Input: character stream (input what way, what data structure stored)

deal with:

- Traverse (What traversal)

- lexical rules

Output: word stream (what output form)

- tuple

 

Word class:

1. Identifier (10)

2. unsigned (11)

3. Leave the word (the word one yard)

4. Operator (word one yard)

5. delimiter (word one yard)

Word symbols

Species do not code

Word symbols

Species do not code

begin

1

:

17

if

2

:=

18

then

3

<

20

while

4

<=

21

do

5

<>

22

end

6

>

23

l(l|d)*

10

>=

24

dd*

11

=

25

+

13

;

26

-

14

(

27

*

15

)

28

/

16

#

0

Code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 100
int P = 0, SYN, n-, I; // SYN code for other recording types, which is a global variable
char prog [SIZE], ch, token [8];
key @ defined; char * keyword [6] = { "begin", "then", "if", "while", "do", "end"} word array
void scaner (); // the number of the array are moved prog token array is compared, other kinds of code updates
main ()
{
P = 0;
the printf ( "Please enter the source string (with '#' end) : \ n-");
do
{
CH = getchar ();
PROG [P ++] = CH;
} the while (CH = '#'); // entry keyboard input string array PROG!
P = 0;
do
{
Scaner ();
Switch (SYN)
{
Case -1: the printf ( "lexical analysis error \ n-"); BREAK;
default: the printf ( "<%d,%s>\n",syn,token);break;
}
} while (syn = 0!) ; // Input # number, other kinds of syn code is 0, the end of the cycle
printf ( "lexical analysis success \ n-");
getchar ();
}
void Scaner ()
{
for (n-0 = ; n <8; n ++) // initialize token array
{token [n-] = '\ 0';}
n-= 0;
CH = PROG [P ++];
the while (CH == '')
{CH = PROG [P ++] ;}
iF ((CH> = 'a' && CH <= 'Z') || (CH> = 'a' && CH <= 'the Z')) // determines whether the input key
{
do {
token [n-++ ] = CH;
CH = PROG [P ++];
} the while ((CH> = 'A' && CH <= 'Z') || (CH> = 'A' && CH <= 'the Z') || (CH> = '0' && CH <= '. 9'));
SYN = 10;
for (n-= 0; n-<. 6; n-++) // contrast six keywords
{
IF (strcmp (token,keyword [n-]) == 0)
SYN = n-+. 1;
}
the P--;
}
the else IF (CH> = '0' && CH <= '. 9') // determines whether the input is an integer constant
{
the P-- ;
do
{
token[n++]=prog[p++];
ch=prog[p];
}while(ch>='0'&&ch<='9');
syn=11;
return;
}
else
{
switch(ch)
{
case '+':syn=13;token[0]=ch;break;
case '-':syn=14;token[0]=ch;break;
case '*':syn=15;token[0]=ch;break;
case '/':syn=16;token[0]=ch;break;
case ':':syn=17;token[0]=ch;
ch=prog[p++];
if(ch=='='){token[1]=ch;syn++;}
else p--;
break;
case '<':syn=20;token[0]=ch;
ch=prog[p++];
if(ch=='>'){token[1]=ch;syn++;}
else if(ch=='='){token[1]=ch;syn=syn+2;}
else p--;
break;
Case '>': SYN = 23 is; token [0] = CH;
CH PROG = [P ++];
IF (CH == '=') {token [. 1] = CH; SYN ++;}
the else the P--;
BREAK;
Case '=': SYN = 25; token [0] = CH; BREAK;
Case ';': SYN = 26 is; token [0] = CH; BREAK;
Case '(': SYN = 27; token [0] = CH; BREAK;
Case ')': SYN = 28; token [0] = CH; BREAK;
Case '#': SYN = 0; token [0] = CH; BREAK;
default: the printf ( "lexical analysis error, please! check whether the input illegal characters \ n-"); SYN = -1; BREAK;
}
}
}

operation result

 

 

 

Lexical analysis program ( Lexical  Analyzer ) requirements:

- stream from a source program composed of character scan left to right

- identify the lexical meaning of the word ( Lexemes )

- Return word record (word class, the word itself)

- filtered spaces

- skip comments

- lexical errors found

 

Program Structure:

Input: character stream (input what way, what data structure stored)

deal with:

- Traverse (What traversal)

- lexical rules

Output: word stream (what output form)

- tuple

 

Word class:

1. Identifier (10)

2. unsigned (11)

3. Leave the word (the word one yard)

4. Operator (word one yard)

5. delimiter (word one yard)

Word symbols

Species do not code

Word symbols

Species do not code

begin

1

:

17

if

2

:=

18

then

3

<

20

while

4

<=

21

do

5

<>

22

end

6

>

23

l(l|d)*

10

>=

24

dd*

11

=

25

+

13

;

26

-

14

(

27

*

15

)

28

/

16

#

0

Code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 100
int P = 0, SYN, n-, I; // SYN code for other recording types, which is a global variable
char prog [SIZE], ch, token [8];
key @ defined; char * keyword [6] = { "begin", "then", "if", "while", "do", "end"} word array
void scaner (); // the number of the array are moved prog token array is compared, other kinds of code updates
main ()
{
P = 0;
the printf ( "Please enter the source string (with '#' end) : \ n-");
do
{
CH = getchar ();
PROG [P ++] = CH;
} the while (CH = '#'); // entry keyboard input string array PROG!
P = 0;
do
{
Scaner ();
Switch (SYN)
{
Case -1: the printf ( "lexical analysis error \ n-"); BREAK;
default: the printf ( "<%d,%s>\n",syn,token);break;
}
} while (syn = 0!) ; // Input # number, other kinds of syn code is 0, the end of the cycle
printf ( "lexical analysis success \ n-");
getchar ();
}
void Scaner ()
{
for (n-0 = ; n <8; n ++) // initialize token array
{token [n-] = '\ 0';}
n-= 0;
CH = PROG [P ++];
the while (CH == '')
{CH = PROG [P ++] ;}
iF ((CH> = 'a' && CH <= 'Z') || (CH> = 'a' && CH <= 'the Z')) // determines whether the input key
{
do {
token [n-++ ] = CH;
CH = PROG [P ++];
} the while ((CH> = 'A' && CH <= 'Z') || (CH> = 'A' && CH <= 'the Z') || (CH> = '0' && CH <= '. 9'));
SYN = 10;
for (n-= 0; n-<. 6; n-++) // contrast six keywords
{
IF (strcmp (token,keyword [n-]) == 0)
SYN = n-+. 1;
}
the P--;
}
the else IF (CH> = '0' && CH <= '. 9') // determines whether the input is an integer constant
{
the P-- ;
do
{
token[n++]=prog[p++];
ch=prog[p];
}while(ch>='0'&&ch<='9');
syn=11;
return;
}
else
{
switch(ch)
{
case '+':syn=13;token[0]=ch;break;
case '-':syn=14;token[0]=ch;break;
case '*':syn=15;token[0]=ch;break;
case '/':syn=16;token[0]=ch;break;
case ':':syn=17;token[0]=ch;
ch=prog[p++];
if(ch=='='){token[1]=ch;syn++;}
else p--;
break;
case '<':syn=20;token[0]=ch;
ch=prog[p++];
if(ch=='>'){token[1]=ch;syn++;}
else if(ch=='='){token[1]=ch;syn=syn+2;}
else p--;
break;
Case '>': SYN = 23 is; token [0] = CH;
CH PROG = [P ++];
IF (CH == '=') {token [. 1] = CH; SYN ++;}
the else the P--;
BREAK;
Case '=': SYN = 25; token [0] = CH; BREAK;
Case ';': SYN = 26 is; token [0] = CH; BREAK;
Case '(': SYN = 27; token [0] = CH; BREAK;
Case ')': SYN = 28; token [0] = CH; BREAK;
Case '#': SYN = 0; token [0] = CH; BREAK;
default: the printf ( "lexical analysis error, please! check whether the input illegal characters \ n-"); SYN = -1; BREAK;
}
}
}

operation result

 

 

 

Guess you like

Origin www.cnblogs.com/lwwwjl123/p/11653589.html