版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wuzh1230/article/details/52241937
Compiler composite with a Scanner(Lexical parser) and Parser(Syntax parser).
Following source codes are from Compiler.Construction.Using.Flex.And.Bison
Build Directives
proto:
bison -dv proto.y
gcc -c proto.tab.c
flex proto.l
gcc -c lex.yy.c
gcc -o proto proto.tab.o lex.yy.o -lm
Scanner
/***************************************************************************
Scanner for the Simple language
***************************************************************************/
%{
/*=========================================================================
C-libraries and Token definitions
=========================================================================*/
#include <string.h> /* for strdup */
/*#include <stdlib.h> */ /* for atoi */
#include "proto.tab.h" /* for token definitions and yylval */
%}
/*=========================================================================
TOKEN Definitions
=========================================================================*/
DIGIT [0-9]
ID [a-z][a-z0-9]*
/*=========================================================================
REGULAR EXPRESSIONS defining the tokens for the Simple language
=========================================================================*/
%%
":=" { return(ASSGNOP); }
{DIGIT}+ { yylval.intval = atoi( yytext );
return(NUMBER); }
do { return(DO); }
else { return(ELSE); }
end { return(END); }
fi { return(FI); }
if { return(IF); }
in { return(IN); }
integer { return(INTEGER); }
let { return(LET); }
read { return(READ); }
skip { return(SKIP); }
then { return(THEN); }
while { return(WHILE); }
write { return(WRITE); }
{ID} { yylval.id = (char *) strdup(yytext);
return(IDENTIFIER); }
[ \t\n]+ /* eat up whitespace */
. { return(yytext[0]);}
%%
int yywrap(void){}
/************************** End Scanner File *****************************/
Parser
%{/*************************************************************************
Compiler for the Simple language
***************************************************************************/
/*=========================================================================
C Libraries, Symbol Table, Code Generator & other C code
=========================================================================*/
#include <stdio.h> /* For I/O */
#include <stdlib.h> /* For malloc here and in symbol table */
#include <string.h> /* For strcmp in symbol table */
#include "sym.h" /* Symbol Table */
#include "nvm.h" /* Stack Machine */
#include "codegen.h" /* Code Generator */
#define YYDEBUG 1 /* For Debugging */
int errors; /* Error Count */
/*-------------------------------------------------------------------------
The following support backpatching
-------------------------------------------------------------------------*/
struct lbs /* Labels for data, if and while */
{
int for_goto;
int for_jmp_false;
};
struct lbs * newlblrec() /* Allocate space for the labels */
{
return (struct lbs *) malloc(sizeof(struct lbs));
}
/*-------------------------------------------------------------------------
Install identifier & check if previously defined.
-------------------------------------------------------------------------*/
install ( char *sym_name )
{
symrec *s;
s = getsym (sym_name);
if (s == 0)
s = putsym (sym_name);
else { errors++;
printf( "%s is already defined\n", sym_name );
}
}
/*-------------------------------------------------------------------------
If identifier is defined, generate code
-------------------------------------------------------------------------*/
context_check( enum code_ops operation, char *sym_name )
{
symrec *identifier;
identifier = getsym( sym_name );
if ( identifier == 0 )
{
errors++;
printf( "%s", sym_name );
printf( "%s\n", " is an undeclared identifier" );
}
else gen_code( operation, identifier->offset );
}
/*=========================================================================
SEMANTIC RECORDS
=========================================================================*/
%}
%union semrec /* The Semantic Records */
{
int intval; /* Integer values */
char *id; /* Identifiers */
struct lbs *lbls; /* For backpatching */
}
/*=========================================================================
TOKENS
=========================================================================*/
%start program
%token <intval> NUMBER /* Simple integer */
%token <id> IDENTIFIER /* Simple identifier */
%token <lbls> IF WHILE /* For backpatching labels */
%token SKIP THEN ELSE FI DO END
%token INTEGER READ WRITE LET IN
%token ASSGNOP
/*=========================================================================
OPERATOR PRECEDENCE
=========================================================================*/
%left '-' '+'
%left '*' '/'
%right '^'
/*=========================================================================
GRAMMAR RULES for the Simple language
=========================================================================*/
%%
program : LET
declarations
IN { gen_code( DATA, data_location() - 1 ); }
commands
END { gen_code( HALT, 0 ); YYACCEPT; }
;
declarations : /* empty */
| INTEGER id_seq IDENTIFIER '.' { install( $3 ); }
;
id_seq : /* empty */
| id_seq IDENTIFIER ',' { install( $2 ); }
;
commands : /* empty */
| commands command ';'
;
command : SKIP
| READ IDENTIFIER { context_check( READ_INT, $2 ); }
| WRITE exp { gen_code( WRITE_INT, 0 ); }
| IDENTIFIER ASSGNOP exp { context_check( STORE, $1 ); }
| IF exp { $1 = (struct lbs *) newlblrec();
$1->for_jmp_false = reserve_loc(); }
THEN commands { $1->for_goto = reserve_loc(); }
ELSE { back_patch( $1->for_jmp_false,
JMP_FALSE,
gen_label() ); }
commands
FI { back_patch( $1->for_goto, GOTO, gen_label() ); }
| WHILE { $1 = (struct lbs *) newlblrec();
$1->for_goto = gen_label(); }
exp { $1->for_jmp_false = reserve_loc(); }
DO
commands
END { gen_code( GOTO, $1->for_goto );
back_patch( $1->for_jmp_false,
JMP_FALSE,
gen_label() ); }
;
exp : NUMBER { gen_code( LD_INT, $1 ); }
| IDENTIFIER { context_check( LD_VAR, $1 ); }
| exp '<' exp { gen_code( LT, 0 ); }
| exp '=' exp { gen_code( EQ, 0 ); }
| exp '>' exp { gen_code( GT, 0 ); }
| exp '+' exp { gen_code( ADD, 0 ); }
| exp '-' exp { gen_code( SUB, 0 ); }
| exp '*' exp { gen_code( MULT, 0 ); }
| exp '/' exp { gen_code( DIV, 0 ); }
| exp '^' exp { gen_code( PWR, 0 ); }
| '(' exp ')'
;
%%
/*=========================================================================
MAIN
=========================================================================*/
main( int argc, char *argv[] )
{
extern FILE *yyin;
++argv; --argc;
yyin = fopen( argv[0], "r" );
/*yydebug = 1;*/
errors = 0;
yyparse ();
printf ( "Parse Completed\n" );
if ( errors == 0 )
{
print_code ();
fetch_execute_cycle();
}
}
/*=========================================================================
YYERROR
=========================================================================*/
yyerror ( char *s ) /* Called by yyparse on error */
{
errors++;
printf ("%s\n", s);
}
/**************************** End Grammar File ***************************/
Utility
Symbol table
/***************************************************************************
Symbol Table Module
***************************************************************************/
/*=========================================================================
DECLARATIONS
=========================================================================*/
/*-------------------------------------------------------------------------
SYMBOL TABLE RECORD
-------------------------------------------------------------------------*/
struct symrec
{
char *name; /* name of symbol */
int offset; /* data offset */
struct symrec *next; /* link field */
};
typedef struct symrec symrec;
/*-------------------------------------------------------------------------
SYMBOL TABLE ENTRY
-------------------------------------------------------------------------*/
symrec *identifier;
/*-------------------------------------------------------------------------
SYMBOL TABLE
Implementation: a chain of records.
------------------------------------------------------------------------*/
symrec *sym_table = (symrec *)0; /* The pointer to the Symbol Table */
/*========================================================================
Operations: Putsym, Getsym
========================================================================*/
symrec * putsym (char *sym_name)
{
symrec *ptr;
ptr = (symrec *) malloc (sizeof(symrec));
ptr->name = (char *) malloc (strlen(sym_name)+1);
strcpy (ptr->name,sym_name);
ptr->offset = data_location();
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
symrec * getsym (char *sym_name)
{
symrec *ptr;
for ( ptr = sym_table;
ptr != (symrec *) 0;
ptr = (symrec *)ptr->next )
if (strcmp (ptr->name,sym_name) == 0)
return ptr;
return 0;
}
/************************** End Symbol Table **************************/
Code Generator
/***************************************************************************
Code Generator
***************************************************************************/
/*-------------------------------------------------------------------------
Data Segment
-------------------------------------------------------------------------*/
int data_offset = 0; /* Initial offset */
int data_location() /* Reserves a data location */
{
return data_offset++;
}
/*-------------------------------------------------------------------------
Code Segment
-------------------------------------------------------------------------*/
int code_offset = 0; /* Initial offset */
int gen_label() /* Returns current offset */
{
return code_offset;
}
int reserve_loc() /* Reserves a code location */
{
return code_offset++;
}
/* Generates code at current location */
void gen_code( enum code_ops operation, int arg )
{ code[code_offset].op = operation;
code[code_offset++].arg = arg;
}
/* Generates code at a reserved location */
void back_patch( int addr, enum code_ops operation, int arg )
{
code[addr].op = operation;
code[addr].arg = arg;
}
/*-------------------------------------------------------------------------
Print Code to stdio
-------------------------------------------------------------------------*/
void print_code()
{
int i = 0;
while (i < code_offset) {
printf("%3ld: %-10s%4ld\n",i,op_name[(int) code[i].op], code[i].arg );
i++;
}
}
/************************** End Code Generator **************************/
VM
/***************************************************************************
Stack Machine
***************************************************************************/
/*=========================================================================
DECLARATIONS
=========================================================================*/
/* OPERATIONS: Internal Representation */
enum code_ops { HALT, STORE, JMP_FALSE, GOTO,
DATA, LD_INT, LD_VAR,
READ_INT, WRITE_INT,
LT, EQ, GT, ADD, SUB, MULT, DIV, PWR };
/* OPERATIONS: External Representation */
char *op_name[] = {"halt", "store", "jmp_false", "goto", "data", "ld_int", "ld_var",
"in_int", "out_int",
"lt", "eq", "gt", "add", "sub", "mult", "div", "pwr" };
struct instruction
{
enum code_ops op;
int arg;
};
/* CODE Array */
struct instruction code[999];
/* RUN-TIME Stack */
int stack[999];
/*-------------------------------------------------------------------------
Registers
-------------------------------------------------------------------------*/
int pc = 0;
struct instruction ir;
int ar = 0;
int top = 0;
char ch;
/*=========================================================================
Fetch Execute Cycle
=========================================================================*/
void fetch_execute_cycle()
{ do { /*printf( "PC = %3d IR.arg = %8d AR = %3d Top = %3d,%8d\n",
pc, ir.arg, ar, top, stack[top]); */
/* Fetch */
ir = code[pc++];
/* Execute */
switch (ir.op) {
case HALT : printf( "halt\n" ); break;
case READ_INT : printf( "Input: " );
scanf( "%ld", &stack[ar+ir.arg] ); break;
case WRITE_INT : printf( "Output: %d\n", stack[top--] ); break;
case STORE : stack[ir.arg] = stack[top--]; break;
case JMP_FALSE : if ( stack[top--] == 0 )
pc = ir.arg;
break;
case GOTO : pc = ir.arg; break;
case DATA : top = top + ir.arg; break;
case LD_INT : stack[++top] = ir.arg; break;
case LD_VAR : stack[++top] = stack[ar+ir.arg]; break;
case LT : if ( stack[top-1] < stack[top] )
stack[--top] = 1;
else stack[--top] = 0;
break;
case EQ : if ( stack[top-1] == stack[top] )
stack[--top] = 1;
else stack[--top] = 0;
break;
case GT : if ( stack[top-1] > stack[top] )
stack[--top] = 1;
else stack[--top] = 0;
break;
case ADD : stack[top-1] = stack[top-1] + stack[top];
top--;
break;
case SUB : stack[top-1] = stack[top-1] - stack[top];
top--;
break;
case MULT : stack[top-1] = stack[top-1] * stack[top];
top--;
break;
case DIV : stack[top-1] = stack[top-1] / stack[top];
top--;
break;
case PWR : stack[top-1] = stack[top-1] * stack[top];
top--;
break;
default : printf( "%sInternal Error: Memory Dump\n" );
break;
}
}
while (ir.op != HALT);
}
/*************************** End Stack Machine **************************/
Test Script
let
integer n,x.
in
read n;
if n < 10 then
x := 1;
else
skip;
fi;
while n < 10 do
if n > 2 then
n := 1;
write n;
else
skip;
fi;
x := 5*x;
n := n+1;
end;
skip;
write n;
write x;
end