【脚本】Compiler for Script

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wuzh1230/article/details/52241937

Compiler composite with a Scanner(Lexical parser) and Parser(Syntax parser).

Following source codes are from Compiler.Construction.Using.Flex.And.Bison

Build Directives

proto:
    bison -dv proto.y
    gcc -c proto.tab.c
    flex proto.l
    gcc -c lex.yy.c
    gcc -o proto proto.tab.o lex.yy.o -lm

Scanner

/***************************************************************************
Scanner for the Simple language
***************************************************************************/
%{
/*=========================================================================
C-libraries and Token  definitions
=========================================================================*/
#include <string.h>     /* for strdup                       */
/*#include <stdlib.h> */    /* for atoi                         */
#include "proto.tab.h" /* for token definitions and yylval */
%}
/*=========================================================================
TOKEN Definitions
=========================================================================*/
DIGIT    [0-9]
ID       [a-z][a-z0-9]*
/*=========================================================================
REGULAR EXPRESSIONS defining the tokens for the Simple language
=========================================================================*/
%%
":="     { return(ASSGNOP);  }
{DIGIT}+ { yylval.intval = atoi( yytext );
return(NUMBER);   }
do       { return(DO);       }
else     { return(ELSE);     }
end      { return(END);      }
fi       { return(FI);       }
if       { return(IF);       }
in       { return(IN);       }
integer  { return(INTEGER);  }
let      { return(LET);      }
read     { return(READ);     }
skip     { return(SKIP);     }
then     { return(THEN);     }
while    { return(WHILE);    }
write    { return(WRITE);    }
{ID}     { yylval.id = (char *) strdup(yytext);
return(IDENTIFIER);    }
[ \t\n]+ /* eat up whitespace */
.        { return(yytext[0]);}
%%

int yywrap(void){}
/************************** End Scanner File *****************************/

Parser

%{/*************************************************************************
Compiler for the Simple language
***************************************************************************/
/*=========================================================================
C Libraries, Symbol Table, Code Generator & other C code
=========================================================================*/
#include <stdio.h>        /* For I/O                                     */
#include <stdlib.h>       /* For malloc here and in symbol table         */
#include <string.h>       /* For strcmp in symbol table                  */
#include "sym.h"           /* Symbol Table                                */
#include "nvm.h"           /* Stack Machine                               */
#include "codegen.h"           /* Code Generator                              */
#define  YYDEBUG 1        /* For Debugging                               */
int  errors;              /* Error Count                                 */
/*-------------------------------------------------------------------------
The following support backpatching
-------------------------------------------------------------------------*/
struct  lbs              /* Labels for data, if and while               */
{
    int for_goto;
    int for_jmp_false;
};
struct lbs * newlblrec()  /* Allocate space for the labels               */
{
    return  (struct lbs *) malloc(sizeof(struct lbs));
}
/*-------------------------------------------------------------------------
Install identifier & check if previously defined.
-------------------------------------------------------------------------*/
install ( char *sym_name )
{
    symrec *s;
    s = getsym (sym_name);
    if (s == 0)
    s = putsym (sym_name);
    else { errors++;
    printf( "%s is already defined\n", sym_name );
}
}
/*-------------------------------------------------------------------------
If identifier is defined, generate code
-------------------------------------------------------------------------*/
context_check( enum code_ops operation, char *sym_name )
{ 
    symrec *identifier;
    identifier = getsym( sym_name );
    if ( identifier == 0 )
    { 
        errors++;
        printf( "%s", sym_name );
        printf( "%s\n", " is an undeclared identifier"  );
    }
    else gen_code( operation, identifier->offset );
}
/*=========================================================================
SEMANTIC RECORDS
=========================================================================*/
%}
%union semrec             /* The Semantic Records                        */
{
    int     intval;          /* Integer values                              */
    char    *id;             /* Identifiers                                 */
    struct lbs *lbls;        /* For backpatching                            */
}
/*=========================================================================
TOKENS
=========================================================================*/
%start program
%token <intval>  NUMBER          /* Simple integer                       */
%token <id>      IDENTIFIER      /* Simple identifier                    */
%token <lbls>    IF WHILE        /* For backpatching labels              */
%token SKIP THEN ELSE FI DO END
%token INTEGER READ WRITE LET IN
%token ASSGNOP
/*=========================================================================
OPERATOR PRECEDENCE
=========================================================================*/
%left '-' '+'
%left '*' '/'
%right '^'
/*=========================================================================
GRAMMAR RULES for the Simple language
=========================================================================*/
%%
program : LET
declarations
IN           { gen_code( DATA, data_location() - 1 );          }
commands
END          { gen_code( HALT, 0 ); YYACCEPT;                  }
;
declarations : /* empty */
| INTEGER id_seq IDENTIFIER '.' { install( $3 );                      }
;
id_seq : /* empty */
| id_seq IDENTIFIER ','  { install( $2 );                             }
;
commands : /* empty */
| commands command ';'
;
command : SKIP
| READ IDENTIFIER   { context_check( READ_INT, $2 );                  }
| WRITE exp         { gen_code( WRITE_INT, 0 );                       }
| IDENTIFIER ASSGNOP exp { context_check( STORE, $1 );                }
| IF exp            { $1 = (struct lbs *) newlblrec();
$1->for_jmp_false = reserve_loc();              }
THEN commands     { $1->for_goto = reserve_loc();                   }
ELSE              { back_patch( $1->for_jmp_false,
JMP_FALSE,
gen_label() );                       }
commands
FI                { back_patch( $1->for_goto, GOTO, gen_label() );  }
| WHILE             { $1 = (struct lbs *) newlblrec();
$1->for_goto = gen_label();                     }
exp            { $1->for_jmp_false = reserve_loc();              }
DO
commands
END               { gen_code( GOTO, $1->for_goto );
back_patch( $1->for_jmp_false,
JMP_FALSE,
gen_label() );                       }
;
exp : NUMBER           { gen_code( LD_INT, $1 );                         }
| IDENTIFIER        { context_check( LD_VAR,  $1 );                   }
| exp '<' exp       { gen_code( LT,   0 );                            }
| exp '=' exp       { gen_code( EQ,   0 );                            }
| exp '>' exp       { gen_code( GT,   0 );                            }
| exp '+' exp       { gen_code( ADD,  0 );                            }
| exp '-' exp       { gen_code( SUB,  0 );                            }
| exp '*' exp       { gen_code( MULT, 0 );                            }
| exp '/' exp       { gen_code( DIV,  0 );                            }
| exp '^' exp       { gen_code( PWR,  0 );                            }
| '(' exp ')'
;
%%

/*=========================================================================
MAIN
=========================================================================*/
main( int argc, char *argv[] )
{ 
    extern FILE *yyin;
    ++argv; --argc;
    yyin = fopen( argv[0], "r" );
    /*yydebug = 1;*/
    errors = 0;
    yyparse ();
    printf ( "Parse Completed\n" );
    if ( errors == 0 )
    { 
        print_code ();
        fetch_execute_cycle();
    }
}
/*=========================================================================
YYERROR
=========================================================================*/
yyerror ( char *s )  /* Called by yyparse on error */
{
    errors++;
    printf ("%s\n", s);
}
/**************************** End Grammar File ***************************/

Utility

Symbol table

/***************************************************************************
Symbol Table Module
***************************************************************************/
/*=========================================================================
DECLARATIONS
=========================================================================*/

/*-------------------------------------------------------------------------
SYMBOL TABLE RECORD
-------------------------------------------------------------------------*/
struct symrec
{
    char *name;  /* name of symbol                     */
    int offset;  /* data offset                        */
    struct symrec *next;    /* link field              */
};
typedef struct symrec symrec;

/*-------------------------------------------------------------------------
SYMBOL TABLE ENTRY
-------------------------------------------------------------------------*/
symrec *identifier;

/*-------------------------------------------------------------------------
SYMBOL TABLE
Implementation: a chain of records.
------------------------------------------------------------------------*/
symrec *sym_table = (symrec *)0; /* The pointer to the Symbol Table */

/*========================================================================
Operations: Putsym, Getsym
========================================================================*/
symrec * putsym (char *sym_name)
{
    symrec *ptr;
    ptr = (symrec *) malloc (sizeof(symrec));
    ptr->name = (char *) malloc (strlen(sym_name)+1);
    strcpy (ptr->name,sym_name);
    ptr->offset = data_location();
    ptr->next = (struct symrec *)sym_table;
    sym_table = ptr;
    return ptr;
}

symrec * getsym (char *sym_name)
{
    symrec *ptr;
    for ( ptr =  sym_table;
    ptr != (symrec *) 0;
    ptr =  (symrec *)ptr->next )
    if (strcmp (ptr->name,sym_name) == 0)
    return ptr;
    return 0;
}
/************************** End Symbol Table **************************/

Code Generator

/***************************************************************************
Code Generator
***************************************************************************/
/*-------------------------------------------------------------------------
Data Segment
-------------------------------------------------------------------------*/
int data_offset = 0;          /* Initial offset                          */
int data_location()           /* Reserves a data location                */
{
return data_offset++;
}
/*-------------------------------------------------------------------------
Code Segment
-------------------------------------------------------------------------*/
int code_offset = 0;          /* Initial offset                          */
int gen_label()               /* Returns current offset                  */
{
return code_offset;
}
int reserve_loc()             /* Reserves a code location                */
{
return code_offset++;
}
/* Generates code at current location      */
void gen_code( enum code_ops operation, int arg )
{ code[code_offset].op    = operation;
code[code_offset++].arg = arg;
}
/* Generates code at a reserved location   */
void back_patch( int addr,  enum code_ops operation, int arg  )
{
code[addr].op  = operation;
code[addr].arg = arg;
}
/*-------------------------------------------------------------------------
Print Code to stdio
-------------------------------------------------------------------------*/
void print_code()
{
int i = 0;
while (i < code_offset) {
printf("%3ld: %-10s%4ld\n",i,op_name[(int) code[i].op], code[i].arg );
i++;
}
}
/************************** End Code Generator **************************/

VM

/***************************************************************************
Stack Machine
***************************************************************************/
/*=========================================================================
DECLARATIONS
=========================================================================*/
/* OPERATIONS: Internal Representation */
enum code_ops { HALT, STORE, JMP_FALSE, GOTO,
DATA, LD_INT, LD_VAR,
READ_INT, WRITE_INT,
LT, EQ, GT, ADD, SUB, MULT, DIV, PWR };

/* OPERATIONS: External Representation */
char *op_name[] = {"halt", "store", "jmp_false", "goto", "data", "ld_int", "ld_var",
"in_int", "out_int",
"lt", "eq", "gt", "add", "sub", "mult", "div", "pwr" };

struct instruction
{
    enum code_ops op;
    int arg;
};
/* CODE Array */
struct instruction code[999];
/* RUN-TIME Stack */
int stack[999];
/*-------------------------------------------------------------------------
Registers
-------------------------------------------------------------------------*/
int                 pc   = 0;
struct instruction  ir;
int                 ar   = 0;
int                 top  = 0;
char         ch;
/*=========================================================================
Fetch Execute Cycle
=========================================================================*/
void fetch_execute_cycle()
{   do { /*printf( "PC = %3d IR.arg = %8d AR = %3d Top = %3d,%8d\n",
pc, ir.arg, ar, top, stack[top]); */
/* Fetch         */
ir = code[pc++];
/* Execute       */
switch (ir.op) {
case HALT      : printf( "halt\n" );              break;
case READ_INT  : printf( "Input: " );
scanf( "%ld", &stack[ar+ir.arg] ); break;
case WRITE_INT : printf( "Output: %d\n", stack[top--] );  break;
case STORE     : stack[ir.arg] = stack[top--];      break;
case JMP_FALSE : if ( stack[top--] == 0 )
pc = ir.arg;
break;
case GOTO      : pc = ir.arg;                       break;
case DATA      : top = top + ir.arg;                break;
case LD_INT    : stack[++top] = ir.arg;             break;
case LD_VAR    : stack[++top] = stack[ar+ir.arg];   break;
case LT        : if ( stack[top-1] < stack[top] )
stack[--top] = 1;
else stack[--top] = 0;
break;
case EQ        : if ( stack[top-1] == stack[top] )
stack[--top] = 1;
else stack[--top] = 0;
break;
case GT        : if ( stack[top-1] > stack[top] )
stack[--top] = 1;
else stack[--top] = 0;
break;
case ADD       : stack[top-1] = stack[top-1] + stack[top];
top--;
break;
case SUB       : stack[top-1] = stack[top-1] - stack[top];
top--;
break;
case MULT      : stack[top-1] = stack[top-1] * stack[top];
top--;
break;
case DIV       : stack[top-1] = stack[top-1] / stack[top];
top--;
break;
case PWR       : stack[top-1] = stack[top-1] * stack[top];
top--;
break;
default        : printf( "%sInternal Error: Memory Dump\n" );
break;
}
}
while (ir.op != HALT);
}
/*************************** End Stack Machine **************************/

Test Script

let
    integer n,x.
in
    read n;
    if n < 10 then 
        x := 1; 
    else 
        skip; 
    fi;

    while n < 10 do 
        if n > 2 then
            n := 1;
            write n;
        else 
            skip;
        fi;
        x := 5*x; 
        n := n+1; 
    end;

    skip;

    write n;

    write x;
end
  1. Yac&Lex sample

  2. CompilersPrinciplesTechniquesTools

  3. HowToWriteParser

  4. LL(*).Grammar.Generator

  5. Expression.diff.Statement

  6. Let’sBuildAInterpreter
  7. Caculator
  8. Compiler.Construction.Using.Flex.And.Bison
  9. gcc.llvm-gcc.clang

猜你喜欢

转载自blog.csdn.net/wuzh1230/article/details/52241937