LEX/YACC using

The texts given below demonstrate how to use LEX/YACC programs or their free analogs FLEX/BISON for creating the lexical and syntax analyzers. Input language are the same as in an example from the first part. Syntax tree and code generator are the same as in an example from the second part. The realization language are C, since LEX/YACC creates programs in this language. An example is shorter than two previous, but for its understanding it is necessary to be familiar with LEX/YACC. Or only with YACC, lexical analyzer can by written manually. YACC reads the description and creates the syntax analyzer, which in the process of work reads lexemes, put them into the stack (in contrast to previous examples scanner must return not only word, but also corresponding identifier) and check the top of stack for the possibility of applying one of the syntax rules given in the description. If is found the rule, whose right side coincides with the top of stack, the corresponding symbols from stack are replaced with symbol from the left side of the rule and the code connected with the rule are executed. Here by symbols we sense not the symbols of alphabet, but lexemes (terminal symbols) and left sides of the rules (nonterminal symbols). Process finishes when in the stack remains one starting symbol (in this case program symbol) or if error are fixed. In real world syntax analyzer are more complex and it must know how to resolve conflicts. For example, appearance at the top of stack IDENTIFIER symbol yet does not mean that it is possible to convert it into the nonterminal symbol (ambiguity, for the selection of the suitable rule it is necessary to use information about the previous symbols). The conversion of identifier symbol also ambiguous, but this conflict can be resolved by the proper adjustment of rules (first identifiers identifier, then identifier but it is more correct also use information about the previous symbols). In some situations also necessary to consider the subsequent symbol.

The automatically generated analyzer is good fact that with its creation the formal description of the syntax of language clearly is used. With the manual writing of analyzer it is only keep in mind. Nevertheless answer to a question about the comparison of the complexity of these procedures is not obvious. An example can contain inaccuracies and errors.

/* File l - Scaner definition */ /* Command line: flex.exe l */ /* Result: lexyy.c */ %{ #include <string.h> /* strdup */ #include <stdlib.h> /* atoi */ %} DIGIT [0-9] ID [A-Za-z][A-Za-z0-9]* %% "<=" { return(LE); } "!=" { return(NE); } ">=" { return(GE); } word { return(WORD); } begin { return(BLOCK); } /* BEGIN word are used by FLEX */ if { return(IF); } then { return(THEN); } while { return(WHILE); } do { return(DO); } write { return(WRITE); } blank { return(BLANK); } end { return(END); } {ID} { yylval.ID = (char *) strdup(yytext); return(IDENTIFIER); } {DIGIT}+ { yylval.Value = atoi(yytext); return(NUMBER); } [ \t\n]+ /* Whitespaces and new lines */ . { return(yytext[0]); } %% int yywrap() { return 1; /* End Of File - end */ }

/* File s - Parser definition and code generator */ /* Command line: bison.exe s */ /* Result: s_tab.c */ %{ #include <stdlib.h> /* malloc */ #include <string.h> /* strcmp */ #include <stdio.h> /* I/O */ #include <io.h> #define YYDEBUG 0 /* Debugging are turned off, turn on - 1 */ #define idEMPTY 0 #define idNUMBER 1 #define idVAR 2 #define idOR 3 #define idAND 4 #define idLT 5 #define idLE 6 #define idEQ 7 #define idNE 8 #define idGE 9 #define idGT 10 #define idBIT_OR 11 #define idBIT_AND 12 #define idADD 13 #define idSUB 14 #define idMUL 15 #define idDIV 16 #define idIF 17 #define idWHILE 18 #define idWRITE 19 #define idBLANK 20 #define idASSIGN 21 int errors; /* Errors counter */ typedef unsigned int word; struct DICT { char *Name; /* Variable name */ word Ofs; /* Variable offset */ word Size; /* Array lenght, 0 - simlpe variable */ struct DICT *Next; /* Pointer to next variable */ }; typedef struct DICT DICT; DICT *Dict = 0; /* Dictionary */ word Ofs=0; DICT *Find(char *Name); DICT *Add (char *Name, word Size); DICT *Test(DICT *Dict, word Array); struct NODE { word ID; word Value; DICT *Var; struct NODE *Left; struct NODE *Right; }; typedef struct NODE NODE; NODE *Tree=0; NODE *Node(word ID, word Value, DICT *Var, NODE *Left, NODE *Right); void yyerror(char *s) /* Called by yyparse when error occurs */ { errors++; printf("%s\n", s); } %} %no_lines /* Do not include #line directives */ %union /* Stack element */ { word Value; /* Number */ char *ID; /* Identifier */ NODE *Node; /* Pointer to Node */ } %start program /* Start symbol */ %token <Value> NUMBER /* Number */ %token <ID> IDENTIFIER /* Identifier */ %token WORD BLOCK IF THEN WHILE DO WRITE BLANK END %left '|' '&' %left '<' LE '=' NE GE '>' %left '-' '+' %left '*' '/' %type <Node> operators operator list expr cond %% /* Syntax rules */ program : declarations BLOCK operators END { /*$3->Left = NULL; Tree = $3;*/ Tree = $3->Right; } ; declarations : declaration | declarations declaration ; declaration : WORD identifiers ';' ; identifiers : identifier | identifiers ',' identifier ; identifier : IDENTIFIER { Add($1, 0); } | IDENTIFIER '[' NUMBER ']' { Add($1, $3); } ; operators : operator { /*Additional node are used for list building */ $$ = Node(idEMPTY, 0, NULL, NULL, $1); $$->Left = $$->Right; } | operators operator { $1->Left->Right = $2; $1->Left = $1->Left->Right; } ; operator : IF cond THEN operators END { $$ = Node(idIF, 0, NULL, Node(idEMPTY, 0, NULL, $2, $4->Right), NULL); } | WHILE cond DO operators END { $$ = Node(idWHILE, 0, NULL, Node(idEMPTY, 0, NULL, Node(idEMPTY, 0, NULL, NULL, $2), $4->Right), NULL); } | WRITE list ';' { $$ = $2; $2->ID = idWRITE; $2->Right = NULL; } | BLANK { $$ = Node(idBLANK, 0, NULL, NULL, NULL); } | IDENTIFIER '=' expr ';' { $$ = Node(idASSIGN, 0, NULL, Node(idEMPTY, 0, Test(Find($1), 0), NULL, $3), NULL); } | IDENTIFIER '[' expr ']' '=' expr ';' { $$ = Node(idASSIGN, 0, NULL, Node(idEMPTY, 0, Test(Find($1), 1), $3, $6), NULL); } ; list : expr { $$ = Node(idEMPTY, 0, NULL, Node(idEMPTY, 0, NULL, $1, NULL), NULL); $$->Right = $$->Left; } | list ',' expr { $1->Right->Right = Node(idEMPTY, 0, NULL, $3, NULL); $1->Right = $1->Right->Right; } ; expr : NUMBER { $$ = Node(idNUMBER, $1, NULL, NULL, NULL); } | IDENTIFIER { $$ = Node(idVAR, 0, Test(Find($1), 0), NULL, NULL); } | IDENTIFIER '[' expr ']' { $$ = Node(idVAR, 0, Test(Find($1), 1), $3, NULL); } | expr '|' expr { $$ = Node(idBIT_OR, 0, NULL, $1, $3); } | expr '&' expr { $$ = Node(idBIT_AND, 0, NULL, $1, $3); } | expr '+' expr { $$ = Node(idADD, 0, NULL, $1, $3); } | expr '-' expr { $$ = Node(idSUB, 0, NULL, $1, $3); } | expr '*' expr { $$ = Node(idMUL, 0, NULL, $1, $3); } | expr '/' expr { $$ = Node(idDIV, 0, NULL, $1, $3); } | '(' expr ')' { $$ = $2; } ; cond : expr '<' expr { $$ = Node(idLT, 0, NULL, $1, $3); } | expr LE expr { $$ = Node(idLE, 0, NULL, $1, $3); } | expr '=' expr { $$ = Node(idEQ, 0, NULL, $1, $3); } | expr NE expr { $$ = Node(idNE, 0, NULL, $1, $3); } | expr GE expr { $$ = Node(idGE, 0, NULL, $1, $3); } | expr '>' expr { $$ = Node(idGT, 0, NULL, $1, $3); } | cond '|' cond { $$ = Node(idOR, 0, NULL, $1, $3); } | cond '&' cond { $$ = Node(idAND, 0, NULL, $1, $3); } | '(' cond ')' { $$ = $2; } ; %% /* Code */ char *str(word N, word S) { static char *Digit="0123456789"; static char Buff [11]; word I; word J; word K; if (S==0) { S=1; } I=0; while (N>0 || S>0) { Buff[I++]=Digit[N%10]; if (S>0) { S--; } N/=10; } Buff[I]=0; J=0; K=I-1; while (J<K) { char C =Buff[J]; Buff[J++]=Buff[K]; Buff[K--]=C; } return Buff; } DICT *Find(char *Name) { DICT *Ptr; for (Ptr=Dict; Ptr!=NULL; Ptr=Ptr->Next) { if (strcmp(Ptr->Name, Name)==0) { break; } } return Ptr; } DICT *Add (char *Name, word Size) { DICT *Ptr=Find(Name); if (Ptr==NULL) { Ptr =(DICT *) malloc(sizeof(DICT)); Ptr->Name = strdup(Name); Ptr->Ofs = Ofs; Ofs+=(Size>0)?2*Size:2; Ptr->Size = Size; Ptr->Next = Dict; Dict = Ptr; } else { errors++; printf("Variable %s already defined\n", Name); } return Ptr; } DICT *Test(DICT *Dict, word Array) { if (Dict!=NULL) { if (Dict->Size==0 && Array!=0) { errors++; printf("Variable %s indexing not allowed\n", Dict->Name); } else if (Dict->Size!=0 && Array==0) { errors++; printf("Array %s index expected\n", Dict->Name); } } else { errors++; printf("Undefined variable\n"); } return Dict; } NODE *Node(word ID, word Value, DICT *Var, NODE *Left, NODE *Right) { NODE *Ptr =(NODE *) malloc(sizeof(NODE)); Ptr->ID = ID; Ptr->Value= Value; Ptr->Var = Var; Ptr->Left = Left; Ptr->Right= Right; return Ptr; } char Text[4096]; word pText; FILE *out; void Save(char Ch) { if (pText>=sizeof(Text)) { fwrite(Text, 1, pText, out); pText=0; } Text[pText++]=Ch; } void Decl(char *Inst) { word I=0; while (Inst[I]!=0) { Save(Inst[I++]); } Save('\n'); } void Emit(word L, char *Inst) { if (L!=0) { char *P=str(L, 5); word I=0; Save ('@'); while (P[I]!=0) { Save(P[I++]); } Save(':'); Save(' '); } else { word I=0; while (I<8) { Save(' '); I++; } } Decl(Inst); } word Lbl (word ID) { switch (ID) { case idNUMBER: case idVAR: case idADD: case idSUB: case idMUL: case idDIV: case idASSIGN: { return 0; } } return 1; } void Enum(NODE *P, word *L) { if (P==NULL) { return; } Enum(P->Left, L); if (Lbl(P->ID)!=0) { P->Value=(*L)++; } Enum(P->Right, L); } void Code(NODE *P, word F, word T, word M) { char Buff [128]; if (P->ID==idNUMBER) { Emit(0, strcat(strcpy(Buff, "mov AX,"), str(P->Value, 0))); } else if (P->ID==idVAR) { if (P->Left) { Code(P->Left, 0, 0, 0); Emit(0, "shl AX,1"); Emit(0, "mov DI,AX"); Emit(0, strcat(strcat(strcpy(Buff, "mov AX,DS:[DI+"), str(P->Var->Ofs, 0)), "]")); } else { Emit(0, strcat(strcat(strcpy(Buff, "mov AX,DS:["), str(P->Var->Ofs, 0)), "]")); } } else if (P->ID==idOR) { if (T==0) { Code(P->Left, 0, M, P->Value); Emit(P->Value,"nop"); Code(P->Right, F, 0, M); } else { Code(P->Left, 0, T, P->Value); Emit(P->Value,"nop"); Code(P->Right, 0, T, M); } } else if (P->ID==idAND) { if (T==0) { Code(P->Left, F, 0, P->Value); Emit(P->Value,"nop"); Code(P->Right, F, 0, M); } else { Code(P->Left, M, 0, P->Value); Emit(P->Value,"nop"); Code(P->Right, 0, T, M); } } else if (idLT<=P->ID && P->ID<=idDIV) { Code(P->Right, 0, 0, 0); Emit(0, "push AX"); Code(P->Left, 0, 0, 0); Emit(0, "pop BX"); if (idLT<=P->ID && P->ID<=idGT) { word ID=P->ID; Emit(0,"cmp AX, BX"); if (T==0) { if (ID==idLT) { ID=idGE; } else if (ID==idLE) { ID=idGT; } else if (ID==idEQ) { ID=idNE; } else if (ID==idNE) { ID=idEQ; } else if (ID==idGE) { ID=idLT; } else if (ID==idGT) { ID=idLE; } T=F; F=0; } if (ID==idLT) { Emit(0, strcat(strcpy(Buff, "jae @"), str(P->Value, 5))); } else if (ID==idLE) { Emit(0, strcat(strcpy(Buff, "ja @"), str(P->Value, 5))); } else if (ID==idEQ) { Emit(0, strcat(strcpy(Buff, "jne @"), str(P->Value, 5))); } else if (ID==idNE) { Emit(0, strcat(strcpy(Buff, "je @"), str(P->Value, 5))); } else if (ID==idGE) { Emit(0, strcat(strcpy(Buff, "jb @"), str(P->Value, 5))); } else if (ID==idGT) { Emit(0, strcat(strcpy(Buff, "jbe @"), str(P->Value, 5))); } Emit(0, strcat(strcpy(Buff, "jmp @"), str(T, 5))); Emit(P->Value, "nop"); } else if (P->ID==idBIT_OR) { Emit(0,"or AX,BX"); } else if (P->ID==idBIT_AND) { Emit(0,"and AX,BX"); } else if (P->ID==idADD) { Emit(0,"add AX,BX"); } else if (P->ID==idSUB) { Emit(0,"sub AX,BX"); } else if (P->ID==idMUL) { Emit(0,"mul BX"); } else if (P->ID==idDIV) { Emit(0,"xor DX,DX"); Emit(0,"div BX"); } } else if (P->ID==idIF) { F=P->Value; P=P->Left; Code(P->Left, F, 0, P->Value); Emit(P->Value,"nop"); P=P->Right; while (P!=NULL) { Code(P, 0, 0, 0); P=P->Right; } Emit(F, "nop"); } else if (P->ID==idWHILE) { word L; F=P->Value; P=P->Left; T=P->Value; L=P->Left->Value; Emit(L, "nop"); Code(P ->Left->Right, F, 0, T); Emit(T, "nop"); P=P->Right; while (P!=NULL) { Code(P, 0, 0, 0); P=P->Right; } Emit(0, strcat(strcpy(Buff, "jmp @"), str(L, 5))); Emit(F,"nop"); } else if (P->ID==idWRITE) { P=P->Left; while (P!=NULL) { Code(P->Left, 0, 0, 0); Emit(0, "mov CX,8"); Emit(0, "call @WRITE"); P=P->Right; } Emit(0,"mov AH, 2"); Emit(0,"mov DL,13"); Emit(0,"int 21H"); Emit(0,"mov AH, 2"); Emit(0,"mov DL,10"); Emit(0,"int 21H"); } else if (P->ID==idBLANK) { Emit(0,"mov AH, 2"); Emit(0,"mov DL,13"); Emit(0,"int 21H"); Emit(0,"mov AH, 2"); Emit(0,"mov DL,10"); Emit(0,"int 21H"); } else if (P->ID==idASSIGN) { P=P->Left; Code(P->Right, 0, 0, 0); if (P->Left) { Emit(0, "push AX"); Code(P->Left, 0, 0, 0); Emit(0, "shl AX,1"); Emit(0, "mov DI,AX"); Emit(0, "pop AX"); Emit(0, strcat(strcat(strcpy(Buff, "mov DS:[DI+"), str(P->Var->Ofs, 0)), "],AX")); } else { Emit(0, strcat(strcat(strcpy(Buff, "mov DS:["), str(P->Var->Ofs, 0)), "],AX")); } } } void main(int argc, char *argv[]) { extern FILE *yyin; yyin=fopen(argv[1], "r"); /*yydebug=1;*/ errors =0; yyparse(); if (errors==0) { NODE *P; word L; L=1; Enum(Tree, &L); out=fopen(argv[2], "w"); Emit(0,"mov AX,DS"); Emit(0,"add AX,4096"); Emit(0,"mov DS,AX"); P=Tree; while (P!=NULL) { Code(P, 0, 0, 0); P=P->Right; } Emit(0,"mov AX,4C00H"); Emit(0,"int 21H"); Decl("@WRITE: dec CX"); Decl(" xor DX,DX"); Decl(" mov BX,10"); Decl(" div BX"); Decl(" push DX"); Decl(" or AX,AX"); Decl(" jz @SPACE"); Decl(" call @WRITE"); Decl(" jmp @DIGIT"); Decl("@SPACE: mov AH, 2"); Decl(" mov DL,32"); Decl(" int 21H"); Decl(" dec CX"); Decl(" jnz @SPACE"); Decl("@DIGIT: mov AH, 2"); Decl(" pop DX"); Decl(" add DL,48"); Decl(" int 21H"); Decl(" retn"); if (pText>0) { fwrite(Text, 1, pText, out); } fclose(out); } } #include "lexyy.c" /* Scaner */



Π‘Π°ΠΉΡ‚ создан Π² систСмС uCoz