Context compiler design. Part 2

In first part simple one-pass compiler are explained. This part explains multi-pass compiler with intermediate presentation of program. Syntax analyzer of one-pass compiler produce assembler listing. Syntax analyzer of multi-pass compiler produce syntax tree. Separated code generator scans this tree and produce assembler listing. It requires additional work and more memory, but may produce more quality code. This techinque are used in Context 2.0 for Windows. Simplified DOS version are listed below. Expr and Ctrl functions are similar to functions of one-pass compiler, recursive code generator are implemented in Code function. Note that it implements short evaluation of conditions.

See two syntax tree examples. Expression 1+2*(3+4) are presented as

+
/ \
1 *
/ \
2 +
/ \
3 4

while loop presented as

while
/ \
body ...
/ \
header op1
\ \
cond op2
\
...

Additional nodes header and body are used to get third link from while node (to condition, to included operators and to next operator) and to store necessary labels.

NODE structure represents the syntax tree node. To point to other nodes indexes pLeft and pRight are used. It decrease structure size, but possibly, using references are more correctly. The changed text is given here.

define @emNOMEMORY "Not enough memory" define @emSIZE "Identifier too long" define @emEOF "EOF" define @emNUMBER "Error in number" define @emNAME "Name expected" define @emEMPTY "Empty array" define @emBRACKET "Bracket expected" define @emCOMMA "Comma expected" define @emSEMICOLON "Semicolon expected" define @emTHENEXP "then keyword expected" define @emDOEXP "do keyword expected" define @emDUP "Duplicate definition" define @emLVALUE "Variable expected" define @emTYPE "Types not match" define @emUNDEFINED "Undefined variable" define @emUNDEFOPR "Undefined operation" define @emASSIGN "= expected" define ntSIZE 4096 define tbSIZE 4096 define dtSIZE 128 define idSIZE 16 define ttWORD 0 define ttBOOL 1 define ptZERO 0 define ptBOOL 1 define ptCOMP 2 define ptADD 3 define ptMUL 4 define ptLVALUE 5 define idEMPTY 0 define idNUMBER 1 define idVARIABLE 2 define idOR 3 define idAND 4 define idLT 5 define idLE 6 define idEQ 7 define idNE 8 define idGE 9 define idGT 10 define idADD 11 define idSUB 12 define idMUL 13 define idDIV 14 define idIF 15 define idWHILE 16 define idWRITE 17 define idBLANK 18 define idASSIGN 19 struct NODE word ID; word Value; word pLeft; word pRight; end struct DATA char Name [idSIZE]; word Ofs; word Index; end NODE Node [ntSIZE]; word nNode; DATA Data [dtSIZE]; word nData; char Text [tbSIZE]; word hText; word nText; word pText; char Name [128]; word Line; void @Ptr(word Seg, Ofs) void @P1=@Ofs; void @@P2=@P1; return @P2; end word isalpha(char Ch) if ('A'<=Ch & Ch<='Z') | ('a'<=Ch & Ch<='z') | (Ch='_') then return 0; end return 1; end word isdigit(char Ch) if ('0'<=Ch & Ch<='9') then return 0; end return 1; end word strlen(char @Buff) word P=0; while Buff[P]!=#0 do inc P; end return P; end word strcmp(char @St1, @St2) word P=0; while St1[P]=St2[P] do if St1[P]=#0 then return 0; end inc P; end return 1; end char @strcpy(char @Dst, @Src) word P=0; while Src[P]!=#0 do Dst[P]=Src[P]; inc P; end Dst[P]=#0; return @Dst; end char @strcat(char @Dst, @Src) word P=strlen(@Dst); word Q=0; while Src[Q]!=#0 do Dst[P]=Src[Q]; inc P; inc Q; end Dst[P]=#0; return @Dst; end word GetPSP() asm mov AH,62H asm int 21H asm mov AX,BX end word open(char @Name) asm push DS asm mov AH,3DH asm mov AL,00H asm mov DX,SS:[BP+6] asm mov DS,DX asm mov DX,SS:[BP+4] asm int 21H asm pop DS end word create(char @Name) asm push DS asm mov AH,3CH asm mov CX,00H asm mov DX,SS:[BP+6] asm mov DS,DX asm mov DX,SS:[BP+4] asm int 21H asm pop DS end word read(word F; void @Buff; word N) asm push DS asm mov AH,3FH asm mov BX,SS:[BP+10] asm mov CX,SS:[BP+4] asm mov DX,SS:[BP+8] asm mov DS,DX asm mov DX,SS:[BP+6] asm int 21H asm pop DS end word write(word F; void @Buff; word N) asm push DS asm mov AH,40H asm mov BX,SS:[BP+10] asm mov CX,SS:[BP+4] asm mov DX,SS:[BP+8] asm mov DS,DX asm mov DX,SS:[BP+6] asm int 21H asm pop DS end void close(word F) asm mov AH,3EH asm mov BX,SS:[BP+4] asm int 21H end void putc(char Ch) asm mov AH,2 asm mov DL,SS:[BP+4] asm int 21H end void puts(char @St) word P=0; while St[P]!=#0 do putc(St[P]); inc P; end end word str(word N; word S; char @Buff) if S>0 then dec S; end word P=0; if N>=10 | S>0 then P=str(N/10,S,@Buff); end char @D = "0123456789"; Buff [P]=D[N%10]; return P+1; end char @Str(word N; word S) char @P="00000"; P[str(N,S,@P)]=#0; return @P; end void Stop(char @EM) putc(#13); puts(@Name); putc('('); puts(@Str(Line,0)); putc(')'); if (strlen(@EM)>0) then puts(": "); puts(@EM); end close (hText); asm mov AX,4C00H asm int 21H end word Val(char @Buff) char @D = "0123456789"; word P = 0; word L = 0; word H = 0; while Buff[P]!=#0 do word S=0; while D[S]!=Buff[P] do inc S; if S>=10 then Stop(@emNUMBER); end end S=10*L+S; L=S%256; S=10*H+S/256; H=S%256; S=S/256; if S>0 then Stop(@emNUMBER); end inc P; end return 256*H+L; end char Read() if pText>=nText then nText=read(hText,@Text,tbSIZE); if nText<1 then Stop(@emEOF); end pText=0; end return Text[pText]; end void Next() inc pText; end char @Scan(char @Buff) while Read()=#09 | Read()=#10 | Read()=#13 | Read()=#32 do if Read()=#10 then inc Line; end Next(); end word P=0; while isalpha(Read())=0 | isdigit(Read())=0 do Buff[P]=Read(); inc P; if P>=idSIZE then Stop(@emSIZE); end Next(); end if P=0 then Buff[P]=Read(); inc P; Next(); select case Buff[0]='<': if Read()='=' then Next(); return @strcpy(@Buff,"<="); end case Buff[0]='!': if Read()='=' then Next(); return @strcpy(@Buff,"!="); end case Buff[0]='>': if Read()='=' then Next(); return @strcpy(@Buff,">="); end end end Buff[P]=#0; return @Buff; end word Find(char @Name) word P=0; while P<nData do if strcmp(@Data[P].Name,@Name)=0 then exit end inc P; end return P; end word Peek() word N =nNode; if N>=ntSIZE then Stop(@emNOMEMORY); end Node [nNode].pLeft =ntSIZE; Node [nNode].pRight=ntSIZE; inc nNode; return N; end word Expr(word Prty; word @pType; char @Buff) word P1; select case strcmp(@Buff,"(")=0: if Prty>=ptLVALUE then Stop(@emLVALUE); end P1=Expr(0,@pType,@Scan(@Buff)); if strcmp(@Buff,")")!=0 then Stop(@emBRACKET); end case isdigit(Buff)=0: if Prty>=ptLVALUE then Stop(@emLVALUE); end P1=Peek(); Node[P1]. ID =idNUMBER; Node[P1]. Value=Val(@Buff); pType=ttWORD; default: word N=Find(@Buff); if N>=nData then Stop(@emNAME); end P1=Peek(); Node[P1].ID =idVARIABLE; Node[P1].Value =N; if Data[N].Index>0 then if strcmp(@Scan(@Buff),"[")!=0 then Stop(@emBRACKET); end word pType2; Node[P1].pLeft=Expr(0,@pType2,@Scan(@Buff)); if strcmp(@Buff,"]")!=0 then Stop(@emBRACKET); end if pType2!=ttWORD then Stop(@emTYPE); end end pType=ttWORD; end Scan(@Buff); while TRUE do word ID; word P; select case strcmp(@Buff,"|")=0: ID=idOR; P =ptBOOL; case strcmp(@Buff,"&")=0: ID=idAND; P =ptBOOL; case strcmp(@Buff,"<")=0: ID=idLT; P =ptCOMP; case strcmp(@Buff,"<=")=0: ID=idLE; P =ptCOMP; case strcmp(@Buff,"=")=0: ID=idEQ; P =ptCOMP; case strcmp(@Buff,"!=")=0: ID=idNE; P =ptCOMP; case strcmp(@Buff,">=")=0: ID=idGE; P =ptCOMP; case strcmp(@Buff,">")=0: ID=idGT; P =ptCOMP; case strcmp(@Buff,"+")=0: ID=idADD; P =ptADD; case strcmp(@Buff,"-")=0: ID=idSUB; P =ptADD; case strcmp(@Buff,"*")=0: ID=idMUL; P =ptMUL; case strcmp(@Buff,"/")=0: ID=idDIV; P =ptMUL; default: P =ptZERO; end if P<=Prty then exit end word pType2; word P2=Peek(); Node[P2]. ID =ID; Node[P2].pLeft =P1; Node[P2].pRight=Expr(P,@pType2,@Scan(@Buff)); if pType2!=pType then Stop(@emTYPE); end select case ID=idOR | ID=idAND: if pType!=ttBOOL then Stop(@emUNDEFOPR); end case idLT<=ID & ID<=idGT: if pType!=ttWORD then Stop(@emUNDEFOPR); end pType=ttBOOL; default: if pType!=ttWORD then Stop(@emUNDEFOPR); end end P1=P2; end return P1; end word Ctrl(char @Buff) word P1=Peek(); select case strcmp(@Buff,"if")=0: word P2=Peek(); Node [P1]. ID =idIF; Node [P1].pLeft = P2; word pType; Node [P2]. ID =idEMPTY; Node [P2].pLeft = Expr(ptZERO,@pType,@Scan(@Buff)); if strcmp(@Buff,"then")!=0 then Stop(@emTHENEXP); end if pType!=ttBOOL then Stop(@emTYPE); end word @P3=@Node[P2].pRight; while strcmp(@Scan(@Buff),"end")!=0 do P3 = Ctrl(@Buff); @P3 =@Node[P3].pRight; end case strcmp(@Buff,"while")=0: word P2=Peek(); word P3=Peek(); Node[P1]. ID =idWHILE; Node[P1].pLeft = P2; Node[P2]. ID =idEMPTY; Node[P2].pLeft = P3; Node[P3]. ID =idEMPTY; word pType; Node[P3].pRight=Expr(ptZERO,@pType,@Scan(@Buff)); if strcmp(@Buff,"do")!=0 then Stop(@emDOEXP); end if pType!=ttBOOL then Stop(@emTYPE); end word @P4=@Node[P2].pRight; while strcmp(@Scan(@Buff),"end")!=0 do P4 = Ctrl(@Buff); @P4 =@Node[P4].pRight; end case strcmp(@Buff,"write")=0: Node[P1].ID=idWRITE; word @P2=@Node[P1].pLeft; while TRUE do word pType; word P3=Peek(); Node [P3]. ID =idEMPTY; Node [P3].pLeft= Expr(ptZERO,@pType,@Scan(@Buff)); if pType!=ttWORD then Stop(@emTYPE); end P2 = P3; @P2=@Node[P3].pRight; if strcmp(@Buff,";")=0 then exit end if strcmp(@Buff,",")!=0 then Stop(@emCOMMA); end end case strcmp(@Buff,"blank")=0: Node[P1].ID=idBLANK; default: word P2=Peek(); Node[P1]. ID =idASSIGN; Node[P1].pLeft = P2; Node[P2]. ID =idEMPTY; word pType1, pType2; Node[P2].pLeft = Expr(ptLVALUE,@pType1,@Buff); if pType1!=ttWORD then Stop(@emTYPE); end if strcmp(@Buff,"=")!=0 then Stop(@emASSIGN); end Node[P2].pRight= Expr(ptZERO, @pType2,@Scan(@Buff)); if strcmp(@Buff,";")!=0 then Stop(@emSEMICOLON); end if pType2!=pType1 then Stop(@emTYPE); end end return P1; end void Save(char Ch) if pText>=tbSIZE then write(hText,@Text,pText); pText=0; end Text[pText]=Ch; inc pText; end void Decl(char @Inst) word I=0; while Inst[I]!=#0 do Save(Inst[I]); inc I; end Save(#13); Save(#10); end void Emit(word L; char @Inst) if L!=0 then Save('@'); char @P=@Str(L,5); word I=0; while P[I]!=#0 do Save(P[I]); inc I; end Save(':'); Save(' '); else word I=0; while I<8 do Save(' '); inc I; end end Decl(@Inst); end word Lbl (word ID) select case ID=idNUMBER: return 0; case ID=idVARIABLE: return 0; case ID=idADD: return 0; case ID=idSUB: return 0; case ID=idMUL: return 0; case ID=idDIV: return 0; case ID=idASSIGN: return 0; end return 1; end void Enum(word P; word @L) if P>=ntSIZE then return end Enum(Node[P].pLeft, @L); if Lbl(Node[P].ID)!=0 then Node[P].Value=L; inc L; end Enum(Node[P].pRight,@L); end void Code(word P; word F, T, M) char Buff[128]; select case Node[P].ID=idNUMBER: Emit(0,@strcat(@strcpy(@Buff,"mov AX,"),@Str(Node[P].Value,0))); case Node[P].ID=idVARIABLE: if Node[P].pLeft<ntSIZE then Code(Node[P].pLeft,0,0,0); Emit(0,"shl AX,1"); Emit(0,"mov DI,AX"); Emit(0,@strcat(@strcat(@strcpy(@Buff,"mov AX,DS:[DI+"),@Str(Data[Node[P].Value].Ofs,0)),"]")); else Emit(0,@strcat(@strcat(@strcpy(@Buff,"mov AX,DS:["),@Str(Data[Node[P].Value].Ofs,0)),"]")); end case Node[P].ID=idOR: if T=0 then Code(Node[P].pLeft, 0,M,Node[P].Value); Emit(Node[P].Value,"nop"); Code(Node[P].pRight,F,0,M); else Code(Node[P].pLeft, 0,T,Node[P].Value); Emit(Node[P].Value,"nop"); Code(Node[P].pRight,0,T,M); end case Node[P].ID=idAND: if T=0 then Code(Node[P].pLeft, F,0,Node[P].Value); Emit(Node[P].Value,"nop"); Code(Node[P].pRight,F,0,M); else Code(Node[P].pLeft, M,0,Node[P].Value); Emit(Node[P].Value,"nop"); Code(Node[P].pRight,0,T,M); end case idLT<=Node[P].ID & Node[P].ID<=idDIV: Code(Node[P].pRight,0,0,0); Emit(0,"push AX"); Code(Node[P].pLeft, 0,0,0); Emit(0,"pop BX"); select case idLT<=Node[P].ID & Node[P].ID<=idGT: Emit(0,"cmp AX, BX"); word ID=Node[P].ID; if T=0 then select case ID=idLT: ID=idGE; case ID=idLE: ID=idGT; case ID=idEQ: ID=idNE; case ID=idNE: ID=idEQ; case ID=idGE: ID=idLT; case ID=idGT: ID=idLE; end T=F; F=0; end select case ID=idLT: Emit(0,@strcat(@strcpy(@Buff,"jae @"),@Str(Node[P].Value,5))); case ID=idLE: Emit(0,@strcat(@strcpy(@Buff,"ja @"),@Str(Node[P].Value,5))); case ID=idEQ: Emit(0,@strcat(@strcpy(@Buff,"jne @"),@Str(Node[P].Value,5))); case ID=idNE: Emit(0,@strcat(@strcpy(@Buff,"je @"),@Str(Node[P].Value,5))); case ID=idGE: Emit(0,@strcat(@strcpy(@Buff,"jb @"),@Str(Node[P].Value,5))); case ID=idGT: Emit(0,@strcat(@strcpy(@Buff,"jbe @"),@Str(Node[P].Value,5))); end Emit(0,@strcat(@strcpy(@Buff,"jmp @"),@Str(T,5))); Emit(Node[P].Value,"nop"); case Node[P].ID=idADD: Emit(0,"add AX,BX"); case Node[P].ID=idSUB: Emit(0,"sub AX,BX"); case Node[P].ID=idMUL: Emit(0,"mul BX"); case Node[P].ID=idDIV: Emit(0,"xor DX,DX"); Emit(0,"div BX"); end case Node[P].ID=idIF: F=Node[P].Value; P=Node[P].pLeft; Code(Node[P].pLeft, F,0,Node[P].Value); Emit(Node[P]. Value,"nop"); P=Node[P].pRight; while P<ntSIZE do Code(P,0,0,0); P=Node[P].pRight; end Emit(F,"nop"); case Node[P].ID=idWHILE: F=Node[P].Value; P=Node[P].pLeft; T=Node[P].Value; word L=Node[Node[P].pLeft].Value; Emit(L,"nop"); Code(Node[Node[P].pLeft].pRight,F,0,T); Emit(T,"nop"); word P=Node[P].pRight; while P<ntSIZE do Code(P,0,0,0); P=Node[P].pRight; end Emit(0,@strcat(@strcpy(@Buff,"jmp @"),@Str(L,5))); Emit(F,"nop"); case Node[P].ID=idWRITE: P=Node[P].pLeft; while P<ntSIZE do Code(Node[P].pLeft,0,0,0); Emit(0,"mov CX,8"); Emit(0,"call @WRITE"); P=Node[P].pRight; end Emit(0,"mov AH, 2"); Emit(0,"mov DL,13"); Emit(0,"int 21H"); Emit(0,"mov AH, 2"); Emit(0,"mov DL,10"); Emit(0,"int 21H"); case Node[P].ID=idBLANK: Emit(0,"mov AH, 2"); Emit(0,"mov DL,13"); Emit(0,"int 21H"); Emit(0,"mov AH, 2"); Emit(0,"mov DL,10"); Emit(0,"int 21H"); case Node[P].ID=idASSIGN: P=Node[P].pLeft; Code(Node[P].pRight,0,0,0); if Node[Node[P].pLeft].pLeft<ntSIZE then Emit(0,"push AX"); Code(Node[Node[P].pLeft].pLeft,0,0,0); Emit(0,"shl AX,1"); Emit(0,"mov DI,AX"); Emit(0,"pop AX"); Emit(0,@strcat(@strcat(@strcpy(@Buff,"mov DS:[DI+"),@Str(Data[Node[Node[P].pLeft].Value].Ofs,0)),"],AX")); else Emit(0,@strcat(@strcat(@strcpy(@Buff,"mov DS:["),@Str(Data[Node[Node[P].pLeft].Value].Ofs,0)),"],AX")); end end end begin byte @Size=@Ptr(GetPSP(),128); char @Parm=@Ptr(GetPSP(),129); char name [128]; word I=0; while I<Size & Parm[I] =' ' do inc I; end word Flag=0; word K =0; word E; while I<Size & Parm[I]!=' ' do name[K]=Parm[I]; if name[K]='.' then E =K; Flag=1; end if name[K]='\' then Flag=0; end inc K; inc I; end name[K]=#00; if K=0 then return end if Flag=0 then strcat(@name,".ctx"); E=K; end I=0; K=0; while name[I]!=#0 do if name[I]=#92 then K=I+1; end inc I; end strcpy(@Name,@name[K]); hText=open(@name); pText=0; nText=0; Line =1; nNode=0; nData=0; char Buff [128]; word pNode; while TRUE do Scan(@Buff); select case strcmp(@Buff,"word")=0: while TRUE do if isalpha(Scan(@Buff))!=0 then Stop(@emNAME); end if Find(@Buff)<nData then Stop(@emDUP); end if nData>=dtSIZE then Stop(@emNOMEMORY); end Data[nData].Index=0; strcpy(@Data[nData].Name,@Buff); word N=1; if strcmp(@Scan(@Buff),"[")=0 then N=Val(@Scan(@Buff)); if N<1 then Stop(@emEMPTY); end Data[nData].Index=N; if strcmp(@Scan(@Buff),"]")!=0 then Stop(@emBRACKET); end Scan(@Buff); end inc nData; if strcmp(@Buff,";")=0 then exit end if strcmp(@Buff,",")!=0 then Stop(@emCOMMA); end end case strcmp(@Buff,"begin")=0: word @P=@pNode; while strcmp(@Scan(@Buff),"end")!=0 do P = Ctrl(@Buff); @P=@Node[P].pRight; end exit default: Stop(@emUNDEFINED); end end close (hText); word Ofs=0; I=0; while I<nData do Data[I].Ofs=Ofs; if Data[I].Index>0 then Ofs=Ofs+2*Data[I].Index; else Ofs=Ofs+2; end inc I; end word L=1; Enum (pNode,@L); strcpy(@name[E],".asm"); hText=create(@name); pText=0; Emit(0,"mov AX,DS"); Emit(0,"add AX,4096"); Emit(0,"mov DS,AX"); while pNode<ntSIZE do Code(pNode, 0,0,0); pNode=Node[pNode].pRight; end Emit(0,"mov AX,4C00H"); Emit(0,"int 21H"); Decl("@WRITE: dec CX"); Decl(" xor DX,DX"); Decl(" mov BX,10"); Decl(" div BX"); Decl(" push DX"); Decl(" or AX,AX"); Decl(" jz @SPACE"); Decl(" call @WRITE"); Decl(" jmp @DIGIT"); Decl("@SPACE: mov AH, 2"); Decl(" mov DL,32"); Decl(" int 21H"); Decl(" dec CX"); Decl(" jnz @SPACE"); Decl("@DIGIT: mov AH, 2"); Decl(" pop DX"); Decl(" add DL,48"); Decl(" int 21H"); Decl(" retn"); if pText>0 then write(hText,@Text,pText); end Stop (""); end

Full version of compiler more complex. It includes types, functions and others. But it uses sumilar set of functions and data structures.



Π‘Π°ΠΉΡ‚ создан Π² систСмС uCoz