In first part simple one-pass compiler are explained.
This part explains multi-pass compiler with intermediate presentation of program.
Syntax analyzer of one-pass compiler produce assembler listing. Syntax analyzer
of multi-pass compiler produce syntax tree. Separated code generator scans this
tree and produce assembler listing. It requires additional work and more memory,
but may produce more quality code. This techinque are used in Context 2.0
for Windows. Simplified DOS version are listed below. Expr and Ctrl functions
are similar to functions of one-pass compiler, recursive code generator
are implemented in Code function. Note that it implements short evaluation
of conditions.
See two syntax tree examples. Expression 1+2*(3+4) are presented as
+
/
\
1
*
/
\
2
+
/
\
3
4
while loop presented as
while
/
\
body
...
/
\
header
op1
\
\
cond
op2
\
...
Additional nodes header and body are used to get third link from while node (to condition,
to included operators and to next operator) and to store necessary labels.
NODE structure represents the syntax tree node. To point to other nodes indexes pLeft and pRight are used. It decrease structure size, but possibly, using references are more correctly. The changed text is given here.
define @emNOMEMORY "Not enough memory"
define @emSIZE "Identifier too long"
define @emEOF "EOF"
define @emNUMBER "Error in number"
define @emNAME "Name expected"
define @emEMPTY "Empty array"
define @emBRACKET "Bracket expected"
define @emCOMMA "Comma expected"
define @emSEMICOLON "Semicolon expected"
define @emTHENEXP "then keyword expected"
define @emDOEXP "do keyword expected"
define @emDUP "Duplicate definition"
define @emLVALUE "Variable expected"
define @emTYPE "Types not match"
define @emUNDEFINED "Undefined variable"
define @emUNDEFOPR "Undefined operation"
define @emASSIGN "= expected"
define ntSIZE 4096
define tbSIZE 4096
define dtSIZE 128
define idSIZE 16
define ttWORD 0
define ttBOOL 1
define ptZERO 0
define ptBOOL 1
define ptCOMP 2
define ptADD 3
define ptMUL 4
define ptLVALUE 5
define idEMPTY 0
define idNUMBER 1
define idVARIABLE 2
define idOR 3
define idAND 4
define idLT 5
define idLE 6
define idEQ 7
define idNE 8
define idGE 9
define idGT 10
define idADD 11
define idSUB 12
define idMUL 13
define idDIV 14
define idIF 15
define idWHILE 16
define idWRITE 17
define idBLANK 18
define idASSIGN 19
struct NODE
word ID;
word Value;
word pLeft;
word pRight;
end
struct DATA
char Name [idSIZE];
word Ofs;
word Index;
end
NODE Node [ntSIZE];
word nNode;
DATA Data [dtSIZE];
word nData;
char Text [tbSIZE];
word hText;
word nText;
word pText;
char Name [128];
word Line;
void @Ptr(word Seg, Ofs)
void @P1=@Ofs;
void @@P2=@P1;
return @P2;
end
word isalpha(char Ch)
if ('A'<=Ch & Ch<='Z') | ('a'<=Ch & Ch<='z') | (Ch='_') then
return 0;
end
return 1;
end
word isdigit(char Ch)
if ('0'<=Ch & Ch<='9') then
return 0;
end
return 1;
end
word strlen(char @Buff)
word P=0;
while Buff[P]!=#0 do
inc P;
end
return P;
end
word strcmp(char @St1, @St2)
word P=0;
while St1[P]=St2[P] do
if St1[P]=#0 then
return 0;
end
inc P;
end
return 1;
end
char @strcpy(char @Dst, @Src)
word P=0;
while Src[P]!=#0 do
Dst[P]=Src[P];
inc P;
end
Dst[P]=#0;
return @Dst;
end
char @strcat(char @Dst, @Src)
word P=strlen(@Dst);
word Q=0;
while Src[Q]!=#0 do
Dst[P]=Src[Q];
inc P;
inc Q;
end
Dst[P]=#0;
return @Dst;
end
word GetPSP()
asm mov AH,62H
asm int 21H
asm mov AX,BX
end
word open(char @Name)
asm push DS
asm mov AH,3DH
asm mov AL,00H
asm mov DX,SS:[BP+6]
asm mov DS,DX
asm mov DX,SS:[BP+4]
asm int 21H
asm pop DS
end
word create(char @Name)
asm push DS
asm mov AH,3CH
asm mov CX,00H
asm mov DX,SS:[BP+6]
asm mov DS,DX
asm mov DX,SS:[BP+4]
asm int 21H
asm pop DS
end
word read(word F; void @Buff; word N)
asm push DS
asm mov AH,3FH
asm mov BX,SS:[BP+10]
asm mov CX,SS:[BP+4]
asm mov DX,SS:[BP+8]
asm mov DS,DX
asm mov DX,SS:[BP+6]
asm int 21H
asm pop DS
end
word write(word F; void @Buff; word N)
asm push DS
asm mov AH,40H
asm mov BX,SS:[BP+10]
asm mov CX,SS:[BP+4]
asm mov DX,SS:[BP+8]
asm mov DS,DX
asm mov DX,SS:[BP+6]
asm int 21H
asm pop DS
end
void close(word F)
asm mov AH,3EH
asm mov BX,SS:[BP+4]
asm int 21H
end
void putc(char Ch)
asm mov AH,2
asm mov DL,SS:[BP+4]
asm int 21H
end
void puts(char @St)
word P=0;
while St[P]!=#0 do
putc(St[P]);
inc P;
end
end
word str(word N; word S; char @Buff)
if S>0 then
dec S;
end
word P=0;
if N>=10 | S>0 then
P=str(N/10,S,@Buff);
end
char @D = "0123456789";
Buff [P]=D[N%10];
return P+1;
end
char @Str(word N; word S)
char @P="00000";
P[str(N,S,@P)]=#0;
return @P;
end
void Stop(char @EM)
putc(#13);
puts(@Name);
putc('(');
puts(@Str(Line,0));
putc(')');
if (strlen(@EM)>0) then
puts(": ");
puts(@EM);
end
close (hText);
asm mov AX,4C00H
asm int 21H
end
word Val(char @Buff)
char @D = "0123456789";
word P = 0;
word L = 0;
word H = 0;
while Buff[P]!=#0 do
word S=0;
while D[S]!=Buff[P] do
inc S;
if S>=10 then
Stop(@emNUMBER);
end
end
S=10*L+S;
L=S%256;
S=10*H+S/256;
H=S%256;
S=S/256;
if S>0 then
Stop(@emNUMBER);
end
inc P;
end
return 256*H+L;
end
char Read()
if pText>=nText then
nText=read(hText,@Text,tbSIZE);
if nText<1 then
Stop(@emEOF);
end
pText=0;
end
return Text[pText];
end
void Next()
inc pText;
end
char @Scan(char @Buff)
while Read()=#09 | Read()=#10 | Read()=#13 | Read()=#32 do
if Read()=#10 then
inc Line;
end
Next();
end
word P=0;
while isalpha(Read())=0 | isdigit(Read())=0 do
Buff[P]=Read();
inc P;
if P>=idSIZE then
Stop(@emSIZE);
end
Next();
end
if P=0 then
Buff[P]=Read();
inc P;
Next();
select
case Buff[0]='<':
if Read()='=' then
Next();
return @strcpy(@Buff,"<=");
end
case Buff[0]='!':
if Read()='=' then
Next();
return @strcpy(@Buff,"!=");
end
case Buff[0]='>':
if Read()='=' then
Next();
return @strcpy(@Buff,">=");
end
end
end
Buff[P]=#0;
return @Buff;
end
word Find(char @Name)
word P=0;
while P=ntSIZE then
Stop(@emNOMEMORY);
end
Node [nNode].pLeft =ntSIZE;
Node [nNode].pRight=ntSIZE;
inc nNode;
return N;
end
word Expr(word Prty; word @pType; char @Buff)
word P1;
select
case strcmp(@Buff,"(")=0:
if Prty>=ptLVALUE then
Stop(@emLVALUE);
end
P1=Expr(0,@pType,@Scan(@Buff));
if strcmp(@Buff,")")!=0 then
Stop(@emBRACKET);
end
case isdigit(Buff)=0:
if Prty>=ptLVALUE then
Stop(@emLVALUE);
end
P1=Peek();
Node[P1]. ID =idNUMBER;
Node[P1]. Value=Val(@Buff);
pType=ttWORD;
default:
word N=Find(@Buff);
if N>=nData then
Stop(@emNAME);
end
P1=Peek();
Node[P1].ID =idVARIABLE;
Node[P1].Value =N;
if Data[N].Index>0 then
if strcmp(@Scan(@Buff),"[")!=0 then
Stop(@emBRACKET);
end
word pType2;
Node[P1].pLeft=Expr(0,@pType2,@Scan(@Buff));
if strcmp(@Buff,"]")!=0 then
Stop(@emBRACKET);
end
if pType2!=ttWORD then
Stop(@emTYPE);
end
end
pType=ttWORD;
end
Scan(@Buff);
while TRUE do
word ID;
word P;
select
case strcmp(@Buff,"|")=0:
ID=idOR;
P =ptBOOL;
case strcmp(@Buff,"&")=0:
ID=idAND;
P =ptBOOL;
case strcmp(@Buff,"<")=0:
ID=idLT;
P =ptCOMP;
case strcmp(@Buff,"<=")=0:
ID=idLE;
P =ptCOMP;
case strcmp(@Buff,"=")=0:
ID=idEQ;
P =ptCOMP;
case strcmp(@Buff,"!=")=0:
ID=idNE;
P =ptCOMP;
case strcmp(@Buff,">=")=0:
ID=idGE;
P =ptCOMP;
case strcmp(@Buff,">")=0:
ID=idGT;
P =ptCOMP;
case strcmp(@Buff,"+")=0:
ID=idADD;
P =ptADD;
case strcmp(@Buff,"-")=0:
ID=idSUB;
P =ptADD;
case strcmp(@Buff,"*")=0:
ID=idMUL;
P =ptMUL;
case strcmp(@Buff,"/")=0:
ID=idDIV;
P =ptMUL;
default:
P =ptZERO;
end
if P<=Prty then
exit
end
word pType2;
word P2=Peek();
Node[P2]. ID =ID;
Node[P2].pLeft =P1;
Node[P2].pRight=Expr(P,@pType2,@Scan(@Buff));
if pType2!=pType then
Stop(@emTYPE);
end
select
case ID=idOR | ID=idAND:
if pType!=ttBOOL then
Stop(@emUNDEFOPR);
end
case idLT<=ID & ID<=idGT:
if pType!=ttWORD then
Stop(@emUNDEFOPR);
end
pType=ttBOOL;
default:
if pType!=ttWORD then
Stop(@emUNDEFOPR);
end
end
P1=P2;
end
return P1;
end
word Ctrl(char @Buff)
word P1=Peek();
select
case strcmp(@Buff,"if")=0:
word P2=Peek();
Node [P1]. ID =idIF;
Node [P1].pLeft = P2;
word pType;
Node [P2]. ID =idEMPTY;
Node [P2].pLeft = Expr(ptZERO,@pType,@Scan(@Buff));
if strcmp(@Buff,"then")!=0 then
Stop(@emTHENEXP);
end
if pType!=ttBOOL then
Stop(@emTYPE);
end
word @P3=@Node[P2].pRight;
while strcmp(@Scan(@Buff),"end")!=0 do
P3 = Ctrl(@Buff);
@P3 =@Node[P3].pRight;
end
case strcmp(@Buff,"while")=0:
word P2=Peek();
word P3=Peek();
Node[P1]. ID =idWHILE;
Node[P1].pLeft = P2;
Node[P2]. ID =idEMPTY;
Node[P2].pLeft = P3;
Node[P3]. ID =idEMPTY;
word pType;
Node[P3].pRight=Expr(ptZERO,@pType,@Scan(@Buff));
if strcmp(@Buff,"do")!=0 then
Stop(@emDOEXP);
end
if pType!=ttBOOL then
Stop(@emTYPE);
end
word @P4=@Node[P2].pRight;
while strcmp(@Scan(@Buff),"end")!=0 do
P4 = Ctrl(@Buff);
@P4 =@Node[P4].pRight;
end
case strcmp(@Buff,"write")=0:
Node[P1].ID=idWRITE;
word @P2=@Node[P1].pLeft;
while TRUE do
word pType;
word P3=Peek();
Node [P3]. ID =idEMPTY;
Node [P3].pLeft= Expr(ptZERO,@pType,@Scan(@Buff));
if pType!=ttWORD then
Stop(@emTYPE);
end
P2 = P3;
@P2=@Node[P3].pRight;
if strcmp(@Buff,";")=0 then
exit
end
if strcmp(@Buff,",")!=0 then
Stop(@emCOMMA);
end
end
case strcmp(@Buff,"blank")=0:
Node[P1].ID=idBLANK;
default:
word P2=Peek();
Node[P1]. ID =idASSIGN;
Node[P1].pLeft = P2;
Node[P2]. ID =idEMPTY;
word pType1, pType2;
Node[P2].pLeft = Expr(ptLVALUE,@pType1,@Buff);
if pType1!=ttWORD then
Stop(@emTYPE);
end
if strcmp(@Buff,"=")!=0 then
Stop(@emASSIGN);
end
Node[P2].pRight= Expr(ptZERO, @pType2,@Scan(@Buff));
if strcmp(@Buff,";")!=0 then
Stop(@emSEMICOLON);
end
if pType2!=pType1 then
Stop(@emTYPE);
end
end
return P1;
end
void Save(char Ch)
if pText>=tbSIZE then
write(hText,@Text,pText);
pText=0;
end
Text[pText]=Ch;
inc pText;
end
void Decl(char @Inst)
word I=0;
while Inst[I]!=#0 do
Save(Inst[I]);
inc I;
end
Save(#13);
Save(#10);
end
void Emit(word L; char @Inst)
if L!=0 then
Save('@');
char @P=@Str(L,5);
word I=0;
while P[I]!=#0 do
Save(P[I]);
inc I;
end
Save(':');
Save(' ');
else
word I=0;
while I<8 do
Save(' ');
inc I;
end
end
Decl(@Inst);
end
word Lbl (word ID)
select
case ID=idNUMBER:
return 0;
case ID=idVARIABLE:
return 0;
case ID=idADD:
return 0;
case ID=idSUB:
return 0;
case ID=idMUL:
return 0;
case ID=idDIV:
return 0;
case ID=idASSIGN:
return 0;
end
return 1;
end
void Enum(word P; word @L)
if P>=ntSIZE then
return
end
Enum(Node[P].pLeft, @L);
if Lbl(Node[P].ID)!=0 then
Node[P].Value=L;
inc L;
end
Enum(Node[P].pRight,@L);
end
void Code(word P; word F, T, M)
char Buff[128];
select
case Node[P].ID=idNUMBER:
Emit(0,@strcat(@strcpy(@Buff,"mov AX,"),@Str(Node[P].Value,0)));
case Node[P].ID=idVARIABLE:
if Node[P].pLeft=dtSIZE then
Stop(@emNOMEMORY);
end
Data[nData].Index=0; strcpy(@Data[nData].Name,@Buff);
word N=1;
if strcmp(@Scan(@Buff),"[")=0 then
N=Val(@Scan(@Buff));
if N<1 then
Stop(@emEMPTY);
end
Data[nData].Index=N;
if strcmp(@Scan(@Buff),"]")!=0 then
Stop(@emBRACKET);
end
Scan(@Buff);
end
inc nData;
if strcmp(@Buff,";")=0 then
exit
end
if strcmp(@Buff,",")!=0 then
Stop(@emCOMMA);
end
end
case strcmp(@Buff,"begin")=0:
word @P=@pNode;
while strcmp(@Scan(@Buff),"end")!=0 do
P = Ctrl(@Buff);
@P=@Node[P].pRight;
end
exit
default:
Stop(@emUNDEFINED);
end
end
close (hText);
word Ofs=0;
I=0;
while I0 then
Ofs=Ofs+2*Data[I].Index;
else
Ofs=Ofs+2;
end
inc I;
end
word L=1;
Enum (pNode,@L);
strcpy(@name[E],".asm");
hText=create(@name);
pText=0;
Emit(0,"mov AX,DS");
Emit(0,"add AX,4096");
Emit(0,"mov DS,AX");
while pNode0 then
write(hText,@Text,pText);
end
Stop ("");
end
Full version of compiler more complex. It includes types, functions and others.
But it uses sumilar set of functions and data structures.