The following compiler uses an LR(1)-parser. This program runs
very slowly - when using DOSBox emulator on AMD64/2GHz self compilation requires more than
five minutes. This does not mean that an LR(1)-parser works slowly - instead of repeating many
times the same actions, set of states and transitions table must be build once. This program
requires significantly larger Point array and checking it's overflow. To save memory some
array types changed from word to byte. The length of the Point array may be increased by
decreasing of the Text array, but this is limited. The correct way is to use 32- or 64-bit
operating system.
This program has different Closure(), Shift() and CheckTop() functions. Other minor
differences are results of refactoring.
c.def file:
symb ;
numb ;
name ;
lsb "[" 0 0;
rsb "]" 0 0;
lcb "(" 0 0;
rcb ")" 0 0;
assign ":=" 0 0;
plus "+" 0 0;
minus "-" 0 1;
star "*" 0 2;
slash "/" 0 3;
pct "%" 0 4;
lt "<" 0 0;
le "<=" 0 1;
eq "=" 0 2;
ne "!=" 0 3;
ge ">=" 0 4;
gt ">" 0 5;
comma "," 0 0;
semi ";" 0 0;
is 0 0;
begin 0 0;
if 0 0;
then 0 0;
while 0 0;
do 0 0;
inline 0 0;
return 0 0;
end 0 0;
char 1 1;
byte 1 1;
word 1 2.
Program : 0 Declarations Block
| 0 Block
;
Block : 10 Main Stmts end
;
Declarations : 0 Declarations Declaration
| 0 Declaration
;
Declaration : 20 TypeName semi
| 30 TypeName lsb numb rsb semi
| 10 Header Stmts end
;
Header : 0 TypeName lcb rcb is
| 0 TypeName lcb Args rcb is
;
Args : 0 Args comma Arg
| 0 Arg
;
Arg : 40 TypeName
;
TypeName : 50 Type name
;
Main : 60 begin
;
Stmts : 70 Stmts Stmt
| 70 Stmt
;
Stmt : 80 IfBlk
| 80 WhileBlk
| 80 Inlines
| 90 Ret
| 80 Local
| 80 Assign
| 80 PCall
;
IfBlk : 100 IfHdr Stmts end
;
IfHdr : 110 if Expr RelOp Expr then
;
WhileBlk : 120 WhileHdr Stmts end
;
WhileHdr : 110 Loop Expr RelOp Expr do
;
Loop : 130 while
;
Inlines : 0 inline OpCodes semi
;
OpCodes : 0 OpCodes comma OpCode
| 0 OpCode
;
OpCode : 140 numb
;
Ret : 150 return Expr semi
;
Local : 160 LocalVar assign Expr semi
| 0 LocalVar semi
;
LocalVar : 170 TypeName
;
Assign : 180 Ref assign Expr semi
;
PCall : 0 Call semi
;
Expr : 190 Expr Op1 Term
| 0 Term
;
Term : 190 Term Op2 Value
| 0 Value
;
Value : 200 symb
| 200 numb
| 210 Ref
| 0 Call
| 0 Cast
| 0 lcb Expr rcb
;
Ref : 0 Var
| 220 Array lsb Expr rsb
;
Call : 230 Fn1 lcb Params rcb
| 230 Fn1 lcb rcb
;
Params : 250 Params comma Expr
| 250 Expr
;
Fn1 : 260 Fn
;
Cast : 0 Type lcb Expr rcb
;
Type : 0 char
| 0 byte
| 0 word
;
Var : 270 name
;
Array : 280 name
;
Fn : 290 name
;
RelOp : 0 lt
| 0 le
| 0 eq
| 0 ne
| 0 ge
| 0 gt
;
Op1 : 0 plus
| 0 minus
;
Op2 : 0 star
| 0 slash
| 0 pct
.
c.prg file:
char Text [16384];
word pText;
word nText;
word nLine;
byte Code [16384];
word nCode;
word hFile;
char Heap [ 2048];
word nHeap;
word Jx [ 6];
word nData;
word Name [ 256];
word Cls [ 256];
word Sub [ 256];
word Type [ 256];
word Size [ 256];
word Ofs [ 256];
word nName1;
word nName;
word Sym [ 96];
word nSym;
byte Left [ 128];
word Id [ 128];
word pRight [ 128];
word nLeft;
byte Right [ 384];
word pLeft [ 384];
word nRight;
word Point [ 2560];
byte Term [ 2560];
word nPoint;
word Stk [ 32];
word Val [ 32];
word Ptr [ 32];
word Frame [ 32];
word pPoint [ 32];
word nStk;
char Buff [ 32];
word open() is
inline 0xB4, 0x3D; // mov AH, 3DH
inline 0xB0, 0x00; // mov AL, 0
inline 0xBA, 0x4A, 0xC1; // mov DX, @@DATA+Ofs(Heap[64])
inline 0xCD, 0x21; // int 21H
end
word create() is
inline 0xB4, 0x3C; // mov AH, 3CH
inline 0xB9, 0x00, 0x00; // mov CX, 0
inline 0xBA, 0x4A, 0xC1; // mov DX, @@DATA+Ofs(Heap[64])
inline 0xCD, 0x21; // int 21H
end
word read() is
inline 0xB4, 0x3F; // mov AH, 3FH
inline 0x8B, 0x1E, 0x08, 0xC1; // mov BX, word [@@DATA+Ofs(hFile)]
inline 0xB9, 0x00, 0x40; // mov CX, 16384
inline 0xBA, 0x00, 0x41; // mov DX, @@DATA+Ofs(Text)
inline 0xCD, 0x21; // int 21H
end
word write() is
inline 0xB4, 0x40; // mov AH, 40H
inline 0x8B, 0x1E, 0x08, 0xC1; // mov BX, word [@@DATA+Ofs(hFile)]
inline 0x8B, 0x0E, 0x06, 0xC1; // mov CX, word [@@DATA+Ofs(nCode)]
inline 0xBA, 0x06, 0x81; // mov DX, @@DATA+Ofs(Code)
inline 0xCD, 0x21; // int 21H
end
word close() is
inline 0xB4, 0x3E; // mov AH, 3EH
inline 0x8B, 0x1E, 0x08, 0xC1; // mov BX, word [@@DATA+Ofs(hFile)]
inline 0xCD, 0x21; // int 21H
end
char putc(char C) is
char C1:=C;
inline 0x92; // xchg DX, AX
inline 0xB4, 0x02; // mov AH, 2
inline 0xCD, 0x21; // int 21H
end
word halt() is
inline 0xB8, 0x00, 0x4C; // mov AX, 4C00H
inline 0xCD, 0x21; // int 21H
end
word Init() is
Heap [ 0]:='0';
Heap [ 1]:='1';
Heap [ 2]:='2';
Heap [ 3]:='3';
Heap [ 4]:='4';
Heap [ 5]:='5';
Heap [ 6]:='6';
Heap [ 7]:='7';
Heap [ 8]:='8';
Heap [ 9]:='9';
Heap [ 10]:='A';
Heap [ 11]:='B';
Heap [ 12]:='C';
Heap [ 13]:='D';
Heap [ 14]:='E';
Heap [ 15]:='F';
Heap [ 16]:='#';
Heap [ 17]:= char(0);
Heap [ 64]:='c';
Heap [ 65]:='.';
Heap [ 66]:='d'; // p
Heap [ 67]:='e'; // r
Heap [ 68]:='f'; // g
Heap [ 69]:= char(0);
nHeap := 70;
Sym [ 1]:= 16; // # (EOF)
Sym [ 2]:= 16; // #' (Unknown)
Sym [ 3]:= 16; // #'' (Extra)
nSym := 4;
Jx [ 0]:= 0x72;
Jx [ 1]:= 0x76;
Jx [ 2]:= 0x74;
Jx [ 3]:= 0x75;
Jx [ 4]:= 0x73;
Jx [ 5]:= 0x77;
end
word OpenPrg() is
Heap [ 66]:='p';
Heap [ 67]:='r';
Heap [ 68]:='g';
hFile := open();
pText := 0;
nText := 0;
nLine := 1;
end
word ClosePrg() is
close();
end
word Stop() is
nStk:=0;
while nLine!=0 do
Stk[nStk]:=nLine%10;
nStk :=nStk+1;
nLine :=nLine/10;
end
while nStk !=0 do
nStk :=nStk-1;
putc(char(Stk[nStk]+48));
end
close();
halt ();
end
word val () is
word E:=10;
word J:= 0;
if Buff[0]='0' then
if Buff[1]='x' then
E:=16;
J:= 2;
end
end
word N:=0;
while Buff[J]!=char(0) do
word K:=0;
while Heap[K]!=Buff[J] do
if K=E then
Stop();
end
K:=K+1;
end
N:=E*N;
N:=N+K;
J:=J+1;
end
return N;
end
word Copy() is
word pHeap:=nHeap;
word pBuff:=0;
while Buff[pBuff]!=char(0) do
Heap[nHeap]:= Buff[pBuff];
nHeap :=nHeap+1;
pBuff :=pBuff+1;
end
Heap[nHeap]:=char(0);
nHeap :=nHeap+1;
return pHeap;
end
word Comp(word pHeap) is
word pBuff:=0;
while Buff[pBuff]=Heap[pHeap] do
if Buff[pBuff]=char(0) then
return 0;
end
pHeap:=pHeap+1;
pBuff:=pBuff+1;
end
return 1;
end
char Look() is
if pText>=nText then
pText:=0;
nText:=read();
if pText>=nText then
return char(0);
end
end
return Text[pText];
end
char Read() is
char C:=Look();
pText :=pText+1;
return C;
end
word isalpha (char C) is
if 'A'<=C then
if C<='Z' then
return 0;
end
end
if 'a'<=C then
if C<='z' then
return 0;
end
end
return 1;
end
word isnumber(char C) is
if '0'<=C then
if C<='9' then
return 0;
end
end
return 1;
end
word isalnum (char C) is
if isalpha (C)=0 then
return 0;
end
if isnumber(C)=0 then
return 0;
end
return 1;
end
word isspace(char C) is
if C=char( 9) then
return 0;
end
if C=char(10) then
return 0;
end
if C=char(13) then
return 0;
end
if C=char(32) then
return 0;
end
return 1;
end
char SkipSpaces() is
while isspace(Look())=0 do
if Read()=char(10) then
nLine:=nLine+1;
end
end
end
char ReadOne() is
Buff[0]:=Read();
Buff[1]:=char(0);
end
char Digraph(char C1, char C2) is
if Buff[0]=C1 then
if Look()=C2 then
Buff[1]:=Read();
Buff[2]:=char(0);
end
end
end
char ReadChar() is
if Buff[0]=''' then
Buff[1]:=Read(); // char
Buff[2]:=Read(); // '
Buff[3]:=char(0);
end
end
word ReadWord() is
if isalnum(Buff[0])=0 then
word pBuff := 1;
while isalnum (Look())=0 do
Buff[pBuff]:= Read();
pBuff :=pBuff+1;
end
Buff[pBuff] := char(0);
end
end
char ReadSym() is
ReadOne ();
Digraph ('<', '=');
Digraph ('!', '=');
Digraph ('>', '=');
Digraph (':', '=');
ReadChar();
ReadWord();
end
word isComment() is
if Buff[0]='/' then
if Look() ='/' then
return 0;
end
end
return 1;
end
word SkipComment() is
if isComment()=0 then
while Look()!=char(10) do
if Read()=char( 0) then
Stop();
end
end
return 0;
end
return 1;
end
char Scan() is
word Flag:=0;
while Flag =0 do
SkipSpaces();
ReadSym();
Flag:=SkipComment();
end
end
word pSymb;
word pProg;
word L;
word V;
word Alter() is
L:=2;
word pName:=0;
while pName =nName then
Stop();
end
if Sub[Ptr[I]]!=1 then
Stop();
end
Save(Ptr[I]);
Ptr[I]:=Ptr[I]+1;
lFlag:=0;
end
word rFlag;
word Action(word A) is
if A= 10 then
if rFlag=0 then
Emi1(0xC3); // retn
end
Hide();
end
if A= 20 then
Patch(2, 0, 1);
end
if A= 30 then
Patch(3, 0, Val[nStk+2]);
end
if A= 40 then
Patch(2, 1, 1);
end
if A= 50 then
Name [nName]:= Val[nStk+1];
Cls [nName]:= 4;
Sub [nName]:= 0;
Type [nName]:= Val[nStk];
Ofs [nName]:=nCode;
Val [nStk] :=nName;
nName :=nName+1;
Frame[nStk] :=nName;
end
if A= 60 then
word N := nCode-3;
Code[1]:= N%256;
Code[2]:= N/256;
Frame[nStk] :=nName;
end
if A= 70 then
lFlag:=0;
end
if A= 80 then
lFlag:=0;
rFlag:=0;
end
if A= 90 then
lFlag:=0;
rFlag:=1;
end
if A=100 then
word N:=(nCode-Val[nStk])-3;
Code[Val[nStk]+1]:=N%256;
Code[Val[nStk]+2]:=N/256;
Hide();
end
if A=110 then
Pop();
Emi1(0x3B); // cmp BX, AX
Emi1(0xD8);
Emi1(Jx[Val[nStk+2]]); // jxx $+3
Emi1(0x03);
Ptr [nStk]:= Val[nStk];
Val [nStk]:=nCode;
Frame[nStk]:=nName;
Emi1(0xE9); // jmp ?
Emi2(0x00);
lFlag:=0;
end
if A=120 then
word W:=(0xFFFF-((nCode+3)-Ptr[nStk]))+1;
Emi1(0xE9); // jmp Ofs
Emi2(W);
word N:=(nCode-Val[nStk])-3;
Code[Val[nStk]+1]:=N%256;
Code[Val[nStk]+2]:=N/256;
Hide();
end
if A=130 then
Val[nStk]:=nCode;
end
if A=140 then
Emi1(Val[nStk]);
end
if A=150 then
Emi1(0xC3); // retn
end
if A=160 then
Save(Val[nStk]);
end
if A=170 then
Patch(2, 2, 1);
end
if A=180 then
word I:=Val[nStk];
if Cls[I]=3 then
Pop();
end
Save(I);
end
if A=190 then
word I:=Val[nStk+1];
Pop();
if I =0 then
Emi1(0x03); // add AX, BX
Emi1(0xC3);
end
if I =1 then
Emi1(0x93); // xchg BX, AX
Emi1(0x2B); // sub AX, BX
Emi1(0xC3);
end
if I =2 then
Emi1(0xF7); // mul BX
Emi1(0xE3);
end
if I>=3 then
Emi1(0x93); // xchg BX, AX
Emi1(0x33); // xor DX, DX
Emi1(0xD2);
Emi1(0xF7); // div BX
Emi1(0xF3);
end
if I =4 then
Emi1(0x92); // xchg DX, AX
end
end
if A=200 then
Push();
Emi1(0xB8); // mov AX, Val
Emi2(Val[nStk]);
end
if A=210 then
word I:=Val[nStk];
if Cls[I]!=3 then
Push();
end
if Cls[I] =3 then
Emi1(0x93); // xchg BX, AX
end
byte C:=0x8B; // mov AX, [(BX+)Ofs]
if Size[Type[I]]=1 then
Emi1(0x32); // xor AH, AH
Emi1(0xE4);
C:=0x8A; // mov AL, [(BX+)Ofs]
end
Move(I, C);
lFlag:=1;
end
if A=220 then
if Size[Type[Val[nStk]]]!=1 then
Emi1(0xD1); // shl AX, 1
Emi1(0xE0);
end
end
if A=230 then
if Ptr[nStk]=2560 then
nLine:=nLine+10000;
Stop();
end
Point [nPoint1]:= Point1;
Term [nPoint1]:= Term1;
return nPoint1+1;
end
word First(word Point1, word Term1) is
if Right[Point1]=0 then
return AddPoint(nPoint, Point1, Term1);
end
word nPoint2:= AddPoint(nPoint, Point1, Right[Point1]);
word P := nPoint;
while P < nPoint2 do
word R:=Right[Point[P]];
word I:=0;
while I< nLeft do
if Left[I]=R then
if FindPoint(nPoint, nPoint2, pRight[I], 0)=nPoint2 then
nPoint2:=AddPoint(nPoint2, pRight[I], Right[pRight[I]]);
end
Term[P] :=0;
end
I:=I+1;
end
P:=P+1;
end
return nPoint2;
end
word Closure(word pPoint1) is
word P:=pPoint1;
while P< nPoint do
word R:=Right[Point[P]];
word I:=0;
while I< nLeft do
if Left[I]=R then
word nPoint2:= First(Point[P]+1, Term[P]);
word pPoint2:=nPoint;
while pPoint2< nPoint2 do
if Term[pPoint2]!=0 then
if FindPoint(pPoint1, nPoint, pRight[I], Term[pPoint2])=nPoint then
Point[nPoint]:=pRight[I];
Term [nPoint]:=Term[pPoint2];
nPoint :=nPoint+1;
end
end
pPoint2:=pPoint2+1;
end
end
I:=I+1;
end
P:=P+1;
end
end
word Shift() is
word S:= Stk [nStk];
word N:=nPoint;
word I:=pPoint[nStk];
while I< N do
if Right[Point[I]]=S then
nPoint:=AddPoint(nPoint, Point[I]+1, Term[I]);
end
I:=I+1;
end
if nPoint=N then
Stop();
end
nStk :=nStk+1;
pPoint[nStk]:=N;
Closure(pPoint[nStk]);
end
word Reduce(word R) is
word pL :=pLeft[R];
nStk :=nStk-(R-pRight[pL]);
nPoint :=pPoint[nStk+1];
Stk[nStk]:= Left[pL];
Action(Id[pL]);
end
word CheckTop(word L1) is
word S:=0;
word R:=nRight;
word I:=pPoint[nStk];
while I< nPoint do
if Right[Point[I]]=L1 then
if R< nRight then
Stop();
end
S:=1;
end
if Right[Point[I]]=0 then
if Term[I]=L1 then
if S!=0 then
Stop();
end
if R< nRight then
Stop();
end
R:= Point[I];
end
end
I:=I+1;
end
if S=0 then
if R=nRight then
Stop();
end
end
return R;
end
word AddExtraRule() is
word nRight0:=nRight;
Left [nLeft] := 3; // #'' (Extra)
pRight[nLeft] := nRight;
AddSym(pProg) ; // Program
AddSym(1); // # (EOF)
AddSym(0);
nLeft := nLeft+1;
return nRight0;
end
word WriteBin() is
Heap [ 66]:='c';
Heap [ 67]:='o';
Heap [ 68]:='m';
hFile:=create();
write();
close();
end
word Compile() is
OpenPrg();
lFlag := 0;
rFlag := 0;
nCode := 0;
nData :=16640;
Emi1(0xE9); // jmp ?
Emi2(0x00);
Point [0] := AddExtraRule();
Term [0] := 1;
nPoint := 1;
Stk [0] := 1; // #
pPoint[0] := 0;
nStk := 0;
Closure(pPoint[nStk]);
while Stk[0]!=pProg do
Scan ();
Alter();
word R:=CheckTop(L);
while R< nRight do
Reduce(R);
Shift ();
R:=CheckTop(L);
end
Stk [nStk]:=L;
Val [nStk]:=V;
Shift();
end
ClosePrg();
WriteBin();
end
begin
Init();
LoadGrammar();
Compile();
end