LR(1)-parser

The following compiler uses an LR(1)-parser. This program runs very slowly - when using DOSBox emulator on AMD64/2GHz self compilation requires more than five minutes. This does not mean that an LR(1)-parser works slowly - instead of repeating many times the same actions, set of states and transitions table must be build once. This program requires significantly larger Point array and checking it's overflow. To save memory some array types changed from word to byte. The length of the Point array may be increased by decreasing of the Text array, but this is limited. The correct way is to use 32- or 64-bit operating system.

This program has different Closure(), Shift() and CheckTop() functions. Other minor differences are results of refactoring.

c.def file:

symb ; numb ; name ; lsb "[" 0 0; rsb "]" 0 0; lcb "(" 0 0; rcb ")" 0 0; assign ":=" 0 0; plus "+" 0 0; minus "-" 0 1; star "*" 0 2; slash "/" 0 3; pct "%" 0 4; lt "<" 0 0; le "<=" 0 1; eq "=" 0 2; ne "!=" 0 3; ge ">=" 0 4; gt ">" 0 5; comma "," 0 0; semi ";" 0 0; is 0 0; begin 0 0; if 0 0; then 0 0; while 0 0; do 0 0; inline 0 0; return 0 0; end 0 0; char 1 1; byte 1 1; word 1 2. Program : 0 Declarations Block | 0 Block ; Block : 10 Main Stmts end ; Declarations : 0 Declarations Declaration | 0 Declaration ; Declaration : 20 TypeName semi | 30 TypeName lsb numb rsb semi | 10 Header Stmts end ; Header : 0 TypeName lcb rcb is | 0 TypeName lcb Args rcb is ; Args : 0 Args comma Arg | 0 Arg ; Arg : 40 TypeName ; TypeName : 50 Type name ; Main : 60 begin ; Stmts : 70 Stmts Stmt | 70 Stmt ; Stmt : 80 IfBlk | 80 WhileBlk | 80 Inlines | 90 Ret | 80 Local | 80 Assign | 80 PCall ; IfBlk : 100 IfHdr Stmts end ; IfHdr : 110 if Expr RelOp Expr then ; WhileBlk : 120 WhileHdr Stmts end ; WhileHdr : 110 Loop Expr RelOp Expr do ; Loop : 130 while ; Inlines : 0 inline OpCodes semi ; OpCodes : 0 OpCodes comma OpCode | 0 OpCode ; OpCode : 140 numb ; Ret : 150 return Expr semi ; Local : 160 LocalVar assign Expr semi | 0 LocalVar semi ; LocalVar : 170 TypeName ; Assign : 180 Ref assign Expr semi ; PCall : 0 Call semi ; Expr : 190 Expr Op1 Term | 0 Term ; Term : 190 Term Op2 Value | 0 Value ; Value : 200 symb | 200 numb | 210 Ref | 0 Call | 0 Cast | 0 lcb Expr rcb ; Ref : 0 Var | 220 Array lsb Expr rsb ; Call : 230 Fn1 lcb Params rcb | 230 Fn1 lcb rcb ; Params : 250 Params comma Expr | 250 Expr ; Fn1 : 260 Fn ; Cast : 0 Type lcb Expr rcb ; Type : 0 char | 0 byte | 0 word ; Var : 270 name ; Array : 280 name ; Fn : 290 name ; RelOp : 0 lt | 0 le | 0 eq | 0 ne | 0 ge | 0 gt ; Op1 : 0 plus | 0 minus ; Op2 : 0 star | 0 slash | 0 pct .

c.prg file:

char Text [16384]; word pText; word nText; word nLine; byte Code [16384]; word nCode; word hFile; char Heap [ 2048]; word nHeap; word Jx [ 6]; word nData; word Name [ 256]; word Cls [ 256]; word Sub [ 256]; word Type [ 256]; word Size [ 256]; word Ofs [ 256]; word nName1; word nName; word Sym [ 96]; word nSym; byte Left [ 128]; word Id [ 128]; word pRight [ 128]; word nLeft; byte Right [ 384]; word pLeft [ 384]; word nRight; word Point [ 2560]; byte Term [ 2560]; word nPoint; word Stk [ 32]; word Val [ 32]; word Ptr [ 32]; word Frame [ 32]; word pPoint [ 32]; word nStk; char Buff [ 32]; word open() is inline 0xB4, 0x3D; // mov AH, 3DH inline 0xB0, 0x00; // mov AL, 0 inline 0xBA, 0x4A, 0xC1; // mov DX, @@DATA+Ofs(Heap[64]) inline 0xCD, 0x21; // int 21H end word create() is inline 0xB4, 0x3C; // mov AH, 3CH inline 0xB9, 0x00, 0x00; // mov CX, 0 inline 0xBA, 0x4A, 0xC1; // mov DX, @@DATA+Ofs(Heap[64]) inline 0xCD, 0x21; // int 21H end word read() is inline 0xB4, 0x3F; // mov AH, 3FH inline 0x8B, 0x1E, 0x08, 0xC1; // mov BX, word [@@DATA+Ofs(hFile)] inline 0xB9, 0x00, 0x40; // mov CX, 16384 inline 0xBA, 0x00, 0x41; // mov DX, @@DATA+Ofs(Text) inline 0xCD, 0x21; // int 21H end word write() is inline 0xB4, 0x40; // mov AH, 40H inline 0x8B, 0x1E, 0x08, 0xC1; // mov BX, word [@@DATA+Ofs(hFile)] inline 0x8B, 0x0E, 0x06, 0xC1; // mov CX, word [@@DATA+Ofs(nCode)] inline 0xBA, 0x06, 0x81; // mov DX, @@DATA+Ofs(Code) inline 0xCD, 0x21; // int 21H end word close() is inline 0xB4, 0x3E; // mov AH, 3EH inline 0x8B, 0x1E, 0x08, 0xC1; // mov BX, word [@@DATA+Ofs(hFile)] inline 0xCD, 0x21; // int 21H end char putc(char C) is char C1:=C; inline 0x92; // xchg DX, AX inline 0xB4, 0x02; // mov AH, 2 inline 0xCD, 0x21; // int 21H end word halt() is inline 0xB8, 0x00, 0x4C; // mov AX, 4C00H inline 0xCD, 0x21; // int 21H end word Init() is Heap [ 0]:='0'; Heap [ 1]:='1'; Heap [ 2]:='2'; Heap [ 3]:='3'; Heap [ 4]:='4'; Heap [ 5]:='5'; Heap [ 6]:='6'; Heap [ 7]:='7'; Heap [ 8]:='8'; Heap [ 9]:='9'; Heap [ 10]:='A'; Heap [ 11]:='B'; Heap [ 12]:='C'; Heap [ 13]:='D'; Heap [ 14]:='E'; Heap [ 15]:='F'; Heap [ 16]:='#'; Heap [ 17]:= char(0); Heap [ 64]:='c'; Heap [ 65]:='.'; Heap [ 66]:='d'; // p Heap [ 67]:='e'; // r Heap [ 68]:='f'; // g Heap [ 69]:= char(0); nHeap := 70; Sym [ 1]:= 16; // # (EOF) Sym [ 2]:= 16; // #' (Unknown) Sym [ 3]:= 16; // #'' (Extra) nSym := 4; Jx [ 0]:= 0x72; Jx [ 1]:= 0x76; Jx [ 2]:= 0x74; Jx [ 3]:= 0x75; Jx [ 4]:= 0x73; Jx [ 5]:= 0x77; end word OpenPrg() is Heap [ 66]:='p'; Heap [ 67]:='r'; Heap [ 68]:='g'; hFile := open(); pText := 0; nText := 0; nLine := 1; end word ClosePrg() is close(); end word Stop() is nStk:=0; while nLine!=0 do Stk[nStk]:=nLine%10; nStk :=nStk+1; nLine :=nLine/10; end while nStk !=0 do nStk :=nStk-1; putc(char(Stk[nStk]+48)); end close(); halt (); end word val () is word E:=10; word J:= 0; if Buff[0]='0' then if Buff[1]='x' then E:=16; J:= 2; end end word N:=0; while Buff[J]!=char(0) do word K:=0; while Heap[K]!=Buff[J] do if K=E then Stop(); end K:=K+1; end N:=E*N; N:=N+K; J:=J+1; end return N; end word Copy() is word pHeap:=nHeap; word pBuff:=0; while Buff[pBuff]!=char(0) do Heap[nHeap]:= Buff[pBuff]; nHeap :=nHeap+1; pBuff :=pBuff+1; end Heap[nHeap]:=char(0); nHeap :=nHeap+1; return pHeap; end word Comp(word pHeap) is word pBuff:=0; while Buff[pBuff]=Heap[pHeap] do if Buff[pBuff]=char(0) then return 0; end pHeap:=pHeap+1; pBuff:=pBuff+1; end return 1; end char Look() is if pText>=nText then pText:=0; nText:=read(); if pText>=nText then return char(0); end end return Text[pText]; end char Read() is char C:=Look(); pText :=pText+1; return C; end word isalpha (char C) is if 'A'<=C then if C<='Z' then return 0; end end if 'a'<=C then if C<='z' then return 0; end end return 1; end word isnumber(char C) is if '0'<=C then if C<='9' then return 0; end end return 1; end word isalnum (char C) is if isalpha (C)=0 then return 0; end if isnumber(C)=0 then return 0; end return 1; end word isspace(char C) is if C=char( 9) then return 0; end if C=char(10) then return 0; end if C=char(13) then return 0; end if C=char(32) then return 0; end return 1; end char SkipSpaces() is while isspace(Look())=0 do if Read()=char(10) then nLine:=nLine+1; end end end char ReadOne() is Buff[0]:=Read(); Buff[1]:=char(0); end char Digraph(char C1, char C2) is if Buff[0]=C1 then if Look()=C2 then Buff[1]:=Read(); Buff[2]:=char(0); end end end char ReadChar() is if Buff[0]=''' then Buff[1]:=Read(); // char Buff[2]:=Read(); // ' Buff[3]:=char(0); end end word ReadWord() is if isalnum(Buff[0])=0 then word pBuff := 1; while isalnum (Look())=0 do Buff[pBuff]:= Read(); pBuff :=pBuff+1; end Buff[pBuff] := char(0); end end char ReadSym() is ReadOne (); Digraph ('<', '='); Digraph ('!', '='); Digraph ('>', '='); Digraph (':', '='); ReadChar(); ReadWord(); end word isComment() is if Buff[0]='/' then if Look() ='/' then return 0; end end return 1; end word SkipComment() is if isComment()=0 then while Look()!=char(10) do if Read()=char( 0) then Stop(); end end return 0; end return 1; end char Scan() is word Flag:=0; while Flag =0 do SkipSpaces(); ReadSym(); Flag:=SkipComment(); end end word pSymb; word pProg; word L; word V; word Alter() is L:=2; word pName:=0; while pName <nName1 do word pHeap:=Name[pName]; if Heap[pHeap]!=char(0) then if Comp(pHeap)=0 then L:=Ofs [pName]; V:=Size[pName]; if Cls [pName]=1 then V:=pName; end return L; end end pName:=pName+1; end if Buff[0]=char(0) then L:=1; end if Buff[0]=''' then L:=pSymb; V:= word(Buff[1]); end if isnumber(Buff[0])=0 then L:=pSymb+1; V:= val (); end if isalpha (Buff[0])=0 then L:=pSymb+2; V:= Copy(); end return L; end word Find() is word I:=1; while I<nSym do if Comp(Sym[I])=0 then return I; end I:=I+1; end Sym [nSym]:= Copy(); nSym :=nSym +1; return I; end word IsEnd() is Scan(); if Buff[0]!='|' then if Buff[0]!=';' then if Buff[0]!='.' then return 1; end end end return 0; end word AddSym(word pSym) is Right[nRight]:=pSym; pLeft[nRight]:=nLeft; nRight :=nRight+1; end word OpenGrammar() is hFile := open(); pText := 0; nText := 0; nLine := 1; end word CloseGrammar() is close(); end word LoadGrammar() is OpenGrammar(); pSymb :=nSym; nName := 0; Buff[0] := ';'; while Buff[0]= ';' do Scan(); word pSym:=Find(); Scan(); if Buff[0]!=';' then Name[nName]:= Sym [pSym]; if Buff[0]='"' then Scan(); Name[nName]:=Copy(); Scan(); // " Scan(); end Cls [nName]:= val (); Scan(); Size[nName]:= val (); Ofs [nName]:=pSym; nName :=nName+1; Scan(); // ; end end nName1 :=nName; pProg :=nSym; nLeft := 0; nRight := 0; Buff[0] := ';'; while Buff[0]= ';' do Scan(); word pSym:=Find(); Scan(); Buff[0]:='|'; while Buff[0]='|' do Scan(); Left [nLeft]:=pSym; Id [nLeft]:= val(); pRight[nLeft]:=nRight; while IsEnd()!=0 do AddSym(Find()); end AddSym(0); nLeft :=nLeft+1; end end CloseGrammar(); end word Comp2(word pHeap1, word pHeap2) is while Heap[pHeap1]=Heap[pHeap2] do if Heap[pHeap1]=char(0) then return 0; end pHeap1:=pHeap1+1; pHeap2:=pHeap2+1; end return 1; end word CheckName(word pHeap, word Cls2) is word I:=1; while I<nName do if Comp2(Name[I], pHeap)=0 then if Cls[I]!=Cls2 then Stop(); end nHeap:=pHeap; return I; end I:=I+1; end Stop(); end byte Emi1(byte C) is Code[nCode]:=C; nCode:=nCode+1; end byte Emi2(word W) is Emi1(W%256); Emi1(W/256); end word lFlag; word Push() is if lFlag!=0 then Emi1(0x50); // push AX end lFlag:=1; end word Pop() is Emi1(0x5B); // pop BX end word Move(word I, byte C) is Emi1(C); C:=0x06; // [Ofs] if Cls[I]=3 then C:=0x87; // [BX+Ofs] end Emi1(C); Emi2(Ofs[I]); end word Save(word I) is byte C:=0x89; // mov [(BX+)Ofs], AX if Size[Type[I]]=1 then C:=0x88; // mov [(BX+)Ofs], AL end Move(I, C); end word Hide() is word H:=Frame[nStk]; while H<nName do Heap[Name[H]]:=char(0); H:=H+1; end end word Patch(word C, word S, word N) is Cls [Val[nStk]]:= C; Sub [Val[nStk]]:= S; Size[Val[nStk]]:= N; Ofs [Val[nStk]]:=nData; word D := Size[Type[Val[nStk]]]*Size[Val[nStk]]; nData :=nData+D; if nData%2!=0 then nData:=nData+1; end end word Param(word I) is if Ptr[I]>=nName then Stop(); end if Sub[Ptr[I]]!=1 then Stop(); end Save(Ptr[I]); Ptr[I]:=Ptr[I]+1; lFlag:=0; end word rFlag; word Action(word A) is if A= 10 then if rFlag=0 then Emi1(0xC3); // retn end Hide(); end if A= 20 then Patch(2, 0, 1); end if A= 30 then Patch(3, 0, Val[nStk+2]); end if A= 40 then Patch(2, 1, 1); end if A= 50 then Name [nName]:= Val[nStk+1]; Cls [nName]:= 4; Sub [nName]:= 0; Type [nName]:= Val[nStk]; Ofs [nName]:=nCode; Val [nStk] :=nName; nName :=nName+1; Frame[nStk] :=nName; end if A= 60 then word N := nCode-3; Code[1]:= N%256; Code[2]:= N/256; Frame[nStk] :=nName; end if A= 70 then lFlag:=0; end if A= 80 then lFlag:=0; rFlag:=0; end if A= 90 then lFlag:=0; rFlag:=1; end if A=100 then word N:=(nCode-Val[nStk])-3; Code[Val[nStk]+1]:=N%256; Code[Val[nStk]+2]:=N/256; Hide(); end if A=110 then Pop(); Emi1(0x3B); // cmp BX, AX Emi1(0xD8); Emi1(Jx[Val[nStk+2]]); // jxx $+3 Emi1(0x03); Ptr [nStk]:= Val[nStk]; Val [nStk]:=nCode; Frame[nStk]:=nName; Emi1(0xE9); // jmp ? Emi2(0x00); lFlag:=0; end if A=120 then word W:=(0xFFFF-((nCode+3)-Ptr[nStk]))+1; Emi1(0xE9); // jmp Ofs Emi2(W); word N:=(nCode-Val[nStk])-3; Code[Val[nStk]+1]:=N%256; Code[Val[nStk]+2]:=N/256; Hide(); end if A=130 then Val[nStk]:=nCode; end if A=140 then Emi1(Val[nStk]); end if A=150 then Emi1(0xC3); // retn end if A=160 then Save(Val[nStk]); end if A=170 then Patch(2, 2, 1); end if A=180 then word I:=Val[nStk]; if Cls[I]=3 then Pop(); end Save(I); end if A=190 then word I:=Val[nStk+1]; Pop(); if I =0 then Emi1(0x03); // add AX, BX Emi1(0xC3); end if I =1 then Emi1(0x93); // xchg BX, AX Emi1(0x2B); // sub AX, BX Emi1(0xC3); end if I =2 then Emi1(0xF7); // mul BX Emi1(0xE3); end if I>=3 then Emi1(0x93); // xchg BX, AX Emi1(0x33); // xor DX, DX Emi1(0xD2); Emi1(0xF7); // div BX Emi1(0xF3); end if I =4 then Emi1(0x92); // xchg DX, AX end end if A=200 then Push(); Emi1(0xB8); // mov AX, Val Emi2(Val[nStk]); end if A=210 then word I:=Val[nStk]; if Cls[I]!=3 then Push(); end if Cls[I] =3 then Emi1(0x93); // xchg BX, AX end byte C:=0x8B; // mov AX, [(BX+)Ofs] if Size[Type[I]]=1 then Emi1(0x32); // xor AH, AH Emi1(0xE4); C:=0x8A; // mov AL, [(BX+)Ofs] end Move(I, C); lFlag:=1; end if A=220 then if Size[Type[Val[nStk]]]!=1 then Emi1(0xD1); // shl AX, 1 Emi1(0xE0); end end if A=230 then if Ptr[nStk]<nName then if Sub[Ptr[nStk]]=1 then Stop(); end end Push(); word W:=(0xFFFF-((nCode+3)-Ofs[Val[nStk]]))+1; Emi1(0xE8); // call Ofs Emi2(W); end if A=240 then Param(nStk); end if A=250 then Param(nStk-2); end if A=260 then Ptr[nStk]:=Val[nStk]+1; end if A=270 then Val[nStk]:=CheckName(Val[nStk], 2); end if A=280 then Val[nStk]:=CheckName(Val[nStk], 3); end if A=290 then Val[nStk]:=CheckName(Val[nStk], 4); end end word FindPoint(word I, word N, word P, word T) is while I<N do if Point[I]=P then if T=0 then return I; end if Term[I]=T then return I; end end I:=I+1; end return I; end word AddPoint(word nPoint1, word Point1, word Term1) is if nPoint1>=2560 then nLine:=nLine+10000; Stop(); end Point [nPoint1]:= Point1; Term [nPoint1]:= Term1; return nPoint1+1; end word First(word Point1, word Term1) is if Right[Point1]=0 then return AddPoint(nPoint, Point1, Term1); end word nPoint2:= AddPoint(nPoint, Point1, Right[Point1]); word P := nPoint; while P < nPoint2 do word R:=Right[Point[P]]; word I:=0; while I< nLeft do if Left[I]=R then if FindPoint(nPoint, nPoint2, pRight[I], 0)=nPoint2 then nPoint2:=AddPoint(nPoint2, pRight[I], Right[pRight[I]]); end Term[P] :=0; end I:=I+1; end P:=P+1; end return nPoint2; end word Closure(word pPoint1) is word P:=pPoint1; while P< nPoint do word R:=Right[Point[P]]; word I:=0; while I< nLeft do if Left[I]=R then word nPoint2:= First(Point[P]+1, Term[P]); word pPoint2:=nPoint; while pPoint2< nPoint2 do if Term[pPoint2]!=0 then if FindPoint(pPoint1, nPoint, pRight[I], Term[pPoint2])=nPoint then Point[nPoint]:=pRight[I]; Term [nPoint]:=Term[pPoint2]; nPoint :=nPoint+1; end end pPoint2:=pPoint2+1; end end I:=I+1; end P:=P+1; end end word Shift() is word S:= Stk [nStk]; word N:=nPoint; word I:=pPoint[nStk]; while I< N do if Right[Point[I]]=S then nPoint:=AddPoint(nPoint, Point[I]+1, Term[I]); end I:=I+1; end if nPoint=N then Stop(); end nStk :=nStk+1; pPoint[nStk]:=N; Closure(pPoint[nStk]); end word Reduce(word R) is word pL :=pLeft[R]; nStk :=nStk-(R-pRight[pL]); nPoint :=pPoint[nStk+1]; Stk[nStk]:= Left[pL]; Action(Id[pL]); end word CheckTop(word L1) is word S:=0; word R:=nRight; word I:=pPoint[nStk]; while I< nPoint do if Right[Point[I]]=L1 then if R< nRight then Stop(); end S:=1; end if Right[Point[I]]=0 then if Term[I]=L1 then if S!=0 then Stop(); end if R< nRight then Stop(); end R:= Point[I]; end end I:=I+1; end if S=0 then if R=nRight then Stop(); end end return R; end word AddExtraRule() is word nRight0:=nRight; Left [nLeft] := 3; // #'' (Extra) pRight[nLeft] := nRight; AddSym(pProg) ; // Program AddSym(1); // # (EOF) AddSym(0); nLeft := nLeft+1; return nRight0; end word WriteBin() is Heap [ 66]:='c'; Heap [ 67]:='o'; Heap [ 68]:='m'; hFile:=create(); write(); close(); end word Compile() is OpenPrg(); lFlag := 0; rFlag := 0; nCode := 0; nData :=16640; Emi1(0xE9); // jmp ? Emi2(0x00); Point [0] := AddExtraRule(); Term [0] := 1; nPoint := 1; Stk [0] := 1; // # pPoint[0] := 0; nStk := 0; Closure(pPoint[nStk]); while Stk[0]!=pProg do Scan (); Alter(); word R:=CheckTop(L); while R< nRight do Reduce(R); Shift (); R:=CheckTop(L); end Stk [nStk]:=L; Val [nStk]:=V; Shift(); end ClosePrg(); WriteBin(); end begin Init(); LoadGrammar(); Compile(); end

Сайт создан в системе uCoz