{****************************************************************************
Copyright (c) 1993,96 by Florian Klämpfl
****************************************************************************}
unit scanner;
interface
uses
cobjects,globals,symtable;
const
id_len = 14;
type
ident = string[id_len];
const
{$ifdef L_C}
anz_keywords = 32;
keyword : array[1..anz_keywords] of ident = (
'auto','break','case','char','const','continue','default','do',
'double','else','enum','extern','float','for','goto','if',
'int','long','register','return','short','signed','sizeof','static',
'struct','switch','typedef','union','unsigned','void','volatile',
'while');
{$else}
anz_keywords = 77;
keyword : array[1..anz_keywords] of ident = (
'ABSOLUTE','AND','ARRAY','AS','ASM','ASSEMBLER','BEGIN',
'BREAK','CASE','CLASS',
'CONST','CONSTRUCTOR','CONTINUE',
'DESTRUCTOR','DISPOSE','DIV','DO','DOWNTO','ELSE','END',
'EXCEPT',
'EXIT','EXPORT','EXTERNAL','FAIL','FALSE','FAR','FOR',
'FORWARD','FUNCTION','GOTO','IF','IMPLEMENTATION','IN',
'INHERITED','INLINE','INTERFACE','INTERRUPT','IS',
'LABEL','MOD','NEAR','NEW','NIL','NOT','OBJECT',
'OF','ON','OPERATOR','OR','OTHERWISE','PACKED','PRIVATE',
'PROCEDURE','PROGRAM','PROTECTED','PUBLIC',
'RECORD','REPEAT','SELF',
'SET','SHL','SHR','STRING','THEN','TO',
'TRUE','TRY','TYPE','UNIT','UNTIL',
'USES','VAR','VIRTUAL','WHILE','WITH','XOR');
keyword_token : array[1..anz_keywords] of ttoken = (
_ABSOLUTE,_AND,_ARRAY,_AS,_ASM,_ASSEMBLER,_BEGIN,
_BREAK,_CASE,_CLASS,
_CONST,_CONSTRUCTOR,_CONTINUE,
_DESTRUCTOR,_DISPOSE,_DIV,_DO,_DOWNTO,_ELSE,_END,
_EXCEPT,
_EXIT,_EXPORT,_EXTERNAL,_FAIL,_FALSE,_FAR,_FOR,
_FORWARD,_FUNCTION,_GOTO,_IF,_IMPLEMENTATION,_IN,
_INHERITED,_INLINE,_INTERFACE,_INTERRUPT,_IS,
_LABEL,_MOD,_NEAR,_NEW,_NIL,_NOT,_OBJECT,
_OF,_ON,_OPERATOR,_OR,_OTHERWISE,_PACKED,_PRIVATE,
_PROCEDURE,_PROGRAM,_PROTECTED,_PUBLIC,
_RECORD,_REPEAT,_SELF,
_SET,_SHL,_SHR,_STRING,_THEN,_TO,
_TRUE,_TRY,_TYPE,_UNIT,_UNTIL,
_USES,_VAR,_VIRTUAL,_WHILE,_WITH,_XOR);
{$endif}
function yylex : ttoken;
procedure initscanner(const source : string);
procedure donescanner;
{ the asm parser use this function getting the input }
function asmgetchar : char;
{ this procedure is called at the end of each line }
{ and the function does the statistics }
procedure write_line;
var
pattern,orgpattern : string;
yyin : file;
{ true, if type declarations are parsed }
parse_types : boolean;
implementation
const
newline = #10;
var
inputbuffer : pinputbuffer;
inputpointer : word;
s_point : boolean;
c : char;
kommentarebene : word;
procedure reload;
var
readsize : word;
begin
if inputstack=nil then
internalerror(14);
if inputstack^.filenotatend then
begin
{ noch ein Teil laden }
blockread(inputstack^.f,inputbuffer^,inputstack^.buffersize-1,readsize);
inputbuffer^[readsize]:=#0;
c:=inputbuffer^[0];
inputpointer:=1;
if eof(inputstack^.f) then
begin
inputstack^.filenotatend:=false;
close(inputstack^.f);
{ wenn auesserste Datei, dann EOF-Zeichen }
if inputstack^.next=nil then
inputbuffer^[readsize]:=#26;
end;
end
else
begin
{ Buffer loeschen }
freemem(inputbuffer,inputstack^.buffersize);
{ inputstack *** nicht *** löschen da Treenodes }
{ Pointer darauf besitzen können }
inputstack:=inputstack^.next;
inputbuffer:=inputstack^.inputbuffer;
inputpointer:=inputstack^.inputpointer;
if assigned(inputstack) then c:=inputbuffer^[inputpointer];
inc(inputpointer);
end;
end;
const
lastmem : longint = 0;
procedure write_line;
var
s : string;
begin
if not(quiet) then
begin
if (abslines=1) then
case language of
'E' : writeln(memavail div 1024,' kB free');
'D' : writeln(memavail div 1024,' kB frei');
end;
if (abslines mod 100=0) then
begin
case language of
'E' : write(abslines,' lines',' ',memavail div 1024,' kB free');
'D' : write(abslines,' Zeilen',' ',memavail div 1024,' kB frei');
end;
{$ifdef tp}
if (use_big) then
case language of
'E' : write(', ',symbolstream.size div 1024,' kB EMS used');
'D' : write(', ',symbolstream.size div 1024,' kB EMS benutzt');
end;
{$endif}
writeln
end;
end;
{
if lastmem<>0 then
writeln('Benutzt ',lastmem-memavail,' Bytes');
lastmem:=memavail;
writeln(abslines,' ',inputstack^.filename^,'(',inputstack^.line_no,')');
}
inc(inputstack^.line_no);
inc(abslines);
end;
procedure kommentar;forward;
procedure skipspace;
begin
while (c=' ') or (c=#9) or (c=#13) or (c=#12) or (c=#10) do
begin
if c=#10 then write_line;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;if c='{' then kommentar;
end;
end;
const
macrobuffer : pchar = nil;
{$ifdef tp}
max_macrosize = 1024;
{$else}
max_macrosize = 65536;
{$endif}
var
aktpreprozebene : word;
procedure kommentar;
function read_string : string;
var
hs : string;
begin
hs:='';
while ((ord(c)>=ord('A')) and (ord(c)<=ord('Z')))
or ((ord(c)>=ord('a')) and (ord(c)<=ord('z'))) do
begin
hs:=hs+upcase(c);
c:=inputbuffer^[inputpointer];if c=#0 then reload;inc(inputpointer);
end;
read_string:=hs;
end;
function read_number : longint;
var
hs : string;
l : longint;
w : word;
begin
read_number:=0;
hs:='';
while ((ord(c)>=ord('0')) and (ord(c)<=ord('9'))) do
begin
hs:=hs+c;
c:=inputbuffer^[inputpointer];if c=#0 then reload;inc(inputpointer);
end;
val(hs,l,w);
read_number:=l;
end;
procedure skip_until_pragma;
begin
repeat
while (c<>'{') and (kommentarebene>0) do
begin
if c=#26 then fatalerror(endoffile);
if c=#10 then write_line;
if c='{' then inc(kommentarebene);
if c='}' then dec(kommentarebene);
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c='$' then
break;
if c=#26 then fatalerror(endoffile);
if c=#10 then write_line;
if c='{' then inc(kommentarebene);
if c='}' then dec(kommentarebene);
until false;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
var
hs : string;
hp : pinputstack;
mac : pmacrosym;
startebene : word;
i : longint;
label
nochmal;
begin
inc(kommentarebene);
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if (kommentarebene=1) and (c='$') then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
hs:=read_string;
if hs='I' then
begin
skipspace;
hs:=c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
while (c<>' ') and (c<>'}') do
begin
hs:=hs+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#26 then fatalerror(endoffile);
end;
{ bis Kommentarende lesen }
while c<>'}' do
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#26 then fatalerror(endoffile);
if c=#10 then write_line;
end;
dec(kommentarebene);
{ Initialisieren: }
inputstack^.inputpointer:=inputpointer;
new(hp);
assign(hp^.f,inputdir+hs);
{$I-}
reset(hp^.f,1);
if ioresult<>0 then
begin
if (hs='-') then
aktswitches:=aktswitches-[cs_iocheck]
else if (hs='+') then
aktswitches:=aktswitches+[cs_iocheck]
else
error(cannot_open_incfile);
dispose(hp);
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end
else
begin
hp^.next:=inputstack;
inputstack:=hp;
sourcesize:=filesize(inputstack^.f)+4;
if sourcesize>maxinputlen then
sourcesize:=maxinputlen-1;
getmem(inputstack^.inputbuffer,sourcesize+1);
inputbuffer:=inputstack^.inputbuffer;
inputstack^.filenotatend:=true;
inputstack^.buffersize:=sourcesize;
inputstack^.filename:=stringdup(upper(hs));
inputstack^.line_no:=1;
reload;
end;
{ das Includefile kann gleich mit einem Kommentar }
{ anfangen, diese ueberlesen }
if c='{' then kommentar;
exit;
end
else if hs='E' then
begin
if c='-' then
aktswitches:=aktswitches-[cs_genexceptcode]
else aktswitches:=aktswitches+[cs_genexceptcode];
end
else if hs='IFDEF' then
begin
inc(aktpreprozebene);
skipspace;
hs:=read_string;
mac:=pmacrosym(macros^.search(hs));
if (not assigned(mac) or not mac^.defined) then
begin
repeat
skip_until_pragma;
hs:=read_string;
if (hs='IFDEF') or (hs='IFNDEF') then
inc(aktpreprozebene);
if (hs='ENDIF') then
dec(aktpreprozebene);
until ((hs='ELSE') and (aktpreprozebene=1))
or ((hs='ENDIF') and (aktpreprozebene=0));
end;
end
else if hs='ENDIF' then
begin
dec(aktpreprozebene);
if aktpreprozebene<0 then
warning(too_much_endifs);
end
else if hs='ELSE' then
begin
startebene:=aktpreprozebene-1;
if aktpreprozebene<1 then
warning(too_much_endifs);
repeat
skip_until_pragma;
hs:=read_string;
if (hs='IFDEF') or (hs='IFNDEF') then
inc(aktpreprozebene);
if (hs='ENDIF') then
dec(aktpreprozebene);
until (hs='ENDIF') and (aktpreprozebene=startebene);
end
else if hs='L' then
begin
skipspace;
hs:='';
while (c<>' ') and (c<>'}') do
begin
hs:=hs+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#26 then fatalerror(endoffile);
end;
hs:=lowercasestring(hs);
linkofiles.insert(hs);
end
else if hs='R' then
begin
if c='-' then
aktswitches:=aktswitches-[cs_rangechecking]
else aktswitches:=aktswitches+[cs_rangechecking];
end
else if hs='DEFINE' then
begin
skipspace;
hs:=read_string;
mac:=pmacrosym(macros^.search(hs));
if not assigned(mac) then
begin
mac:=new(pmacrosym,init(hs));
mac^.defined:=true;
macros^.insert(mac);
end
else
mac^.defined:=true;
if support_macros then
begin
skipspace;
{ may be a macro? }
if c='=' then
begin
i:=0;
while (c<>'}') do
begin
hs:=hs+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#26 then fatalerror(endoffile);
end;
end;
end;
end
else if hs='UNDEF' then
begin
skipspace;
hs:=read_string;
mac:=pmacrosym(macros^.search(hs));
if not assigned(mac) then
begin
mac:=new(pmacrosym,init(hs));
mac^.defined:=false;
macros^.insert(mac);
end
else
mac^.defined:=false;
end
else if hs='PACKRECORDS' then
begin
skipspace;
if upcase(c)='N' then
begin
hs:=read_string;
if hs='NORMAL' then
aktpackrecords:=2
else warning(only_pack_records_);
end
else
case read_number of
1 : aktpackrecords:=1;
2 : aktpackrecords:=2;
else warning(only_pack_records_);
end;
end
else warning(ill_switch);
end;
nochmal:
while c<>'}' do
begin
if c='{' then
kommentar
else
begin
if c=#26 then fatalerror(endoffile);
if c=#10 then write_line;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
end;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
{ checks }{ }
if c='{' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
goto nochmal;
end;
dec(kommentarebene);
end;
function is_keyword(var token : ttoken) : boolean;
var
m,n,k : integer;
begin
{ lohnt sich meist, da viele Bezeichner nur ein Zeichen lang sind, }
{ aber alle Schlüsselwörter länger als ein Zeichen sind }
if length(pattern)<=1 then
begin
is_keyword:=false;
exit;
end;
m:=1;
n:=anz_keywords;
while m<=n do
begin
k:=m+(n-m) div 2;
if pattern=keyword[k] then
begin
token:=keyword_token[k];
is_keyword:=true;
exit;
end
else if pattern>keyword[k] then m:=k+1 else n:=k-1;
end;
is_keyword:=false;
end;
function yylex : ttoken;
var
y : ttoken;
code : word;
l : longint;
hs : string;
begin
if s_point then
begin
s_point:=false;
if c='.' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=POINTPOINT;
exit;
end;
yylex:=POINT;
exit;
end;
if c='{' then kommentar;
while (c=' ') or (c=#9) or (c=#13) or (c=#12) or (c=#10) do
begin
if c=#10 then write_line;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;if c='{' then kommentar;
end;
case c of
'A'..'Z','a'..'z','_' : begin
orgpattern:=c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
while ((ord(c)>=ord('A')) and (ord(c)<=ord('Z')))
or ((ord(c)>=ord('a')) and (ord(c)<=ord('z')))
or ((ord(c)>=ord('0')) and (ord(c)<=ord('9')))
or (c='_') do
begin
orgpattern:=orgpattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
pattern:=orgpattern;
uppervar(pattern);
if is_keyword(y) then yylex:=y
else yylex:=ID;
exit;
end;
'$' : begin
pattern:=c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
while ((ord(c)>=ord('0')) and (ord(c)<=ord('9'))) or
(ord(upcase(c))>=ord('A')) and (ord(upcase(c))<=ord('F')) do
begin
pattern:=pattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
yylex:=INTCONST;
exit;
end;
'0'..'9' : begin
pattern:=c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
while ((ord(c)>=ord('0')) and (ord(c)<=ord('9'))) do
begin
pattern:=pattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
if (c='.') or (upcase(c)='E') then
begin
if c='.' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if not((ord(c)>=ord('0')) and (ord(c)<=ord('9'))) then
begin
s_point:=true;
yylex:=INTCONST;
exit;
end;
pattern:=pattern+'.';
while ((ord(c)>=ord('0')) and (ord(c)<=ord('9'))) do
begin
pattern:=pattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
end;
if upcase(c)='E' then
begin
pattern:=pattern+'E';
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if (c='-') or (c='+') then
begin
pattern:=pattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
if not((ord(c)>=ord('0')) and (ord(c)<=ord('9')))
then fatalerror(ill_character);
while ((ord(c)>=ord('0')) and (ord(c)<=ord('9'))) do
begin
pattern:=pattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
end;
yylex:=REALNUMBER;
exit;
end;
yylex:=INTCONST;
exit;
end;
';' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=SEMICOLON;
exit;
end;
'[' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=LECKKLAMMER;
exit;
end;
']' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=RECKKLAMMER;
exit;
end;
'(' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=LKLAMMER;
exit;
end;
')' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=RKLAMMER;
exit;
end;
'+' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=PLUS;
exit;
end;
'-' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=MINUS;
exit;
end;
':' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c='=' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=ASSIGNMENT;
exit;
end
else
begin
yylex:=COLON;
exit;
end;
end;
'*' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=STAR;
exit;
end;
'/' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=SLASH;
exit;
end;
'=' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=EQUAL;
exit;
end;
'.' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c='.' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=POINTPOINT;
exit;
end
else
yylex:=POINT;
exit;
end;
'@' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=KLAMMERAFFE;
exit;
end;
',' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=COMMA;
exit;
end;
'''','#','^' :
begin
if c='^' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
c:=upcase(c);
if not(parse_types) and (c>='A') and (c<='Z') then
begin
pattern:=chr(ord(c)-64);
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end
else
begin
yylex:=CARET;
exit;
end;
end
else pattern:='';
while true do
case c of
'#' :
begin
hs:='';
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
while (ord(c)>=ord('0')) and (ord(c)<=ord('9')) do
begin
hs:=hs+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
val(hs,l,code);
if (code<>0) or (l<0) or (l>255) then
fatalerror(ill_char_const);
pattern:=pattern+chr(l);
end;
'''' :
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#13 then
begin
error(string_exceed_line);
break;
end;
repeat
if c=''''then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c='''' then
begin
pattern:=pattern+'''';
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#13 then
begin
error(string_exceed_line);
break;
end;
end
else break;
end
else
begin
pattern:=pattern+c;
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c=#13 then
begin
error(string_exceed_line);
break
end;
end;
until false;
end;
'^' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
c:=upcase(c);
if (c>='A') or (c<='Z') then
pattern:=pattern+chr(ord(c)-64)
else fatalerror(ill_character);
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
end;
else break;
end;
{ aus einem Zeichen bestehende }
{ Strings werden als Char-Kons- }
{ tanten behandelt }
if length(pattern)=1 then
yylex:=CCHAR
else yylex:=CSTRING;
exit;
end;
'>' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c='=' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=GTE;
exit;
end
else if c='>' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=_SHR;
exit;
end
else
begin
yylex:=GT;
exit;
end;
end;
'<' : begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
if c='>' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=UNEQUAL;
exit;
end
else if c='=' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=LTE;
exit;
end
else if c='<' then
begin
c:=inputbuffer^[inputpointer];inc(inputpointer);if c=#0 then reload;
yylex:=_SHL;
exit;
end
else
begin
yylex:=LT;
exit;
end;
end;
#26 : begin
yylex:=_EOF;
exit;
end;
else fatalerror(ill_character);
end;
end;
function asmgetchar : char;
begin
c:=inputbuffer^[inputpointer];
inc(inputpointer);if c=#0 then reload;
if c='{' then kommentar;
asmgetchar:=c;
end;
procedure initscanner(const source : string);
begin
aktpreprozebene:=0;
new(inputstack);
inputstack^.line_no:=1;
inputstack^.filename:=stringdup('');
assign(inputstack^.f,source);
{$I-}
reset(inputstack^.f,1);
if ioresult<>0 then
fatalerror(cannot_open_input);
sourcesize:=filesize(inputstack^.f);
inputstack^.next:=nil;
if sourcesize>maxinputlen then
sourcesize:=maxinputlen-1;
getmem(inputstack^.inputbuffer,sourcesize+1);
inputbuffer:=inputstack^.inputbuffer;
inputstack^.filenotatend:=true;
inputstack^.buffersize:=sourcesize+1;
inputstack^.filename:=stringdup(source);
reload;
kommentarebene:=0;
s_point:=false;
if c='{' then kommentar;
end;
{$I+}
procedure donescanner;
begin
freemem(inputbuffer,sourcesize+1);
if aktpreprozebene<>0 then
warning(endif_expect);
end;
end.