[fpc-pascal] fast text processing
Jeff Pohlmeyer
yetanothergeek at gmail.com
Wed Oct 31 11:03:43 CET 2007
Heck, I'm not even a programmer, and this kludge is
about 25% faster than your perl script on my machine....
program koleski;
{$MODE OBJFPC} {$H+}
uses classes, strings;
var
f: text;
s:ansistring;
wc:longint=0;
wl, ul:TStringList;
i,n:LongInt;
begin
assign(f, 'Koleksi.dat');
reset(f);
wl:=TStringList.Create();
ul:=TStringList.Create();
ul.Sorted:=true;
ul.Duplicates:=dupIgnore;
while not eof(f) do begin
readln(f,s);
n:=length(s);
if (n>0) then begin
StrLower(@s[1]);
if (s[1]='<') then begin
if StrLComp(@s[1], '<title>',7) = 0 then begin
delete(s,1,7);
end else continue;
end;
for i:=1 to n do if not (s[i] in ['a'..'z','0'..'9']) then begin
if ( s[i] <> '<' ) then begin
s[i]:=#10
end else begin
s[i]:=#0;
SetLength(s,StrLen(@s[1]));
break;
end;
end;
wl.Text:=s;
for i:=0 to wl.Count-1 do begin
s:=wl[i];
for n:=1 to length(s) do if (s[n] in ['0'..'9']) then begin
s:='';
break;
end;
if (s<>'') then begin
inc(wc);
ul.Add(s);
end;
end;
end;
end;
close(f);
WriteLn('Word count:',wc, #10'Unique word count:', ul.Count);
end.
- Jeff
More information about the fpc-pascal
mailing list