[fpc-pascal] fast text processing

Jeff Pohlmeyer yetanothergeek at gmail.com
Wed Oct 31 11:03:43 CET 2007


Heck, I'm not even a programmer, and this kludge is
about 25% faster than your perl script on my machine....

program koleski;
{$MODE OBJFPC} {$H+}

uses classes, strings;

var
  f: text;
  s:ansistring;
  wc:longint=0;
  wl, ul:TStringList;
  i,n:LongInt;

begin
  assign(f, 'Koleksi.dat');
  reset(f);
  wl:=TStringList.Create();
  ul:=TStringList.Create();
  ul.Sorted:=true;
  ul.Duplicates:=dupIgnore;
  while not eof(f) do begin
    readln(f,s);
    n:=length(s);
    if (n>0) then begin
    StrLower(@s[1]);
      if (s[1]='<') then begin
        if StrLComp(@s[1], '<title>',7) = 0 then begin
          delete(s,1,7);
        end else continue;
      end;
      for i:=1 to n do if not (s[i] in ['a'..'z','0'..'9']) then begin
        if ( s[i] <> '<' ) then begin
          s[i]:=#10
        end else begin
          s[i]:=#0;
          SetLength(s,StrLen(@s[1]));
          break;
        end;
      end;
      wl.Text:=s;
      for i:=0 to wl.Count-1 do begin
        s:=wl[i];
        for n:=1 to length(s) do if (s[n] in ['0'..'9']) then begin
          s:='';
          break;
        end;
        if (s<>'') then begin
          inc(wc);
          ul.Add(s);
        end;
      end;
    end;
  end;
  close(f);
  WriteLn('Word count:',wc, #10'Unique word count:', ul.Count);
end.


 - Jeff



More information about the fpc-pascal mailing list