[fpc-pascal] Example: regular expressions and "hash-tables"

S. Fisher expandafter at yahoo.com
Fri Mar 22 09:31:42 CET 2013


--- On Wed, 3/20/13, S. Fisher <expandafter at yahoo.com> wrote:

> The program reads a text file and counts the number of
> unique words,
> and also displays the number of times the most common word
> was found.
> 

Added an iterator of sorts for regular expressions.  This allows

    if re.exec( line ) then  begin
      si_table_inc( table, re.match[0] );
      while re.execNext do
        si_table_inc( table, re.match[0] );
    end;

to be replaced by

    re_each_match( re, line, @count_the_word );

with no loss of speed.


{$mode objfpc}

uses
  AvgLvlTree,
  regexpr,
  classes,
  dateutils,
  Sysutils;

const
  file_to_process = 'Bartlett--Quotations.txt';

type
  // string-integer table
  t_si_table = TStringToPointerTree;
  t_si_item = PStringToPointerItem;

procedure si_table_put( table: t_si_table; k: string; v: longint );
begin
  table[ k ] := pointer( v )
end;
function si_table_get( table : t_si_table; key : string ): longint;
begin
  result := longint( table[ key ] )
end;
procedure si_table_inc( table: t_si_table; k: string );
begin
  table[ k ] := pointer( si_table_get( table, k ) + 1 ) ;
end;
function si_table_key( item : t_si_item ): string;
begin
  result := item^.name
end;
function si_table_val( item : t_si_item ): longint;
begin
  result := longint( item^.value )
end;
function si_table_new( sensitive : boolean ): t_si_table;
begin
  result := t_si_table.create( sensitive )
end;


type
  t_regex_proc = procedure( const text : string );

procedure re_each_match( re : TRegExpr;
                         const text: string;
                         proc: t_regex_proc ); overload;
begin
  if re.exec( text ) then  begin
    proc( re.match[0] );
    while re.execNext do
      proc( re.match[0] );
  end;
end;
procedure re_each_match( const re_str, text: string;
                         proc: t_regex_proc ); overload;
var
  re : TRegExpr;
begin
  re := TRegExpr.Create;
  re.Expression := re_str;
  re_each_match( re, text, proc );
  re.free
end;


var
  table : t_si_table ;
  table_item : t_si_item ;
  line, word : string;
  re : TRegExpr;
  max_count, count : int32;
  lines : TStringList;
  time_1 : TDateTime;

procedure count_the_word( const s : string );
begin
  si_table_inc( table, s )
end;

begin
  time_1 := time;

  re := TRegExpr.Create;
  re.Expression := '(?i)[a-z]+' ;  // case-insensitive

  table := si_table_new( true ) ; // true to make it case-sensitive.

  lines := TStringList.Create;
  lines.LoadFromFile( file_to_process );
  for line in lines do
    re_each_match( re, line, @count_the_word );
  lines.free;

  writeln( 'Unique word count: ', table.count );
  max_count := 0;
  for table_item in table do  begin
    count := si_table_val( table_item );
    if count > max_count then  begin
      word := si_table_key( table_item );
      max_count := count
    end;
  end;
  writeln( 'Commonest word: ', word, ' (', max_count, ')' );
  writeln( 'Elapsed time: ',
           milliSecondsBetween( time_1, time ),
           ' milliseconds.' );

  table.free;
  re.free
end.




More information about the fpc-pascal mailing list