[fpc-pascal] regex-dna using PCRE

Florian Klaempfl florian at freepascal.org
Wed Nov 7 20:18:12 CET 2007


S. Fisher schrieb:
> Unfortunately, it's slightly slower 

Why? This is great if our library is fast :)

> than my fastest program that
> uses the library that comes with FPC.  I wonder if the dll was
> compiled with all of the C compiler's optimiaztions turned on.
> 
> {$mode objfpc}
> 
> uses pcre; // from www.renatomancuso.com/software/dpcre/dpcre.htm
> 
> const
>   patterns : array[1..9] of pchar =
>     (
>       'agggtaaa|tttaccct',
>       '[cgt]gggtaaa|tttaccc[acg]',
>       'a[act]ggtaaa|tttacc[agt]t',
>       'ag[act]gtaaa|tttac[agt]ct',
>       'agg[act]taaa|ttta[agt]cct',
>       'aggg[acg]aaa|ttt[cgt]ccct',
>       'agggt[cgt]aa|tt[acg]accct',
>       'agggta[cgt]a|t[acg]taccct',
>       'agggtaa[cgt]|[acg]ttaccct'
>     );
>   replacements : array[0..10,0..1] of string[15] =
>   (
>     ('B', '(c|g|t)'), ('D', '(a|g|t)'), ('H', '(a|c|t)'), ('K', '(g|t)'),
>     ('M', '(a|c)'), ('N', '(a|c|g|t)'), ('R', '(a|g)'), ('S', '(c|t)'),
>     ('V', '(a|c|g)'), ('W', '(a|t)'), ('Y', '(c|t)')
>   );
> 
> 
> // Append 2 strings to an ansistring rapidly.  Note: the ansistring's
> // length will be increased by a more than sufficient amount.
> function append2( var dest: ansistring; len0: longint;
>                   s1: pchar; len1: longint;
>                   s2: pchar; len2: longint): longint; inline;
> const  quantum = 599000;
> var  newlength: longint;
> begin
>   newlength := len0 + len1 + len2;
>   // Since setlength() is somewhat costly, we'll do it less
>   // often than you would think.
>   if length( dest ) < newlength then
>     setlength( dest, newlength + quantum );
>   move( s1^, dest[len0 + 1], len1 );
>   move( s2^, dest[len0 + 1 + len1], len2 );
>   exit( newlength );
> end;
> 
> 
> 
> procedure replace_matches( const str: ansistring;  var dest: ansistring );
> var
>   rx : iregex;
>   capture: imatch;
>   start, index, truelength, i : longint;
>   target, repl, found: string[255];
> begin
>   target := '[';
>   for i := 0 to high(replacements) do
>     target += replacements[i,0];
>   target += ']';
>   rx := regexCreate( target ); 
> 
>   dest := '';   truelength := 0;   start := 0;
>   repeat
>     capture := rx.match( str, start );
>     if not capture.success then  break;
>     index := capture.groups[0].index;
>     found :=  str[ index + 1 ];
>     repl := replacements[ pos(found,target)-2, 1 ];
>     truelength := append2(
>       dest, truelength, @str[start+1], index-start, @repl[1], length(repl) );
>     start := index + 1;
>   until false;
>   setlength( dest, truelength );
>   dest := dest + Copy( str, start+1, length(str) - start + 1);
> end;
> 
> 
> function count_matches( const needle, haystack: ansistring ):longint;
> var
>   rx : iregex;
>   capture: imatch;
>   where: longint;
> begin
>   rx := regexCreate( needle ); 
>   result := 0;
>   where := 0;
>   repeat
>     capture := rx.match( haystack, where );
>     if not capture.success then  break;
>     inc( result );
>     where := capture.groups[0].index + capture.groups[0].length;
>   until false;
> end;
> 
> 
> 
> 
> 
> var
>   pattern : pchar;
>   sequence, new_seq : ansiString;
>   line: string[255];
>   i, count, init_length, clean_length: longint;
>   inbuf : array[1..64*1024] of char;
> begin
>   settextbuf(input,inbuf);
>   sequence := '';
>   init_length := 0;
>   while not eof do
>   begin
>     readln( line );
>     init_length += length( line ) + 1;
>     if line[1] <> '>' then
>       sequence := sequence + line;
>   end;
>   clean_length := length(sequence);
> 
>   for i := low(patterns) to high(patterns) do
>   begin
>     pattern := patterns[i];
>     count := count_matches( pattern, sequence );
>     writeln( pattern, ' ', count);
>   end;
> 
>   replace_matches(sequence, new_seq);
> 
>   writeln;
>   writeln( init_length );
>   writeln( clean_length );
>   writeln( length(new_seq) );
> end.
> 
> 
> __________________________________________________
> Do You Yahoo!?
> Tired of spam?  Yahoo! Mail has the best spam protection around 
> http://mail.yahoo.com 
> _______________________________________________
> fpc-pascal maillist  -  fpc-pascal at lists.freepascal.org
> http://lists.freepascal.org/mailman/listinfo/fpc-pascal
> 




More information about the fpc-pascal mailing list