[fpc-devel] Patch to speed up Uppercase/Lowercase functions

Joost van der Sluis joost at cnoc.nl
Sat Jun 11 15:20:56 CEST 2005


If we're gonna hold a discussion like this for every optimilisation, it
isn't worth the effort imho. But now we're busy with it:

> Well. Discussion is nice, but what does the real world show ?

> To compare, I made 6 versions of Lowercase:
> 1 - Sysutils
> 2 - Sysutils with Joost's improvement.
> 3 - Sysutils with Joost's improvement, but forward loop.
> 4 - Using PChar.
> 5 - Using PChar with lookup table and if check
> 5 - Using Pchar with lookup table and no check.

You shoudn't use the sysutils's version. It's better to copy the source
from sysutils to the testprogram. 

I've added that (lowercase1) and i've added Daniel's asm-procedure...

> Result on an AMD 64 3000:
> Lowercase time to execute: 00:00:01.563
> Lowercase2 Time to execute: 00:00:01.363
> Lowercase3 Time to execute: 00:00:01.394
> Lowercase4 Time to execute: 00:00:00.999
> Lowercase5 Time to execute: 00:00:01.021
> Lowercase6 Time to execute: 00:00:00.948

> So, judge for yourself. I think this is worth the 256 byte lookup table.

Further I've used the more precise timer-utils from Tom. Most remarkably
is that now my lowercase is actually _slower_ then the one in the RTL.
Yesterday and in Michael's test it's the other way around?

Results:  (compiled with -OG1)

lowercase   execution time: 90619
lowercase 1 execution time: 94293
lowercase 2 execution time: 96657
lowercase 3 execution time: 95163
lowercase 4 execution time: 68910
lowercase 5 execution time: 68030
lowercase 6 execution time: 69403
lowercase 7 execution time: 51839

(notice the difference between the lowercase in the rtl, and the same
code directly in the unit, caused by different compiler-directives?)

I aggree with Daniel that the use of a table isn't usefull. For that,
also look at GenericAnsiUpperCase in sysstr.inc

Joost.
-------------- next part --------------
program test;

{$mode objfpc}
{$H+}

// Renamed cpu to cpu-timer, because the existence of a cpu-unit in the RTL
uses cpu_timer, sysutils;

function LowerCase1(const S: string): string;
var i: integer;
begin
result := S;
i := Length(result);
while i <> 0 do begin
   if (result[i] in ['A'..'Z']) then result[i] := char(byte(result[i]) + 32);
   dec(i);
   end;
end;

Function Lowercase2(Const S : String) : String;

Var
  i : Integer;

begin
  result := S;
  for i := Length(S) downto 1 do
    if (result[i] in ['A'..'Z']) then result[i] := char(byte(result[i]) + 32);
end;

Function Lowercase3(Const S : String) : String;

Var
  i : Integer;

begin
  result := S;
  for i := 1 to Length(S) do
    if (result[i] in ['A'..'Z']) then result[i] := char(byte(result[i]) + 32);
end;

Function Lowercase4(Const S : String) : String;

Var
  i : Integer;
  P : PChar;

begin
  result := S;
  UniqueString(Result);
  P:=Pchar(Result);
  for i := 1 to Length(Result) do
    begin
    if (P^ in ['A'..'Z']) then P^ := char(byte(p^) + 32);
    Inc(P);
    end;
end;

Var
  ConvertTable : Array[0..255] of char;

Function Lowercase5(Const S : String) : String;

Var
  i : Integer;
  P : PChar;

begin
  result := S;
  UniqueString(Result);
  P:=Pchar(Result);
  for i := 1 to Length(Result) do
    begin
    if (P^ in ['A'..'Z']) then P^ := ConvertTable[byte(p^)];
    Inc(P);
    end;
end;

Function Lowercase6(Const S : String) : String;

Var
  i : Integer;
  P : PChar;

begin
  result := S;
  UniqueString(Result); // Needed or S will be modified.
  P:=Pchar(Result);
  for i := 1 to Length(Result) do
    begin
    P^ := ConvertTable[byte(p^)];
    Inc(P);
    end;
end;

{$ASMMODE INTEL}
{$H-}

function lowercase7(const s:string):string;assembler;pascal;
asm
    mov esi,s
    mov edi, at result
    lodsb
    stosb
    movzx ecx,al
    jecxz @a2
@a1:
    lodsb
    cmp al,'A'
    sbb bl,bl
    cmp al,'Z'+1
    sbb bh,bh
    not bh
    or bl,bh
    not bl
    and bl,32
    add al,bl
    stosb
    dec cl
    jnz @a1
@a2:
end;
{$H+}

var start, stop : int64;
    s,t         : string;
    i           : integer;

const count = 50000;

begin
  For I:=0 to 256 do
    ConvertTable[i]:=LowerCase(Char(i));


  s := 'QWERTYuiOP[]asdfghjkl;''\/.,MnBvcxz!@#$55tGGbvXCXg5eujFNBCx4w#^$&hfgbccxvfSFDCSjfDF145tySDFVmbCDl;k d;DCrbrdb6334fdb,\bx FG';


  start := Clock;
  for i := 0 to count do
    t := lowercase(s);
  stop := Clock;
  writeln('lowercase   execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase1(s);
  stop := Clock;
  writeln('lowercase 1 execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase2(s);
  stop := Clock;
  writeln('lowercase 2 execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase3(s);
  stop := Clock;
  writeln('lowercase 3 execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase4(s);
  stop := Clock;
  writeln('lowercase 4 execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase5(s);
  stop := Clock;
  writeln('lowercase 5 execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase6(s);
  stop := Clock;
  writeln('lowercase 6 execution time: ',stop-start);

  start := Clock;
  for i := 0 to count do
    t := lowercase7(s);
  stop := Clock;
  writeln('lowercase 7 execution time: ',stop-start);
end.
  


More information about the fpc-devel mailing list