[fpc-devel] LEA instruction speed

J. Gareth Moreton gareth at moreton-family.com
Mon Oct 9 20:51:38 CEST 2023


I updated the "blea" test in the merge request so it now displays the 
processor brand name on x86_64; however, it is not fetched under i386 
because CPUID was not introduced until later 486 processors.  I've 
attached it to this e-mail if anyone wants to take a look to ensure I 
haven't broken something.

Kit

On 09/10/2023 18:01, J. Gareth Moreton via fpc-devel wrote:
> Thank you very much!  That processor is built on the Excavator 
> architecture and lines up with the flag I put in the merge request 
> (i.e. it has the "fast LEA" hint).
>
> I honestly didn't expect this much testing feedback, so thank you all!
>
> Gareth aka. Kit
>
> P.S. I'm tempted to extend the test slightly to actually name the CPU 
> automatically.
>
> On 09/10/2023 15:40, Jean SUZINEAU via fpc-devel wrote:
>> My results:
>> jean at First-Boss:~/temp$ cat /proc/cpuinfo | grep "model name"
>> model name    : AMD A6-7480 Radeon R5, 8 Compute Cores 2C+6G
>> jean at First-Boss:~/temp$ /usr/bin/fpc blea.pp
>> Free Pascal Compiler version 3.2.2 [2021/07/09] for x86_64
>> Copyright (c) 1993-2021 by Florian Klaempfl and others
>> Target OS: Linux for x86-64
>> Compiling blea.pp
>> Linking blea
>> 95 lines compiled, 0.2 sec
>> jean at First-Boss:~/temp$ ./blea
>>    Pascal control case: 5.1 ns/call
>>  Using LEA instruction: 0.5 ns/call
>> Using ADD instructions: 0.8 ns/call
>> jean at First-Boss:~/temp$
>>
>> _______________________________________________
>> fpc-devel maillist  -  fpc-devel at lists.freepascal.org
>> https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>>
> _______________________________________________
> fpc-devel maillist  -  fpc-devel at lists.freepascal.org
> https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>
-------------- next part --------------
{ %CPU=i386,x86_64 }
program blea;

{$IF not defined(CPUX86) and not defined(CPUX86_64)}
  {$FATAL This test program requires an Intel x86 or x64 processor }
{$ENDIF}

{$MODE OBJFPC}
{$ASMMODE Intel}

uses
  SysUtils;
  
type
  TBenchmarkProc = function(const Input, X, Y: LongWord): LongWord;

var
  CPUName: array[0..48] of Char;

{$ifdef CPUX86_64}
function FillBrandName: Boolean; assembler; nostackframe;
asm
  PUSH RBX
  MOV  EAX, $80000000
  CPUID
  CMP  EAX, $80000004
  JB   @Unavailable
  LEA  R8,  [RIP + CPUName]
  MOV  EAX, $80000002
  CPUID
  MOV  [R8], EAX
  MOV  [R8 + 4], EBX
  MOV  [R8 + 8], ECX
  MOV  [R8 + 12], EDX
  MOV  EAX, $80000003
  CPUID
  MOV  [R8 + 16], EAX
  MOV  [R8 + 20], EBX
  MOV  [R8 + 24], ECX
  MOV  [R8 + 28], EDX
  MOV  EAX, $80000004
  CPUID
  MOV  [R8 + 32], EAX
  MOV  [R8 + 36], EBX
  MOV  [R8 + 40], ECX
  MOV  [R8 + 44], EDX
  MOV  BYTE PTR [R8 + 48], 0
  MOV  AL,  1
  JMP  @ExitBrand
@Unavailable:
  XOR  AL,  AL
@ExitBrand:
  POP  RBX
end;
{$else CPUX86_64}
function FillBrandName: Boolean; inline;
begin
  Result := False;
end;
{$endif CPUX86_64}

function Checksum_PAS(const Input, X, Y: LongWord): LongWord;
var
  Counter: LongWord;
begin
  Result := Input;
  Counter := Y;
  while (Counter > 0) do
    begin
      Result := Result + X + $87654321;
      Result := Result xor Counter;
      Dec(Counter);
    end;
end;

function Checksum_ADD(const Input, X, Y: LongWord): LongWord; assembler; nostackframe;
asm
@Loop1:
  ADD Input, $87654321
  ADD Input, X
  XOR Input, Y
  DEC Y
  JNZ @Loop1
  MOV Result, Input
end;

function Checksum_LEA(const Input, X, Y: LongWord): LongWord; assembler; nostackframe;
asm
@Loop2:
  LEA Input, [Input + X + $87654321]
  XOR Input, Y
  DEC Y
  JNZ @Loop2
  MOV Result, Input
end;

function Benchmark(const name: string; proc: TBenchmarkProc; Z, X: LongWord): LongWord;
const
  internal_reps = 1000;
var
  start: TDateTime;
  time: double;
  reps: cardinal;
begin
  Result := Z;
  reps := 0;
  start := Now;
  repeat
    inc(reps);
    Result := proc(Result, X, internal_reps);
  until (reps >= 10000);
  time := ((Now - start) * SecsPerDay) / reps / internal_reps * 1e9;
  writeln(name, ': ', time:0:ord(time < 10), ' ns/call');
end;

var
  Results: array[0..2] of LongWord;
  FailureCode, X: Integer;
begin
  if FillBrandName then
    begin
      WriteLn('CPU = ', CpuName);
      X := 0;
      while CpuName[X] <> #0 do
        begin
          CpuName[X] := '-';
          Inc(X);
        end;
      WriteLn('------', CpuName);
    end;
  Results[0] := Benchmark('   Pascal control case', @Checksum_PAS, 5000000, 1000);
  Results[1] := Benchmark(' Using LEA instruction', @Checksum_LEA, 5000000, 1000);
  Results[2] := Benchmark('Using ADD instructions', @Checksum_ADD, 5000000, 1000);
  
  FailureCode := 0;

  if (Results[0] <> Results[1]) then
    begin
      WriteLn('ERROR: Checksum_LEA doesn''t match control case');
      FailureCode := FailureCode or 1;
    end;
  if (Results[0] <> Results[2]) then
    begin
      WriteLn('ERROR: Checksum_ADD doesn''t match control case');
      FailureCode := FailureCode or 2
    end;
    
  if FailureCode <> 0 then
    Halt(FailureCode);
end.


More information about the fpc-devel mailing list