[fpc-devel] LEA instruction speed

J. Gareth Moreton gareth at moreton-family.com
Fri Oct 13 11:25:06 CEST 2023


Oops - that was a silly mistake of mine with R8.  As for the other 
error, that sounds like it's in the third party benchmark suite.  I'll 
do some investigating on my virtual machine.

In the meantime, here's the fixed test with the stray R8 call properly 
filtered out on i386 (it's replaced with "CPUName" on 32-bit).  I wasn't 
sure if global variables were initialised or not, hence me playing safe.

Kit

On 13/10/2023 08:34, Tomas Hajny via fpc-devel wrote:
> On 2023-10-13 09:26, Tomas Hajny wrote:
>> On 2023-10-12 20:02, J. Gareth Moreton via fpc-devel wrote:
>>> So an update.
>>  .
>>  .
>>
>> The latest version of blea.pp doesn't compile with a 32-bit compiler -
>> line 76 contains an unconditional reference to R8 register, which
>> obviously doesn't for the 32-bit mode.
>
> BTW, the line shouldn't be necessary at all, because global variables 
> should be initialized to 0 on program start anyway as far as I know.
>
> When fixing the problem above, compiling to 32-bit mode and running 
> it, the test fails with an error in GetLogicalProcessorInformation (it 
> states "8" in place of the error information; I wonder if it isn't 
> misinterpreted, because 8 is number of logical CPUs on the machine 
> used for running the test).
>
> Tomas
> _______________________________________________
> fpc-devel maillist  -  fpc-devel at lists.freepascal.org
> https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>
-------------- next part --------------
{ %CPU=i386,x86_64 }
program blea;

{$IF not defined(CPUX86) and not defined(CPUX86_64)}
  {$FATAL This test program requires an Intel x86 or x64 processor }
{$ENDIF}

{$MODE OBJFPC}
{$ASMMODE Intel}

{$DEFINE DETECTCPU}

uses
  SysUtils, Spring.Benchmark in 'spring/Spring.Benchmark.pp';
  
type
  TBenchmarkProc = function(const Input, X, Y: LongWord): LongWord;

var
  CPUName: array[0..48] of Char;

{$ifdef DETECTCPU}
function FillBrandName: Boolean; assembler; nostackframe;
asm
{$ifdef CPUX86_64}
  PUSH RBX
{$else CPUX86_64}
  PUSH EBX
{$endif CPUX86_64}
  MOV  EAX, $80000000
  CPUID
  CMP  EAX, $80000004
  JB   @Unavailable
{$ifdef CPUX86_64}
  LEA  R8,  [RIP + CPUName]
{$endif CPUX86_64}
  MOV  EAX, $80000002
  CPUID
{$ifdef CPUX86_64}
  MOV  [R8], EAX
  MOV  [R8 + 4], EBX
  MOV  [R8 + 8], ECX
  MOV  [R8 + 12], EDX
{$else CPUX86_64}
  MOV  [CPUName], EAX
  MOV  [CPUName + 4], EBX
  MOV  [CPUName + 8], ECX
  MOV  [CPUName + 12], EDX
{$endif CPUX86_64}
  MOV  EAX, $80000003
  CPUID
{$ifdef CPUX86_64}
  MOV  [R8 + 16], EAX
  MOV  [R8 + 20], EBX
  MOV  [R8 + 24], ECX
  MOV  [R8 + 28], EDX
{$else CPUX86_64}
  MOV  [CPUName + 16], EAX
  MOV  [CPUName + 20], EBX
  MOV  [CPUName + 24], ECX
  MOV  [CPUName + 28], EDX
{$endif CPUX86_64}
  MOV  EAX, $80000004
  CPUID
{$ifdef CPUX86_64}
  MOV  [R8 + 32], EAX
  MOV  [R8 + 36], EBX
  MOV  [R8 + 40], ECX
  MOV  [R8 + 44], EDX
  MOV  BYTE PTR [R8 + 48], 0
{$else CPUX86_64}
  MOV  [CPUName + 32], EAX
  MOV  [CPUName + 36], EBX
  MOV  [CPUName + 40], ECX
  MOV  [CPUName + 44], EDX
  MOV  BYTE PTR [CPUName + 48], 0
{$endif CPUX86_64}
  MOV  AL,  1
  JMP  @ExitBrand
@Unavailable:
  XOR  AL,  AL
@ExitBrand:
{$ifdef CPUX86_64}
  POP  RBX
{$else CPUX86_64}
  POP  EBX
{$endif CPUX86_64}
end;
{$else DETECTCPU}
function FillBrandName: Boolean; inline;
begin
  Result := False;	
end;
{$endif DETECTPU}

function Checksum_PAS(const Input, X, Y: LongWord): LongWord;
var
  Counter: LongWord;
begin
  Result := Input;
  Counter := Y;
  while (Counter > 0) do
    begin
      Result := Result + X + $87654321;
      Result := Result xor Counter;
      Dec(Counter);
    end;
end;

function Checksum_ADD(const Input, X, Y: LongWord): LongWord; assembler; nostackframe;
asm
@Loop1:
  ADD Input, $87654321
  ADD Input, X
  XOR Input, Y
  DEC Y
  JNZ @Loop1
  MOV Result, Input
end;

function Checksum_LEA(const Input, X, Y: LongWord): LongWord; assembler; nostackframe;
asm
@Loop2:
  LEA Input, [Input + X -2023406815] { -2023406815 = $87654321 }
  XOR Input, Y
  DEC Y
  JNZ @Loop2
  MOV Result, Input
end;

const
  internal_reps = 1000;

procedure BM_Checksum_PAS(const State: TState);
var
  S: TState.TValue; Z, X: LongWord;
begin
  Z := 5000000;
  X := 1000;
  for S in State do
  begin
    Checksum_PAS(Z, X, internal_reps);
  end;
end;

procedure BM_Checksum_LEA(const State: TState);
var
  S: TState.TValue; Z, X: LongWord;
begin
  Z := 5000000;
  X := 1000;
  for S in State do
  begin
    Checksum_LEA(Z, X, internal_reps);
  end;
end;

procedure BM_Checksum_ADD(const State: TState);
var
  S: TState.TValue; Z, X: LongWord;
begin
  Z := 5000000;
  X := 1000;
  for S in State do
  begin
    Checksum_ADD(Z, X, internal_reps);
  end;
end;

var
  Results: array[0..2] of LongWord;
  FailureCode, X: Integer;
begin
{$IFDEF CPUX86}
  WriteLn ('32 bits:');
{$ENDIF CPUX86}
{$IFDEF CPUX86_64}
  WriteLn ('64 bits:');
{$ENDIF CPUX86_64}
  if FillBrandName then
    begin
      WriteLn('CPU = ', CpuName);
      X := 0;
      while CpuName[X] <> #0 do
        begin
          CpuName[X] := '-';
          Inc(X);
        end;
      WriteLn('------', CpuName);
    end;

  WriteLn('Verifying function correctness...');

  Results[0] := Checksum_PAS(5000000, 1000, internal_reps);
  Results[1] := Checksum_LEA(5000000, 1000, internal_reps);
  Results[2] := Checksum_ADD(5000000, 1000, internal_reps);

  FailureCode := 0;

  if (Results[0] <> Results[1]) then
    begin
      WriteLn('ERROR: Checksum_LEA doesn''t match control case');
      FailureCode := FailureCode or 1;
    end;
  if (Results[0] <> Results[2]) then
    begin
      WriteLn('ERROR: Checksum_ADD doesn''t match control case');
      FailureCode := FailureCode or 2
    end;

  if FailureCode <> 0 then
    Halt(FailureCode);

  WriteLn();

  Benchmark(@BM_Checksum_PAS, 'Pascal control case');
  Benchmark(@BM_Checksum_LEA, 'Using LEA instruction');
  Benchmark(@BM_Checksum_ADD, 'Using ADD instructions');

  Benchmark_Main;
end.


More information about the fpc-devel mailing list