[fpc-devel] LEA instruction speed
J. Gareth Moreton
gareth at moreton-family.com
Mon Oct 9 20:51:38 CEST 2023
I updated the "blea" test in the merge request so it now displays the
processor brand name on x86_64; however, it is not fetched under i386
because CPUID was not introduced until later 486 processors. I've
attached it to this e-mail if anyone wants to take a look to ensure I
haven't broken something.
Kit
On 09/10/2023 18:01, J. Gareth Moreton via fpc-devel wrote:
> Thank you very much! That processor is built on the Excavator
> architecture and lines up with the flag I put in the merge request
> (i.e. it has the "fast LEA" hint).
>
> I honestly didn't expect this much testing feedback, so thank you all!
>
> Gareth aka. Kit
>
> P.S. I'm tempted to extend the test slightly to actually name the CPU
> automatically.
>
> On 09/10/2023 15:40, Jean SUZINEAU via fpc-devel wrote:
>> My results:
>> jean at First-Boss:~/temp$ cat /proc/cpuinfo | grep "model name"
>> model name : AMD A6-7480 Radeon R5, 8 Compute Cores 2C+6G
>> jean at First-Boss:~/temp$ /usr/bin/fpc blea.pp
>> Free Pascal Compiler version 3.2.2 [2021/07/09] for x86_64
>> Copyright (c) 1993-2021 by Florian Klaempfl and others
>> Target OS: Linux for x86-64
>> Compiling blea.pp
>> Linking blea
>> 95 lines compiled, 0.2 sec
>> jean at First-Boss:~/temp$ ./blea
>> Pascal control case: 5.1 ns/call
>> Using LEA instruction: 0.5 ns/call
>> Using ADD instructions: 0.8 ns/call
>> jean at First-Boss:~/temp$
>>
>> _______________________________________________
>> fpc-devel maillist - fpc-devel at lists.freepascal.org
>> https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>>
> _______________________________________________
> fpc-devel maillist - fpc-devel at lists.freepascal.org
> https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>
-------------- next part --------------
{ %CPU=i386,x86_64 }
program blea;
{$IF not defined(CPUX86) and not defined(CPUX86_64)}
{$FATAL This test program requires an Intel x86 or x64 processor }
{$ENDIF}
{$MODE OBJFPC}
{$ASMMODE Intel}
uses
SysUtils;
type
TBenchmarkProc = function(const Input, X, Y: LongWord): LongWord;
var
CPUName: array[0..48] of Char;
{$ifdef CPUX86_64}
function FillBrandName: Boolean; assembler; nostackframe;
asm
PUSH RBX
MOV EAX, $80000000
CPUID
CMP EAX, $80000004
JB @Unavailable
LEA R8, [RIP + CPUName]
MOV EAX, $80000002
CPUID
MOV [R8], EAX
MOV [R8 + 4], EBX
MOV [R8 + 8], ECX
MOV [R8 + 12], EDX
MOV EAX, $80000003
CPUID
MOV [R8 + 16], EAX
MOV [R8 + 20], EBX
MOV [R8 + 24], ECX
MOV [R8 + 28], EDX
MOV EAX, $80000004
CPUID
MOV [R8 + 32], EAX
MOV [R8 + 36], EBX
MOV [R8 + 40], ECX
MOV [R8 + 44], EDX
MOV BYTE PTR [R8 + 48], 0
MOV AL, 1
JMP @ExitBrand
@Unavailable:
XOR AL, AL
@ExitBrand:
POP RBX
end;
{$else CPUX86_64}
function FillBrandName: Boolean; inline;
begin
Result := False;
end;
{$endif CPUX86_64}
function Checksum_PAS(const Input, X, Y: LongWord): LongWord;
var
Counter: LongWord;
begin
Result := Input;
Counter := Y;
while (Counter > 0) do
begin
Result := Result + X + $87654321;
Result := Result xor Counter;
Dec(Counter);
end;
end;
function Checksum_ADD(const Input, X, Y: LongWord): LongWord; assembler; nostackframe;
asm
@Loop1:
ADD Input, $87654321
ADD Input, X
XOR Input, Y
DEC Y
JNZ @Loop1
MOV Result, Input
end;
function Checksum_LEA(const Input, X, Y: LongWord): LongWord; assembler; nostackframe;
asm
@Loop2:
LEA Input, [Input + X + $87654321]
XOR Input, Y
DEC Y
JNZ @Loop2
MOV Result, Input
end;
function Benchmark(const name: string; proc: TBenchmarkProc; Z, X: LongWord): LongWord;
const
internal_reps = 1000;
var
start: TDateTime;
time: double;
reps: cardinal;
begin
Result := Z;
reps := 0;
start := Now;
repeat
inc(reps);
Result := proc(Result, X, internal_reps);
until (reps >= 10000);
time := ((Now - start) * SecsPerDay) / reps / internal_reps * 1e9;
writeln(name, ': ', time:0:ord(time < 10), ' ns/call');
end;
var
Results: array[0..2] of LongWord;
FailureCode, X: Integer;
begin
if FillBrandName then
begin
WriteLn('CPU = ', CpuName);
X := 0;
while CpuName[X] <> #0 do
begin
CpuName[X] := '-';
Inc(X);
end;
WriteLn('------', CpuName);
end;
Results[0] := Benchmark(' Pascal control case', @Checksum_PAS, 5000000, 1000);
Results[1] := Benchmark(' Using LEA instruction', @Checksum_LEA, 5000000, 1000);
Results[2] := Benchmark('Using ADD instructions', @Checksum_ADD, 5000000, 1000);
FailureCode := 0;
if (Results[0] <> Results[1]) then
begin
WriteLn('ERROR: Checksum_LEA doesn''t match control case');
FailureCode := FailureCode or 1;
end;
if (Results[0] <> Results[2]) then
begin
WriteLn('ERROR: Checksum_ADD doesn''t match control case');
FailureCode := FailureCode or 2
end;
if FailureCode <> 0 then
Halt(FailureCode);
end.
More information about the fpc-devel
mailing list