[fpc-devel] SIMD support / SSE

Ivo Steinmann ivo_steinmann at gmx.net
Sun Mar 14 16:13:32 CET 2010


hello all

I just did some tests with sse on a x86_64 linux system. (fpc -Cfsse3
-O3 -a some.pas)

With scalar values, everything is looking fine so far. So doubles and
single arguments are passed by xmm registers and also for computation
sse operations are used:

type
  TScalar = single;

function adds(a,b: TScalar): TScalar;
begin
  Result := a + b;
end;

P$SIMD_ADDS$SINGLE$SINGLE$$SINGLE:
        addss   %xmm1,%xmm0
        movss   %xmm0,%xmm0
        ret


var
  s, t: TScalar;
adds(s, t);

        movss   (%rsp),%xmm1
        movss   4(%rsp),%xmm0
        call    P$SIMD_ADDS$SINGLE$SINGLE$$SINGLE
        call    FPC_DO_EXIT

so this is nice.


But when I start using vector values, things are not so nice:

type
  TVector = packed record
    x, y, z, w: single;
  end;

function addv(a,b: TVector): TVector;
begin
  Result.x := a.x + b.x;
  Result.y := a.y + b.y;
  Result.z := a.z + b.z;
  Result.w := a.w + b.w;
end;


P$SIMD_ADDV$TVECTOR$TVECTOR$$TVECTOR:
.Lc1:
        subq    $72,%rsp
.Lc3:
        movq    %rdi,(%rsp)
        movq    %rsi,8(%rsp)
        movq    %rdx,16(%rsp)
        movq    %rcx,24(%rsp)
        movss   (%rsp),%xmm0
        addss   16(%rsp),%xmm0
        movss   %xmm0,32(%rsp)
        movss   4(%rsp),%xmm0
        addss   20(%rsp),%xmm0
        movss   %xmm0,36(%rsp)
        movss   8(%rsp),%xmm0
        addss   24(%rsp),%xmm0
        movss   %xmm0,40(%rsp)
        movss   12(%rsp),%xmm0
        addss   28(%rsp),%xmm0
        movss   %xmm0,44(%rsp)
        movq    32(%rsp),%rax
        movq    40(%rsp),%rdx
        addq    $72,%rsp
        ret


var
  x, y: TVector;
addv(x, y);

        movq    U_P$SIMD_Y,%rdx
        movq    U_P$SIMD_Y+8,%rcx
        movq    U_P$SIMD_X,%rdi
        movq    U_P$SIMD_X+8,%rsi
        call    P$SIMD_ADDV$TVECTOR$TVECTOR$$TVECTOR


1. parameters are passed by reference, instead of xmm0 and xmm1 registers
2. there are multiple operations to compute the some, instead of just one



My question: What is required to get the same results for TVector as for
TScalar? Are there some built-in types (necessary) like

QuadSingle
QuadInteger
DoubleDouble
DoubleInt64
etc...

TVector = QuaterSingle;

function addv(a, b: TVector): TVector;
begin
  Result := a + b;
// this is done with one single instruction (multiple data)
end;


-Ivo Steinmann



More information about the fpc-devel mailing list