[fpc-devel] "Blank slate" next version of FPC

Ben Grasset operator97 at gmail.com
Sat Feb 23 01:58:20 CET 2019


There's not really any way to do what that does with a normal for loop
without copying the array though, which is whole point basically.

Here's the actual assembly I get from FPC for "main":

.section .text.n_main,"x"
.balign 16,0x90
.globl PASCALMAIN
PASCALMAIN:
.globl main
main:
.Lc12:
.seh_proc main
pushq %rbp
.seh_pushreg %rbp
.Lc14:
.Lc15:
movq %rsp,%rbp
.Lc16:
leaq -176(%rsp),%rsp
.seh_stackalloc 176
movq %rbx,-136(%rbp)
movq %rdi,-128(%rbp)
movq %rsi,-120(%rbp)
movq %r12,-112(%rbp)
.seh_savereg %rbx, 40
.seh_savereg %rdi, 48
.seh_savereg %rsi, 56
.seh_savereg %r12, 64
.seh_endprologue
call fpc_initializeunits
movq $100000000,-8(%rbp)
leaq RTTI_$P$TAKEWHILEEXAMPLE_$$_def00000014(%rip),%rdx
leaq -8(%rbp),%r9
leaq U_$P$TAKEWHILEEXAMPLE_$$_ARR(%rip),%rcx
movl $1,%r8d
call fpc_dynarray_setlength
movq $-1,%rdx
.p2align 4,,10
.p2align 3
.Lj35:
addq $1,%rdx
movq U_$P$TAKEWHILEEXAMPLE_$$_ARR(%rip),%rax
movq %rdx,(%rax,%rdx,8)
cmpq $99999999,%rdx
jnae .Lj35
xorl %esi,%esi
xorl %ebx,%ebx
leaq P$TAKEWHILEEXAMPLE_$$_TEST$QWORD$$BOOLEAN(%rip),%rax
movq %rax,-88(%rbp)
movq $0,-80(%rbp)
movq U_$P$TAKEWHILEEXAMPLE_$$_ARR(%rip),%rax
movq %rax,-56(%rbp)
movq U_$P$TAKEWHILEEXAMPLE_$$_ARR(%rip),%rax
movq %rax,-48(%rbp)
movq U_$P$TAKEWHILEEXAMPLE_$$_ARR(%rip),%rcx
call fpc_dynarray_high
movq %rax,%rdi
movq $-1,%r12
.p2align 4,,10
.p2align 3
.Lj39:
addq $1,%r12
movq U_$P$TAKEWHILEEXAMPLE_$$_ARR(%rip),%rax
movq (%rax,%r12,8),%rcx
call *-88(%rbp)
testb %al,%al
je .Lj38
addq $8,-48(%rbp)
.p2align 4,,10
.p2align 3
cmpq %r12,%rdi
jnbe .Lj39
.Lj38:
leaq -56(%rbp),%rax
vmovups (%rax),%xmm1
vmovsd 16(%rax),%xmm0
vmovups %xmm1,-32(%rbp)
vmovsd %xmm0,-16(%rbp)
movq -32(%rbp),%rax
subq $8,%rax
movq %rax,-16(%rbp)
jmp .Lj47
.p2align 4,,10
.p2align 3
.Lj46:
movq -16(%rbp),%rax
movq (%rax),%rsi
movq %rsi,%rbx
.Lj47:
addq $8,-16(%rbp)
movq -24(%rbp),%rax
cmpq -16(%rbp),%rax
jnae .Lj52
cmpq -32(%rbp),%rax
je .Lj52
movb $1,%al
jmp .Lj50
.Lj52:
xorb %al,%al
.Lj50:
testb %al,%al
jne .Lj46
call fpc_get_output
movq %rax,%rsi
movq %rbx,%r8
movq %rsi,%rdx
xorl %ecx,%ecx
call fpc_write_text_uint
movq %rsi,%rcx
call fpc_writeln_end
call fpc_do_exit
movq -136(%rbp),%rbx
movq -128(%rbp),%rdi
movq -120(%rbp),%rsi
movq -112(%rbp),%r12
leaq (%rbp),%rsp
popq %rbp
ret
.seh_endproc
.Lc13:

On Fri, Feb 22, 2019 at 7:17 PM Benito van der Zander <benito at benibela.de>
wrote:

> Hi,
>
> The trick with enumerators is to never make them classes, and use advanced
> records instead, I've found. This way you avoid the heap allocation and the
> implicit try/finally. Also make sure you inline the MoveNext and
> GetCurrent!
>
>
> that's what I do.
>
>
> But the generated assembly is still worse than an old for loop, because it
> keeps all the fields of the record in memory.
>
> for example
>
> >   for I in TSlice<SizeUInt>.TakeWhile(Arr, Test) do J := I;
>
> generates something like this
>
> 0000000000401290 488b45f0                 mov    -0x10(%rbp),%rax
> 0000000000401294 488b00                   mov    (%rax),%rax
> 0000000000401297 488905b22a0300           mov    %rax,0x32ab2(%rip)
> # 0x433d50 <U_$P$PROJECT1_$$_I>
> project1.lpr:75                           J := I;
> 000000000040129E 488905bb2a0300           mov    %rax,0x32abb(%rip)
> # 0x433d60 <U_$P$PROJECT1_$$_J>
> project1.lpr:74                           for I in
> TSlice<SizeUInt>.TakeWhile(Arr, Test) do
> 00000000004012A5 488345f008               addq   $0x8,-0x10(%rbp)
> project1.lpr:69                           begin
> 00000000004012AA 488b45e8                 mov    -0x18(%rbp),%rax
> project1.lpr:74                           for I in
> TSlice<SizeUInt>.TakeWhile(Arr, Test) do
> 00000000004012AE 483b45f0                 cmp    -0x10(%rbp),%rax
> 00000000004012B2 720a                     jb     0x4012be <main+334>
> 00000000004012B4 483b45e0                 cmp    -0x20(%rbp),%rax
> 00000000004012B8 7404                     je     0x4012be <main+334>
> 00000000004012BA b001                     mov    $0x1,%al
> 00000000004012BC eb02                     jmp    0x4012c0 <main+336>
> 00000000004012BE 30c0                     xor    %al,%al
> 00000000004012C0 84c0                     test   %al,%al
> 00000000004012C2 75cc                     jne    0x401290 <main+288>
>
> Nearly every line is accessing some memory, when it could keep everything
> in a few registers. amd64 has 16 registers, but fpc seems to only know
> three when records are involved
>
>
>
> Cheers,
> Benito
>
> Am 22.02.19 um 16:51 schrieb Ben Grasset:
>
> On Fri, Feb 22, 2019 at 1:07 AM Paul van Helden <paul at planetgis.co.za>
> wrote:
>
>>  How do you make a (for in) enumerator with a record? I don't use them
>> for exactly this reason, and they did seem to be another useful language
>> feature that turned out to be poorly implemented by Embarcadero. (Haven't
>> checked with FPC).
>>
>
> Here's an example (for FPC) that demonstrates it by implementing the
> "take-while" pattern:
>
> program TakeWhileExample;
>
> {$mode Delphi}{$H+}{$J-}
> {$modeswitch NestedProcVars}
> {$ImplicitExceptions Off}
> {$PointerMath On}
>
> type
>   TSlice<T> = record
>   public type
>     PT = ^T;
>     ArrayType = array of T;
>   private
>     FFirst, FLast, FCurrent: PT;
>     function GetCurrent: T; inline;
>   public
>     function GetEnumerator: TSlice<T>; inline;
>     function MoveNext: Boolean; inline;
>     class function TakeWhile(const A: ArrayType; function F(const Val: T):
> Boolean): TSlice<T>; static; inline;
>     property Current: T read GetCurrent;
>   end;
>
>   TTestFunc<T> = function(const Val: T): Boolean;
>
>   function TSlice<T>.GetCurrent: T;
>   begin
>     Result := FCurrent^;
>   end;
>
>   function TSlice<T>.GetEnumerator: TSlice<T>;
>   begin
>     Result := Self;
>     with Result do FCurrent := FFirst - 1;
>   end;
>
>   function TSlice<T>.MoveNext: Boolean;
>   begin
>     Inc(FCurrent);
>     Exit((FCurrent <= FLast) and (FFirst <> FLast));
>   end;
>
>   function Test(const Val: SizeUInt): Boolean; inline;
>   begin
>     Exit((Val < 50000000));
>   end;
>
>   class function TSlice<T>.TakeWhile(const A: ArrayType; function F(const
> Val: T): Boolean): TSlice<T>;
>   var
>     I: SizeUInt;
>     X: TTestFunc<T> absolute F;
>     //FPC generates slightly better code for the "absolute" way, not sure
> why...
>   begin
>     with Result do begin
>       FFirst := @A[0];
>       FLast := @A[0];
>       for I := 0 to High(A) do
>         case X(A[I]) of
>           True: Inc(FLast);
>           False: Exit();
>         end;
>     end;
>   end;
>
> var
>   I, J: SizeUInt;
>   Arr: TSlice<SizeUInt>.ArrayType;
>
> begin
>   SetLength(Arr, 100000000);
>   for I := 0 to 99999999 do Arr[I] := I;
>   I := 0;
>   J := 0;
>   for I in TSlice<SizeUInt>.TakeWhile(Arr, Test) do J := I;
>   WriteLn(J);
> end.
>
> _______________________________________________
> fpc-devel maillist  -  fpc-devel at lists.freepascal.orghttp://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>
> _______________________________________________
> fpc-devel maillist  -  fpc-devel at lists.freepascal.org
> http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freepascal.org/pipermail/fpc-devel/attachments/20190222/1180cdb9/attachment.html>


More information about the fpc-devel mailing list