[fpc-devel] AArch64 Register efficiency
Florian Klämpfl
florian at freepascal.org
Thu Aug 20 16:36:22 CEST 2020
Am 20.08.20 um 15:09 schrieb J. Gareth Moreton:
> Oh dear, that's a shame. Havoc sounds fun though! Part of me wants to try anyway because I'm masochistic like that, but I'll do that privately.
>
> As an example, here's something from the classes unit - see how many references there are to [sp, #16].
This is not a matter of non-available registers but of the fact that
self in a constructor must be always stored in memory else it couldn't
be accessed properly in case of an exception. The proper approach to
overcome this is imo to do this at the node level:
- find variables which are read a lot but non-regable
- replace read access by a regable tempref which is loaded before the reads
> It might not be exactly the same as what you get as I'm working on improving some peephole optimisations,
> especially the large numbers of "ldr x0,[sp. #16]" instructions:
>
> .section .text.n_classes$_$tbits_$__$$_create$longint$$tbits,"ax"
> .balign 8
> .globl CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS
> .type CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS, at function
> CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS:
> stp x29,x30,[sp, #-16]!
> mov x29,sp
> sub sp,sp,#432
> str x0,[sp, #16]
> str x1,[sp, #8]
> str w2,[sp]
> ldr x0,[sp, #8]
> cmp x0,#1
> b.ne .Lj90
> ldr x0,[sp, #16]
> ldr x1,[sp, #16]
> ldr x1,[x1, #104]
> blr x1
> str x0,[sp, #16]
> .Lj90:
> ldr x0,[sp, #16]
> cbz x0,.Lj87
> .Lj92:
> add x2,sp,#32
> add x1,sp,#56
> movz w0,#1
> bl fpc_pushexceptaddr
> bl fpc_setjmp
> ubfiz x0,x0,#0,#32
> sxtw x1,w0
> str x1,[sp, #224]
> cbnz w0,.Lj97
> movn x0,#0
> str x0,[sp, #24]
> ldr x0,[sp, #16]
> str wzr,[x0, #16]
> ldr x0,[sp, #16]
> str wzr,[x0, #20]
> ldr x0,[sp, #16]
> str xzr,[x0, #8]
> ldr x0,[sp, #16]
> movn w1,#0
> str w1,[x0, #24]
> ldr x0,[sp, #16]
> movz w1,#1
> strb w1,[x0, #28]
> ldr w0,[sp]
> cmp w0,#0
> b.le .Lj100
> ldr w1,[sp]
> ldr x0,[sp, #16]
> bl CLASSES$_$TBITS_$__$$_GROW$LONGINT
> .Lj100:
> movz x0,#1
> str x0,[sp, #24]
> ldr x0,[sp, #16]
> cmp x0,#0
> cset w0,ne
> ldr x1,[sp, #8]
> cmp x1,#0
> cset w1,ne
> and w0,w1,w0
> cbz w0,.Lj102
> ldr x0,[sp, #16]
> ldr x1,[sp, #16]
> ldr x1,[x1]
> ldr x1,[x1, #136]
> blr x1
> .Lj102:
> .Lj97:
> bl fpc_popaddrstack
> ldr x0,[sp, #224]
> cbz x0,.Lj95
> add x2,sp,#232
> add x1,sp,#256
> movz w0,#1
> bl fpc_pushexceptaddr
> bl fpc_setjmp
> ubfiz x0,x0,#0,#32
> sxtw x1,w0
> str x1,[sp, #424]
> cbnz w0,.Lj103
> ldr x0,[sp, #8]
> cbz x0,.Lj105
> ldr x1,[sp, #24]
> ldr x0,[sp, #16]
> ldr x2,[sp, #16]
> ldr x2,[x2]
> ldr x2,[x2, #96]
> blr x2
> .Lj105:
> bl fpc_popaddrstack
> bl fpc_reraise
> .Lj103:
> bl fpc_popaddrstack
> ldr x0,[sp, #424]
> cbz x0,.Lj106
> bl fpc_raise_nested
> .Lj106:
> bl fpc_doneexception
> .Lj95:
> .Lj87:
> ldr x0,[sp, #16]
> mov sp,x29
> ldp x29,x30,[sp], #16
> ret
> .Le9:
> .size CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS, .Le9 - CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS
>
>
More information about the fpc-devel
mailing list