[fpc-devel] AArch64 Register efficiency

Florian Klämpfl florian at freepascal.org
Thu Aug 20 16:36:22 CEST 2020


Am 20.08.20 um 15:09 schrieb J. Gareth Moreton:
> Oh dear, that's a shame.  Havoc sounds fun though!  Part of me wants to try anyway because I'm masochistic like that, but I'll do that privately.
> 
> As an example, here's something from the classes unit - see how many references there are to [sp, #16]. 

This is not a matter of non-available registers but of the fact that 
self in a constructor must be always stored in memory else it couldn't 
be accessed properly in case of an exception. The proper approach to 
overcome this is imo to do this at the node level:
- find variables which are read a lot but non-regable
- replace read access by a regable tempref which is loaded before the reads

> It might not be exactly the same as what you get as I'm working on improving some peephole optimisations,
> especially the large numbers of "ldr x0,[sp. #16]" instructions:
> 
> .section .text.n_classes$_$tbits_$__$$_create$longint$$tbits,"ax"
> 	.balign 8
> .globl	CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS
> 	.type	CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS, at function
> CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS:
> 	stp	x29,x30,[sp, #-16]!
> 	mov	x29,sp
> 	sub	sp,sp,#432
> 	str	x0,[sp, #16]
> 	str	x1,[sp, #8]
> 	str	w2,[sp]
> 	ldr	x0,[sp, #8]
> 	cmp	x0,#1
> 	b.ne	.Lj90
> 	ldr	x0,[sp, #16]
> 	ldr	x1,[sp, #16]
> 	ldr	x1,[x1, #104]
> 	blr	x1
> 	str	x0,[sp, #16]
> .Lj90:
> 	ldr	x0,[sp, #16]
> 	cbz	x0,.Lj87
> .Lj92:
> 	add	x2,sp,#32
> 	add	x1,sp,#56
> 	movz	w0,#1
> 	bl	fpc_pushexceptaddr
> 	bl	fpc_setjmp
> 	ubfiz	x0,x0,#0,#32
> 	sxtw	x1,w0
> 	str	x1,[sp, #224]
> 	cbnz	w0,.Lj97
> 	movn	x0,#0
> 	str	x0,[sp, #24]
> 	ldr	x0,[sp, #16]
> 	str	wzr,[x0, #16]
> 	ldr	x0,[sp, #16]
> 	str	wzr,[x0, #20]
> 	ldr	x0,[sp, #16]
> 	str	xzr,[x0, #8]
> 	ldr	x0,[sp, #16]
> 	movn	w1,#0
> 	str	w1,[x0, #24]
> 	ldr	x0,[sp, #16]
> 	movz	w1,#1
> 	strb	w1,[x0, #28]
> 	ldr	w0,[sp]
> 	cmp	w0,#0
> 	b.le	.Lj100
> 	ldr	w1,[sp]
> 	ldr	x0,[sp, #16]
> 	bl	CLASSES$_$TBITS_$__$$_GROW$LONGINT
> .Lj100:
> 	movz	x0,#1
> 	str	x0,[sp, #24]
> 	ldr	x0,[sp, #16]
> 	cmp	x0,#0
> 	cset	w0,ne
> 	ldr	x1,[sp, #8]
> 	cmp	x1,#0
> 	cset	w1,ne
> 	and	w0,w1,w0
> 	cbz	w0,.Lj102
> 	ldr	x0,[sp, #16]
> 	ldr	x1,[sp, #16]
> 	ldr	x1,[x1]
> 	ldr	x1,[x1, #136]
> 	blr	x1
> .Lj102:
> .Lj97:
> 	bl	fpc_popaddrstack
> 	ldr	x0,[sp, #224]
> 	cbz	x0,.Lj95
> 	add	x2,sp,#232
> 	add	x1,sp,#256
> 	movz	w0,#1
> 	bl	fpc_pushexceptaddr
> 	bl	fpc_setjmp
> 	ubfiz	x0,x0,#0,#32
> 	sxtw	x1,w0
> 	str	x1,[sp, #424]
> 	cbnz	w0,.Lj103
> 	ldr	x0,[sp, #8]
> 	cbz	x0,.Lj105
> 	ldr	x1,[sp, #24]
> 	ldr	x0,[sp, #16]
> 	ldr	x2,[sp, #16]
> 	ldr	x2,[x2]
> 	ldr	x2,[x2, #96]
> 	blr	x2
> .Lj105:
> 	bl	fpc_popaddrstack
> 	bl	fpc_reraise
> .Lj103:
> 	bl	fpc_popaddrstack
> 	ldr	x0,[sp, #424]
> 	cbz	x0,.Lj106
> 	bl	fpc_raise_nested
> .Lj106:
> 	bl	fpc_doneexception
> .Lj95:
> .Lj87:
> 	ldr	x0,[sp, #16]
> 	mov	sp,x29
> 	ldp	x29,x30,[sp], #16
> 	ret
> .Le9:
> 	.size	CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS, .Le9 - CLASSES$_$TBITS_$__$$_CREATE$LONGINT$$TBITS
> 
> 



More information about the fpc-devel mailing list