mirror of
				https://github.com/smaeul/u-boot.git
				synced 2025-11-04 05:50:17 +00:00 
			
		
		
		
	The assembly for __gnu_thumb1_case_si was taken from upstream gcc and adapted
as width suffix was removed for the add instruction [1].
Signed-off-by: Francis Laniel <francis.laniel@amarulasolutions.com>
Tested-by: Tony Dinh <mibodhi@gmail.com>
[1] 4f181f9c7e/libgcc/config/arm/lib1funcs.S (L2156)
Acked-by: Pali Rohár <pali@kernel.org>
Acked-by:  Tony Dinh <mibodhi@gmail.com>
		
	
			
		
			
				
	
	
		
			440 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			440 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0+ */
 | 
						|
/*
 | 
						|
 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 | 
						|
 *
 | 
						|
 * Author: Nicolas Pitre <nico@fluxnic.net>
 | 
						|
 *   - contributed to gcc-3.4 on Sep 30, 2003
 | 
						|
 *   - adapted for the Linux kernel on Oct 2, 2003
 | 
						|
 */
 | 
						|
/*
 | 
						|
 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/linkage.h>
 | 
						|
#include <asm/assembler.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
 | 
						|
 * do not support stack unwinding to make all of the functions available
 | 
						|
 * without diverging from Linux code.
 | 
						|
 */
 | 
						|
#ifdef __UBOOT__
 | 
						|
#define UNWIND(x...)
 | 
						|
#endif
 | 
						|
 | 
						|
.macro ARM_DIV_BODY dividend, divisor, result, curbit
 | 
						|
 | 
						|
#if __LINUX_ARM_ARCH__ >= 5
 | 
						|
 | 
						|
	clz	\curbit, \divisor
 | 
						|
	clz	\result, \dividend
 | 
						|
	sub	\result, \curbit, \result
 | 
						|
	mov	\curbit, #1
 | 
						|
	mov	\divisor, \divisor, lsl \result
 | 
						|
	mov	\curbit, \curbit, lsl \result
 | 
						|
	mov	\result, #0
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
	@ Initially shift the divisor left 3 bits if possible,
 | 
						|
	@ set curbit accordingly.  This allows for curbit to be located
 | 
						|
	@ at the left end of each 4 bit nibbles in the division loop
 | 
						|
	@ to save one loop in most cases.
 | 
						|
	tst	\divisor, #0xe0000000
 | 
						|
	moveq	\divisor, \divisor, lsl #3
 | 
						|
	moveq	\curbit, #8
 | 
						|
	movne	\curbit, #1
 | 
						|
 | 
						|
	@ Unless the divisor is very big, shift it up in multiples of
 | 
						|
	@ four bits, since this is the amount of unwinding in the main
 | 
						|
	@ division loop.  Continue shifting until the divisor is
 | 
						|
	@ larger than the dividend.
 | 
						|
1:	cmp	\divisor, #0x10000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #4
 | 
						|
	movlo	\curbit, \curbit, lsl #4
 | 
						|
	blo	1b
 | 
						|
 | 
						|
	@ For very big divisors, we must shift it a bit at a time, or
 | 
						|
	@ we will be in danger of overflowing.
 | 
						|
1:	cmp	\divisor, #0x80000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #1
 | 
						|
	movlo	\curbit, \curbit, lsl #1
 | 
						|
	blo	1b
 | 
						|
 | 
						|
	mov	\result, #0
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
	@ Division loop
 | 
						|
1:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	orrhs	\result,   \result,   \curbit
 | 
						|
	cmp	\dividend, \divisor,  lsr #1
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #1
 | 
						|
	orrhs	\result,   \result,   \curbit,  lsr #1
 | 
						|
	cmp	\dividend, \divisor,  lsr #2
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #2
 | 
						|
	orrhs	\result,   \result,   \curbit,  lsr #2
 | 
						|
	cmp	\dividend, \divisor,  lsr #3
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #3
 | 
						|
	orrhs	\result,   \result,   \curbit,  lsr #3
 | 
						|
	cmp	\dividend, #0			@ Early termination?
 | 
						|
	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
 | 
						|
	movne	\divisor,  \divisor, lsr #4
 | 
						|
	bne	1b
 | 
						|
 | 
						|
.endm
 | 
						|
 | 
						|
 | 
						|
.macro ARM_DIV2_ORDER divisor, order
 | 
						|
 | 
						|
#if __LINUX_ARM_ARCH__ >= 5
 | 
						|
 | 
						|
	clz	\order, \divisor
 | 
						|
	rsb	\order, \order, #31
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 16)
 | 
						|
	movhs	\divisor, \divisor, lsr #16
 | 
						|
	movhs	\order, #16
 | 
						|
	movlo	\order, #0
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 8)
 | 
						|
	movhs	\divisor, \divisor, lsr #8
 | 
						|
	addhs	\order, \order, #8
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 4)
 | 
						|
	movhs	\divisor, \divisor, lsr #4
 | 
						|
	addhs	\order, \order, #4
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 2)
 | 
						|
	addhi	\order, \order, #3
 | 
						|
	addls	\order, \order, \divisor, lsr #1
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
.endm
 | 
						|
 | 
						|
 | 
						|
.macro ARM_MOD_BODY dividend, divisor, order, spare
 | 
						|
 | 
						|
#if __LINUX_ARM_ARCH__ >= 5
 | 
						|
 | 
						|
	clz	\order, \divisor
 | 
						|
	clz	\spare, \dividend
 | 
						|
	sub	\order, \order, \spare
 | 
						|
	mov	\divisor, \divisor, lsl \order
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
	mov	\order, #0
 | 
						|
 | 
						|
	@ Unless the divisor is very big, shift it up in multiples of
 | 
						|
	@ four bits, since this is the amount of unwinding in the main
 | 
						|
	@ division loop.  Continue shifting until the divisor is
 | 
						|
	@ larger than the dividend.
 | 
						|
1:	cmp	\divisor, #0x10000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #4
 | 
						|
	addlo	\order, \order, #4
 | 
						|
	blo	1b
 | 
						|
 | 
						|
	@ For very big divisors, we must shift it a bit at a time, or
 | 
						|
	@ we will be in danger of overflowing.
 | 
						|
1:	cmp	\divisor, #0x80000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #1
 | 
						|
	addlo	\order, \order, #1
 | 
						|
	blo	1b
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
	@ Perform all needed subtractions to keep only the reminder.
 | 
						|
	@ Do comparisons in batch of 4 first.
 | 
						|
	subs	\order, \order, #3		@ yes, 3 is intended here
 | 
						|
	blt	2f
 | 
						|
 | 
						|
1:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	cmp	\dividend, \divisor,  lsr #1
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #1
 | 
						|
	cmp	\dividend, \divisor,  lsr #2
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #2
 | 
						|
	cmp	\dividend, \divisor,  lsr #3
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #3
 | 
						|
	cmp	\dividend, #1
 | 
						|
	mov	\divisor, \divisor, lsr #4
 | 
						|
	subsge	\order, \order, #4
 | 
						|
	bge	1b
 | 
						|
 | 
						|
	tst	\order, #3
 | 
						|
	teqne	\dividend, #0
 | 
						|
	beq	5f
 | 
						|
 | 
						|
	@ Either 1, 2 or 3 comparison/subtractions are left.
 | 
						|
2:	cmn	\order, #2
 | 
						|
	blt	4f
 | 
						|
	beq	3f
 | 
						|
	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	mov	\divisor,  \divisor,  lsr #1
 | 
						|
3:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	mov	\divisor,  \divisor,  lsr #1
 | 
						|
4:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
5:
 | 
						|
.endm
 | 
						|
 | 
						|
 | 
						|
.pushsection .text.__udivsi3, "ax"
 | 
						|
ENTRY(__udivsi3)
 | 
						|
ENTRY(__aeabi_uidiv)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	subs	r2, r1, #1
 | 
						|
	reteq	lr
 | 
						|
	bcc	Ldiv0
 | 
						|
	cmp	r0, r1
 | 
						|
	bls	11f
 | 
						|
	tst	r1, r2
 | 
						|
	beq	12f
 | 
						|
 | 
						|
	ARM_DIV_BODY r0, r1, r2, r3
 | 
						|
 | 
						|
	mov	r0, r2
 | 
						|
	ret	lr
 | 
						|
 | 
						|
11:	moveq	r0, #1
 | 
						|
	movne	r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
12:	ARM_DIV2_ORDER r1, r2
 | 
						|
 | 
						|
	mov	r0, r0, lsr r2
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__udivsi3)
 | 
						|
ENDPROC(__aeabi_uidiv)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__umodsi3, "ax"
 | 
						|
ENTRY(__umodsi3)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	subs	r2, r1, #1			@ compare divisor with 1
 | 
						|
	bcc	Ldiv0
 | 
						|
	cmpne	r0, r1				@ compare dividend with divisor
 | 
						|
	moveq   r0, #0
 | 
						|
	tsthi	r1, r2				@ see if divisor is power of 2
 | 
						|
	andeq	r0, r0, r2
 | 
						|
	retls	lr
 | 
						|
 | 
						|
	ARM_MOD_BODY r0, r1, r2, r3
 | 
						|
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__umodsi3)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__divsi3, "ax"
 | 
						|
ENTRY(__divsi3)
 | 
						|
ENTRY(__aeabi_idiv)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	cmp	r1, #0
 | 
						|
	eor	ip, r0, r1			@ save the sign of the result.
 | 
						|
	beq	Ldiv0
 | 
						|
	rsbmi	r1, r1, #0			@ loops below use unsigned.
 | 
						|
	subs	r2, r1, #1			@ division by 1 or -1 ?
 | 
						|
	beq	10f
 | 
						|
	movs	r3, r0
 | 
						|
	rsbmi	r3, r0, #0			@ positive dividend value
 | 
						|
	cmp	r3, r1
 | 
						|
	bls	11f
 | 
						|
	tst	r1, r2				@ divisor is power of 2 ?
 | 
						|
	beq	12f
 | 
						|
 | 
						|
	ARM_DIV_BODY r3, r1, r0, r2
 | 
						|
 | 
						|
	cmp	ip, #0
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
10:	teq	ip, r0				@ same sign ?
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
11:	movlo	r0, #0
 | 
						|
	moveq	r0, ip, asr #31
 | 
						|
	orreq	r0, r0, #1
 | 
						|
	ret	lr
 | 
						|
 | 
						|
12:	ARM_DIV2_ORDER r1, r2
 | 
						|
 | 
						|
	cmp	ip, #0
 | 
						|
	mov	r0, r3, lsr r2
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__divsi3)
 | 
						|
ENDPROC(__aeabi_idiv)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__modsi3, "ax"
 | 
						|
ENTRY(__modsi3)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	cmp	r1, #0
 | 
						|
	beq	Ldiv0
 | 
						|
	rsbmi	r1, r1, #0			@ loops below use unsigned.
 | 
						|
	movs	ip, r0				@ preserve sign of dividend
 | 
						|
	rsbmi	r0, r0, #0			@ if negative make positive
 | 
						|
	subs	r2, r1, #1			@ compare divisor with 1
 | 
						|
	cmpne	r0, r1				@ compare dividend with divisor
 | 
						|
	moveq	r0, #0
 | 
						|
	tsthi	r1, r2				@ see if divisor is power of 2
 | 
						|
	andeq	r0, r0, r2
 | 
						|
	bls	10f
 | 
						|
 | 
						|
	ARM_MOD_BODY r0, r1, r2, r3
 | 
						|
 | 
						|
10:	cmp	ip, #0
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__modsi3)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__aeabi_uidivmod, "ax"
 | 
						|
ENTRY(__aeabi_uidivmod)
 | 
						|
UNWIND(.fnstart)
 | 
						|
UNWIND(.save {r0, r1, ip, lr}	)
 | 
						|
 | 
						|
	stmfd	sp!, {r0, r1, ip, lr}
 | 
						|
	bl	__aeabi_uidiv
 | 
						|
	ldmfd	sp!, {r1, r2, ip, lr}
 | 
						|
	mul	r3, r0, r2
 | 
						|
	sub	r1, r1, r3
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__aeabi_uidivmod)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__aeabi_uidivmod, "ax"
 | 
						|
ENTRY(__aeabi_idivmod)
 | 
						|
UNWIND(.fnstart)
 | 
						|
UNWIND(.save {r0, r1, ip, lr}	)
 | 
						|
 | 
						|
	stmfd	sp!, {r0, r1, ip, lr}
 | 
						|
	bl	__aeabi_idiv
 | 
						|
	ldmfd	sp!, {r1, r2, ip, lr}
 | 
						|
	mul	r3, r0, r2
 | 
						|
	sub	r1, r1, r3
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__aeabi_idivmod)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.Ldiv0, "ax"
 | 
						|
Ldiv0:
 | 
						|
UNWIND(.fnstart)
 | 
						|
UNWIND(.pad #4)
 | 
						|
UNWIND(.save {lr})
 | 
						|
 | 
						|
	str	lr, [sp, #-8]!
 | 
						|
	bl	__div0
 | 
						|
	mov	r0, #0			@ About as wrong as it could be.
 | 
						|
	ldr	pc, [sp], #8
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(Ldiv0)
 | 
						|
.popsection
 | 
						|
 | 
						|
/* Thumb-1 specialities */
 | 
						|
#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
 | 
						|
.pushsection .text.__gnu_thumb1_case_sqi, "ax"
 | 
						|
ENTRY(__gnu_thumb1_case_sqi)
 | 
						|
	push	{r1}
 | 
						|
	mov	r1, lr
 | 
						|
	lsrs	r1, r1, #1
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	ldrsb	r1, [r1, r0]
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	add	lr, lr, r1
 | 
						|
	pop	{r1}
 | 
						|
	ret	lr
 | 
						|
ENDPROC(__gnu_thumb1_case_sqi)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__gnu_thumb1_case_uqi, "ax"
 | 
						|
ENTRY(__gnu_thumb1_case_uqi)
 | 
						|
	push	{r1}
 | 
						|
	mov	r1, lr
 | 
						|
	lsrs	r1, r1, #1
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	ldrb	r1, [r1, r0]
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	add	lr, lr, r1
 | 
						|
	pop	{r1}
 | 
						|
	ret	lr
 | 
						|
ENDPROC(__gnu_thumb1_case_uqi)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__gnu_thumb1_case_shi, "ax"
 | 
						|
ENTRY(__gnu_thumb1_case_shi)
 | 
						|
	push	{r0, r1}
 | 
						|
	mov	r1, lr
 | 
						|
	lsrs	r1, r1, #1
 | 
						|
	lsls	r0, r0, #1
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	ldrsh	r1, [r1, r0]
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	add	lr, lr, r1
 | 
						|
	pop	{r0, r1}
 | 
						|
	ret	lr
 | 
						|
ENDPROC(__gnu_thumb1_case_shi)
 | 
						|
.popsection
 | 
						|
 | 
						|
.pushsection .text.__gnu_thumb1_case_uhi, "ax"
 | 
						|
ENTRY(__gnu_thumb1_case_uhi)
 | 
						|
	push	{r0, r1}
 | 
						|
	mov	r1, lr
 | 
						|
	lsrs	r1, r1, #1
 | 
						|
	lsls	r0, r0, #1
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	ldrh	r1, [r1, r0]
 | 
						|
	lsls	r1, r1, #1
 | 
						|
	add	lr, lr, r1
 | 
						|
	pop	{r0, r1}
 | 
						|
	ret	lr
 | 
						|
ENDPROC(__gnu_thumb1_case_uhi)
 | 
						|
.popsection
 | 
						|
 | 
						|
/* Taken and adapted from: https://github.com/gcc-mirror/gcc/blob/4f181f9c7ee3efc509d185fdfda33be9018f1611/libgcc/config/arm/lib1funcs.S#L2156 */
 | 
						|
.pushsection .text.__gnu_thumb1_case_si, "ax"
 | 
						|
ENTRY(__gnu_thumb1_case_si)
 | 
						|
	push	{r0, r1}
 | 
						|
	mov	r1, lr
 | 
						|
	adds	r1, r1, #2	/* Align to word.  */
 | 
						|
	lsrs	r1, r1, #2
 | 
						|
	lsls	r0, r0, #2
 | 
						|
	lsls	r1, r1, #2
 | 
						|
	ldr	r0, [r1, r0]
 | 
						|
	adds	r0, r0, r1
 | 
						|
	mov	lr, r0
 | 
						|
	pop	{r0, r1}
 | 
						|
	mov	pc, lr		/* We know we were called from thumb code.  */
 | 
						|
ENDPROC(__gnu_thumb1_case_si)
 | 
						|
.popsection
 | 
						|
#endif
 |