246 lines
4.5 KiB
ArmAsm
246 lines
4.5 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright 2010, Google Inc.
|
|
*
|
|
* Brought in from coreboot uldivmod.S
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
/*
|
|
* A, Q = r0 + (r1 << 32)
|
|
* B, R = r2 + (r3 << 32)
|
|
* A / B = Q ... R
|
|
*/
|
|
|
|
A_0 .req r0
|
|
A_1 .req r1
|
|
B_0 .req r2
|
|
B_1 .req r3
|
|
C_0 .req r4
|
|
C_1 .req r5
|
|
D_0 .req r6
|
|
D_1 .req r7
|
|
|
|
Q_0 .req r0
|
|
Q_1 .req r1
|
|
R_0 .req r2
|
|
R_1 .req r3
|
|
|
|
THUMB(
|
|
TMP .req r8
|
|
)
|
|
|
|
.pushsection .text.__aeabi_uldivmod, "ax"
|
|
ENTRY(__aeabi_uldivmod)
|
|
|
|
stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
|
|
@ Test if B == 0
|
|
orrs ip, B_0, B_1 @ Z set -> B == 0
|
|
beq L_div_by_0
|
|
@ Test if B is power of 2: (B & (B - 1)) == 0
|
|
subs C_0, B_0, #1
|
|
sbc C_1, B_1, #0
|
|
tst C_0, B_0
|
|
tsteq B_1, C_1
|
|
beq L_pow2
|
|
@ Test if A_1 == B_1 == 0
|
|
orrs ip, A_1, B_1
|
|
beq L_div_32_32
|
|
|
|
L_div_64_64:
|
|
/* CLZ only exists in ARM architecture version 5 and above. */
|
|
#ifdef HAVE_CLZ
|
|
mov C_0, #1
|
|
mov C_1, #0
|
|
@ D_0 = clz A
|
|
teq A_1, #0
|
|
clz D_0, A_1
|
|
clzeq ip, A_0
|
|
addeq D_0, D_0, ip
|
|
@ D_1 = clz B
|
|
teq B_1, #0
|
|
clz D_1, B_1
|
|
clzeq ip, B_0
|
|
addeq D_1, D_1, ip
|
|
@ if clz B - clz A > 0
|
|
subs D_0, D_1, D_0
|
|
bls L_done_shift
|
|
@ B <<= (clz B - clz A)
|
|
subs D_1, D_0, #32
|
|
rsb ip, D_0, #32
|
|
movmi B_1, B_1, lsl D_0
|
|
ARM( orrmi B_1, B_1, B_0, lsr ip )
|
|
THUMB( lsrmi TMP, B_0, ip )
|
|
THUMB( orrmi B_1, B_1, TMP )
|
|
movpl B_1, B_0, lsl D_1
|
|
mov B_0, B_0, lsl D_0
|
|
@ C = 1 << (clz B - clz A)
|
|
movmi C_1, C_1, lsl D_0
|
|
ARM( orrmi C_1, C_1, C_0, lsr ip )
|
|
THUMB( lsrmi TMP, C_0, ip )
|
|
THUMB( orrmi C_1, C_1, TMP )
|
|
movpl C_1, C_0, lsl D_1
|
|
mov C_0, C_0, lsl D_0
|
|
L_done_shift:
|
|
mov D_0, #0
|
|
mov D_1, #0
|
|
@ C: current bit; D: result
|
|
#else
|
|
@ C: current bit; D: result
|
|
mov C_0, #1
|
|
mov C_1, #0
|
|
mov D_0, #0
|
|
mov D_1, #0
|
|
L_lsl_4:
|
|
cmp B_1, #0x10000000
|
|
cmpcc B_1, A_1
|
|
cmpeq B_0, A_0
|
|
bcs L_lsl_1
|
|
@ B <<= 4
|
|
mov B_1, B_1, lsl #4
|
|
orr B_1, B_1, B_0, lsr #28
|
|
mov B_0, B_0, lsl #4
|
|
@ C <<= 4
|
|
mov C_1, C_1, lsl #4
|
|
orr C_1, C_1, C_0, lsr #28
|
|
mov C_0, C_0, lsl #4
|
|
b L_lsl_4
|
|
L_lsl_1:
|
|
cmp B_1, #0x80000000
|
|
cmpcc B_1, A_1
|
|
cmpeq B_0, A_0
|
|
bcs L_subtract
|
|
@ B <<= 1
|
|
mov B_1, B_1, lsl #1
|
|
orr B_1, B_1, B_0, lsr #31
|
|
mov B_0, B_0, lsl #1
|
|
@ C <<= 1
|
|
mov C_1, C_1, lsl #1
|
|
orr C_1, C_1, C_0, lsr #31
|
|
mov C_0, C_0, lsl #1
|
|
b L_lsl_1
|
|
#endif
|
|
L_subtract:
|
|
@ if A >= B
|
|
cmp A_1, B_1
|
|
cmpeq A_0, B_0
|
|
bcc L_update
|
|
@ A -= B
|
|
subs A_0, A_0, B_0
|
|
sbc A_1, A_1, B_1
|
|
@ D |= C
|
|
orr D_0, D_0, C_0
|
|
orr D_1, D_1, C_1
|
|
L_update:
|
|
@ if A == 0: break
|
|
orrs ip, A_1, A_0
|
|
beq L_exit
|
|
@ C >>= 1
|
|
movs C_1, C_1, lsr #1
|
|
movs C_0, C_0, rrx
|
|
@ if C == 0: break
|
|
orrs ip, C_1, C_0
|
|
beq L_exit
|
|
@ B >>= 1
|
|
movs B_1, B_1, lsr #1
|
|
mov B_0, B_0, rrx
|
|
b L_subtract
|
|
L_exit:
|
|
@ Note: A, B & Q, R are aliases
|
|
mov R_0, A_0
|
|
mov R_1, A_1
|
|
mov Q_0, D_0
|
|
mov Q_1, D_1
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
|
|
L_div_32_32:
|
|
@ Note: A_0 & r0 are aliases
|
|
@ Q_1 r1
|
|
mov r1, B_0
|
|
bl __aeabi_uidivmod
|
|
mov R_0, r1
|
|
mov R_1, #0
|
|
mov Q_1, #0
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
|
|
L_pow2:
|
|
#ifdef HAVE_CLZ
|
|
@ Note: A, B and Q, R are aliases
|
|
@ R = A & (B - 1)
|
|
and C_0, A_0, C_0
|
|
and C_1, A_1, C_1
|
|
@ Q = A >> log2(B)
|
|
@ Note: B must not be 0 here!
|
|
clz D_0, B_0
|
|
add D_1, D_0, #1
|
|
rsbs D_0, D_0, #31
|
|
bpl L_1
|
|
clz D_0, B_1
|
|
rsb D_0, D_0, #31
|
|
mov A_0, A_1, lsr D_0
|
|
add D_0, D_0, #32
|
|
L_1:
|
|
movpl A_0, A_0, lsr D_0
|
|
ARM( orrpl A_0, A_0, A_1, lsl D_1 )
|
|
THUMB( lslpl TMP, A_1, D_1 )
|
|
THUMB( orrpl A_0, A_0, TMP )
|
|
mov A_1, A_1, lsr D_0
|
|
@ Mov back C to R
|
|
mov R_0, C_0
|
|
mov R_1, C_1
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
#else
|
|
@ Note: A, B and Q, R are aliases
|
|
@ R = A & (B - 1)
|
|
and C_0, A_0, C_0
|
|
and C_1, A_1, C_1
|
|
@ Q = A >> log2(B)
|
|
@ Note: B must not be 0 here!
|
|
@ Count the leading zeroes in B.
|
|
mov D_0, #0
|
|
orrs B_0, B_0, B_0
|
|
@ If B is greater than 1 << 31, divide A and B by 1 << 32.
|
|
moveq A_0, A_1
|
|
moveq A_1, #0
|
|
moveq B_0, B_1
|
|
@ Count the remaining leading zeroes in B.
|
|
movs B_1, B_0, lsl #16
|
|
addeq D_0, #16
|
|
moveq B_0, B_0, lsr #16
|
|
tst B_0, #0xff
|
|
addeq D_0, #8
|
|
moveq B_0, B_0, lsr #8
|
|
tst B_0, #0xf
|
|
addeq D_0, #4
|
|
moveq B_0, B_0, lsr #4
|
|
tst B_0, #0x3
|
|
addeq D_0, #2
|
|
moveq B_0, B_0, lsr #2
|
|
tst B_0, #0x1
|
|
addeq D_0, #1
|
|
@ Shift A to the right by the appropriate amount.
|
|
rsb D_1, D_0, #32
|
|
mov Q_0, A_0, lsr D_0
|
|
ARM( orr Q_0, Q_0, A_1, lsl D_1 )
|
|
THUMB( lsl A_1, D_1 )
|
|
THUMB( orr Q_0, A_1 )
|
|
mov Q_1, A_1, lsr D_0
|
|
@ Move C to R
|
|
mov R_0, C_0
|
|
mov R_1, C_1
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
#endif
|
|
|
|
L_div_by_0:
|
|
bl __div0
|
|
@ As wrong as it could be
|
|
mov Q_0, #0
|
|
mov Q_1, #0
|
|
mov R_0, #0
|
|
mov R_1, #0
|
|
ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
|
|
ENDPROC(__aeabi_uldivmod)
|
|
.popsection
|