#ifdef __ARMEB__ #define xh r0 #define xl r1 #define yh r2 #define yl r3 #else #define xl r0 #define xh r1 #define yl r2 #define yh r3 #endif .global __muldi3 __muldi3: .global __aeabi_lmul __aeabi_lmul: mul xh, yl, xh mla xh, xl, yh, xh mov ip, xl, lsr #16 mov yh, yl, lsr #16 bic xl, xl, ip, lsl #16 bic yl, yl, yh, lsl #16 mla xh, yh, ip, xh mul yh, xl, yh mul xl, yl, xl mul ip, yl, ip adds xl, xl, yh, lsl #16 adc xh, xh, yh, lsr #16 adds xl, xl, ip, lsl #16 adc xh, xh, ip, lsr #16 mov pc, lr dividend .req r0 divisor .req r1 result .req r2 curbit .req r3 .globl __udivsi3 .type __udivsi3 ,function .globl __aeabi_uidiv .type __aeabi_uidiv ,function .align 0 __udivsi3: __aeabi_uidiv: cmp divisor, #0 beq Ldiv0_uidiv mov curbit, #1 mov result, #0 cmp dividend, divisor bcc Lgot_result Loop1: @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. cmp divisor, #0x10000000 cmpcc divisor, dividend movcc divisor, divisor, lsl #4 movcc curbit, curbit, lsl #4 bcc Loop1 Lbignum: @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. cmp divisor, #0x80000000 cmpcc divisor, dividend movcc divisor, divisor, lsl #1 movcc curbit, curbit, lsl #1 bcc Lbignum Loop3: @ Test for possible subtractions, and note which bits @ are done in the result. On the final pass, this may subtract @ too much from the dividend, but the result will be ok, since the @ "bit" will have been shifted out at the bottom. cmp dividend, divisor subcs dividend, dividend, divisor orrcs result, result, curbit cmp dividend, divisor, lsr #1 subcs dividend, dividend, divisor, lsr #1 orrcs result, result, curbit, lsr #1 cmp dividend, divisor, lsr #2 subcs dividend, dividend, divisor, lsr #2 orrcs result, result, curbit, lsr #2 cmp dividend, divisor, lsr #3 subcs dividend, dividend, divisor, lsr #3 orrcs result, result, curbit, lsr #3 cmp dividend, #0 @ Early termination? movnes curbit, curbit, lsr #4 @ No, any more bits to do? movne divisor, divisor, lsr #4 bne Loop3 Lgot_result: mov r0, result mov pc, lr Ldiv0_uidiv: str lr, [sp, #-4]! #bl __div0 (PLT) mov r0, #0 @ about as wrong as it could be ldmia sp!, {pc} .size __udivsi3 , . - __udivsi3 .globl __aeabi_uidivmod __aeabi_uidivmod: stmfd sp!, {r0, r1, ip, lr} bl __aeabi_uidiv ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 mov pc, lr .globl __aeabi_idivmod __aeabi_idivmod: stmfd sp!, {r0, r1, ip, lr} bl __aeabi_idiv ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 mov pc, lr .macro ARM_DIV_BODY dividend, divisor, result, curbit #if __LINUX_ARM_ARCH__ >= 5 clz \curbit, \divisor clz \result, \dividend sub \result, \curbit, \result mov \curbit, #1 mov \divisor, \divisor, lsl \result mov \curbit, \curbit, lsl \result mov \result, #0 #else @ Initially shift the divisor left 3 bits if possible, @ set curbit accordingly. This allows for curbit to be located @ at the left end of each 4 bit nibbles in the division loop @ to save one loop in most cases. tst \divisor, #0xe0000000 moveq \divisor, \divisor, lsl #3 moveq \curbit, #8 movne \curbit, #1 @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 movlo \curbit, \curbit, lsl #4 blo 1b @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 movlo \curbit, \curbit, lsl #1 blo 1b mov \result, #0 #endif @ Division loop 1: cmp \dividend, \divisor subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? movne \divisor, \divisor, lsr #4 bne 1b .endm .macro ARM_DIV2_ORDER divisor, order #if __LINUX_ARM_ARCH__ >= 5 clz \order, \divisor rsb \order, \order, #31 #else cmp \divisor, #(1 << 16) movhs \divisor, \divisor, lsr #16 movhs \order, #16 movlo \order, #0 cmp \divisor, #(1 << 8) movhs \divisor, \divisor, lsr #8 addhs \order, \order, #8 cmp \divisor, #(1 << 4) movhs \divisor, \divisor, lsr #4 addhs \order, \order, #4 cmp \divisor, #(1 << 2) addhi \order, \order, #3 addls \order, \order, \divisor, lsr #1 #endif .endm .align 5 .globl __divsi3 .globl __aeabi_idiv __divsi3: __aeabi_idiv: cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f tst r1, r2 @ divisor is power of 2 ? beq 12f ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 rsbmi r0, r0, #0 mov pc, lr 10: teq ip, r0 @ same sign ? rsbmi r0, r0, #0 mov pc, lr 11: movlo r0, #0 moveq r0, ip, asr #31 orreq r0, r0, #1 mov pc, lr 12: ARM_DIV2_ORDER r1, r2 cmp ip, #0 mov r0, r3, lsr r2 rsbmi r0, r0, #0 mov pc, lr Ldiv0: str lr, [sp, #-4]! #bl __div0 mov r0, #0 @ About as wrong as it could be. ldr pc, [sp], #4 .global __aeabi_uldivmod .type __aeabi_uldivmod, function .align 0 A_0 .req r0 A_1 .req r1 B_0 .req r2 B_1 .req r3 C_0 .req r4 C_1 .req r5 D_0 .req r6 D_1 .req r7 Q_0 .req r0 Q_1 .req r1 R_0 .req r2 R_1 .req r3 __aeabi_uldivmod: stmfd sp!, {r4, r5, r6, r7, lr} @ Test if B == 0 orrs ip, B_0, B_1 @ Z set -> B == 0 beq L_div_by_0 @ Test if B is power of 2: (B & (B - 1)) == 0 subs C_0, B_0, #1 sbc C_1, B_1, #0 tst C_0, B_0 tsteq B_1, C_1 beq L_pow2 @ Test if A_1 == B_1 == 0 orrs ip, A_1, B_1 beq L_div_32_32 L_div_64_64: mov C_0, #1 mov C_1, #0 @ D_0 = clz A teq A_1, #0 clz D_0, A_1 clzeq ip, A_0 addeq D_0, D_0, ip @ D_1 = clz B teq B_1, #0 clz D_1, B_1 clzeq ip, B_0 addeq D_1, D_1, ip @ if clz B - clz A > 0 subs D_0, D_1, D_0 bls L_done_shift @ B <<= (clz B - clz A) subs D_1, D_0, #32 rsb ip, D_0, #32 movmi B_1, B_1, lsl D_0 orrmi B_1, B_1, B_0, lsr ip movpl B_1, B_0, lsl D_1 mov B_0, B_0, lsl D_0 @ C = 1 << (clz B - clz A) movmi C_1, C_1, lsl D_0 orrmi C_1, C_1, C_0, lsr ip movpl C_1, C_0, lsl D_1 mov C_0, C_0, lsl D_0 L_done_shift: mov D_0, #0 mov D_1, #0 @ C: current bit; D: result L_subtract: @ if A >= B cmp A_1, B_1 cmpeq A_0, B_0 bcc L_update @ A -= B subs A_0, A_0, B_0 sbc A_1, A_1, B_1 @ D |= C orr D_0, D_0, C_0 orr D_1, D_1, C_1 L_update: @ if A == 0: break orrs ip, A_1, A_0 beq L_exit @ C >>= 1 movs C_1, C_1, lsr #1 movs C_0, C_0, rrx @ if C == 0: break orrs ip, C_1, C_0 beq L_exit @ B >>= 1 movs B_1, B_1, lsr #1 mov B_0, B_0, rrx b L_subtract L_exit: @ Note: A, B & Q, R are aliases mov R_0, A_0 mov R_1, A_1 mov Q_0, D_0 mov Q_1, D_1 ldmfd sp!, {r4, r5, r6, r7, pc} L_div_32_32: @ Note: A_0 & r0 are aliases @ Q_1 r1 mov r1, B_0 bl __aeabi_uidivmod mov R_0, r1 mov R_1, #0 mov Q_1, #0 ldmfd sp!, {r4, r5, r6, r7, pc} L_pow2: @ Note: A, B and Q, R are aliases @ R = A & (B - 1) and C_0, A_0, C_0 and C_1, A_1, C_1 @ Q = A >> log2(B) @ Note: B must not be 0 here! clz D_0, B_0 add D_1, D_0, #1 rsbs D_0, D_0, #31 bpl L_1 clz D_0, B_1 rsb D_0, D_0, #31 mov A_0, A_1, lsr D_0 add D_0, D_0, #32 L_1: movpl A_0, A_0, lsr D_0 orrpl A_0, A_0, A_1, lsl D_1 mov A_1, A_1, lsr D_0 @ Mov back C to R mov R_0, C_0 mov R_1, C_1 ldmfd sp!, {r4, r5, r6, r7, pc} L_div_by_0: #bl __div0 @ As wrong as it could be mov Q_0, #0 mov Q_1, #0 mov R_0, #0 mov R_1, #0 ldmfd sp!, {r4, r5, r6, r7, pc}