Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
0003  *
0004  * Author: Nicolas Pitre <nico@fluxnic.net>
0005  *   - contributed to gcc-3.4 on Sep 30, 2003
0006  *   - adapted for the Linux kernel on Oct 2, 2003
0007  */
0008 
0009 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
0010 
0011 This file is free software; you can redistribute it and/or modify it
0012 under the terms of the GNU General Public License as published by the
0013 Free Software Foundation; either version 2, or (at your option) any
0014 later version.
0015 
0016 In addition to the permissions in the GNU General Public License, the
0017 Free Software Foundation gives you unlimited permission to link the
0018 compiled version of this file into combinations with other programs,
0019 and to distribute those combinations without any restriction coming
0020 from the use of this file.  (The General Public License restrictions
0021 do apply in other respects; for example, they cover modification of
0022 the file, and distribution when not linked into a combine
0023 executable.)
0024 
0025 This file is distributed in the hope that it will be useful, but
0026 WITHOUT ANY WARRANTY; without even the implied warranty of
0027 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0028 General Public License for more details.
0029 
0030 You should have received a copy of the GNU General Public License
0031 along with this program; see the file COPYING.  If not, write to
0032 the Free Software Foundation, 59 Temple Place - Suite 330,
0033 Boston, MA 02111-1307, USA.  */
0034 
0035 
0036 #include <linux/linkage.h>
0037 #include <asm/assembler.h>
0038 #include <asm/unwind.h>
0039 
0040 .macro ARM_DIV_BODY dividend, divisor, result, curbit
0041 
0042 #if __LINUX_ARM_ARCH__ >= 5
0043 
0044     clz \curbit, \divisor
0045     clz \result, \dividend
0046     sub \result, \curbit, \result
0047     mov \curbit, #1
0048     mov \divisor, \divisor, lsl \result
0049     mov \curbit, \curbit, lsl \result
0050     mov \result, #0
0051     
0052 #else
0053 
0054     @ Initially shift the divisor left 3 bits if possible,
0055     @ set curbit accordingly.  This allows for curbit to be located
0056     @ at the left end of each 4 bit nibbles in the division loop
0057     @ to save one loop in most cases.
0058     tst \divisor, #0xe0000000
0059     moveq   \divisor, \divisor, lsl #3
0060     moveq   \curbit, #8
0061     movne   \curbit, #1
0062 
0063     @ Unless the divisor is very big, shift it up in multiples of
0064     @ four bits, since this is the amount of unwinding in the main
0065     @ division loop.  Continue shifting until the divisor is 
0066     @ larger than the dividend.
0067 1:  cmp \divisor, #0x10000000
0068     cmplo   \divisor, \dividend
0069     movlo   \divisor, \divisor, lsl #4
0070     movlo   \curbit, \curbit, lsl #4
0071     blo 1b
0072 
0073     @ For very big divisors, we must shift it a bit at a time, or
0074     @ we will be in danger of overflowing.
0075 1:  cmp \divisor, #0x80000000
0076     cmplo   \divisor, \dividend
0077     movlo   \divisor, \divisor, lsl #1
0078     movlo   \curbit, \curbit, lsl #1
0079     blo 1b
0080 
0081     mov \result, #0
0082 
0083 #endif
0084 
0085     @ Division loop
0086 1:  cmp \dividend, \divisor
0087     subhs   \dividend, \dividend, \divisor
0088     orrhs   \result,   \result,   \curbit
0089     cmp \dividend, \divisor,  lsr #1
0090     subhs   \dividend, \dividend, \divisor, lsr #1
0091     orrhs   \result,   \result,   \curbit,  lsr #1
0092     cmp \dividend, \divisor,  lsr #2
0093     subhs   \dividend, \dividend, \divisor, lsr #2
0094     orrhs   \result,   \result,   \curbit,  lsr #2
0095     cmp \dividend, \divisor,  lsr #3
0096     subhs   \dividend, \dividend, \divisor, lsr #3
0097     orrhs   \result,   \result,   \curbit,  lsr #3
0098     cmp \dividend, #0           @ Early termination?
0099     movsne  \curbit,   \curbit,  lsr #4 @ No, any more bits to do?
0100     movne   \divisor,  \divisor, lsr #4
0101     bne 1b
0102 
0103 .endm
0104 
0105 
0106 .macro ARM_DIV2_ORDER divisor, order
0107 
0108 #if __LINUX_ARM_ARCH__ >= 5
0109 
0110     clz \order, \divisor
0111     rsb \order, \order, #31
0112 
0113 #else
0114 
0115     cmp \divisor, #(1 << 16)
0116     movhs   \divisor, \divisor, lsr #16
0117     movhs   \order, #16
0118     movlo   \order, #0
0119 
0120     cmp \divisor, #(1 << 8)
0121     movhs   \divisor, \divisor, lsr #8
0122     addhs   \order, \order, #8
0123 
0124     cmp \divisor, #(1 << 4)
0125     movhs   \divisor, \divisor, lsr #4
0126     addhs   \order, \order, #4
0127 
0128     cmp \divisor, #(1 << 2)
0129     addhi   \order, \order, #3
0130     addls   \order, \order, \divisor, lsr #1
0131 
0132 #endif
0133 
0134 .endm
0135 
0136 
0137 .macro ARM_MOD_BODY dividend, divisor, order, spare
0138 
0139 #if __LINUX_ARM_ARCH__ >= 5
0140 
0141     clz \order, \divisor
0142     clz \spare, \dividend
0143     sub \order, \order, \spare
0144     mov \divisor, \divisor, lsl \order
0145 
0146 #else
0147 
0148     mov \order, #0
0149 
0150     @ Unless the divisor is very big, shift it up in multiples of
0151     @ four bits, since this is the amount of unwinding in the main
0152     @ division loop.  Continue shifting until the divisor is 
0153     @ larger than the dividend.
0154 1:  cmp \divisor, #0x10000000
0155     cmplo   \divisor, \dividend
0156     movlo   \divisor, \divisor, lsl #4
0157     addlo   \order, \order, #4
0158     blo 1b
0159 
0160     @ For very big divisors, we must shift it a bit at a time, or
0161     @ we will be in danger of overflowing.
0162 1:  cmp \divisor, #0x80000000
0163     cmplo   \divisor, \dividend
0164     movlo   \divisor, \divisor, lsl #1
0165     addlo   \order, \order, #1
0166     blo 1b
0167 
0168 #endif
0169 
0170     @ Perform all needed subtractions to keep only the reminder.
0171     @ Do comparisons in batch of 4 first.
0172     subs    \order, \order, #3      @ yes, 3 is intended here
0173     blt 2f
0174 
0175 1:  cmp \dividend, \divisor
0176     subhs   \dividend, \dividend, \divisor
0177     cmp \dividend, \divisor,  lsr #1
0178     subhs   \dividend, \dividend, \divisor, lsr #1
0179     cmp \dividend, \divisor,  lsr #2
0180     subhs   \dividend, \dividend, \divisor, lsr #2
0181     cmp \dividend, \divisor,  lsr #3
0182     subhs   \dividend, \dividend, \divisor, lsr #3
0183     cmp \dividend, #1
0184     mov \divisor, \divisor, lsr #4
0185     subsge  \order, \order, #4
0186     bge 1b
0187 
0188     tst \order, #3
0189     teqne   \dividend, #0
0190     beq 5f
0191 
0192     @ Either 1, 2 or 3 comparison/subtractions are left.
0193 2:  cmn \order, #2
0194     blt 4f
0195     beq 3f
0196     cmp \dividend, \divisor
0197     subhs   \dividend, \dividend, \divisor
0198     mov \divisor,  \divisor,  lsr #1
0199 3:  cmp \dividend, \divisor
0200     subhs   \dividend, \dividend, \divisor
0201     mov \divisor,  \divisor,  lsr #1
0202 4:  cmp \dividend, \divisor
0203     subhs   \dividend, \dividend, \divisor
0204 5:
0205 .endm
0206 
0207 
0208 #ifdef CONFIG_ARM_PATCH_IDIV
0209     .align  3
0210 #endif
0211 
0212 ENTRY(__udivsi3)
0213 ENTRY(__aeabi_uidiv)
0214 UNWIND(.fnstart)
0215 
0216     subs    r2, r1, #1
0217     reteq   lr
0218     bcc Ldiv0
0219     cmp r0, r1
0220     bls 11f
0221     tst r1, r2
0222     beq 12f
0223 
0224     ARM_DIV_BODY r0, r1, r2, r3
0225 
0226     mov r0, r2
0227     ret lr
0228 
0229 11: moveq   r0, #1
0230     movne   r0, #0
0231     ret lr
0232 
0233 12: ARM_DIV2_ORDER r1, r2
0234 
0235     mov r0, r0, lsr r2
0236     ret lr
0237 
0238 UNWIND(.fnend)
0239 ENDPROC(__udivsi3)
0240 ENDPROC(__aeabi_uidiv)
0241 
0242 ENTRY(__umodsi3)
0243 UNWIND(.fnstart)
0244 
0245     subs    r2, r1, #1          @ compare divisor with 1
0246     bcc Ldiv0
0247     cmpne   r0, r1              @ compare dividend with divisor
0248     moveq   r0, #0
0249     tsthi   r1, r2              @ see if divisor is power of 2
0250     andeq   r0, r0, r2
0251     retls   lr
0252 
0253     ARM_MOD_BODY r0, r1, r2, r3
0254 
0255     ret lr
0256 
0257 UNWIND(.fnend)
0258 ENDPROC(__umodsi3)
0259 
0260 #ifdef CONFIG_ARM_PATCH_IDIV
0261     .align 3
0262 #endif
0263 
0264 ENTRY(__divsi3)
0265 ENTRY(__aeabi_idiv)
0266 UNWIND(.fnstart)
0267 
0268     cmp r1, #0
0269     eor ip, r0, r1          @ save the sign of the result.
0270     beq Ldiv0
0271     rsbmi   r1, r1, #0          @ loops below use unsigned.
0272     subs    r2, r1, #1          @ division by 1 or -1 ?
0273     beq 10f
0274     movs    r3, r0
0275     rsbmi   r3, r0, #0          @ positive dividend value
0276     cmp r3, r1
0277     bls 11f
0278     tst r1, r2              @ divisor is power of 2 ?
0279     beq 12f
0280 
0281     ARM_DIV_BODY r3, r1, r0, r2
0282 
0283     cmp ip, #0
0284     rsbmi   r0, r0, #0
0285     ret lr
0286 
0287 10: teq ip, r0              @ same sign ?
0288     rsbmi   r0, r0, #0
0289     ret lr
0290 
0291 11: movlo   r0, #0
0292     moveq   r0, ip, asr #31
0293     orreq   r0, r0, #1
0294     ret lr
0295 
0296 12: ARM_DIV2_ORDER r1, r2
0297 
0298     cmp ip, #0
0299     mov r0, r3, lsr r2
0300     rsbmi   r0, r0, #0
0301     ret lr
0302 
0303 UNWIND(.fnend)
0304 ENDPROC(__divsi3)
0305 ENDPROC(__aeabi_idiv)
0306 
0307 ENTRY(__modsi3)
0308 UNWIND(.fnstart)
0309 
0310     cmp r1, #0
0311     beq Ldiv0
0312     rsbmi   r1, r1, #0          @ loops below use unsigned.
0313     movs    ip, r0              @ preserve sign of dividend
0314     rsbmi   r0, r0, #0          @ if negative make positive
0315     subs    r2, r1, #1          @ compare divisor with 1
0316     cmpne   r0, r1              @ compare dividend with divisor
0317     moveq   r0, #0
0318     tsthi   r1, r2              @ see if divisor is power of 2
0319     andeq   r0, r0, r2
0320     bls 10f
0321 
0322     ARM_MOD_BODY r0, r1, r2, r3
0323 
0324 10: cmp ip, #0
0325     rsbmi   r0, r0, #0
0326     ret lr
0327 
0328 UNWIND(.fnend)
0329 ENDPROC(__modsi3)
0330 
0331 #ifdef CONFIG_AEABI
0332 
0333 ENTRY(__aeabi_uidivmod)
0334 UNWIND(.fnstart)
0335 UNWIND(.save {r0, r1, ip, lr}   )
0336 
0337     stmfd   sp!, {r0, r1, ip, lr}
0338     bl  __aeabi_uidiv
0339     ldmfd   sp!, {r1, r2, ip, lr}
0340     mul r3, r0, r2
0341     sub r1, r1, r3
0342     ret lr
0343 
0344 UNWIND(.fnend)
0345 ENDPROC(__aeabi_uidivmod)
0346 
0347 ENTRY(__aeabi_idivmod)
0348 UNWIND(.fnstart)
0349 UNWIND(.save {r0, r1, ip, lr}   )
0350     stmfd   sp!, {r0, r1, ip, lr}
0351     bl  __aeabi_idiv
0352     ldmfd   sp!, {r1, r2, ip, lr}
0353     mul r3, r0, r2
0354     sub r1, r1, r3
0355     ret lr
0356 
0357 UNWIND(.fnend)
0358 ENDPROC(__aeabi_idivmod)
0359 
0360 #endif
0361 
0362 Ldiv0:
0363 UNWIND(.fnstart)
0364 UNWIND(.pad #4)
0365 UNWIND(.save {lr})
0366     str lr, [sp, #-8]!
0367     bl  __div0
0368     mov r0, #0          @ About as wrong as it could be.
0369     ldr pc, [sp], #8
0370 UNWIND(.fnend)
0371 ENDPROC(Ldiv0)