arm/lib/div64.S

0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  *  linux/arch/arm/lib/div64.S
0004  *
0005  *  Optimized computation of 64-bit dividend / 32-bit divisor
0006  *
0007  *  Author: Nicolas Pitre
0008  *  Created:    Oct 5, 2003
0009  *  Copyright:  Monta Vista Software, Inc.
0010  */
0011
0012 #include <linux/linkage.h>
0013 #include <asm/assembler.h>
0014 #include <asm/unwind.h>
0015
0016 #ifdef __ARMEB__
0017 #define xh r0
0018 #define xl r1
0019 #define yh r2
0020 #define yl r3
0021 #else
0022 #define xl r0
0023 #define xh r1
0024 #define yl r2
0025 #define yh r3
0026 #endif
0027
0028 /*
0029  * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
0030  *
0031  * Note: Calling convention is totally non standard for optimal code.
0032  *       This is meant to be used by do_div() from include/asm/div64.h only.
0033  *
0034  * Input parameters:
0035  *  xh-xl   = dividend (clobbered)
0036  *  r4  = divisor (preserved)
0037  *
0038  * Output values:
0039  *  yh-yl   = result
0040  *  xh  = remainder
0041  *
0042  * Clobbered regs: xl, ip
0043  */
0044
0045 ENTRY(__do_div64)
0046 UNWIND(.fnstart)
0047
0048     @ Test for easy paths first.
0049     subs    ip, r4, #1
0050     bls 9f          @ divisor is 0 or 1
0051     tst ip, r4
0052     beq 8f          @ divisor is power of 2
0053
0054     @ See if we need to handle upper 32-bit result.
0055     cmp xh, r4
0056     mov yh, #0
0057     blo 3f
0058
0059     @ Align divisor with upper part of dividend.
0060     @ The aligned divisor is stored in yl preserving the original.
0061     @ The bit position is stored in ip.
0062
0063 #if __LINUX_ARM_ARCH__ >= 5
0064
0065     clz yl, r4
0066     clz ip, xh
0067     sub yl, yl, ip
0068     mov ip, #1
0069     mov ip, ip, lsl yl
0070     mov yl, r4, lsl yl
0071
0072 #else
0073
0074     mov yl, r4
0075     mov ip, #1
0076 1:  cmp yl, #0x80000000
0077     cmpcc   yl, xh
0078     movcc   yl, yl, lsl #1
0079     movcc   ip, ip, lsl #1
0080     bcc 1b
0081
0082 #endif
0083
0084     @ The division loop for needed upper bit positions.
0085     @ Break out early if dividend reaches 0.
0086 2:  cmp xh, yl
0087     orrcs   yh, yh, ip
0088     subscs  xh, xh, yl
0089     movsne  ip, ip, lsr #1
0090     mov yl, yl, lsr #1
0091     bne 2b
0092
0093     @ See if we need to handle lower 32-bit result.
0094 3:  cmp xh, #0
0095     mov yl, #0
0096     cmpeq   xl, r4
0097     movlo   xh, xl
0098     retlo   lr
0099
0100     @ The division loop for lower bit positions.
0101     @ Here we shift remainer bits leftwards rather than moving the
0102     @ divisor for comparisons, considering the carry-out bit as well.
0103     mov ip, #0x80000000
0104 4:  movs    xl, xl, lsl #1
0105     adcs    xh, xh, xh
0106     beq 6f
0107     cmpcc   xh, r4
0108 5:  orrcs   yl, yl, ip
0109     subcs   xh, xh, r4
0110     movs    ip, ip, lsr #1
0111     bne 4b
0112     ret lr
0113
0114     @ The top part of remainder became zero.  If carry is set
0115     @ (the 33th bit) this is a false positive so resume the loop.
0116     @ Otherwise, if lower part is also null then we are done.
0117 6:  bcs 5b
0118     cmp xl, #0
0119     reteq   lr
0120
0121     @ We still have remainer bits in the low part.  Bring them up.
0122
0123 #if __LINUX_ARM_ARCH__ >= 5
0124
0125     clz xh, xl          @ we know xh is zero here so...
0126     add xh, xh, #1
0127     mov xl, xl, lsl xh
0128     mov ip, ip, lsr xh
0129
0130 #else
0131
0132 7:  movs    xl, xl, lsl #1
0133     mov ip, ip, lsr #1
0134     bcc 7b
0135
0136 #endif
0137
0138     @ Current remainder is now 1.  It is worthless to compare with
0139     @ divisor at this point since divisor can not be smaller than 3 here.
0140     @ If possible, branch for another shift in the division loop.
0141     @ If no bit position left then we are done.
0142     movs    ip, ip, lsr #1
0143     mov xh, #1
0144     bne 4b
0145     ret lr
0146
0147 8:  @ Division by a power of 2: determine what that divisor order is
0148     @ then simply shift values around
0149
0150 #if __LINUX_ARM_ARCH__ >= 5
0151
0152     clz ip, r4
0153     rsb ip, ip, #31
0154
0155 #else
0156
0157     mov yl, r4
0158     cmp r4, #(1 << 16)
0159     mov ip, #0
0160     movhs   yl, yl, lsr #16
0161     movhs   ip, #16
0162
0163     cmp yl, #(1 << 8)
0164     movhs   yl, yl, lsr #8
0165     addhs   ip, ip, #8
0166
0167     cmp yl, #(1 << 4)
0168     movhs   yl, yl, lsr #4
0169     addhs   ip, ip, #4
0170
0171     cmp yl, #(1 << 2)
0172     addhi   ip, ip, #3
0173     addls   ip, ip, yl, lsr #1
0174
0175 #endif
0176
0177     mov yh, xh, lsr ip
0178     mov yl, xl, lsr ip
0179     rsb ip, ip, #32
0180  ARM(   orr yl, yl, xh, lsl ip  )
0181  THUMB( lsl xh, xh, ip      )
0182  THUMB( orr yl, yl, xh      )
0183     mov xh, xl, lsl ip
0184     mov xh, xh, lsr ip
0185     ret lr
0186
0187     @ eq -> division by 1: obvious enough...
0188 9:  moveq   yl, xl
0189     moveq   yh, xh
0190     moveq   xh, #0
0191     reteq   lr
0192 UNWIND(.fnend)
0193
0194 UNWIND(.fnstart)
0195 UNWIND(.pad #4)
0196 UNWIND(.save {lr})
0197 Ldiv0_64:
0198     @ Division by 0:
0199     str lr, [sp, #-8]!
0200     bl  __div0
0201
0202     @ as wrong as it could be...
0203     mov yl, #0
0204     mov yh, #0
0205     mov xh, #0
0206     ldr pc, [sp], #8
0207
0208 UNWIND(.fnend)
0209 ENDPROC(__do_div64)