Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * This file is subject to the terms and conditions of the GNU General Public
0003  * License.  See the file "COPYING" in the main directory of this archive
0004  * for more details.
0005  *
0006  * Unified implementation of memcpy, memmove and the __copy_user backend.
0007  *
0008  * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
0009  * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
0010  * Copyright (C) 2002 Broadcom, Inc.
0011  *   memcpy/copy_user author: Mark Vandevoorde
0012  *
0013  * Mnemonic names for arguments to memcpy/__copy_user
0014  */
0015 
0016 #include <asm/asm.h>
0017 #include <asm/asm-offsets.h>
0018 #include <asm/export.h>
0019 #include <asm/regdef.h>
0020 
0021 #define dst a0
0022 #define src a1
0023 #define len a2
0024 
0025 /*
0026  * Spec
0027  *
0028  * memcpy copies len bytes from src to dst and sets v0 to dst.
0029  * It assumes that
0030  *   - src and dst don't overlap
0031  *   - src is readable
0032  *   - dst is writable
0033  * memcpy uses the standard calling convention
0034  *
0035  * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
0036  * the number of uncopied bytes due to an exception caused by a read or write.
0037  * __copy_user assumes that src and dst don't overlap, and that the call is
0038  * implementing one of the following:
0039  *   copy_to_user
0040  *     - src is readable  (no exceptions when reading src)
0041  *   copy_from_user
0042  *     - dst is writable  (no exceptions when writing dst)
0043  * __copy_user uses a non-standard calling convention; see
0044  * arch/mips/include/asm/uaccess.h
0045  *
0046  * When an exception happens on a load, the handler must
0047  # ensure that all of the destination buffer is overwritten to prevent
0048  * leaking information to user mode programs.
0049  */
0050 
0051 /*
0052  * Implementation
0053  */
0054 
0055 /*
0056  * The exception handler for loads requires that:
0057  *  1- AT contain the address of the byte just past the end of the source
0058  *     of the copy,
0059  *  2- src_entry <= src < AT, and
0060  *  3- (dst - src) == (dst_entry - src_entry),
0061  * The _entry suffix denotes values when __copy_user was called.
0062  *
0063  * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
0064  * (2) is met by incrementing src by the number of bytes copied
0065  * (3) is met by not doing loads between a pair of increments of dst and src
0066  *
0067  * The exception handlers for stores adjust len (if necessary) and return.
0068  * These handlers do not need to overwrite any data.
0069  *
0070  * For __rmemcpy and memmove an exception is always a kernel bug, therefore
0071  * they're not protected.
0072  */
0073 
0074 #define EXC(inst_reg,addr,handler)      \
0075 9:  inst_reg, addr;             \
0076     .section __ex_table,"a";        \
0077     PTR_WD  9b, handler;            \
0078     .previous
0079 
0080 /*
0081  * Only on the 64-bit kernel we can made use of 64-bit registers.
0082  */
0083 
0084 #define LOAD   ld
0085 #define LOADL  ldl
0086 #define LOADR  ldr
0087 #define STOREL sdl
0088 #define STORER sdr
0089 #define STORE  sd
0090 #define ADD    daddu
0091 #define SUB    dsubu
0092 #define SRL    dsrl
0093 #define SRA    dsra
0094 #define SLL    dsll
0095 #define SLLV   dsllv
0096 #define SRLV   dsrlv
0097 #define NBYTES 8
0098 #define LOG_NBYTES 3
0099 
0100 /*
0101  * As we are sharing code base with the mips32 tree (which use the o32 ABI
0102  * register definitions). We need to redefine the register definitions from
0103  * the n64 ABI register naming to the o32 ABI register naming.
0104  */
0105 #undef t0
0106 #undef t1
0107 #undef t2
0108 #undef t3
0109 #define t0  $8
0110 #define t1  $9
0111 #define t2  $10
0112 #define t3  $11
0113 #define t4  $12
0114 #define t5  $13
0115 #define t6  $14
0116 #define t7  $15
0117 
0118 #ifdef CONFIG_CPU_LITTLE_ENDIAN
0119 #define LDFIRST LOADR
0120 #define LDREST  LOADL
0121 #define STFIRST STORER
0122 #define STREST  STOREL
0123 #define SHIFT_DISCARD SLLV
0124 #else
0125 #define LDFIRST LOADL
0126 #define LDREST  LOADR
0127 #define STFIRST STOREL
0128 #define STREST  STORER
0129 #define SHIFT_DISCARD SRLV
0130 #endif
0131 
0132 #define FIRST(unit) ((unit)*NBYTES)
0133 #define REST(unit)  (FIRST(unit)+NBYTES-1)
0134 #define UNIT(unit)  FIRST(unit)
0135 
0136 #define ADDRMASK (NBYTES-1)
0137 
0138     .text
0139     .set    noreorder
0140     .set    noat
0141 
0142 /*
0143  * A combined memcpy/__copy_user
0144  * __copy_user sets len to 0 for success; else to an upper bound of
0145  * the number of uncopied bytes.
0146  * memcpy sets v0 to dst.
0147  */
0148     .align  5
0149 LEAF(memcpy)                    /* a0=dst a1=src a2=len */
0150 EXPORT_SYMBOL(memcpy)
0151     move    v0, dst             /* return value */
0152 __memcpy:
0153 FEXPORT(__raw_copy_from_user)
0154 EXPORT_SYMBOL(__raw_copy_from_user)
0155 FEXPORT(__raw_copy_to_user)
0156 EXPORT_SYMBOL(__raw_copy_to_user)
0157     /*
0158      * Note: dst & src may be unaligned, len may be 0
0159      * Temps
0160      */
0161     #
0162     # Octeon doesn't care if the destination is unaligned. The hardware
0163     # can fix it faster than we can special case the assembly.
0164     #
0165     pref    0, 0(src)
0166     sltu    t0, len, NBYTES     # Check if < 1 word
0167     bnez    t0, copy_bytes_checklen
0168      and    t0, src, ADDRMASK   # Check if src unaligned
0169     bnez    t0, src_unaligned
0170      sltu   t0, len, 4*NBYTES   # Check if < 4 words
0171     bnez    t0, less_than_4units
0172      sltu   t0, len, 8*NBYTES   # Check if < 8 words
0173     bnez    t0, less_than_8units
0174      sltu   t0, len, 16*NBYTES  # Check if < 16 words
0175     bnez    t0, cleanup_both_aligned
0176      sltu   t0, len, 128+1      # Check if len < 129
0177     bnez    t0, 1f          # Skip prefetch if len is too short
0178      sltu   t0, len, 256+1      # Check if len < 257
0179     bnez    t0, 1f          # Skip prefetch if len is too short
0180      pref   0, 128(src)     # We must not prefetch invalid addresses
0181     #
0182     # This is where we loop if there is more than 128 bytes left
0183 2:  pref    0, 256(src)     # We must not prefetch invalid addresses
0184     #
0185     # This is where we loop if we can't prefetch anymore
0186 1:
0187 EXC(    LOAD    t0, UNIT(0)(src),   l_exc)
0188 EXC(    LOAD    t1, UNIT(1)(src),   l_exc_copy)
0189 EXC(    LOAD    t2, UNIT(2)(src),   l_exc_copy)
0190 EXC(    LOAD    t3, UNIT(3)(src),   l_exc_copy)
0191     SUB len, len, 16*NBYTES
0192 EXC(    STORE   t0, UNIT(0)(dst),   s_exc_p16u)
0193 EXC(    STORE   t1, UNIT(1)(dst),   s_exc_p15u)
0194 EXC(    STORE   t2, UNIT(2)(dst),   s_exc_p14u)
0195 EXC(    STORE   t3, UNIT(3)(dst),   s_exc_p13u)
0196 EXC(    LOAD    t0, UNIT(4)(src),   l_exc_copy)
0197 EXC(    LOAD    t1, UNIT(5)(src),   l_exc_copy)
0198 EXC(    LOAD    t2, UNIT(6)(src),   l_exc_copy)
0199 EXC(    LOAD    t3, UNIT(7)(src),   l_exc_copy)
0200 EXC(    STORE   t0, UNIT(4)(dst),   s_exc_p12u)
0201 EXC(    STORE   t1, UNIT(5)(dst),   s_exc_p11u)
0202 EXC(    STORE   t2, UNIT(6)(dst),   s_exc_p10u)
0203     ADD src, src, 16*NBYTES
0204 EXC(    STORE   t3, UNIT(7)(dst),   s_exc_p9u)
0205     ADD dst, dst, 16*NBYTES
0206 EXC(    LOAD    t0, UNIT(-8)(src),  l_exc_copy_rewind16)
0207 EXC(    LOAD    t1, UNIT(-7)(src),  l_exc_copy_rewind16)
0208 EXC(    LOAD    t2, UNIT(-6)(src),  l_exc_copy_rewind16)
0209 EXC(    LOAD    t3, UNIT(-5)(src),  l_exc_copy_rewind16)
0210 EXC(    STORE   t0, UNIT(-8)(dst),  s_exc_p8u)
0211 EXC(    STORE   t1, UNIT(-7)(dst),  s_exc_p7u)
0212 EXC(    STORE   t2, UNIT(-6)(dst),  s_exc_p6u)
0213 EXC(    STORE   t3, UNIT(-5)(dst),  s_exc_p5u)
0214 EXC(    LOAD    t0, UNIT(-4)(src),  l_exc_copy_rewind16)
0215 EXC(    LOAD    t1, UNIT(-3)(src),  l_exc_copy_rewind16)
0216 EXC(    LOAD    t2, UNIT(-2)(src),  l_exc_copy_rewind16)
0217 EXC(    LOAD    t3, UNIT(-1)(src),  l_exc_copy_rewind16)
0218 EXC(    STORE   t0, UNIT(-4)(dst),  s_exc_p4u)
0219 EXC(    STORE   t1, UNIT(-3)(dst),  s_exc_p3u)
0220 EXC(    STORE   t2, UNIT(-2)(dst),  s_exc_p2u)
0221 EXC(    STORE   t3, UNIT(-1)(dst),  s_exc_p1u)
0222     sltu    t0, len, 256+1      # See if we can prefetch more
0223     beqz    t0, 2b
0224      sltu   t0, len, 128        # See if we can loop more time
0225     beqz    t0, 1b
0226      nop
0227     #
0228     # Jump here if there are less than 16*NBYTES left.
0229     #
0230 cleanup_both_aligned:
0231     beqz    len, done
0232      sltu   t0, len, 8*NBYTES
0233     bnez    t0, less_than_8units
0234      nop
0235 EXC(    LOAD    t0, UNIT(0)(src),   l_exc)
0236 EXC(    LOAD    t1, UNIT(1)(src),   l_exc_copy)
0237 EXC(    LOAD    t2, UNIT(2)(src),   l_exc_copy)
0238 EXC(    LOAD    t3, UNIT(3)(src),   l_exc_copy)
0239     SUB len, len, 8*NBYTES
0240 EXC(    STORE   t0, UNIT(0)(dst),   s_exc_p8u)
0241 EXC(    STORE   t1, UNIT(1)(dst),   s_exc_p7u)
0242 EXC(    STORE   t2, UNIT(2)(dst),   s_exc_p6u)
0243 EXC(    STORE   t3, UNIT(3)(dst),   s_exc_p5u)
0244 EXC(    LOAD    t0, UNIT(4)(src),   l_exc_copy)
0245 EXC(    LOAD    t1, UNIT(5)(src),   l_exc_copy)
0246 EXC(    LOAD    t2, UNIT(6)(src),   l_exc_copy)
0247 EXC(    LOAD    t3, UNIT(7)(src),   l_exc_copy)
0248 EXC(    STORE   t0, UNIT(4)(dst),   s_exc_p4u)
0249 EXC(    STORE   t1, UNIT(5)(dst),   s_exc_p3u)
0250 EXC(    STORE   t2, UNIT(6)(dst),   s_exc_p2u)
0251 EXC(    STORE   t3, UNIT(7)(dst),   s_exc_p1u)
0252     ADD src, src, 8*NBYTES
0253     beqz    len, done
0254      ADD    dst, dst, 8*NBYTES
0255     #
0256     # Jump here if there are less than 8*NBYTES left.
0257     #
0258 less_than_8units:
0259     sltu    t0, len, 4*NBYTES
0260     bnez    t0, less_than_4units
0261      nop
0262 EXC(    LOAD    t0, UNIT(0)(src),   l_exc)
0263 EXC(    LOAD    t1, UNIT(1)(src),   l_exc_copy)
0264 EXC(    LOAD    t2, UNIT(2)(src),   l_exc_copy)
0265 EXC(    LOAD    t3, UNIT(3)(src),   l_exc_copy)
0266     SUB len, len, 4*NBYTES
0267 EXC(    STORE   t0, UNIT(0)(dst),   s_exc_p4u)
0268 EXC(    STORE   t1, UNIT(1)(dst),   s_exc_p3u)
0269 EXC(    STORE   t2, UNIT(2)(dst),   s_exc_p2u)
0270 EXC(    STORE   t3, UNIT(3)(dst),   s_exc_p1u)
0271     ADD src, src, 4*NBYTES
0272     beqz    len, done
0273      ADD    dst, dst, 4*NBYTES
0274     #
0275     # Jump here if there are less than 4*NBYTES left. This means
0276     # we may need to copy up to 3 NBYTES words.
0277     #
0278 less_than_4units:
0279     sltu    t0, len, 1*NBYTES
0280     bnez    t0, copy_bytes_checklen
0281      nop
0282     #
0283     # 1) Copy NBYTES, then check length again
0284     #
0285 EXC(    LOAD    t0, 0(src),     l_exc)
0286     SUB len, len, NBYTES
0287     sltu    t1, len, 8
0288 EXC(    STORE   t0, 0(dst),     s_exc_p1u)
0289     ADD src, src, NBYTES
0290     bnez    t1, copy_bytes_checklen
0291      ADD    dst, dst, NBYTES
0292     #
0293     # 2) Copy NBYTES, then check length again
0294     #
0295 EXC(    LOAD    t0, 0(src),     l_exc)
0296     SUB len, len, NBYTES
0297     sltu    t1, len, 8
0298 EXC(    STORE   t0, 0(dst),     s_exc_p1u)
0299     ADD src, src, NBYTES
0300     bnez    t1, copy_bytes_checklen
0301      ADD    dst, dst, NBYTES
0302     #
0303     # 3) Copy NBYTES, then check length again
0304     #
0305 EXC(    LOAD    t0, 0(src),     l_exc)
0306     SUB len, len, NBYTES
0307     ADD src, src, NBYTES
0308     ADD dst, dst, NBYTES
0309     b copy_bytes_checklen
0310 EXC(     STORE  t0, -8(dst),        s_exc_p1u)
0311 
0312 src_unaligned:
0313 #define rem t8
0314     SRL t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
0315     beqz    t0, cleanup_src_unaligned
0316      and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
0317 1:
0318 /*
0319  * Avoid consecutive LD*'s to the same register since some mips
0320  * implementations can't issue them in the same cycle.
0321  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
0322  * are to the same unit (unless src is aligned, but it's not).
0323  */
0324 EXC(    LDFIRST t0, FIRST(0)(src),  l_exc)
0325 EXC(    LDFIRST t1, FIRST(1)(src),  l_exc_copy)
0326     SUB len, len, 4*NBYTES
0327 EXC(    LDREST  t0, REST(0)(src),   l_exc_copy)
0328 EXC(    LDREST  t1, REST(1)(src),   l_exc_copy)
0329 EXC(    LDFIRST t2, FIRST(2)(src),  l_exc_copy)
0330 EXC(    LDFIRST t3, FIRST(3)(src),  l_exc_copy)
0331 EXC(    LDREST  t2, REST(2)(src),   l_exc_copy)
0332 EXC(    LDREST  t3, REST(3)(src),   l_exc_copy)
0333     ADD src, src, 4*NBYTES
0334 EXC(    STORE   t0, UNIT(0)(dst),   s_exc_p4u)
0335 EXC(    STORE   t1, UNIT(1)(dst),   s_exc_p3u)
0336 EXC(    STORE   t2, UNIT(2)(dst),   s_exc_p2u)
0337 EXC(    STORE   t3, UNIT(3)(dst),   s_exc_p1u)
0338     bne len, rem, 1b
0339      ADD    dst, dst, 4*NBYTES
0340 
0341 cleanup_src_unaligned:
0342     beqz    len, done
0343      and    rem, len, NBYTES-1  # rem = len % NBYTES
0344     beq rem, len, copy_bytes
0345      nop
0346 1:
0347 EXC(    LDFIRST t0, FIRST(0)(src),  l_exc)
0348 EXC(    LDREST  t0, REST(0)(src),   l_exc_copy)
0349     SUB len, len, NBYTES
0350 EXC(    STORE   t0, 0(dst),     s_exc_p1u)
0351     ADD src, src, NBYTES
0352     bne len, rem, 1b
0353      ADD    dst, dst, NBYTES
0354 
0355 copy_bytes_checklen:
0356     beqz    len, done
0357      nop
0358 copy_bytes:
0359     /* 0 < len < NBYTES  */
0360 #define COPY_BYTE(N)            \
0361 EXC(    lb  t0, N(src), l_exc); \
0362     SUB len, len, 1;        \
0363     beqz    len, done;      \
0364 EXC(     sb t0, N(dst), s_exc_p1)
0365 
0366     COPY_BYTE(0)
0367     COPY_BYTE(1)
0368     COPY_BYTE(2)
0369     COPY_BYTE(3)
0370     COPY_BYTE(4)
0371     COPY_BYTE(5)
0372 EXC(    lb  t0, NBYTES-2(src), l_exc)
0373     SUB len, len, 1
0374     jr  ra
0375 EXC(     sb t0, NBYTES-2(dst), s_exc_p1)
0376 done:
0377     jr  ra
0378      nop
0379     END(memcpy)
0380 
0381 l_exc_copy_rewind16:
0382     /* Rewind src and dst by 16*NBYTES for l_exc_copy */
0383     SUB src, src, 16*NBYTES
0384     SUB dst, dst, 16*NBYTES
0385 l_exc_copy:
0386     /*
0387      * Copy bytes from src until faulting load address (or until a
0388      * lb faults)
0389      *
0390      * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
0391      * may be more than a byte beyond the last address.
0392      * Hence, the lb below may get an exception.
0393      *
0394      * Assumes src < THREAD_BUADDR($28)
0395      */
0396     LOAD    t0, TI_TASK($28)
0397     LOAD    t0, THREAD_BUADDR(t0)
0398 1:
0399 EXC(    lb  t1, 0(src), l_exc)
0400     ADD src, src, 1
0401     sb  t1, 0(dst)  # can't fault -- we're copy_from_user
0402     bne src, t0, 1b
0403      ADD    dst, dst, 1
0404 l_exc:
0405     LOAD    t0, TI_TASK($28)
0406     LOAD    t0, THREAD_BUADDR(t0)   # t0 is just past last good address
0407     SUB len, AT, t0     # len number of uncopied bytes
0408     jr  ra
0409      nop
0410 
0411 
0412 #define SEXC(n)             \
0413 s_exc_p ## n ## u:          \
0414     jr  ra;         \
0415      ADD    len, len, n*NBYTES
0416 
0417 SEXC(16)
0418 SEXC(15)
0419 SEXC(14)
0420 SEXC(13)
0421 SEXC(12)
0422 SEXC(11)
0423 SEXC(10)
0424 SEXC(9)
0425 SEXC(8)
0426 SEXC(7)
0427 SEXC(6)
0428 SEXC(5)
0429 SEXC(4)
0430 SEXC(3)
0431 SEXC(2)
0432 SEXC(1)
0433 
0434 s_exc_p1:
0435     jr  ra
0436      ADD    len, len, 1
0437 s_exc:
0438     jr  ra
0439      nop
0440 
0441     .align  5
0442 LEAF(memmove)
0443 EXPORT_SYMBOL(memmove)
0444     ADD t0, a0, a2
0445     ADD t1, a1, a2
0446     sltu    t0, a1, t0          # dst + len <= src -> memcpy
0447     sltu    t1, a0, t1          # dst >= src + len -> memcpy
0448     and t0, t1
0449     beqz    t0, __memcpy
0450      move   v0, a0              /* return value */
0451     beqz    a2, r_out
0452     END(memmove)
0453 
0454     /* fall through to __rmemcpy */
0455 LEAF(__rmemcpy)                 /* a0=dst a1=src a2=len */
0456      sltu   t0, a1, a0
0457     beqz    t0, r_end_bytes_up      # src >= dst
0458      nop
0459     ADD a0, a2              # dst = dst + len
0460     ADD a1, a2              # src = src + len
0461 
0462 r_end_bytes:
0463     lb  t0, -1(a1)
0464     SUB a2, a2, 0x1
0465     sb  t0, -1(a0)
0466     SUB a1, a1, 0x1
0467     bnez    a2, r_end_bytes
0468      SUB    a0, a0, 0x1
0469 
0470 r_out:
0471     jr  ra
0472      move   a2, zero
0473 
0474 r_end_bytes_up:
0475     lb  t0, (a1)
0476     SUB a2, a2, 0x1
0477     sb  t0, (a0)
0478     ADD a1, a1, 0x1
0479     bnez    a2, r_end_bytes_up
0480      ADD    a0, a0, 0x1
0481 
0482     jr  ra
0483      move   a2, zero
0484     END(__rmemcpy)