Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /* memcpy.S: Sparc optimized memcpy and memmove code
0003  * Hand optimized from GNU libc's memcpy and memmove
0004  * Copyright (C) 1991,1996 Free Software Foundation
0005  * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
0006  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
0007  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
0008  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
0009  */
0010 
0011 #include <asm/export.h>
0012 #define FUNC(x)         \
0013     .globl  x;      \
0014     .type   x,@function;    \
0015     .align  4;      \
0016 x:
0017 
0018 /* Both these macros have to start with exactly the same insn */
0019 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
0020     ldd [%src + (offset) + 0x00], %t0; \
0021     ldd [%src + (offset) + 0x08], %t2; \
0022     ldd [%src + (offset) + 0x10], %t4; \
0023     ldd [%src + (offset) + 0x18], %t6; \
0024     st  %t0, [%dst + (offset) + 0x00]; \
0025     st  %t1, [%dst + (offset) + 0x04]; \
0026     st  %t2, [%dst + (offset) + 0x08]; \
0027     st  %t3, [%dst + (offset) + 0x0c]; \
0028     st  %t4, [%dst + (offset) + 0x10]; \
0029     st  %t5, [%dst + (offset) + 0x14]; \
0030     st  %t6, [%dst + (offset) + 0x18]; \
0031     st  %t7, [%dst + (offset) + 0x1c];
0032 
0033 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
0034     ldd [%src + (offset) + 0x00], %t0; \
0035     ldd [%src + (offset) + 0x08], %t2; \
0036     ldd [%src + (offset) + 0x10], %t4; \
0037     ldd [%src + (offset) + 0x18], %t6; \
0038     std %t0, [%dst + (offset) + 0x00]; \
0039     std %t2, [%dst + (offset) + 0x08]; \
0040     std %t4, [%dst + (offset) + 0x10]; \
0041     std %t6, [%dst + (offset) + 0x18];
0042 
0043 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
0044     ldd [%src - (offset) - 0x10], %t0; \
0045     ldd [%src - (offset) - 0x08], %t2; \
0046     st  %t0, [%dst - (offset) - 0x10]; \
0047     st  %t1, [%dst - (offset) - 0x0c]; \
0048     st  %t2, [%dst - (offset) - 0x08]; \
0049     st  %t3, [%dst - (offset) - 0x04];
0050 
0051 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
0052     ldd [%src - (offset) - 0x10], %t0; \
0053     ldd [%src - (offset) - 0x08], %t2; \
0054     std %t0, [%dst - (offset) - 0x10]; \
0055     std %t2, [%dst - (offset) - 0x08];
0056 
0057 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
0058     ldub    [%src - (offset) - 0x02], %t0; \
0059     ldub    [%src - (offset) - 0x01], %t1; \
0060     stb %t0, [%dst - (offset) - 0x02]; \
0061     stb %t1, [%dst - (offset) - 0x01];
0062 
0063     .text
0064     .align  4
0065 
0066 FUNC(memmove)
0067 EXPORT_SYMBOL(memmove)
0068     cmp     %o0, %o1
0069     mov     %o0, %g7
0070     bleu        9f
0071      sub        %o0, %o1, %o4
0072 
0073     add     %o1, %o2, %o3
0074     cmp     %o3, %o0
0075     bleu        0f
0076      andcc      %o4, 3, %o5
0077 
0078     add     %o1, %o2, %o1
0079     add     %o0, %o2, %o0
0080     sub     %o1, 1, %o1
0081     sub     %o0, 1, %o0
0082     
0083 1:  /* reverse_bytes */
0084 
0085     ldub        [%o1], %o4
0086     subcc       %o2, 1, %o2
0087     stb     %o4, [%o0]
0088     sub     %o1, 1, %o1
0089     bne     1b
0090      sub        %o0, 1, %o0
0091 
0092     retl
0093      mov        %g7, %o0
0094 
0095 /* NOTE: This code is executed just for the cases,
0096          where %src (=%o1) & 3 is != 0.
0097      We need to align it to 4. So, for (%src & 3)
0098      1 we need to do ldub,lduh
0099      2 lduh
0100      3 just ldub
0101          so even if it looks weird, the branches
0102          are correct here. -jj
0103  */
0104 78: /* dword_align */
0105 
0106     andcc       %o1, 1, %g0
0107     be      4f
0108      andcc      %o1, 2, %g0
0109 
0110     ldub        [%o1], %g2
0111     add     %o1, 1, %o1
0112     stb     %g2, [%o0]
0113     sub     %o2, 1, %o2
0114     bne     3f
0115      add        %o0, 1, %o0
0116 4:
0117     lduh        [%o1], %g2
0118     add     %o1, 2, %o1
0119     sth     %g2, [%o0]
0120     sub     %o2, 2, %o2
0121     b       3f
0122      add        %o0, 2, %o0
0123 
0124 FUNC(memcpy)    /* %o0=dst %o1=src %o2=len */
0125 EXPORT_SYMBOL(memcpy)
0126 
0127     sub     %o0, %o1, %o4
0128     mov     %o0, %g7
0129 9:
0130     andcc       %o4, 3, %o5
0131 0:
0132     bne     86f
0133      cmp        %o2, 15
0134 
0135     bleu        90f
0136      andcc      %o1, 3, %g0
0137 
0138     bne     78b
0139 3:
0140      andcc      %o1, 4, %g0
0141 
0142     be      2f
0143      mov        %o2, %g1
0144 
0145     ld      [%o1], %o4
0146     sub     %g1, 4, %g1
0147     st      %o4, [%o0]
0148     add     %o1, 4, %o1
0149     add     %o0, 4, %o0
0150 2:
0151     andcc       %g1, 0xffffff80, %g0
0152     be      3f
0153      andcc      %o0, 4, %g0
0154 
0155     be      82f + 4
0156 5:
0157     MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
0158     MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
0159     MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
0160     MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
0161     sub     %g1, 128, %g1
0162     add     %o1, 128, %o1
0163     cmp     %g1, 128
0164     bge     5b
0165      add        %o0, 128, %o0
0166 3:
0167     andcc       %g1, 0x70, %g4
0168     be      80f
0169      andcc      %g1, 8, %g0
0170 
0171     sethi       %hi(80f), %o5
0172     srl     %g4, 1, %o4
0173     add     %g4, %o4, %o4
0174     add     %o1, %g4, %o1
0175     sub     %o5, %o4, %o5
0176     jmpl        %o5 + %lo(80f), %g0
0177      add        %o0, %g4, %o0
0178 
0179 79: /* memcpy_table */
0180 
0181     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
0182     MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
0183     MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
0184     MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
0185     MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
0186     MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
0187     MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
0188 
0189 80: /* memcpy_table_end */
0190     be      81f
0191      andcc      %g1, 4, %g0
0192 
0193     ldd     [%o1], %g2
0194     add     %o0, 8, %o0
0195     st      %g2, [%o0 - 0x08]
0196     add     %o1, 8, %o1
0197     st      %g3, [%o0 - 0x04]
0198 
0199 81: /* memcpy_last7 */
0200 
0201     be      1f
0202      andcc      %g1, 2, %g0
0203 
0204     ld      [%o1], %g2
0205     add     %o1, 4, %o1
0206     st      %g2, [%o0]
0207     add     %o0, 4, %o0
0208 1:
0209     be      1f
0210      andcc      %g1, 1, %g0
0211 
0212     lduh        [%o1], %g2
0213     add     %o1, 2, %o1
0214     sth     %g2, [%o0]
0215     add     %o0, 2, %o0
0216 1:
0217     be      1f
0218      nop
0219 
0220     ldub        [%o1], %g2
0221     stb     %g2, [%o0]
0222 1:
0223     retl
0224      mov        %g7, %o0
0225 
0226 82: /* ldd_std */
0227     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
0228     MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
0229     MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
0230     MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
0231     subcc       %g1, 128, %g1
0232     add     %o1, 128, %o1
0233     cmp     %g1, 128
0234     bge     82b
0235      add        %o0, 128, %o0
0236 
0237     andcc       %g1, 0x70, %g4
0238     be      84f
0239      andcc      %g1, 8, %g0
0240 
0241     sethi       %hi(84f), %o5
0242     add     %o1, %g4, %o1
0243     sub     %o5, %g4, %o5
0244     jmpl        %o5 + %lo(84f), %g0
0245      add        %o0, %g4, %o0
0246 
0247 83: /* amemcpy_table */
0248 
0249     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
0250     MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
0251     MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
0252     MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
0253     MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
0254     MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
0255     MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
0256 
0257 84: /* amemcpy_table_end */
0258     be      85f
0259      andcc      %g1, 4, %g0
0260 
0261     ldd     [%o1], %g2
0262     add     %o0, 8, %o0
0263     std     %g2, [%o0 - 0x08]
0264     add     %o1, 8, %o1
0265 85: /* amemcpy_last7 */
0266     be      1f
0267      andcc      %g1, 2, %g0
0268 
0269     ld      [%o1], %g2
0270     add     %o1, 4, %o1
0271     st      %g2, [%o0]
0272     add     %o0, 4, %o0
0273 1:
0274     be      1f
0275      andcc      %g1, 1, %g0
0276 
0277     lduh        [%o1], %g2
0278     add     %o1, 2, %o1
0279     sth     %g2, [%o0]
0280     add     %o0, 2, %o0
0281 1:
0282     be      1f
0283      nop
0284 
0285     ldub        [%o1], %g2
0286     stb     %g2, [%o0]
0287 1:
0288     retl
0289      mov        %g7, %o0
0290 
0291 86: /* non_aligned */
0292     cmp     %o2, 6
0293     bleu        88f
0294      nop
0295 
0296     save        %sp, -96, %sp
0297     andcc       %i0, 3, %g0
0298     be      61f
0299      andcc      %i0, 1, %g0
0300     be      60f
0301      andcc      %i0, 2, %g0
0302 
0303     ldub        [%i1], %g5
0304     add     %i1, 1, %i1
0305     stb     %g5, [%i0]
0306     sub     %i2, 1, %i2
0307     bne     61f
0308      add        %i0, 1, %i0
0309 60:
0310     ldub        [%i1], %g3
0311     add     %i1, 2, %i1
0312     stb     %g3, [%i0]
0313     sub     %i2, 2, %i2
0314     ldub        [%i1 - 1], %g3
0315     add     %i0, 2, %i0
0316     stb     %g3, [%i0 - 1]
0317 61:
0318     and     %i1, 3, %g2
0319     and     %i2, 0xc, %g3
0320     and     %i1, -4, %i1
0321     cmp     %g3, 4
0322     sll     %g2, 3, %g4
0323     mov     32, %g2
0324     be      4f
0325      sub        %g2, %g4, %l0
0326     
0327     blu     3f
0328      cmp        %g3, 0x8
0329 
0330     be      2f
0331      srl        %i2, 2, %g3
0332 
0333     ld      [%i1], %i3
0334     add     %i0, -8, %i0
0335     ld      [%i1 + 4], %i4
0336     b       8f
0337      add        %g3, 1, %g3
0338 2:
0339     ld      [%i1], %i4
0340     add     %i0, -12, %i0
0341     ld      [%i1 + 4], %i5
0342     add     %g3, 2, %g3
0343     b       9f
0344      add        %i1, -4, %i1
0345 3:
0346     ld      [%i1], %g1
0347     add     %i0, -4, %i0
0348     ld      [%i1 + 4], %i3
0349     srl     %i2, 2, %g3
0350     b       7f
0351      add        %i1, 4, %i1
0352 4:
0353     ld      [%i1], %i5
0354     cmp     %i2, 7
0355     ld      [%i1 + 4], %g1
0356     srl     %i2, 2, %g3
0357     bleu        10f
0358      add        %i1, 8, %i1
0359 
0360     ld      [%i1], %i3
0361     add     %g3, -1, %g3
0362 5:
0363     sll     %i5, %g4, %g2
0364     srl     %g1, %l0, %g5
0365     or      %g2, %g5, %g2
0366     st      %g2, [%i0]
0367 7:
0368     ld      [%i1 + 4], %i4
0369     sll     %g1, %g4, %g2
0370     srl     %i3, %l0, %g5
0371     or      %g2, %g5, %g2
0372     st      %g2, [%i0 + 4]
0373 8:
0374     ld      [%i1 + 8], %i5
0375     sll     %i3, %g4, %g2
0376     srl     %i4, %l0, %g5
0377     or      %g2, %g5, %g2
0378     st      %g2, [%i0 + 8]
0379 9:
0380     ld      [%i1 + 12], %g1
0381     sll     %i4, %g4, %g2
0382     srl     %i5, %l0, %g5
0383     addcc       %g3, -4, %g3
0384     or      %g2, %g5, %g2
0385     add     %i1, 16, %i1
0386     st      %g2, [%i0 + 12]
0387     add     %i0, 16, %i0
0388     bne,a       5b
0389      ld     [%i1], %i3
0390 10:
0391     sll     %i5, %g4, %g2
0392     srl     %g1, %l0, %g5
0393     srl     %l0, 3, %g3
0394     or      %g2, %g5, %g2
0395     sub     %i1, %g3, %i1
0396     andcc       %i2, 2, %g0
0397     st      %g2, [%i0]
0398     be      1f
0399      andcc      %i2, 1, %g0
0400 
0401     ldub        [%i1], %g2
0402     add     %i1, 2, %i1
0403     stb     %g2, [%i0 + 4]
0404     add     %i0, 2, %i0
0405     ldub        [%i1 - 1], %g2
0406     stb     %g2, [%i0 + 3]
0407 1:
0408     be      1f
0409      nop
0410     ldub        [%i1], %g2
0411     stb     %g2, [%i0 + 4]
0412 1:
0413     ret
0414      restore    %g7, %g0, %o0
0415 
0416 88: /* short_end */
0417 
0418     and     %o2, 0xe, %o3
0419 20:
0420     sethi       %hi(89f), %o5
0421     sll     %o3, 3, %o4
0422     add     %o0, %o3, %o0
0423     sub     %o5, %o4, %o5
0424     add     %o1, %o3, %o1
0425     jmpl        %o5 + %lo(89f), %g0
0426      andcc      %o2, 1, %g0
0427 
0428     MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
0429     MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
0430     MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
0431     MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
0432     MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
0433     MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
0434     MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
0435 
0436 89: /* short_table_end */
0437 
0438     be      1f
0439      nop
0440 
0441     ldub        [%o1], %g2
0442     stb     %g2, [%o0]
0443 1:
0444     retl
0445      mov        %g7, %o0
0446 
0447 90: /* short_aligned_end */
0448     bne     88b
0449      andcc      %o2, 8, %g0
0450 
0451     be      1f
0452      andcc      %o2, 4, %g0
0453 
0454     ld      [%o1 + 0x00], %g2
0455     ld      [%o1 + 0x04], %g3
0456     add     %o1, 8, %o1
0457     st      %g2, [%o0 + 0x00]
0458     st      %g3, [%o0 + 0x04]
0459     add     %o0, 8, %o0
0460 1:
0461     b       81b
0462      mov        %o2, %g1