Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /* csum_copy.S: Checksum+copy code for sparc64
0003  *
0004  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
0005  */
0006 
0007 #include <asm/export.h>
0008 
0009 #ifdef __KERNEL__
0010 #define GLOBAL_SPARE    %g7
0011 #else
0012 #define GLOBAL_SPARE    %g5
0013 #endif
0014 
0015 #ifndef EX_LD
0016 #define EX_LD(x)    x
0017 #endif
0018 
0019 #ifndef EX_ST
0020 #define EX_ST(x)    x
0021 #endif
0022 
0023 #ifndef EX_RETVAL
0024 #define EX_RETVAL(x)    x
0025 #endif
0026 
0027 #ifndef LOAD
0028 #define LOAD(type,addr,dest)    type [addr], dest
0029 #endif
0030 
0031 #ifndef STORE
0032 #define STORE(type,src,addr)    type src, [addr]
0033 #endif
0034 
0035 #ifndef FUNC_NAME
0036 #define FUNC_NAME   csum_partial_copy_nocheck
0037 #endif
0038 
0039     .register   %g2, #scratch
0040     .register   %g3, #scratch
0041 
0042     .text
0043 
0044 90:
0045     /* We checked for zero length already, so there must be
0046      * at least one byte.
0047      */
0048     be,pt       %icc, 1f
0049      nop
0050     EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
0051     add     %o0, 1, %o0
0052     sub     %o2, 1, %o2
0053     EX_ST(STORE(stb, %o4, %o1 + 0x00))
0054     add     %o1, 1, %o1
0055 1:  andcc       %o0, 0x2, %g0
0056     be,pn       %icc, 80f
0057      cmp        %o2, 2
0058     blu,pn      %icc, 60f
0059      nop
0060     EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
0061     add     %o0, 2, %o0
0062     sub     %o2, 2, %o2
0063     EX_ST(STORE(sth, %o5, %o1 + 0x00))
0064     add     %o1, 2, %o1
0065     ba,pt       %xcc, 80f
0066      add        %o5, %o4, %o4
0067 
0068     .globl      FUNC_NAME
0069     .type       FUNC_NAME,#function
0070     EXPORT_SYMBOL(FUNC_NAME)
0071 FUNC_NAME:      /* %o0=src, %o1=dst, %o2=len */
0072     LOAD(prefetch, %o0 + 0x000, #n_reads)
0073     xor     %o0, %o1, %g1
0074     mov     -1, %o3
0075     clr     %o4
0076     andcc       %g1, 0x3, %g0
0077     bne,pn      %icc, 95f
0078      LOAD(prefetch, %o0 + 0x040, #n_reads)
0079     
0080     brz,pn      %o2, 70f
0081      andcc      %o0, 0x3, %g0
0082 
0083     /* We "remember" whether the lowest bit in the address
0084      * was set in GLOBAL_SPARE.  Because if it is, we have to swap
0085      * upper and lower 8 bit fields of the sum we calculate.
0086     */
0087     bne,pn      %icc, 90b
0088      andcc      %o0, 0x1, GLOBAL_SPARE
0089 
0090 80:
0091     LOAD(prefetch, %o0 + 0x080, #n_reads)
0092     andncc      %o2, 0x3f, %g3
0093 
0094     LOAD(prefetch, %o0 + 0x0c0, #n_reads)
0095     sub     %o2, %g3, %o2
0096     brz,pn      %g3, 2f
0097      LOAD(prefetch, %o0 + 0x100, #n_reads)
0098 
0099     /* So that we don't need to use the non-pairing
0100      * add-with-carry instructions we accumulate 32-bit
0101      * values into a 64-bit register.  At the end of the
0102      * loop we fold it down to 32-bits and so on.
0103      */
0104     ba,pt       %xcc, 1f
0105     LOAD(prefetch, %o0 + 0x140, #n_reads)
0106 
0107     .align      32
0108 1:  EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
0109     EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
0110     EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
0111     add     %o4, %o5, %o4
0112     EX_ST(STORE(stw, %o5, %o1 + 0x00))
0113     EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
0114     add     %o4, %g1, %o4
0115     EX_ST(STORE(stw, %g1, %o1 + 0x04))
0116     EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
0117     add     %o4, %g2, %o4
0118     EX_ST(STORE(stw, %g2, %o1 + 0x08))
0119     EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
0120     add     %o4, %o5, %o4
0121     EX_ST(STORE(stw, %o5, %o1 + 0x0c))
0122     EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
0123     add     %o4, %g1, %o4
0124     EX_ST(STORE(stw, %g1, %o1 + 0x10))
0125     EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
0126     add     %o4, %g2, %o4
0127     EX_ST(STORE(stw, %g2, %o1 + 0x14))
0128     EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
0129     add     %o4, %o5, %o4
0130     EX_ST(STORE(stw, %o5, %o1 + 0x18))
0131     EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
0132     add     %o4, %g1, %o4
0133     EX_ST(STORE(stw, %g1, %o1 + 0x1c))
0134     EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
0135     add     %o4, %g2, %o4
0136     EX_ST(STORE(stw, %g2, %o1 + 0x20))
0137     EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
0138     add     %o4, %o5, %o4
0139     EX_ST(STORE(stw, %o5, %o1 + 0x24))
0140     EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
0141     add     %o4, %g1, %o4
0142     EX_ST(STORE(stw, %g1, %o1 + 0x28))
0143     EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
0144     add     %o4, %g2, %o4
0145     EX_ST(STORE(stw, %g2, %o1 + 0x2c))
0146     EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
0147     add     %o4, %o5, %o4
0148     EX_ST(STORE(stw, %o5, %o1 + 0x30))
0149     EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
0150     add     %o4, %g1, %o4
0151     EX_ST(STORE(stw, %g1, %o1 + 0x34))
0152     LOAD(prefetch, %o0 + 0x180, #n_reads)
0153     add     %o4, %g2, %o4
0154     EX_ST(STORE(stw, %g2, %o1 + 0x38))
0155     subcc       %g3, 0x40, %g3
0156     add     %o0, 0x40, %o0
0157     add     %o4, %o5, %o4
0158     EX_ST(STORE(stw, %o5, %o1 + 0x3c))
0159     bne,pt      %icc, 1b
0160      add        %o1, 0x40, %o1
0161 
0162 2:  and     %o2, 0x3c, %g3
0163     brz,pn      %g3, 2f
0164      sub        %o2, %g3, %o2
0165 1:  EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
0166     subcc       %g3, 0x4, %g3
0167     add     %o0, 0x4, %o0
0168     add     %o4, %o5, %o4
0169     EX_ST(STORE(stw, %o5, %o1 + 0x00))
0170     bne,pt      %icc, 1b
0171      add        %o1, 0x4, %o1
0172 
0173 2:
0174     /* fold 64-->32 */
0175     srlx        %o4, 32, %o5
0176     srl     %o4, 0, %o4
0177     add     %o4, %o5, %o4
0178     srlx        %o4, 32, %o5
0179     srl     %o4, 0, %o4
0180     add     %o4, %o5, %o4
0181 
0182     /* fold 32-->16 */
0183     sethi       %hi(0xffff0000), %g1
0184     srl     %o4, 16, %o5
0185     andn        %o4, %g1, %g2
0186     add     %o5, %g2, %o4
0187     srl     %o4, 16, %o5
0188     andn        %o4, %g1, %g2
0189     add     %o5, %g2, %o4
0190 
0191 60:
0192     /* %o4 has the 16-bit sum we have calculated so-far.  */
0193     cmp     %o2, 2
0194     blu,pt      %icc, 1f
0195      nop
0196     EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
0197     sub     %o2, 2, %o2
0198     add     %o0, 2, %o0
0199     add     %o4, %o5, %o4
0200     EX_ST(STORE(sth, %o5, %o1 + 0x00))
0201     add     %o1, 0x2, %o1
0202 1:  brz,pt      %o2, 1f
0203      nop
0204     EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
0205     sub     %o2, 1, %o2
0206     add     %o0, 1, %o0
0207     EX_ST(STORE(stb, %o5, %o1 + 0x00))
0208     sllx        %o5, 8, %o5
0209     add     %o1, 1, %o1
0210     add     %o4, %o5, %o4
0211 1:
0212     /* fold 32-->16 */
0213     sethi       %hi(0xffff0000), %g1
0214     srl     %o4, 16, %o5
0215     andn        %o4, %g1, %g2
0216     add     %o5, %g2, %o4
0217     srl     %o4, 16, %o5
0218     andn        %o4, %g1, %g2
0219     add     %o5, %g2, %o4
0220 
0221 1:  brz,pt      GLOBAL_SPARE, 1f
0222      nop
0223 
0224     /* We started with an odd byte, byte-swap the result.  */
0225     srl     %o4, 8, %o5
0226     and     %o4, 0xff, %g1
0227     sll     %g1, 8, %g1
0228     or      %o5, %g1, %o4
0229 
0230 1:  addcc       %o3, %o4, %o3
0231     addc        %g0, %o3, %o3
0232 
0233 70:
0234     retl
0235      srl        %o3, 0, %o0
0236 
0237 95: mov     0, GLOBAL_SPARE
0238     brlez,pn    %o2, 4f
0239      andcc      %o0, 1, %o5     
0240     be,a,pt     %icc, 1f
0241      srl        %o2, 1, %g1     
0242     sub     %o2, 1, %o2 
0243     EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
0244     add     %o0, 1, %o0 
0245     EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
0246     srl     %o2, 1, %g1
0247     add     %o1, 1, %o1
0248 1:  brz,a,pn    %g1, 3f
0249      andcc      %o2, 1, %g0
0250     andcc       %o0, 2, %g0 
0251     be,a,pt     %icc, 1f
0252      srl        %g1, 1, %g1
0253     EX_LD(LOAD(lduh, %o0, %o4))
0254     sub     %o2, 2, %o2 
0255     srl     %o4, 8, %g2
0256     sub     %g1, 1, %g1 
0257     EX_ST(STORE(stb, %g2, %o1))
0258     add     %o4, GLOBAL_SPARE, GLOBAL_SPARE
0259     EX_ST(STORE(stb, %o4, %o1 + 1))
0260     add     %o0, 2, %o0 
0261     srl     %g1, 1, %g1
0262     add     %o1, 2, %o1
0263 1:  brz,a,pn    %g1, 2f     
0264      andcc      %o2, 2, %g0
0265     EX_LD(LOAD(lduw, %o0, %o4))
0266 5:  srl     %o4, 24, %g2
0267     srl     %o4, 16, %g3
0268     EX_ST(STORE(stb, %g2, %o1))
0269     srl     %o4, 8, %g2
0270     EX_ST(STORE(stb, %g3, %o1 + 1))
0271     add     %o0, 4, %o0
0272     EX_ST(STORE(stb, %g2, %o1 + 2))
0273     addcc       %o4, GLOBAL_SPARE, GLOBAL_SPARE
0274     EX_ST(STORE(stb, %o4, %o1 + 3))
0275     addc        GLOBAL_SPARE, %g0, GLOBAL_SPARE
0276     add     %o1, 4, %o1
0277     subcc       %g1, 1, %g1
0278     bne,a,pt    %icc, 5b
0279      EX_LD(LOAD(lduw, %o0, %o4))
0280     sll     GLOBAL_SPARE, 16, %g2
0281     srl     GLOBAL_SPARE, 16, GLOBAL_SPARE
0282     srl     %g2, 16, %g2
0283     andcc       %o2, 2, %g0
0284     add     %g2, GLOBAL_SPARE, GLOBAL_SPARE 
0285 2:  be,a,pt     %icc, 3f        
0286      andcc      %o2, 1, %g0
0287     EX_LD(LOAD(lduh, %o0, %o4))
0288     andcc       %o2, 1, %g0
0289     srl     %o4, 8, %g2
0290     add     %o0, 2, %o0 
0291     EX_ST(STORE(stb, %g2, %o1))
0292     add     GLOBAL_SPARE, %o4, GLOBAL_SPARE
0293     EX_ST(STORE(stb, %o4, %o1 + 1))
0294     add     %o1, 2, %o1
0295 3:  be,a,pt     %icc, 1f        
0296      sll        GLOBAL_SPARE, 16, %o4
0297     EX_LD(LOAD(ldub, %o0, %g2))
0298     sll     %g2, 8, %o4 
0299     EX_ST(STORE(stb, %g2, %o1))
0300     add     GLOBAL_SPARE, %o4, GLOBAL_SPARE
0301     sll     GLOBAL_SPARE, 16, %o4
0302 1:  addcc       %o4, GLOBAL_SPARE, GLOBAL_SPARE
0303     srl     GLOBAL_SPARE, 16, %o4
0304     addc        %g0, %o4, GLOBAL_SPARE
0305     brz,pt      %o5, 4f
0306      srl        GLOBAL_SPARE, 8, %o4
0307     and     GLOBAL_SPARE, 0xff, %g2
0308     and     %o4, 0xff, %o4
0309     sll     %g2, 8, %g2
0310     or      %g2, %o4, GLOBAL_SPARE
0311 4:  addcc       %o3, GLOBAL_SPARE, %o3
0312     addc        %g0, %o3, %o0
0313     retl
0314      srl        %o0, 0, %o0
0315     .size       FUNC_NAME, .-FUNC_NAME