Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * This file contains assembly-language implementations
0004  * of IP-style 1's complement checksum routines.
0005  *  
0006  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
0007  *
0008  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
0009  */
0010 
0011 #include <linux/sys.h>
0012 #include <asm/processor.h>
0013 #include <asm/cache.h>
0014 #include <asm/errno.h>
0015 #include <asm/ppc_asm.h>
0016 #include <asm/export.h>
0017 
0018     .text
0019 
0020 /*
0021  * computes the checksum of a memory block at buff, length len,
0022  * and adds in "sum" (32-bit)
0023  *
0024  * __csum_partial(buff, len, sum)
0025  */
0026 _GLOBAL(__csum_partial)
0027     subi    r3,r3,4
0028     srawi.  r6,r4,2     /* Divide len by 4 and also clear carry */
0029     beq 3f      /* if we're doing < 4 bytes */
0030     andi.   r0,r3,2     /* Align buffer to longword boundary */
0031     beq+    1f
0032     lhz r0,4(r3)    /* do 2 bytes to get aligned */
0033     subi    r4,r4,2
0034     addi    r3,r3,2
0035     srwi.   r6,r4,2     /* # words to do */
0036     adde    r5,r5,r0
0037     beq 3f
0038 1:  andi.   r6,r6,3     /* Prepare to handle words 4 by 4 */
0039     beq 21f
0040     mtctr   r6
0041 2:  lwzu    r0,4(r3)
0042     adde    r5,r5,r0
0043     bdnz    2b
0044 21: srwi.   r6,r4,4     /* # blocks of 4 words to do */
0045     beq 3f
0046     lwz r0,4(r3)
0047     mtctr   r6
0048     lwz r6,8(r3)
0049     adde    r5,r5,r0
0050     lwz r7,12(r3)
0051     adde    r5,r5,r6
0052     lwzu    r8,16(r3)
0053     adde    r5,r5,r7
0054     bdz 23f
0055 22: lwz r0,4(r3)
0056     adde    r5,r5,r8
0057     lwz r6,8(r3)
0058     adde    r5,r5,r0
0059     lwz r7,12(r3)
0060     adde    r5,r5,r6
0061     lwzu    r8,16(r3)
0062     adde    r5,r5,r7
0063     bdnz    22b
0064 23: adde    r5,r5,r8
0065 3:  andi.   r0,r4,2
0066     beq+    4f
0067     lhz r0,4(r3)
0068     addi    r3,r3,2
0069     adde    r5,r5,r0
0070 4:  andi.   r0,r4,1
0071     beq+    5f
0072     lbz r0,4(r3)
0073     slwi    r0,r0,8     /* Upper byte of word */
0074     adde    r5,r5,r0
0075 5:  addze   r3,r5       /* add in final carry */
0076     blr
0077 EXPORT_SYMBOL(__csum_partial)
0078 
0079 /*
0080  * Computes the checksum of a memory block at src, length len,
0081  * and adds in 0xffffffff, while copying the block to dst.
0082  * If an access exception occurs it returns zero.
0083  *
0084  * csum_partial_copy_generic(src, dst, len)
0085  */
0086 #define CSUM_COPY_16_BYTES_WITHEX(n)    \
0087 8 ## n ## 0:            \
0088     lwz r7,4(r4);   \
0089 8 ## n ## 1:            \
0090     lwz r8,8(r4);   \
0091 8 ## n ## 2:            \
0092     lwz r9,12(r4);  \
0093 8 ## n ## 3:            \
0094     lwzu    r10,16(r4); \
0095 8 ## n ## 4:            \
0096     stw r7,4(r6);   \
0097     adde    r12,r12,r7; \
0098 8 ## n ## 5:            \
0099     stw r8,8(r6);   \
0100     adde    r12,r12,r8; \
0101 8 ## n ## 6:            \
0102     stw r9,12(r6);  \
0103     adde    r12,r12,r9; \
0104 8 ## n ## 7:            \
0105     stwu    r10,16(r6); \
0106     adde    r12,r12,r10
0107 
0108 #define CSUM_COPY_16_BYTES_EXCODE(n)        \
0109     EX_TABLE(8 ## n ## 0b, fault);  \
0110     EX_TABLE(8 ## n ## 1b, fault);  \
0111     EX_TABLE(8 ## n ## 2b, fault);  \
0112     EX_TABLE(8 ## n ## 3b, fault);  \
0113     EX_TABLE(8 ## n ## 4b, fault);  \
0114     EX_TABLE(8 ## n ## 5b, fault);  \
0115     EX_TABLE(8 ## n ## 6b, fault);  \
0116     EX_TABLE(8 ## n ## 7b, fault);
0117 
0118     .text
0119 
0120 CACHELINE_BYTES = L1_CACHE_BYTES
0121 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
0122 CACHELINE_MASK = (L1_CACHE_BYTES-1)
0123 
0124 _GLOBAL(csum_partial_copy_generic)
0125     li  r12,-1
0126     addic   r0,r0,0         /* clear carry */
0127     addi    r6,r4,-4
0128     neg r0,r4
0129     addi    r4,r3,-4
0130     andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
0131     crset   4*cr7+eq
0132     beq 58f
0133 
0134     cmplw   0,r5,r0         /* is this more than total to do? */
0135     blt 63f         /* if not much to do */
0136     rlwinm  r7,r6,3,0x8
0137     rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte */
0138     cmplwi  cr7,r7,0    /* is destination address even ? */
0139     andi.   r8,r0,3         /* get it word-aligned first */
0140     mtctr   r8
0141     beq+    61f
0142     li  r3,0
0143 70: lbz r9,4(r4)        /* do some bytes */
0144     addi    r4,r4,1
0145     slwi    r3,r3,8
0146     rlwimi  r3,r9,0,24,31
0147 71: stb r9,4(r6)
0148     addi    r6,r6,1
0149     bdnz    70b
0150     adde    r12,r12,r3
0151 61: subf    r5,r0,r5
0152     srwi.   r0,r0,2
0153     mtctr   r0
0154     beq 58f
0155 72: lwzu    r9,4(r4)        /* do some words */
0156     adde    r12,r12,r9
0157 73: stwu    r9,4(r6)
0158     bdnz    72b
0159 
0160 58: srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
0161     clrlwi  r5,r5,32-LG_CACHELINE_BYTES
0162     li  r11,4
0163     beq 63f
0164 
0165     /* Here we decide how far ahead to prefetch the source */
0166     li  r3,4
0167     cmpwi   r0,1
0168     li  r7,0
0169     ble 114f
0170     li  r7,1
0171 #if MAX_COPY_PREFETCH > 1
0172     /* Heuristically, for large transfers we prefetch
0173        MAX_COPY_PREFETCH cachelines ahead.  For small transfers
0174        we prefetch 1 cacheline ahead. */
0175     cmpwi   r0,MAX_COPY_PREFETCH
0176     ble 112f
0177     li  r7,MAX_COPY_PREFETCH
0178 112:    mtctr   r7
0179 111:    dcbt    r3,r4
0180     addi    r3,r3,CACHELINE_BYTES
0181     bdnz    111b
0182 #else
0183     dcbt    r3,r4
0184     addi    r3,r3,CACHELINE_BYTES
0185 #endif /* MAX_COPY_PREFETCH > 1 */
0186 
0187 114:    subf    r8,r7,r0
0188     mr  r0,r7
0189     mtctr   r8
0190 
0191 53: dcbt    r3,r4
0192 54: dcbz    r11,r6
0193 /* the main body of the cacheline loop */
0194     CSUM_COPY_16_BYTES_WITHEX(0)
0195 #if L1_CACHE_BYTES >= 32
0196     CSUM_COPY_16_BYTES_WITHEX(1)
0197 #if L1_CACHE_BYTES >= 64
0198     CSUM_COPY_16_BYTES_WITHEX(2)
0199     CSUM_COPY_16_BYTES_WITHEX(3)
0200 #if L1_CACHE_BYTES >= 128
0201     CSUM_COPY_16_BYTES_WITHEX(4)
0202     CSUM_COPY_16_BYTES_WITHEX(5)
0203     CSUM_COPY_16_BYTES_WITHEX(6)
0204     CSUM_COPY_16_BYTES_WITHEX(7)
0205 #endif
0206 #endif
0207 #endif
0208     bdnz    53b
0209     cmpwi   r0,0
0210     li  r3,4
0211     li  r7,0
0212     bne 114b
0213 
0214 63: srwi.   r0,r5,2
0215     mtctr   r0
0216     beq 64f
0217 30: lwzu    r0,4(r4)
0218     adde    r12,r12,r0
0219 31: stwu    r0,4(r6)
0220     bdnz    30b
0221 
0222 64: andi.   r0,r5,2
0223     beq+    65f
0224 40: lhz r0,4(r4)
0225     addi    r4,r4,2
0226 41: sth r0,4(r6)
0227     adde    r12,r12,r0
0228     addi    r6,r6,2
0229 65: andi.   r0,r5,1
0230     beq+    66f
0231 50: lbz r0,4(r4)
0232 51: stb r0,4(r6)
0233     slwi    r0,r0,8
0234     adde    r12,r12,r0
0235 66: addze   r3,r12
0236     beqlr+  cr7
0237     rlwinm  r3,r3,8,0,31    /* odd destination address: rotate one byte */
0238     blr
0239 
0240 fault:
0241     li  r3,0
0242     blr
0243 
0244     EX_TABLE(70b, fault);
0245     EX_TABLE(71b, fault);
0246     EX_TABLE(72b, fault);
0247     EX_TABLE(73b, fault);
0248     EX_TABLE(54b, fault);
0249 
0250 /*
0251  * this stuff handles faults in the cacheline loop and branches to either
0252  * fault (if in read part) or fault (if in write part)
0253  */
0254     CSUM_COPY_16_BYTES_EXCODE(0)
0255 #if L1_CACHE_BYTES >= 32
0256     CSUM_COPY_16_BYTES_EXCODE(1)
0257 #if L1_CACHE_BYTES >= 64
0258     CSUM_COPY_16_BYTES_EXCODE(2)
0259     CSUM_COPY_16_BYTES_EXCODE(3)
0260 #if L1_CACHE_BYTES >= 128
0261     CSUM_COPY_16_BYTES_EXCODE(4)
0262     CSUM_COPY_16_BYTES_EXCODE(5)
0263     CSUM_COPY_16_BYTES_EXCODE(6)
0264     CSUM_COPY_16_BYTES_EXCODE(7)
0265 #endif
0266 #endif
0267 #endif
0268 
0269     EX_TABLE(30b, fault);
0270     EX_TABLE(31b, fault);
0271     EX_TABLE(40b, fault);
0272     EX_TABLE(41b, fault);
0273     EX_TABLE(50b, fault);
0274     EX_TABLE(51b, fault);
0275 
0276 EXPORT_SYMBOL(csum_partial_copy_generic)
0277 
0278 /*
0279  * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
0280  *             const struct in6_addr *daddr,
0281  *             __u32 len, __u8 proto, __wsum sum)
0282  */
0283 
0284 _GLOBAL(csum_ipv6_magic)
0285     lwz r8, 0(r3)
0286     lwz r9, 4(r3)
0287     addc    r0, r7, r8
0288     lwz r10, 8(r3)
0289     adde    r0, r0, r9
0290     lwz r11, 12(r3)
0291     adde    r0, r0, r10
0292     lwz r8, 0(r4)
0293     adde    r0, r0, r11
0294     lwz r9, 4(r4)
0295     adde    r0, r0, r8
0296     lwz r10, 8(r4)
0297     adde    r0, r0, r9
0298     lwz r11, 12(r4)
0299     adde    r0, r0, r10
0300     add r5, r5, r6  /* assumption: len + proto doesn't carry */
0301     adde    r0, r0, r11
0302     adde    r0, r0, r5
0303     addze   r0, r0
0304     rotlwi  r3, r0, 16
0305     add r3, r0, r3
0306     not r3, r3
0307     rlwinm  r3, r3, 16, 16, 31
0308     blr
0309 EXPORT_SYMBOL(csum_ipv6_magic)