sparc/lib/checksum_32.S

0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /* checksum.S: Sparc optimized checksum code.
0003  *
0004  *  Copyright(C) 1995 Linus Torvalds
0005  *  Copyright(C) 1995 Miguel de Icaza
0006  *  Copyright(C) 1996 David S. Miller
0007  *  Copyright(C) 1997 Jakub Jelinek
0008  *
0009  * derived from:
0010  *  Linux/Alpha checksum c-code
0011  *      Linux/ix86 inline checksum assembly
0012  *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
0013  *  David Mosberger-Tang for optimized reference c-code
0014  *  BSD4.4 portable checksum routine
0015  */
0016
0017 #include <asm/errno.h>
0018 #include <asm/export.h>
0019
0020 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
0021     ldd [buf + offset + 0x00], t0;          \
0022     ldd [buf + offset + 0x08], t2;          \
0023     addxcc  t0, sum, sum;                   \
0024     addxcc  t1, sum, sum;                   \
0025     ldd [buf + offset + 0x10], t4;          \
0026     addxcc  t2, sum, sum;                   \
0027     addxcc  t3, sum, sum;                   \
0028     ldd [buf + offset + 0x18], t0;          \
0029     addxcc  t4, sum, sum;                   \
0030     addxcc  t5, sum, sum;                   \
0031     addxcc  t0, sum, sum;                   \
0032     addxcc  t1, sum, sum;
0033
0034 #define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3)    \
0035     ldd [buf - offset - 0x08], t0;          \
0036     ldd [buf - offset - 0x00], t2;          \
0037     addxcc  t0, sum, sum;                   \
0038     addxcc  t1, sum, sum;                   \
0039     addxcc  t2, sum, sum;                   \
0040     addxcc  t3, sum, sum;
0041
0042     /* Do end cruft out of band to get better cache patterns. */
0043 csum_partial_end_cruft:
0044     be  1f              ! caller asks %o1 & 0x8
0045      andcc  %o1, 4, %g0         ! nope, check for word remaining
0046     ldd [%o0], %g2          ! load two
0047     addcc   %g2, %o2, %o2           ! add first word to sum
0048     addxcc  %g3, %o2, %o2           ! add second word as well
0049     add %o0, 8, %o0         ! advance buf ptr
0050     addx    %g0, %o2, %o2           ! add in final carry
0051     andcc   %o1, 4, %g0         ! check again for word remaining
0052 1:  be  1f              ! nope, skip this code
0053      andcc  %o1, 3, %o1         ! check for trailing bytes
0054     ld  [%o0], %g2          ! load it
0055     addcc   %g2, %o2, %o2           ! add to sum
0056     add %o0, 4, %o0         ! advance buf ptr
0057     addx    %g0, %o2, %o2           ! add in final carry
0058     andcc   %o1, 3, %g0         ! check again for trailing bytes
0059 1:  be  1f              ! no trailing bytes, return
0060      addcc  %o1, -1, %g0            ! only one byte remains?
0061     bne 2f              ! at least two bytes more
0062      subcc  %o1, 2, %o1         ! only two bytes more?
0063     b   4f              ! only one byte remains
0064      or %g0, %g0, %o4           ! clear fake hword value
0065 2:  lduh    [%o0], %o4          ! get hword
0066     be  6f              ! jmp if only hword remains
0067      add    %o0, 2, %o0         ! advance buf ptr either way
0068     sll %o4, 16, %o4            ! create upper hword
0069 4:  ldub    [%o0], %o5          ! get final byte
0070     sll %o5, 8, %o5         ! put into place
0071     or  %o5, %o4, %o4           ! coalese with hword (if any)
0072 6:  addcc   %o4, %o2, %o2           ! add to sum
0073 1:  retl                    ! get outta here
0074      addx   %g0, %o2, %o0           ! add final carry into retval
0075
0076     /* Also do alignment out of band to get better cache patterns. */
0077 csum_partial_fix_alignment:
0078     cmp %o1, 6
0079     bl  cpte - 0x4
0080      andcc  %o0, 0x2, %g0
0081     be  1f
0082      andcc  %o0, 0x4, %g0
0083     lduh    [%o0 + 0x00], %g2
0084     sub %o1, 2, %o1
0085     add %o0, 2, %o0
0086     sll %g2, 16, %g2
0087     addcc   %g2, %o2, %o2
0088     srl %o2, 16, %g3
0089     addx    %g0, %g3, %g2
0090     sll %o2, 16, %o2
0091     sll %g2, 16, %g3
0092     srl %o2, 16, %o2
0093     andcc   %o0, 0x4, %g0
0094     or  %g3, %o2, %o2
0095 1:  be  cpa
0096      andcc  %o1, 0xffffff80, %o3
0097     ld  [%o0 + 0x00], %g2
0098     sub %o1, 4, %o1
0099     addcc   %g2, %o2, %o2
0100     add %o0, 4, %o0
0101     addx    %g0, %o2, %o2
0102     b   cpa
0103      andcc  %o1, 0xffffff80, %o3
0104
0105     /* The common case is to get called with a nicely aligned
0106      * buffer of size 0x20.  Follow the code path for that case.
0107      */
0108     .globl  csum_partial
0109     EXPORT_SYMBOL(csum_partial)
0110 csum_partial:           /* %o0=buf, %o1=len, %o2=sum */
0111     andcc   %o0, 0x7, %g0               ! alignment problems?
0112     bne csum_partial_fix_alignment      ! yep, handle it
0113      sethi  %hi(cpte - 8), %g7          ! prepare table jmp ptr
0114     andcc   %o1, 0xffffff80, %o3            ! num loop iterations
0115 cpa:    be  3f                  ! none to do
0116      andcc  %o1, 0x70, %g1              ! clears carry flag too
0117 5:  CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
0118     CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
0119     CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
0120     CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
0121     addx    %g0, %o2, %o2               ! sink in final carry
0122     subcc   %o3, 128, %o3               ! detract from loop iters
0123     bne 5b                  ! more to do
0124      add    %o0, 128, %o0               ! advance buf ptr
0125     andcc   %o1, 0x70, %g1              ! clears carry flag too
0126 3:  be  cpte                    ! nope
0127      andcc  %o1, 0xf, %g0               ! anything left at all?
0128     srl %g1, 1, %o4             ! compute offset
0129     sub %g7, %g1, %g7               ! adjust jmp ptr
0130     sub %g7, %o4, %g7               ! final jmp ptr adjust
0131     jmp %g7 + %lo(cpte - 8)         ! enter the table
0132      add    %o0, %g1, %o0               ! advance buf ptr
0133 cptbl:  CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
0134     CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
0135     CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
0136     CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
0137     CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
0138     CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
0139     CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
0140     addx    %g0, %o2, %o2               ! fetch final carry
0141     andcc   %o1, 0xf, %g0               ! anything left at all?
0142 cpte:   bne csum_partial_end_cruft          ! yep, handle it
0143      andcc  %o1, 8, %g0             ! check how much
0144 cpout:  retl                        ! get outta here
0145      mov    %o2, %o0                ! return computed csum
0146
0147 /* Work around cpp -rob */
0148 #define ALLOC #alloc
0149 #define EXECINSTR #execinstr
0150 #define EX(x,y)                 \
0151 98:     x,y;                                    \
0152         .section __ex_table,ALLOC;      \
0153         .align  4;                              \
0154         .word   98b, cc_fault;                   \
0155         .text;                                  \
0156         .align  4
0157
0158     /* This aligned version executes typically in 8.5 superscalar cycles, this
0159      * is the best I can do.  I say 8.5 because the final add will pair with
0160      * the next ldd in the main unrolled loop.  Thus the pipe is always full.
0161      * If you change these macros (including order of instructions),
0162      * please check the fixup code below as well.
0163      */
0164 #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)   \
0165     EX(ldd  [src + off + 0x00], t0);                        \
0166     EX(ldd  [src + off + 0x08], t2);                        \
0167     addxcc  t0, sum, sum;                               \
0168     EX(ldd  [src + off + 0x10], t4);                        \
0169     addxcc  t1, sum, sum;                               \
0170     EX(ldd  [src + off + 0x18], t6);                        \
0171     addxcc  t2, sum, sum;                               \
0172     EX(std  t0, [dst + off + 0x00]);                        \
0173     addxcc  t3, sum, sum;                               \
0174     EX(std  t2, [dst + off + 0x08]);                        \
0175     addxcc  t4, sum, sum;                               \
0176     EX(std  t4, [dst + off + 0x10]);                        \
0177     addxcc  t5, sum, sum;                               \
0178     EX(std  t6, [dst + off + 0x18]);                        \
0179     addxcc  t6, sum, sum;                               \
0180     addxcc  t7, sum, sum;
0181
0182     /* 12 superscalar cycles seems to be the limit for this case,
0183      * because of this we thus do all the ldd's together to get
0184      * Viking MXCC into streaming mode.  Ho hum...
0185      */
0186 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)   \
0187     EX(ldd  [src + off + 0x00], t0);                    \
0188     EX(ldd  [src + off + 0x08], t2);                    \
0189     EX(ldd  [src + off + 0x10], t4);                    \
0190     EX(ldd  [src + off + 0x18], t6);                    \
0191     EX(st   t0, [dst + off + 0x00]);                    \
0192     addxcc  t0, sum, sum;                           \
0193     EX(st   t1, [dst + off + 0x04]);                    \
0194     addxcc  t1, sum, sum;                           \
0195     EX(st   t2, [dst + off + 0x08]);                    \
0196     addxcc  t2, sum, sum;                           \
0197     EX(st   t3, [dst + off + 0x0c]);                    \
0198     addxcc  t3, sum, sum;                           \
0199     EX(st   t4, [dst + off + 0x10]);                    \
0200     addxcc  t4, sum, sum;                           \
0201     EX(st   t5, [dst + off + 0x14]);                    \
0202     addxcc  t5, sum, sum;                           \
0203     EX(st   t6, [dst + off + 0x18]);                    \
0204     addxcc  t6, sum, sum;                           \
0205     EX(st   t7, [dst + off + 0x1c]);                    \
0206     addxcc  t7, sum, sum;
0207
0208     /* Yuck, 6 superscalar cycles... */
0209 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)  \
0210     EX(ldd  [src - off - 0x08], t0);            \
0211     EX(ldd  [src - off - 0x00], t2);            \
0212     addxcc  t0, sum, sum;                   \
0213     EX(st   t0, [dst - off - 0x08]);            \
0214     addxcc  t1, sum, sum;                   \
0215     EX(st   t1, [dst - off - 0x04]);            \
0216     addxcc  t2, sum, sum;                   \
0217     EX(st   t2, [dst - off - 0x00]);            \
0218     addxcc  t3, sum, sum;                   \
0219     EX(st   t3, [dst - off + 0x04]);
0220
0221     /* Handle the end cruft code out of band for better cache patterns. */
0222 cc_end_cruft:
0223     be  1f
0224      andcc  %o3, 4, %g0
0225     EX(ldd  [%o0 + 0x00], %g2)
0226     add %o1, 8, %o1
0227     addcc   %g2, %g7, %g7
0228     add %o0, 8, %o0
0229     addxcc  %g3, %g7, %g7
0230     EX(st   %g2, [%o1 - 0x08])
0231     addx    %g0, %g7, %g7
0232     andcc   %o3, 4, %g0
0233     EX(st   %g3, [%o1 - 0x04])
0234 1:  be  1f
0235      andcc  %o3, 3, %o3
0236     EX(ld   [%o0 + 0x00], %g2)
0237     add %o1, 4, %o1
0238     addcc   %g2, %g7, %g7
0239     EX(st   %g2, [%o1 - 0x04])
0240     addx    %g0, %g7, %g7
0241     andcc   %o3, 3, %g0
0242     add %o0, 4, %o0
0243 1:  be  1f
0244      addcc  %o3, -1, %g0
0245     bne 2f
0246      subcc  %o3, 2, %o3
0247     b   4f
0248      or %g0, %g0, %o4
0249 2:  EX(lduh [%o0 + 0x00], %o4)
0250     add %o0, 2, %o0
0251     EX(sth  %o4, [%o1 + 0x00])
0252     be  6f
0253      add    %o1, 2, %o1
0254     sll %o4, 16, %o4
0255 4:  EX(ldub [%o0 + 0x00], %o5)
0256     EX(stb  %o5, [%o1 + 0x00])
0257     sll %o5, 8, %o5
0258     or  %o5, %o4, %o4
0259 6:  addcc   %o4, %g7, %g7
0260 1:  retl
0261      addx   %g0, %g7, %o0
0262
0263     /* Also, handle the alignment code out of band. */
0264 cc_dword_align:
0265     cmp %g1, 16
0266     bge 1f
0267      srl    %g1, 1, %o3
0268 2:  cmp %o3, 0
0269     be,a    ccte
0270      andcc  %g1, 0xf, %o3
0271     andcc   %o3, %o0, %g0   ! Check %o0 only (%o1 has the same last 2 bits)
0272     be,a    2b
0273      srl    %o3, 1, %o3
0274 1:  andcc   %o0, 0x1, %g0
0275     bne ccslow
0276      andcc  %o0, 0x2, %g0
0277     be  1f
0278      andcc  %o0, 0x4, %g0
0279     EX(lduh [%o0 + 0x00], %g4)
0280     sub %g1, 2, %g1
0281     EX(sth  %g4, [%o1 + 0x00])
0282     add %o0, 2, %o0
0283     sll %g4, 16, %g4
0284     addcc   %g4, %g7, %g7
0285     add %o1, 2, %o1
0286     srl %g7, 16, %g3
0287     addx    %g0, %g3, %g4
0288     sll %g7, 16, %g7
0289     sll %g4, 16, %g3
0290     srl %g7, 16, %g7
0291     andcc   %o0, 0x4, %g0
0292     or  %g3, %g7, %g7
0293 1:  be  3f
0294      andcc  %g1, 0xffffff80, %g0
0295     EX(ld   [%o0 + 0x00], %g4)
0296     sub %g1, 4, %g1
0297     EX(st   %g4, [%o1 + 0x00])
0298     add %o0, 4, %o0
0299     addcc   %g4, %g7, %g7
0300     add %o1, 4, %o1
0301     addx    %g0, %g7, %g7
0302     b   3f
0303      andcc  %g1, 0xffffff80, %g0
0304
0305     /* Sun, you just can't beat me, you just can't.  Stop trying,
0306      * give up.  I'm serious, I am going to kick the living shit
0307      * out of you, game over, lights out.
0308      */
0309     .align  8
0310     .globl  __csum_partial_copy_sparc_generic
0311     EXPORT_SYMBOL(__csum_partial_copy_sparc_generic)
0312 __csum_partial_copy_sparc_generic:
0313                     /* %o0=src, %o1=dest, %g1=len, %g7=sum */
0314     xor %o0, %o1, %o4       ! get changing bits
0315     andcc   %o4, 3, %g0     ! check for mismatched alignment
0316     bne ccslow          ! better this than unaligned/fixups
0317      andcc  %o0, 7, %g0     ! need to align things?
0318     bne cc_dword_align      ! yes, we check for short lengths there
0319      andcc  %g1, 0xffffff80, %g0    ! can we use unrolled loop?
0320 3:  be  3f          ! nope, less than one loop remains
0321      andcc  %o1, 4, %g0     ! dest aligned on 4 or 8 byte boundary?
0322     be  ccdbl + 4       ! 8 byte aligned, kick ass
0323 5:  CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0324     CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0325     CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0326     CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0327     sub %g1, 128, %g1       ! detract from length
0328     addx    %g0, %g7, %g7       ! add in last carry bit
0329     andcc   %g1, 0xffffff80, %g0    ! more to csum?
0330     add %o0, 128, %o0       ! advance src ptr
0331     bne 5b          ! we did not go negative, continue looping
0332      add    %o1, 128, %o1       ! advance dest ptr
0333 3:  andcc   %g1, 0x70, %o2      ! can use table?
0334 ccmerge:be  ccte            ! nope, go and check for end cruft
0335      andcc  %g1, 0xf, %o3       ! get low bits of length (clears carry btw)
0336     srl %o2, 1, %o4     ! begin negative offset computation
0337     sethi   %hi(12f), %o5       ! set up table ptr end
0338     add %o0, %o2, %o0       ! advance src ptr
0339     sub %o5, %o4, %o5       ! continue table calculation
0340     sll %o2, 1, %g2     ! constant multiplies are fun...
0341     sub %o5, %g2, %o5       ! some more adjustments
0342     jmp %o5 + %lo(12f)      ! jump into it, duff style, wheee...
0343      add    %o1, %o2, %o1       ! advance dest ptr (carry is clear btw)
0344 cctbl:  CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
0345     CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
0346     CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
0347     CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
0348     CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
0349     CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
0350     CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
0351 12: addx    %g0, %g7, %g7
0352     andcc   %o3, 0xf, %g0       ! check for low bits set
0353 ccte:   bne cc_end_cruft        ! something left, handle it out of band
0354      andcc  %o3, 8, %g0     ! begin checks for that code
0355     retl                ! return
0356      mov    %g7, %o0        ! give em the computed checksum
0357 ccdbl:  CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0358     CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0359     CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0360     CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
0361     sub %g1, 128, %g1       ! detract from length
0362     addx    %g0, %g7, %g7       ! add in last carry bit
0363     andcc   %g1, 0xffffff80, %g0    ! more to csum?
0364     add %o0, 128, %o0       ! advance src ptr
0365     bne ccdbl           ! we did not go negative, continue looping
0366      add    %o1, 128, %o1       ! advance dest ptr
0367     b   ccmerge         ! finish it off, above
0368      andcc  %g1, 0x70, %o2      ! can use table? (clears carry btw)
0369
0370 ccslow: cmp %g1, 0
0371     mov 0, %g5
0372     bleu    4f
0373      andcc  %o0, 1, %o5
0374     be,a    1f
0375      srl    %g1, 1, %g4
0376     sub %g1, 1, %g1
0377     EX(ldub [%o0], %g5)
0378     add %o0, 1, %o0
0379     EX(stb  %g5, [%o1])
0380     srl %g1, 1, %g4
0381     add %o1, 1, %o1
0382 1:  cmp %g4, 0
0383     be,a    3f
0384      andcc  %g1, 1, %g0
0385     andcc   %o0, 2, %g0
0386     be,a    1f
0387      srl    %g4, 1, %g4
0388     EX(lduh [%o0], %o4)
0389     sub %g1, 2, %g1
0390     srl %o4, 8, %g2
0391     sub %g4, 1, %g4
0392     EX(stb  %g2, [%o1])
0393     add %o4, %g5, %g5
0394     EX(stb  %o4, [%o1 + 1])
0395     add %o0, 2, %o0
0396     srl %g4, 1, %g4
0397     add %o1, 2, %o1
0398 1:  cmp %g4, 0
0399     be,a    2f
0400      andcc  %g1, 2, %g0
0401     EX(ld   [%o0], %o4)
0402 5:  srl %o4, 24, %g2
0403     srl %o4, 16, %g3
0404     EX(stb  %g2, [%o1])
0405     srl %o4, 8, %g2
0406     EX(stb  %g3, [%o1 + 1])
0407     add %o0, 4, %o0
0408     EX(stb  %g2, [%o1 + 2])
0409     addcc   %o4, %g5, %g5
0410     EX(stb  %o4, [%o1 + 3])
0411     addx    %g5, %g0, %g5   ! I am now to lazy to optimize this (question it
0412     add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
0413     subcc   %g4, 1, %g4 ! tricks
0414     bne,a   5b
0415      EX(ld  [%o0], %o4)
0416     sll %g5, 16, %g2
0417     srl %g5, 16, %g5
0418     srl %g2, 16, %g2
0419     andcc   %g1, 2, %g0
0420     add %g2, %g5, %g5
0421 2:  be,a    3f
0422      andcc  %g1, 1, %g0
0423     EX(lduh [%o0], %o4)
0424     andcc   %g1, 1, %g0
0425     srl %o4, 8, %g2
0426     add %o0, 2, %o0
0427     EX(stb  %g2, [%o1])
0428     add %g5, %o4, %g5
0429     EX(stb  %o4, [%o1 + 1])
0430     add %o1, 2, %o1
0431 3:  be,a    1f
0432      sll    %g5, 16, %o4
0433     EX(ldub [%o0], %g2)
0434     sll %g2, 8, %o4
0435     EX(stb  %g2, [%o1])
0436     add %g5, %o4, %g5
0437     sll %g5, 16, %o4
0438 1:  addcc   %o4, %g5, %g5
0439     srl %g5, 16, %o4
0440     addx    %g0, %o4, %g5
0441     orcc    %o5, %g0, %g0
0442     be  4f
0443      srl    %g5, 8, %o4
0444     and %g5, 0xff, %g2
0445     and %o4, 0xff, %o4
0446     sll %g2, 8, %g2
0447     or  %g2, %o4, %g5
0448 4:  addcc   %g7, %g5, %g7
0449     retl
0450      addx   %g0, %g7, %o0
0451
0452 /* We do these strange calculations for the csum_*_from_user case only, ie.
0453  * we only bother with faults on loads... */
0454
0455 cc_fault:
0456     ret
0457      clr    %o0