0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056 #include <asm/export.h>
0057 .globl csum_ipv6_magic
0058 .align 4
0059 .ent csum_ipv6_magic
0060 .frame $30,0,$26,0
0061 csum_ipv6_magic:
0062 .prologue 0
0063
0064 ldq_u $0,0($16) # L : Latency: 3
0065 inslh $18,7,$4 # U : 0000000000AABBCC
0066 ldq_u $1,8($16) # L : Latency: 3
0067 sll $19,8,$7 # U : U L U L : 0x00000000 00aabb00
0068
0069 and $16,7,$6 # E : src misalignment
0070 ldq_u $5,15($16) # L : Latency: 3
0071 zapnot $20,15,$20 # U : zero extend incoming csum
0072 ldq_u $2,0($17) # L : U L U L : Latency: 3
0073
0074 extql $0,$6,$0 # U :
0075 extqh $1,$6,$22 # U :
0076 ldq_u $3,8($17) # L : Latency: 3
0077 sll $19,24,$19 # U : U U L U : 0x000000aa bb000000
0078
0079 cmoveq $6,$31,$22 # E : src aligned?
0080 ldq_u $23,15($17) # L : Latency: 3
0081 inswl $18,3,$18 # U : 000000CCDD000000
0082 addl $19,$7,$19 # E : U L U L : <sign bits>bbaabb00
0083
0084 or $0,$22,$0 # E : 1st src word complete
0085 extql $1,$6,$1 # U :
0086 or $18,$4,$18 # E : 000000CCDDAABBCC
0087 extqh $5,$6,$5 # U : L U L U
0088
0089 and $17,7,$6 # E : dst misalignment
0090 extql $2,$6,$2 # U :
0091 or $1,$5,$1 # E : 2nd src word complete
0092 extqh $3,$6,$22 # U : L U L U :
0093
0094 cmoveq $6,$31,$22 # E : dst aligned?
0095 extql $3,$6,$3 # U :
0096 addq $20,$0,$20 # E : begin summing the words
0097 extqh $23,$6,$23 # U : L U L U :
0098
0099 srl $18,16,$4 # U : 0000000000CCDDAA
0100 or $2,$22,$2 # E : 1st dst word complete
0101 zap $19,0x3,$19 # U : <sign bits>bbaa0000
0102 or $3,$23,$3 # E : U L U L : 2nd dst word complete
0103
0104 cmpult $20,$0,$0 # E :
0105 addq $20,$1,$20 # E :
0106 zapnot $18,0xa,$18 # U : 00000000DD00BB00
0107 zap $4,0xa,$4 # U : U U L L : 0000000000CC00AA
0108
0109 or $18,$4,$18 # E : 00000000DDCCBBAA
0110 nop # E :
0111 cmpult $20,$1,$1 # E :
0112 addq $20,$2,$20 # E : U L U L
0113
0114 cmpult $20,$2,$2 # E :
0115 addq $20,$3,$20 # E :
0116 cmpult $20,$3,$3 # E : (1 cycle stall on $20)
0117 addq $20,$18,$20 # E : U L U L (1 cycle stall on $20)
0118
0119 cmpult $20,$18,$18 # E :
0120 addq $20,$19,$20 # E : (1 cycle stall on $20)
0121 addq $0,$1,$0 # E : merge the carries back into the csum
0122 addq $2,$3,$2 # E :
0123
0124 cmpult $20,$19,$19 # E :
0125 addq $18,$19,$18 # E : (1 cycle stall on $19)
0126 addq $0,$2,$0 # E :
0127 addq $20,$18,$20 # E : U L U L :
0128
0129
0130 addq $0,$20,$0 # E :
0131 zapnot $0,15,$1 # U : Start folding output (1 cycle stall on $0)
0132 nop # E :
0133 srl $0,32,$0 # U : U L U L : (1 cycle stall on $0)
0134
0135 addq $1,$0,$1 # E : Finished generating ulong
0136 extwl $1,2,$2 # U : ushort[1] (1 cycle stall on $1)
0137 zapnot $1,3,$0 # U : ushort[0] (1 cycle stall on $1)
0138 extwl $1,4,$1 # U : ushort[2] (1 cycle stall on $1)
0139
0140 addq $0,$2,$0 # E
0141 addq $0,$1,$3 # E : Finished generating uint
0142
0143 extwl $3,2,$1 # U : ushort[1] (1 cycle stall on $3)
0144 nop # E : L U L U
0145
0146 addq $1,$3,$0 # E : Final carry
0147 not $0,$4 # E : complement (1 cycle stall on $0)
0148 zapnot $4,3,$0 # U : clear upper garbage bits
0149
0150 ret # L0 : L U L U
0151
0152 .end csum_ipv6_magic
0153 EXPORT_SYMBOL(csum_ipv6_magic)