0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 #include <asm/export.h>
0033
0034 #define EX(x,y...) \
0035 99: x,##y; \
0036 .section __ex_table,"a"; \
0037 .long 99b - .; \
0038 lda $31, $exception-99b($31); \
0039 .previous
0040
0041 .set noat
0042 .set noreorder
0043 .align 4
0044
0045 .globl __clear_user
0046 .ent __clear_user
0047 .frame $30, 0, $26
0048 .prologue 0
0049
0050 # Pipeline info : Slotting & Comments
0051 __clear_user:
0052 and $17, $17, $0
0053 and $16, 7, $4 # .. E .. .. : find dest head misalignment
0054 beq $0, $zerolength # U .. .. .. : U L U L
0055
0056 addq $0, $4, $1 # .. .. .. E : bias counter
0057 and $1, 7, $2 # .. .. E .. : number of misaligned bytes in tail
0058 # Note - we never actually use $2, so this is a moot computation
0059 # and we can rewrite this later...
0060 srl $1, 3, $1 # .. E .. .. : number of quadwords to clear
0061 beq $4, $headalign # U .. .. .. : U L U L
0062
0063
0064
0065
0066
0067 EX( ldq_u $5, 0($16) ) # .. .. .. L : load dst word to mask back in
0068 beq $1, $onebyte # .. .. U .. : sub-word store?
0069 mskql $5, $16, $5 # .. U .. .. : take care of misaligned head
0070 addq $16, 8, $16 # E .. .. .. : L U U L
0071
0072 EX( stq_u $5, -8($16) ) # .. .. .. L :
0073 subq $1, 1, $1 # .. .. E .. :
0074 addq $0, $4, $0 # .. E .. .. : bytes left -= 8 - misalignment
0075 subq $0, 8, $0 # E .. .. .. : U L U L
0076
0077 .align 4
0078
0079
0080
0081
0082
0083
0084
0085 $headalign:
0086 subq $1, 16, $4 # .. .. .. E : If < 16, we can not use the huge loop
0087 and $16, 0x3f, $2 # .. .. E .. : Forward work for huge loop
0088 subq $2, 0x40, $3 # .. E .. .. : bias counter (huge loop)
0089 blt $4, $trailquad # U .. .. .. : U L U L
0090
0091
0092
0093
0094
0095
0096
0097
0098 nop # .. .. .. E
0099 nop # .. .. E ..
0100 nop # .. E .. ..
0101 beq $3, $bigalign # U .. .. .. : U L U L : Aligned 0mod64
0102
0103 $alignmod64:
0104 EX( stq_u $31, 0($16) ) # .. .. .. L
0105 addq $3, 8, $3 # .. .. E ..
0106 subq $0, 8, $0 # .. E .. ..
0107 nop # E .. .. .. : U L U L
0108
0109 nop # .. .. .. E
0110 subq $1, 1, $1 # .. .. E ..
0111 addq $16, 8, $16 # .. E .. ..
0112 blt $3, $alignmod64 # U .. .. .. : U L U L
0113
0114 $bigalign:
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132 nop # E :
0133 nop # E :
0134 nop # E :
0135 bis $16,$16,$3 # E : U L U L : Initial wh64 address is dest
0136
0137
0138 $do_wh64:
0139 wh64 ($3) # .. .. .. L1 : memory subsystem hint
0140 subq $1, 16, $4 # .. .. E .. : Forward calculation - repeat the loop?
0141 EX( stq_u $31, 0($16) ) # .. L .. ..
0142 subq $0, 8, $0 # E .. .. .. : U L U L
0143
0144 addq $16, 128, $3 # E : Target address of wh64
0145 EX( stq_u $31, 8($16) ) # L :
0146 EX( stq_u $31, 16($16) ) # L :
0147 subq $0, 16, $0 # E : U L L U
0148
0149 nop # E :
0150 EX( stq_u $31, 24($16) ) # L :
0151 EX( stq_u $31, 32($16) ) # L :
0152 subq $0, 168, $5 # E : U L L U : two trips through the loop left?
0153
0154
0155 subq $0, 16, $0 # E :
0156 EX( stq_u $31, 40($16) ) # L :
0157 EX( stq_u $31, 48($16) ) # L :
0158 cmovlt $5, $16, $3 # E : U L L U : Latency 2, extra mapping cycle
0159
0160 subq $1, 8, $1 # E :
0161 subq $0, 16, $0 # E :
0162 EX( stq_u $31, 56($16) ) # L :
0163 nop # E : U L U L
0164
0165 nop # E :
0166 subq $0, 8, $0 # E :
0167 addq $16, 64, $16 # E :
0168 bge $4, $do_wh64 # U : U L U L
0169
0170 $trailquad:
0171 # zero to 16 quadwords left to store, plus any trailing bytes
0172 # $1 is the number of quadwords left to go.
0173 #
0174 nop # .. .. .. E
0175 nop # .. .. E ..
0176 nop # .. E .. ..
0177 beq $1, $trailbytes # U .. .. .. : U L U L : Only 0..7 bytes to go
0178
0179 $onequad:
0180 EX( stq_u $31, 0($16) ) # .. .. .. L
0181 subq $1, 1, $1 # .. .. E ..
0182 subq $0, 8, $0 # .. E .. ..
0183 nop # E .. .. .. : U L U L
0184
0185 nop # .. .. .. E
0186 nop # .. .. E ..
0187 addq $16, 8, $16 # .. E .. ..
0188 bgt $1, $onequad # U .. .. .. : U L U L
0189
0190 # We have an unknown number of bytes left to go.
0191 $trailbytes:
0192 nop # .. .. .. E
0193 nop # .. .. E ..
0194 nop # .. E .. ..
0195 beq $0, $zerolength # U .. .. .. : U L U L
0196
0197 # $0 contains the number of bytes left to copy (0..31)
0198 # so we will use $0 as the loop counter
0199 # We know for a fact that $0 > 0 zero due to previous context
0200 $onebyte:
0201 EX( stb $31, 0($16) ) # .. .. .. L
0202 subq $0, 1, $0 # .. .. E .. :
0203 addq $16, 1, $16 # .. E .. .. :
0204 bgt $0, $onebyte # U .. .. .. : U L U L
0205
0206 $zerolength:
0207 $exception: # Destination for exception recovery(?)
0208 nop # .. .. .. E :
0209 nop # .. .. E .. :
0210 nop # .. E .. .. :
0211 ret $31, ($26), 1 # L0 .. .. .. : L U L U
0212 .end __clear_user
0213 EXPORT_SYMBOL(__clear_user)