0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084 #include <asm/asi.h>
0085 #include <asm/page.h>
0086
0087 #define ASI_STBI_P ASI_BLK_INIT_QUAD_LDD_P
0088 #define ASI_STBIMRU_P ASI_ST_BLKINIT_MRU_P
0089
0090
0091 #define ST_CHUNK 24
0092 #define MIN_LOOP 16320
0093 #define MIN_ZERO 512
0094
0095 .section ".text"
0096 .align 32
0097
0098
0099
0100
0101
0102 .globl M7clear_page
0103 .globl M7clear_user_page
0104 M7clear_page:
0105 M7clear_user_page:
0106 set PAGE_SIZE, %o1
0107
0108
0109 .size M7clear_page,.-M7clear_page
0110 .size M7clear_user_page,.-M7clear_user_page
0111
0112
0113
0114
0115
0116 .globl M7bzero
0117 M7bzero:
0118 mov %o1, %o2
0119 mov 0, %o1
0120
0121
0122 .size M7bzero,.-M7bzero
0123
0124 .global M7memset
0125 .type M7memset, #function
0126 .register %g3, #scratch
0127 M7memset:
0128 mov %o0, %o5 ! copy sp1 before using it
0129 cmp %o2, 7 ! if small counts, just write bytes
0130 bleu,pn %xcc, .wrchar
0131 and %o1, 0xff, %o1 ! o1 is (char)c
0132
0133 sll %o1, 8, %o3
0134 or %o1, %o3, %o1 ! now o1 has 2 bytes of c
0135 sll %o1, 16, %o3
0136 cmp %o2, 32
0137 blu,pn %xcc, .wdalign
0138 or %o1, %o3, %o1 ! now o1 has 4 bytes of c
0139
0140 sllx %o1, 32, %o3
0141 or %o1, %o3, %o1 ! now o1 has 8 bytes of c
0142
0143 .dbalign:
0144 andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound?
0145 bz,pt %xcc, .blkalign ! already long word aligned
0146 sub %o3, 8, %o3 ! -(bytes till long word aligned)
0147
0148 add %o2, %o3, %o2 ! update o2 with new count
0149 ! Set -(%o3) bytes till sp1 long word aligned
0150 1: stb %o1, [%o5] ! there is at least 1 byte to set
0151 inccc %o3 ! byte clearing loop
0152 bl,pt %xcc, 1b
0153 inc %o5
0154
0155 ! Now sp1 is long word aligned (sp1 is found in %o5)
0156 .blkalign:
0157 cmp %o2, 64 ! check if there are 64 bytes to set
0158 blu,pn %xcc, .wrshort
0159 mov %o2, %o3
0160
0161 andcc %o5, 63, %o3 ! is sp1 block aligned?
0162 bz,pt %xcc, .blkwr ! now block aligned
0163 sub %o3, 64, %o3 ! o3 is -(bytes till block aligned)
0164 add %o2, %o3, %o2 ! o2 is the remainder
0165
0166 ! Store -(%o3) bytes till dst is block (64 byte) aligned.
0167 ! Use long word stores.
0168 ! Recall that dst is already long word aligned
0169 1:
0170 addcc %o3, 8, %o3
0171 stx %o1, [%o5]
0172 bl,pt %xcc, 1b
0173 add %o5, 8, %o5
0174
0175 ! Now sp1 is block aligned
0176 .blkwr:
0177 andn %o2, 63, %o4 ! calculate size of blocks in bytes
0178 brz,pn %o1, .wrzero ! special case if c == 0
0179 and %o2, 63, %o3 ! %o3 = bytes left after blk stores.
0180
0181 set MIN_LOOP, %g1
0182 cmp %o4, %g1 ! check there are enough bytes to set
0183 blu,pn %xcc, .short_set ! to justify cost of membar
0184 ! must be > pre-cleared lines
0185 nop
0186
0187 ! initial cache-clearing stores
0188 ! get store pipeline moving
0189 rd %asi, %g3 ! save %asi to be restored later
0190 wr %g0, ASI_STBIMRU_P, %asi
0191
0192 ! Primary memset loop for large memsets
0193 .wr_loop:
0194 sub %o5, 8, %o5 ! adjust %o5 for ASI store alignment
0195 mov ST_CHUNK, %g1
0196 .wr_loop_start:
0197 stxa %o1, [%o5+8]%asi
0198 subcc %g1, 4, %g1
0199 stxa %o1, [%o5+8+64]%asi
0200 add %o5, 256, %o5
0201 stxa %o1, [%o5+8-128]%asi
0202 bgu %xcc, .wr_loop_start
0203 stxa %o1, [%o5+8-64]%asi
0204
0205 sub %o5, ST_CHUNK*64, %o5 ! reset %o5
0206 mov ST_CHUNK, %g1
0207
0208 .wr_loop_rest:
0209 stxa %o1, [%o5+8+8]%asi
0210 sub %o4, 64, %o4
0211 stxa %o1, [%o5+16+8]%asi
0212 subcc %g1, 1, %g1
0213 stxa %o1, [%o5+24+8]%asi
0214 stxa %o1, [%o5+32+8]%asi
0215 stxa %o1, [%o5+40+8]%asi
0216 add %o5, 64, %o5
0217 stxa %o1, [%o5-8]%asi
0218 bgu %xcc, .wr_loop_rest
0219 stxa %o1, [%o5]ASI_STBI_P
0220
0221 ! If more than ST_CHUNK*64 bytes remain to set, continue
0222 ! setting the first long word of each cache line in advance
0223 ! to keep the store pipeline moving.
0224
0225 cmp %o4, ST_CHUNK*64
0226 bge,pt %xcc, .wr_loop_start
0227 mov ST_CHUNK, %g1
0228
0229 brz,a,pn %o4, .asi_done
0230 add %o5, 8, %o5 ! restore %o5 offset
0231
0232 .wr_loop_small:
0233 stxa %o1, [%o5+8]%asi
0234 stxa %o1, [%o5+8+8]%asi
0235 stxa %o1, [%o5+16+8]%asi
0236 stxa %o1, [%o5+24+8]%asi
0237 stxa %o1, [%o5+32+8]%asi
0238 subcc %o4, 64, %o4
0239 stxa %o1, [%o5+40+8]%asi
0240 add %o5, 64, %o5
0241 stxa %o1, [%o5-8]%asi
0242 bgu,pt %xcc, .wr_loop_small
0243 stxa %o1, [%o5]ASI_STBI_P
0244
0245 ba .asi_done
0246 add %o5, 8, %o5 ! restore %o5 offset
0247
0248 ! Special case loop for zero fill memsets
0249 ! For each 64 byte cache line, single STBI to first element
0250 ! clears line
0251 .wrzero:
0252 cmp %o4, MIN_ZERO ! check if enough bytes to set
0253 ! to pay %asi + membar cost
0254 blu %xcc, .short_set
0255 nop
0256 sub %o4, 256, %o4
0257
0258 .wrzero_loop:
0259 mov 64, %g3
0260 stxa %o1, [%o5]ASI_STBI_P
0261 subcc %o4, 256, %o4
0262 stxa %o1, [%o5+%g3]ASI_STBI_P
0263 add %o5, 256, %o5
0264 sub %g3, 192, %g3
0265 stxa %o1, [%o5+%g3]ASI_STBI_P
0266 add %g3, 64, %g3
0267 bge,pt %xcc, .wrzero_loop
0268 stxa %o1, [%o5+%g3]ASI_STBI_P
0269 add %o4, 256, %o4
0270
0271 brz,pn %o4, .bsi_done
0272 nop
0273
0274 .wrzero_small:
0275 stxa %o1, [%o5]ASI_STBI_P
0276 subcc %o4, 64, %o4
0277 bgu,pt %xcc, .wrzero_small
0278 add %o5, 64, %o5
0279 ba,a .bsi_done
0280
0281 .asi_done:
0282 wr %g3, 0x0, %asi ! restored saved %asi
0283 .bsi_done:
0284 membar #StoreStore ! required by use of Block Store Init
0285
0286 .short_set:
0287 cmp %o4, 64 ! check if 64 bytes to set
0288 blu %xcc, 5f
0289 nop
0290 4: ! set final blocks of 64 bytes
0291 stx %o1, [%o5]
0292 stx %o1, [%o5+8]
0293 stx %o1, [%o5+16]
0294 stx %o1, [%o5+24]
0295 subcc %o4, 64, %o4
0296 stx %o1, [%o5+32]
0297 stx %o1, [%o5+40]
0298 add %o5, 64, %o5
0299 stx %o1, [%o5-16]
0300 bgu,pt %xcc, 4b
0301 stx %o1, [%o5-8]
0302
0303 5:
0304 ! Set the remaining long words
0305 .wrshort:
0306 subcc %o3, 8, %o3 ! Can we store any long words?
0307 blu,pn %xcc, .wrchars
0308 and %o2, 7, %o2 ! calc bytes left after long words
0309 6:
0310 subcc %o3, 8, %o3
0311 stx %o1, [%o5] ! store the long words
0312 bgeu,pt %xcc, 6b
0313 add %o5, 8, %o5
0314
0315 .wrchars: ! check for extra chars
0316 brnz %o2, .wrfin
0317 nop
0318 retl
0319 nop
0320
0321 .wdalign:
0322 andcc %o5, 3, %o3 ! is sp1 aligned on a word boundary
0323 bz,pn %xcc, .wrword
0324 andn %o2, 3, %o3 ! create word sized count in %o3
0325
0326 dec %o2 ! decrement count
0327 stb %o1, [%o5] ! clear a byte
0328 b .wdalign
0329 inc %o5 ! next byte
0330
0331 .wrword:
0332 subcc %o3, 4, %o3
0333 st %o1, [%o5] ! 4-byte writing loop
0334 bnz,pt %xcc, .wrword
0335 add %o5, 4, %o5
0336
0337 and %o2, 3, %o2 ! leftover count, if any
0338
0339 .wrchar:
0340 ! Set the remaining bytes, if any
0341 brz %o2, .exit
0342 nop
0343 .wrfin:
0344 deccc %o2
0345 stb %o1, [%o5]
0346 bgu,pt %xcc, .wrfin
0347 inc %o5
0348 .exit:
0349 retl ! %o0 was preserved
0350 nop
0351
0352 .size M7memset,.-M7memset