0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #include <asm/export.h>
0035 .set noreorder
0036 .set noat
0037
0038 .globl memchr
0039 .ent memchr
0040 memchr:
0041 .frame $30,0,$26,0
0042 .prologue 0
0043
0044 # Hack -- if someone passes in (size_t)-1, hoping to just
0045 # search til the end of the address space, we will overflow
0046 # below when we find the address of the last byte. Given
0047 # that we will never have a 56-bit address space, cropping
0048 # the length is the easiest way to avoid trouble.
0049 zap $18, 0x80, $5 #-e0 :
0050
0051 beq $18, $not_found # .. e1 :
0052 ldq_u $1, 0($16) # e1 : load first quadword
0053 insbl $17, 1, $2 # .. e0 : $2 = 000000000000ch00
0054 and $17, 0xff, $17 #-e0 : $17 = 00000000000000ch
0055 cmpult $18, 9, $4 # .. e1 :
0056 or $2, $17, $17 # e0 : $17 = 000000000000chch
0057 lda $3, -1($31) # .. e1 :
0058 sll $17, 16, $2 #-e0 : $2 = 00000000chch0000
0059 addq $16, $5, $5 # .. e1 :
0060 or $2, $17, $17 # e1 : $17 = 00000000chchchch
0061 unop # :
0062 sll $17, 32, $2 #-e0 : $2 = chchchch00000000
0063 or $2, $17, $17 # e1 : $17 = chchchchchchchch
0064 extql $1, $16, $7 # e0 :
0065 beq $4, $first_quad # .. e1 :
0066
0067 ldq_u $6, -1($5) #-e1 : eight or less bytes to search
0068 extqh $6, $16, $6 # .. e0 :
0069 mov $16, $0 # e0 :
0070 or $7, $6, $1 # .. e1 : $1 = quadword starting at $16
0071
0072 # Deal with the case where at most 8 bytes remain to be searched
0073 # in $1. E.g.:
0074 # $18 = 6
0075 # $1 = ????c6c5c4c3c2c1
0076 $last_quad:
0077 negq $18, $6 #-e0 :
0078 xor $17, $1, $1 # .. e1 :
0079 srl $3, $6, $6 # e0 : $6 = mask of $18 bits set
0080 cmpbge $31, $1, $2 # .. e1 :
0081 and $2, $6, $2 #-e0 :
0082 beq $2, $not_found # .. e1 :
0083
0084 $found_it:
0085 # Now, determine which byte matched:
0086 negq $2, $3 # e0 :
0087 and $2, $3, $2 # e1 :
0088
0089 and $2, 0x0f, $1 #-e0 :
0090 addq $0, 4, $3 # .. e1 :
0091 cmoveq $1, $3, $0 # e0 :
0092
0093 addq $0, 2, $3 # .. e1 :
0094 and $2, 0x33, $1 #-e0 :
0095 cmoveq $1, $3, $0 # .. e1 :
0096
0097 and $2, 0x55, $1 # e0 :
0098 addq $0, 1, $3 # .. e1 :
0099 cmoveq $1, $3, $0 #-e0 :
0100
0101 $done: ret # .. e1 :
0102
0103 # Deal with the case where $18 > 8 bytes remain to be
0104 # searched. $16 may not be aligned.
0105 .align 4
0106 $first_quad:
0107 andnot $16, 0x7, $0 #-e1 :
0108 insqh $3, $16, $2 # .. e0 : $2 = 0000ffffffffffff ($16<0:2> ff)
0109 xor $1, $17, $1 # e0 :
0110 or $1, $2, $1 # e1 : $1 = ====ffffffffffff
0111 cmpbge $31, $1, $2 #-e0 :
0112 bne $2, $found_it # .. e1 :
0113
0114 # At least one byte left to process.
0115
0116 ldq $1, 8($0) # e0 :
0117 subq $5, 1, $18 # .. e1 :
0118 addq $0, 8, $0 #-e0 :
0119
0120 # Make $18 point to last quad to be accessed (the
0121 # last quad may or may not be partial).
0122
0123 andnot $18, 0x7, $18 # .. e1 :
0124 cmpult $0, $18, $2 # e0 :
0125 beq $2, $final # .. e1 :
0126
0127 # At least two quads remain to be accessed.
0128
0129 subq $18, $0, $4 #-e0 : $4 <- nr quads to be processed
0130 and $4, 8, $4 # e1 : odd number of quads?
0131 bne $4, $odd_quad_count # e1 :
0132
0133 # At least three quads remain to be accessed
0134
0135 mov $1, $4 # e0 : move prefetched value to correct reg
0136
0137 .align 4
0138 $unrolled_loop:
0139 ldq $1, 8($0) #-e0 : prefetch $1
0140 xor $17, $4, $2 # .. e1 :
0141 cmpbge $31, $2, $2 # e0 :
0142 bne $2, $found_it # .. e1 :
0143
0144 addq $0, 8, $0 #-e0 :
0145 $odd_quad_count:
0146 xor $17, $1, $2 # .. e1 :
0147 ldq $4, 8($0) # e0 : prefetch $4
0148 cmpbge $31, $2, $2 # .. e1 :
0149 addq $0, 8, $6 #-e0 :
0150 bne $2, $found_it # .. e1 :
0151
0152 cmpult $6, $18, $6 # e0 :
0153 addq $0, 8, $0 # .. e1 :
0154 bne $6, $unrolled_loop #-e1 :
0155
0156 mov $4, $1 # e0 : move prefetched value into $1
0157 $final: subq $5, $0, $18 # .. e1 : $18 <- number of bytes left to do
0158 bne $18, $last_quad # e1 :
0159
0160 $not_found:
0161 mov $31, $0 #-e0 :
0162 ret # .. e1 :
0163
0164 .end memchr
0165 EXPORT_SYMBOL(memchr)