0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include <asm/export.h>
0023 #include <asm/regdef.h>
0024
0025 .set noreorder
0026 .set noat
0027
0028 .align 4
0029 .ent strrchr
0030 .globl strrchr
0031 strrchr:
0032 .frame sp, 0, ra
0033 .prologue 0
0034
0035 and a1, 0xff, t2 # E : 00000000000000ch
0036 insbl a1, 1, t4 # U : 000000000000ch00
0037 insbl a1, 2, t5 # U : 0000000000ch0000
0038 ldq_u t0, 0(a0) # L : load first quadword Latency=3
0039
0040 mov zero, t6 # E : t6 is last match aligned addr
0041 or t2, t4, a1 # E : 000000000000chch
0042 sll t5, 8, t3 # U : 00000000ch000000
0043 mov zero, t8 # E : t8 is last match byte compare mask
0044
0045 andnot a0, 7, v0 # E : align source addr
0046 or t5, t3, t3 # E : 00000000chch0000
0047 sll a1, 32, t2 # U : 0000chch00000000
0048 sll a1, 48, t4 # U : chch000000000000
0049
0050 or t4, a1, a1 # E : chch00000000chch
0051 or t2, t3, t2 # E : 0000chchchch0000
0052 or a1, t2, a1 # E : chchchchchchchch
0053 lda t5, -1 # E : build garbage mask
0054
0055 cmpbge zero, t0, t1 # E : bits set iff byte == zero
0056 mskqh t5, a0, t4 # E : Complete garbage mask
0057 xor t0, a1, t2 # E : make bytes == c zero
0058 cmpbge zero, t4, t4 # E : bits set iff byte is garbage
0059
0060 cmpbge zero, t2, t3 # E : bits set iff byte == c
0061 andnot t1, t4, t1 # E : clear garbage from null test
0062 andnot t3, t4, t3 # E : clear garbage from char test
0063 bne t1, $eos # U : did we already hit the terminator?
0064
0065
0066 $loop:
0067 ldq t0, 8(v0) # L : load next quadword
0068 cmovne t3, v0, t6 # E : save previous comparisons match
0069 nop # : Latency=2, extra map slot (keep nop with cmov)
0070 nop
0071
0072 cmovne t3, t3, t8 # E : Latency=2, extra map slot
0073 nop # : keep with cmovne
0074 addq v0, 8, v0 # E :
0075 xor t0, a1, t2 # E :
0076
0077 cmpbge zero, t0, t1 # E : bits set iff byte == zero
0078 cmpbge zero, t2, t3 # E : bits set iff byte == c
0079 beq t1, $loop # U : if we havnt seen a null, loop
0080 nop
0081
0082
0083 $eos:
0084 negq t1, t4 # E : isolate first null byte match
0085 and t1, t4, t4 # E :
0086 subq t4, 1, t5 # E : build a mask of the bytes up to...
0087 or t4, t5, t4 # E : ... and including the null
0088
0089 and t3, t4, t3 # E : mask out char matches after null
0090 cmovne t3, t3, t8 # E : save it, if match found Latency=2, extra map slot
0091 nop # : Keep with cmovne
0092 nop
0093
0094 cmovne t3, v0, t6 # E :
0095 nop # : Keep with cmovne
0096
0097 ctlz t8, t2 # U0 : Latency=3 (0x40 for t8=0)
0098 nop
0099
0100 cmoveq t8, 0x3f, t2 # E : Compensate for case when no match is seen
0101 nop # E : hide the cmov latency (2) behind ctlz latency
0102 lda t5, 0x3f($31) # E :
0103 subq t5, t2, t5 # E : Normalize leading zero count
0104
0105 addq t6, t5, v0 # E : and add to quadword address
0106 ret # L0 : Latency=3
0107 nop
0108 nop
0109
0110 .end strrchr
0111 EXPORT_SYMBOL(strrchr)