Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
0004  */
0005 
0006 /* This is optimized primarily for the ARC700.
0007    It would be possible to speed up the loops by one cycle / word
0008    respective one cycle / byte by forcing double source 1 alignment, unrolling
0009    by a factor of two, and speculatively loading the second word / byte of
0010    source 1; however, that would increase the overhead for loop setup / finish,
0011    and strcmp might often terminate early.  */
0012 
0013 #include <linux/linkage.h>
0014 
0015 ENTRY_CFI(strcmp)
0016     or  r2,r0,r1
0017     bmsk_s  r2,r2,1
0018     brne    r2,0,.Lcharloop
0019     mov_s   r12,0x01010101
0020     ror r5,r12
0021 .Lwordloop:
0022     ld.ab   r2,[r0,4]
0023     ld.ab   r3,[r1,4]
0024     nop_s
0025     sub r4,r2,r12
0026     bic r4,r4,r2
0027     and r4,r4,r5
0028     brne    r4,0,.Lfound0
0029     breq    r2,r3,.Lwordloop
0030 #ifdef  __LITTLE_ENDIAN__
0031     xor r0,r2,r3    ; mask for difference
0032     sub_s   r1,r0,1
0033     bic_s   r0,r0,r1    ; mask for least significant difference bit
0034     sub r1,r5,r0
0035     xor r0,r5,r1    ; mask for least significant difference byte
0036     and_s   r2,r2,r0
0037     and_s   r3,r3,r0
0038 #endif /* LITTLE ENDIAN */
0039     cmp_s   r2,r3
0040     mov_s   r0,1
0041     j_s.d   [blink]
0042     bset.lo r0,r0,31
0043 
0044     .balign 4
0045 #ifdef __LITTLE_ENDIAN__
0046 .Lfound0:
0047     xor r0,r2,r3    ; mask for difference
0048     or  r0,r0,r4    ; or in zero indicator
0049     sub_s   r1,r0,1
0050     bic_s   r0,r0,r1    ; mask for least significant difference bit
0051     sub r1,r5,r0
0052     xor r0,r5,r1    ; mask for least significant difference byte
0053     and_s   r2,r2,r0
0054     and_s   r3,r3,r0
0055     sub.f   r0,r2,r3
0056     mov.hi  r0,1
0057     j_s.d   [blink]
0058     bset.lo r0,r0,31
0059 #else /* BIG ENDIAN */
0060     /* The zero-detection above can mis-detect 0x01 bytes as zeroes
0061        because of carry-propagateion from a lower significant zero byte.
0062        We can compensate for this by checking that bit0 is zero.
0063        This compensation is not necessary in the step where we
0064        get a low estimate for r2, because in any affected bytes
0065        we already have 0x00 or 0x01, which will remain unchanged
0066        when bit 7 is cleared.  */
0067     .balign 4
0068 .Lfound0:
0069     lsr r0,r4,8
0070     lsr_s   r1,r2
0071     bic_s   r2,r2,r0    ; get low estimate for r2 and get ...
0072     bic_s   r0,r0,r1    ; <this is the adjusted mask for zeros>
0073     or_s    r3,r3,r0    ; ... high estimate r3 so that r2 > r3 will ...
0074     cmp_s   r3,r2       ; ... be independent of trailing garbage
0075     or_s    r2,r2,r0    ; likewise for r3 > r2
0076     bic_s   r3,r3,r0
0077     rlc r0,0        ; r0 := r2 > r3 ? 1 : 0
0078     cmp_s   r2,r3
0079     j_s.d   [blink]
0080     bset.lo r0,r0,31
0081 #endif /* ENDIAN */
0082 
0083     .balign 4
0084 .Lcharloop:
0085     ldb.ab  r2,[r0,1]
0086     ldb.ab  r3,[r1,1]
0087     nop_s
0088     breq    r2,0,.Lcmpend
0089     breq    r2,r3,.Lcharloop
0090 .Lcmpend:
0091     j_s.d   [blink]
0092     sub r0,r2,r3
0093 END_CFI(strcmp)