Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright (c) 2012-2022, Arm Limited.
0004  *
0005  * Adapted from the original at:
0006  * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
0007  */
0008 
0009 #include <linux/linkage.h>
0010 #include <asm/assembler.h>
0011 
0012 /* Assumptions:
0013  *
0014  * ARMv8-a, AArch64.
0015  * MTE compatible.
0016  */
0017 
0018 #define L(label) .L ## label
0019 
0020 #define REP8_01 0x0101010101010101
0021 #define REP8_7f 0x7f7f7f7f7f7f7f7f
0022 
0023 #define src1        x0
0024 #define src2        x1
0025 #define result      x0
0026 
0027 #define data1       x2
0028 #define data1w      w2
0029 #define data2       x3
0030 #define data2w      w3
0031 #define has_nul     x4
0032 #define diff        x5
0033 #define off1        x5
0034 #define syndrome    x6
0035 #define tmp     x6
0036 #define data3       x7
0037 #define zeroones    x8
0038 #define shift       x9
0039 #define off2        x10
0040 
0041 /* On big-endian early bytes are at MSB and on little-endian LSB.
0042    LS_FW means shifting towards early bytes.  */
0043 #ifdef __AARCH64EB__
0044 # define LS_FW lsl
0045 #else
0046 # define LS_FW lsr
0047 #endif
0048 
0049 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
0050    (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
0051    can be done in parallel across the entire word.
0052    Since carry propagation makes 0x1 bytes before a NUL byte appear
0053    NUL too in big-endian, byte-reverse the data before the NUL check.  */
0054 
0055 
0056 SYM_FUNC_START(__pi_strcmp)
0057     sub off2, src2, src1
0058     mov zeroones, REP8_01
0059     and tmp, src1, 7
0060     tst off2, 7
0061     b.ne    L(misaligned8)
0062     cbnz    tmp, L(mutual_align)
0063 
0064     .p2align 4
0065 
0066 L(loop_aligned):
0067     ldr data2, [src1, off2]
0068     ldr data1, [src1], 8
0069 L(start_realigned):
0070 #ifdef __AARCH64EB__
0071     rev tmp, data1
0072     sub has_nul, tmp, zeroones
0073     orr tmp, tmp, REP8_7f
0074 #else
0075     sub has_nul, data1, zeroones
0076     orr tmp, data1, REP8_7f
0077 #endif
0078     bics    has_nul, has_nul, tmp   /* Non-zero if NUL terminator.  */
0079     ccmp    data1, data2, 0, eq
0080     b.eq    L(loop_aligned)
0081 #ifdef __AARCH64EB__
0082     rev has_nul, has_nul
0083 #endif
0084     eor diff, data1, data2
0085     orr syndrome, diff, has_nul
0086 L(end):
0087 #ifndef __AARCH64EB__
0088     rev syndrome, syndrome
0089     rev data1, data1
0090     rev data2, data2
0091 #endif
0092     clz shift, syndrome
0093     /* The most-significant-non-zero bit of the syndrome marks either the
0094        first bit that is different, or the top bit of the first zero byte.
0095        Shifting left now will bring the critical information into the
0096        top bits.  */
0097     lsl data1, data1, shift
0098     lsl data2, data2, shift
0099     /* But we need to zero-extend (char is unsigned) the value and then
0100        perform a signed 32-bit subtraction.  */
0101     lsr data1, data1, 56
0102     sub result, data1, data2, lsr 56
0103     ret
0104 
0105     .p2align 4
0106 
0107 L(mutual_align):
0108     /* Sources are mutually aligned, but are not currently at an
0109        alignment boundary.  Round down the addresses and then mask off
0110        the bytes that precede the start point.  */
0111     bic src1, src1, 7
0112     ldr data2, [src1, off2]
0113     ldr data1, [src1], 8
0114     neg shift, src2, lsl 3  /* Bits to alignment -64.  */
0115     mov tmp, -1
0116     LS_FW   tmp, tmp, shift
0117     orr data1, data1, tmp
0118     orr data2, data2, tmp
0119     b   L(start_realigned)
0120 
0121 L(misaligned8):
0122     /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
0123        checking to make sure that we don't access beyond the end of SRC2.  */
0124     cbz tmp, L(src1_aligned)
0125 L(do_misaligned):
0126     ldrb    data1w, [src1], 1
0127     ldrb    data2w, [src2], 1
0128     cmp data1w, 0
0129     ccmp    data1w, data2w, 0, ne   /* NZCV = 0b0000.  */
0130     b.ne    L(done)
0131     tst src1, 7
0132     b.ne    L(do_misaligned)
0133 
0134 L(src1_aligned):
0135     neg shift, src2, lsl 3
0136     bic src2, src2, 7
0137     ldr data3, [src2], 8
0138 #ifdef __AARCH64EB__
0139     rev data3, data3
0140 #endif
0141     lsr tmp, zeroones, shift
0142     orr data3, data3, tmp
0143     sub has_nul, data3, zeroones
0144     orr tmp, data3, REP8_7f
0145     bics    has_nul, has_nul, tmp
0146     b.ne    L(tail)
0147 
0148     sub off1, src2, src1
0149 
0150     .p2align 4
0151 
0152 L(loop_unaligned):
0153     ldr data3, [src1, off1]
0154     ldr data2, [src1, off2]
0155 #ifdef __AARCH64EB__
0156     rev data3, data3
0157 #endif
0158     sub has_nul, data3, zeroones
0159     orr tmp, data3, REP8_7f
0160     ldr data1, [src1], 8
0161     bics    has_nul, has_nul, tmp
0162     ccmp    data1, data2, 0, eq
0163     b.eq    L(loop_unaligned)
0164 
0165     lsl tmp, has_nul, shift
0166 #ifdef __AARCH64EB__
0167     rev tmp, tmp
0168 #endif
0169     eor diff, data1, data2
0170     orr syndrome, diff, tmp
0171     cbnz    syndrome, L(end)
0172 L(tail):
0173     ldr data1, [src1]
0174     neg shift, shift
0175     lsr data2, data3, shift
0176     lsr has_nul, has_nul, shift
0177 #ifdef __AARCH64EB__
0178     rev     data2, data2
0179     rev has_nul, has_nul
0180 #endif
0181     eor diff, data1, data2
0182     orr syndrome, diff, has_nul
0183     b   L(end)
0184 
0185 L(done):
0186     sub result, data1, data2
0187     ret
0188 SYM_FUNC_END(__pi_strcmp)
0189 SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
0190 EXPORT_SYMBOL_NOKASAN(strcmp)