Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright (C) 2013 ARM Ltd.
0004  * Copyright (C) 2013 Linaro.
0005  *
0006  * This code is based on glibc cortex strings work originally authored by Linaro
0007  * be found @
0008  *
0009  * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
0010  * files/head:/src/aarch64/
0011  */
0012 
0013 #include <linux/linkage.h>
0014 #include <asm/assembler.h>
0015 #include <asm/cache.h>
0016 
0017 /*
0018  * Fill in the buffer with character c (alignment handled by the hardware)
0019  *
0020  * Parameters:
0021  *  x0 - buf
0022  *  x1 - c
0023  *  x2 - n
0024  * Returns:
0025  *  x0 - buf
0026  */
0027 
0028 dstin       .req    x0
0029 val     .req    w1
0030 count       .req    x2
0031 tmp1        .req    x3
0032 tmp1w       .req    w3
0033 tmp2        .req    x4
0034 tmp2w       .req    w4
0035 zva_len_x   .req    x5
0036 zva_len     .req    w5
0037 zva_bits_x  .req    x6
0038 
0039 A_l     .req    x7
0040 A_lw        .req    w7
0041 dst     .req    x8
0042 tmp3w       .req    w9
0043 tmp3        .req    x9
0044 
0045 SYM_FUNC_START(__pi_memset)
0046     mov dst, dstin  /* Preserve return value.  */
0047     and A_lw, val, #255
0048     orr A_lw, A_lw, A_lw, lsl #8
0049     orr A_lw, A_lw, A_lw, lsl #16
0050     orr A_l, A_l, A_l, lsl #32
0051 
0052     cmp count, #15
0053     b.hi    .Lover16_proc
0054     /*All store maybe are non-aligned..*/
0055     tbz count, #3, 1f
0056     str A_l, [dst], #8
0057 1:
0058     tbz count, #2, 2f
0059     str A_lw, [dst], #4
0060 2:
0061     tbz count, #1, 3f
0062     strh    A_lw, [dst], #2
0063 3:
0064     tbz count, #0, 4f
0065     strb    A_lw, [dst]
0066 4:
0067     ret
0068 
0069 .Lover16_proc:
0070     /*Whether  the start address is aligned with 16.*/
0071     neg tmp2, dst
0072     ands    tmp2, tmp2, #15
0073     b.eq    .Laligned
0074 /*
0075 * The count is not less than 16, we can use stp to store the start 16 bytes,
0076 * then adjust the dst aligned with 16.This process will make the current
0077 * memory address at alignment boundary.
0078 */
0079     stp A_l, A_l, [dst] /*non-aligned store..*/
0080     /*make the dst aligned..*/
0081     sub count, count, tmp2
0082     add dst, dst, tmp2
0083 
0084 .Laligned:
0085     cbz A_l, .Lzero_mem
0086 
0087 .Ltail_maybe_long:
0088     cmp count, #64
0089     b.ge    .Lnot_short
0090 .Ltail63:
0091     ands    tmp1, count, #0x30
0092     b.eq    3f
0093     cmp tmp1w, #0x20
0094     b.eq    1f
0095     b.lt    2f
0096     stp A_l, A_l, [dst], #16
0097 1:
0098     stp A_l, A_l, [dst], #16
0099 2:
0100     stp A_l, A_l, [dst], #16
0101 /*
0102 * The last store length is less than 16,use stp to write last 16 bytes.
0103 * It will lead some bytes written twice and the access is non-aligned.
0104 */
0105 3:
0106     ands    count, count, #15
0107     cbz count, 4f
0108     add dst, dst, count
0109     stp A_l, A_l, [dst, #-16]   /* Repeat some/all of last store. */
0110 4:
0111     ret
0112 
0113     /*
0114     * Critical loop. Start at a new cache line boundary. Assuming
0115     * 64 bytes per line, this ensures the entire loop is in one line.
0116     */
0117     .p2align    L1_CACHE_SHIFT
0118 .Lnot_short:
0119     sub dst, dst, #16/* Pre-bias.  */
0120     sub count, count, #64
0121 1:
0122     stp A_l, A_l, [dst, #16]
0123     stp A_l, A_l, [dst, #32]
0124     stp A_l, A_l, [dst, #48]
0125     stp A_l, A_l, [dst, #64]!
0126     subs    count, count, #64
0127     b.ge    1b
0128     tst count, #0x3f
0129     add dst, dst, #16
0130     b.ne    .Ltail63
0131 .Lexitfunc:
0132     ret
0133 
0134     /*
0135     * For zeroing memory, check to see if we can use the ZVA feature to
0136     * zero entire 'cache' lines.
0137     */
0138 .Lzero_mem:
0139     cmp count, #63
0140     b.le    .Ltail63
0141     /*
0142     * For zeroing small amounts of memory, it's not worth setting up
0143     * the line-clear code.
0144     */
0145     cmp count, #128
0146     b.lt    .Lnot_short /*count is at least  128 bytes*/
0147 
0148     mrs tmp1, dczid_el0
0149     tbnz    tmp1, #4, .Lnot_short
0150     mov tmp3w, #4
0151     and zva_len, tmp1w, #15 /* Safety: other bits reserved.  */
0152     lsl zva_len, tmp3w, zva_len
0153 
0154     ands    tmp3w, zva_len, #63
0155     /*
0156     * ensure the zva_len is not less than 64.
0157     * It is not meaningful to use ZVA if the block size is less than 64.
0158     */
0159     b.ne    .Lnot_short
0160 .Lzero_by_line:
0161     /*
0162     * Compute how far we need to go to become suitably aligned. We're
0163     * already at quad-word alignment.
0164     */
0165     cmp count, zva_len_x
0166     b.lt    .Lnot_short     /* Not enough to reach alignment.  */
0167     sub zva_bits_x, zva_len_x, #1
0168     neg tmp2, dst
0169     ands    tmp2, tmp2, zva_bits_x
0170     b.eq    2f          /* Already aligned.  */
0171     /* Not aligned, check that there's enough to copy after alignment.*/
0172     sub tmp1, count, tmp2
0173     /*
0174     * grantee the remain length to be ZVA is bigger than 64,
0175     * avoid to make the 2f's process over mem range.*/
0176     cmp tmp1, #64
0177     ccmp    tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
0178     b.lt    .Lnot_short
0179     /*
0180     * We know that there's at least 64 bytes to zero and that it's safe
0181     * to overrun by 64 bytes.
0182     */
0183     mov count, tmp1
0184 1:
0185     stp A_l, A_l, [dst]
0186     stp A_l, A_l, [dst, #16]
0187     stp A_l, A_l, [dst, #32]
0188     subs    tmp2, tmp2, #64
0189     stp A_l, A_l, [dst, #48]
0190     add dst, dst, #64
0191     b.ge    1b
0192     /* We've overrun a bit, so adjust dst downwards.*/
0193     add dst, dst, tmp2
0194 2:
0195     sub count, count, zva_len_x
0196 3:
0197     dc  zva, dst
0198     add dst, dst, zva_len_x
0199     subs    count, count, zva_len_x
0200     b.ge    3b
0201     ands    count, count, zva_bits_x
0202     b.ne    .Ltail_maybe_long
0203     ret
0204 SYM_FUNC_END(__pi_memset)
0205 
0206 SYM_FUNC_ALIAS(__memset, __pi_memset)
0207 EXPORT_SYMBOL(__memset)
0208 
0209 SYM_FUNC_ALIAS_WEAK(memset, __pi_memset)
0210 EXPORT_SYMBOL(memset)