Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
0004  */
0005 
0006 #include <linux/linkage.h>
0007 #include <asm/cache.h>
0008 
0009 /*
0010  * The memset implementation below is optimized to use prefetchw and prealloc
0011  * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
0012  * If you want to implement optimized memset for other possible L1 data cache
0013  * line lengths (32B and 128B) you should rewrite code carefully checking
0014  * we don't call any prefetchw/prealloc instruction for L1 cache lines which
0015  * don't belongs to memset area.
0016  */
0017 
0018 #if L1_CACHE_SHIFT == 6
0019 
0020 .macro PREALLOC_INSTR   reg, off
0021     prealloc    [\reg, \off]
0022 .endm
0023 
0024 .macro PREFETCHW_INSTR  reg, off
0025     prefetchw   [\reg, \off]
0026 .endm
0027 
0028 #else
0029 
0030 .macro PREALLOC_INSTR   reg, off
0031 .endm
0032 
0033 .macro PREFETCHW_INSTR  reg, off
0034 .endm
0035 
0036 #endif
0037 
0038 ENTRY_CFI(memset)
0039     PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
0040     mov.f   0, r2
0041 ;;; if size is zero
0042     jz.d    [blink]
0043     mov r3, r0      ; don't clobber ret val
0044 
0045 ;;; if length < 8
0046     brls.d.nt   r2, 8, .Lsmallchunk
0047     mov.f   lp_count,r2
0048 
0049     and.f   r4, r0, 0x03
0050     rsub    lp_count, r4, 4
0051     lpnz    @.Laligndestination
0052     ;; LOOP BEGIN
0053     stb.ab  r1, [r3,1]
0054     sub r2, r2, 1
0055 .Laligndestination:
0056 
0057 ;;; Destination is aligned
0058     and r1, r1, 0xFF
0059     asl r4, r1, 8
0060     or  r4, r4, r1
0061     asl r5, r4, 16
0062     or  r5, r5, r4
0063     mov r4, r5
0064 
0065     sub3    lp_count, r2, 8
0066     cmp     r2, 64
0067     bmsk.hi r2, r2, 5
0068     mov.ls  lp_count, 0
0069     add3.hi r2, r2, 8
0070 
0071 ;;; Convert len to Dwords, unfold x8
0072     lsr.f   lp_count, lp_count, 6
0073 
0074     lpnz    @.Lset64bytes
0075     ;; LOOP START
0076     PREALLOC_INSTR  r3, 64  ; alloc next line w/o fetching
0077 
0078 #ifdef CONFIG_ARC_HAS_LL64
0079     std.ab  r4, [r3, 8]
0080     std.ab  r4, [r3, 8]
0081     std.ab  r4, [r3, 8]
0082     std.ab  r4, [r3, 8]
0083     std.ab  r4, [r3, 8]
0084     std.ab  r4, [r3, 8]
0085     std.ab  r4, [r3, 8]
0086     std.ab  r4, [r3, 8]
0087 #else
0088     st.ab   r4, [r3, 4]
0089     st.ab   r4, [r3, 4]
0090     st.ab   r4, [r3, 4]
0091     st.ab   r4, [r3, 4]
0092     st.ab   r4, [r3, 4]
0093     st.ab   r4, [r3, 4]
0094     st.ab   r4, [r3, 4]
0095     st.ab   r4, [r3, 4]
0096     st.ab   r4, [r3, 4]
0097     st.ab   r4, [r3, 4]
0098     st.ab   r4, [r3, 4]
0099     st.ab   r4, [r3, 4]
0100     st.ab   r4, [r3, 4]
0101     st.ab   r4, [r3, 4]
0102     st.ab   r4, [r3, 4]
0103     st.ab   r4, [r3, 4]
0104 #endif
0105 .Lset64bytes:
0106 
0107     lsr.f   lp_count, r2, 5 ;Last remaining  max 124 bytes
0108     lpnz    .Lset32bytes
0109     ;; LOOP START
0110 #ifdef CONFIG_ARC_HAS_LL64
0111     std.ab  r4, [r3, 8]
0112     std.ab  r4, [r3, 8]
0113     std.ab  r4, [r3, 8]
0114     std.ab  r4, [r3, 8]
0115 #else
0116     st.ab   r4, [r3, 4]
0117     st.ab   r4, [r3, 4]
0118     st.ab   r4, [r3, 4]
0119     st.ab   r4, [r3, 4]
0120     st.ab   r4, [r3, 4]
0121     st.ab   r4, [r3, 4]
0122     st.ab   r4, [r3, 4]
0123     st.ab   r4, [r3, 4]
0124 #endif
0125 .Lset32bytes:
0126 
0127     and.f   lp_count, r2, 0x1F ;Last remaining 31 bytes
0128 .Lsmallchunk:
0129     lpnz    .Lcopy3bytes
0130     ;; LOOP START
0131     stb.ab  r1, [r3, 1]
0132 .Lcopy3bytes:
0133 
0134     j   [blink]
0135 
0136 END_CFI(memset)
0137 
0138 ENTRY_CFI(memzero)
0139     ; adjust bzero args to memset args
0140     mov r2, r1
0141     b.d  memset    ;tail call so need to tinker with blink
0142     mov r1, 0
0143 END_CFI(memzero)