Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
0004  */
0005 
0006 #include <linux/linkage.h>
0007 
0008 #ifdef __LITTLE_ENDIAN__
0009 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
0010 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
0011 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
0012 # define MERGE_2(RX,RY,IMM)
0013 # define EXTRACT_1(RX,RY,IMM)   and RX, RY, 0xFFFF
0014 # define EXTRACT_2(RX,RY,IMM)   lsr RX, RY, IMM
0015 #else
0016 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
0017 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
0018 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
0019 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
0020 # define EXTRACT_1(RX,RY,IMM)   lsr RX, RY, IMM
0021 # define EXTRACT_2(RX,RY,IMM)   lsr RX, RY, 0x08
0022 #endif
0023 
0024 #ifdef CONFIG_ARC_HAS_LL64
0025 # define LOADX(DST,RX)      ldd.ab  DST, [RX, 8]
0026 # define STOREX(SRC,RX)     std.ab  SRC, [RX, 8]
0027 # define ZOLSHFT        5
0028 # define ZOLAND         0x1F
0029 #else
0030 # define LOADX(DST,RX)      ld.ab   DST, [RX, 4]
0031 # define STOREX(SRC,RX)     st.ab   SRC, [RX, 4]
0032 # define ZOLSHFT        4
0033 # define ZOLAND         0xF
0034 #endif
0035 
0036 ENTRY_CFI(memcpy)
0037     mov.f   0, r2
0038 ;;; if size is zero
0039     jz.d    [blink]
0040     mov r3, r0      ; don;t clobber ret val
0041 
0042 ;;; if size <= 8
0043     cmp r2, 8
0044     bls.d   @.Lsmallchunk
0045     mov.f   lp_count, r2
0046 
0047     and.f   r4, r0, 0x03
0048     rsub    lp_count, r4, 4
0049     lpnz    @.Laligndestination
0050     ;; LOOP BEGIN
0051     ldb.ab  r5, [r1,1]
0052     sub r2, r2, 1
0053     stb.ab  r5, [r3,1]
0054 .Laligndestination:
0055 
0056 ;;; Check the alignment of the source
0057     and.f   r4, r1, 0x03
0058     bnz.d   @.Lsourceunaligned
0059 
0060 ;;; CASE 0: Both source and destination are 32bit aligned
0061 ;;; Convert len to Dwords, unfold x4
0062     lsr.f   lp_count, r2, ZOLSHFT
0063     lpnz    @.Lcopy32_64bytes
0064     ;; LOOP START
0065     LOADX (r6, r1)
0066     LOADX (r8, r1)
0067     LOADX (r10, r1)
0068     LOADX (r4, r1)
0069     STOREX (r6, r3)
0070     STOREX (r8, r3)
0071     STOREX (r10, r3)
0072     STOREX (r4, r3)
0073 .Lcopy32_64bytes:
0074 
0075     and.f   lp_count, r2, ZOLAND ;Last remaining 31 bytes
0076 .Lsmallchunk:
0077     lpnz    @.Lcopyremainingbytes
0078     ;; LOOP START
0079     ldb.ab  r5, [r1,1]
0080     stb.ab  r5, [r3,1]
0081 .Lcopyremainingbytes:
0082 
0083     j   [blink]
0084 ;;; END CASE 0
0085 
0086 .Lsourceunaligned:
0087     cmp r4, 2
0088     beq.d   @.LunalignedOffby2
0089     sub r2, r2, 1
0090 
0091     bhi.d   @.LunalignedOffby3
0092     ldb.ab  r5, [r1, 1]
0093 
0094 ;;; CASE 1: The source is unaligned, off by 1
0095     ;; Hence I need to read 1 byte for a 16bit alignment
0096     ;; and 2bytes to reach 32bit alignment
0097     ldh.ab  r6, [r1, 2]
0098     sub r2, r2, 2
0099     ;; Convert to words, unfold x2
0100     lsr.f   lp_count, r2, 3
0101     MERGE_1 (r6, r6, 8)
0102     MERGE_2 (r5, r5, 24)
0103     or  r5, r5, r6
0104 
0105     ;; Both src and dst are aligned
0106     lpnz    @.Lcopy8bytes_1
0107     ;; LOOP START
0108     ld.ab   r6, [r1, 4]
0109     ld.ab   r8, [r1,4]
0110 
0111     SHIFT_1 (r7, r6, 24)
0112     or  r7, r7, r5
0113     SHIFT_2 (r5, r6, 8)
0114 
0115     SHIFT_1 (r9, r8, 24)
0116     or  r9, r9, r5
0117     SHIFT_2 (r5, r8, 8)
0118 
0119     st.ab   r7, [r3, 4]
0120     st.ab   r9, [r3, 4]
0121 .Lcopy8bytes_1:
0122 
0123     ;; Write back the remaining 16bits
0124     EXTRACT_1 (r6, r5, 16)
0125     sth.ab  r6, [r3, 2]
0126     ;; Write back the remaining 8bits
0127     EXTRACT_2 (r5, r5, 16)
0128     stb.ab  r5, [r3, 1]
0129 
0130     and.f   lp_count, r2, 0x07 ;Last 8bytes
0131     lpnz    @.Lcopybytewise_1
0132     ;; LOOP START
0133     ldb.ab  r6, [r1,1]
0134     stb.ab  r6, [r3,1]
0135 .Lcopybytewise_1:
0136     j   [blink]
0137 
0138 .LunalignedOffby2:
0139 ;;; CASE 2: The source is unaligned, off by 2
0140     ldh.ab  r5, [r1, 2]
0141     sub r2, r2, 1
0142 
0143     ;; Both src and dst are aligned
0144     ;; Convert to words, unfold x2
0145     lsr.f   lp_count, r2, 3
0146 #ifdef __BIG_ENDIAN__
0147     asl.nz  r5, r5, 16
0148 #endif
0149     lpnz    @.Lcopy8bytes_2
0150     ;; LOOP START
0151     ld.ab   r6, [r1, 4]
0152     ld.ab   r8, [r1,4]
0153 
0154     SHIFT_1 (r7, r6, 16)
0155     or  r7, r7, r5
0156     SHIFT_2 (r5, r6, 16)
0157 
0158     SHIFT_1 (r9, r8, 16)
0159     or  r9, r9, r5
0160     SHIFT_2 (r5, r8, 16)
0161 
0162     st.ab   r7, [r3, 4]
0163     st.ab   r9, [r3, 4]
0164 .Lcopy8bytes_2:
0165 
0166 #ifdef __BIG_ENDIAN__
0167     lsr.nz  r5, r5, 16
0168 #endif
0169     sth.ab  r5, [r3, 2]
0170 
0171     and.f   lp_count, r2, 0x07 ;Last 8bytes
0172     lpnz    @.Lcopybytewise_2
0173     ;; LOOP START
0174     ldb.ab  r6, [r1,1]
0175     stb.ab  r6, [r3,1]
0176 .Lcopybytewise_2:
0177     j   [blink]
0178 
0179 .LunalignedOffby3:
0180 ;;; CASE 3: The source is unaligned, off by 3
0181 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
0182 
0183     ;; Both src and dst are aligned
0184     ;; Convert to words, unfold x2
0185     lsr.f   lp_count, r2, 3
0186 #ifdef __BIG_ENDIAN__
0187     asl.ne  r5, r5, 24
0188 #endif
0189     lpnz    @.Lcopy8bytes_3
0190     ;; LOOP START
0191     ld.ab   r6, [r1, 4]
0192     ld.ab   r8, [r1,4]
0193 
0194     SHIFT_1 (r7, r6, 8)
0195     or  r7, r7, r5
0196     SHIFT_2 (r5, r6, 24)
0197 
0198     SHIFT_1 (r9, r8, 8)
0199     or  r9, r9, r5
0200     SHIFT_2 (r5, r8, 24)
0201 
0202     st.ab   r7, [r3, 4]
0203     st.ab   r9, [r3, 4]
0204 .Lcopy8bytes_3:
0205 
0206 #ifdef __BIG_ENDIAN__
0207     lsr.nz  r5, r5, 24
0208 #endif
0209     stb.ab  r5, [r3, 1]
0210 
0211     and.f   lp_count, r2, 0x07 ;Last 8bytes
0212     lpnz    @.Lcopybytewise_3
0213     ;; LOOP START
0214     ldb.ab  r6, [r1,1]
0215     stb.ab  r6, [r3,1]
0216 .Lcopybytewise_3:
0217     j   [blink]
0218 
0219 END_CFI(memcpy)