0001
0002
0003
0004
0005
0006 #include <linux/linkage.h>
0007
0008 #ifdef __LITTLE_ENDIAN__
0009 # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
0010 # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
0011 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
0012 # define MERGE_2(RX,RY,IMM)
0013 # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
0014 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
0015 #else
0016 # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
0017 # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
0018 # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
0019 # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
0020 # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
0021 # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
0022 #endif
0023
0024 #ifdef CONFIG_ARC_HAS_LL64
0025 # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
0026 # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
0027 # define ZOLSHFT 5
0028 # define ZOLAND 0x1F
0029 #else
0030 # define LOADX(DST,RX) ld.ab DST, [RX, 4]
0031 # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
0032 # define ZOLSHFT 4
0033 # define ZOLAND 0xF
0034 #endif
0035
0036 ENTRY_CFI(memcpy)
0037 mov.f 0, r2
0038 ;;; if size is zero
0039 jz.d [blink]
0040 mov r3, r0 ; don;t clobber ret val
0041
0042 ;;; if size <= 8
0043 cmp r2, 8
0044 bls.d @.Lsmallchunk
0045 mov.f lp_count, r2
0046
0047 and.f r4, r0, 0x03
0048 rsub lp_count, r4, 4
0049 lpnz @.Laligndestination
0050 ;; LOOP BEGIN
0051 ldb.ab r5, [r1,1]
0052 sub r2, r2, 1
0053 stb.ab r5, [r3,1]
0054 .Laligndestination:
0055
0056 ;;; Check the alignment of the source
0057 and.f r4, r1, 0x03
0058 bnz.d @.Lsourceunaligned
0059
0060 ;;; CASE 0: Both source and destination are 32bit aligned
0061 ;;; Convert len to Dwords, unfold x4
0062 lsr.f lp_count, r2, ZOLSHFT
0063 lpnz @.Lcopy32_64bytes
0064 ;; LOOP START
0065 LOADX (r6, r1)
0066 LOADX (r8, r1)
0067 LOADX (r10, r1)
0068 LOADX (r4, r1)
0069 STOREX (r6, r3)
0070 STOREX (r8, r3)
0071 STOREX (r10, r3)
0072 STOREX (r4, r3)
0073 .Lcopy32_64bytes:
0074
0075 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
0076 .Lsmallchunk:
0077 lpnz @.Lcopyremainingbytes
0078 ;; LOOP START
0079 ldb.ab r5, [r1,1]
0080 stb.ab r5, [r3,1]
0081 .Lcopyremainingbytes:
0082
0083 j [blink]
0084 ;;; END CASE 0
0085
0086 .Lsourceunaligned:
0087 cmp r4, 2
0088 beq.d @.LunalignedOffby2
0089 sub r2, r2, 1
0090
0091 bhi.d @.LunalignedOffby3
0092 ldb.ab r5, [r1, 1]
0093
0094 ;;; CASE 1: The source is unaligned, off by 1
0095 ;; Hence I need to read 1 byte for a 16bit alignment
0096 ;; and 2bytes to reach 32bit alignment
0097 ldh.ab r6, [r1, 2]
0098 sub r2, r2, 2
0099 ;; Convert to words, unfold x2
0100 lsr.f lp_count, r2, 3
0101 MERGE_1 (r6, r6, 8)
0102 MERGE_2 (r5, r5, 24)
0103 or r5, r5, r6
0104
0105 ;; Both src and dst are aligned
0106 lpnz @.Lcopy8bytes_1
0107 ;; LOOP START
0108 ld.ab r6, [r1, 4]
0109 ld.ab r8, [r1,4]
0110
0111 SHIFT_1 (r7, r6, 24)
0112 or r7, r7, r5
0113 SHIFT_2 (r5, r6, 8)
0114
0115 SHIFT_1 (r9, r8, 24)
0116 or r9, r9, r5
0117 SHIFT_2 (r5, r8, 8)
0118
0119 st.ab r7, [r3, 4]
0120 st.ab r9, [r3, 4]
0121 .Lcopy8bytes_1:
0122
0123 ;; Write back the remaining 16bits
0124 EXTRACT_1 (r6, r5, 16)
0125 sth.ab r6, [r3, 2]
0126 ;; Write back the remaining 8bits
0127 EXTRACT_2 (r5, r5, 16)
0128 stb.ab r5, [r3, 1]
0129
0130 and.f lp_count, r2, 0x07 ;Last 8bytes
0131 lpnz @.Lcopybytewise_1
0132 ;; LOOP START
0133 ldb.ab r6, [r1,1]
0134 stb.ab r6, [r3,1]
0135 .Lcopybytewise_1:
0136 j [blink]
0137
0138 .LunalignedOffby2:
0139 ;;; CASE 2: The source is unaligned, off by 2
0140 ldh.ab r5, [r1, 2]
0141 sub r2, r2, 1
0142
0143 ;; Both src and dst are aligned
0144 ;; Convert to words, unfold x2
0145 lsr.f lp_count, r2, 3
0146 #ifdef __BIG_ENDIAN__
0147 asl.nz r5, r5, 16
0148 #endif
0149 lpnz @.Lcopy8bytes_2
0150 ;; LOOP START
0151 ld.ab r6, [r1, 4]
0152 ld.ab r8, [r1,4]
0153
0154 SHIFT_1 (r7, r6, 16)
0155 or r7, r7, r5
0156 SHIFT_2 (r5, r6, 16)
0157
0158 SHIFT_1 (r9, r8, 16)
0159 or r9, r9, r5
0160 SHIFT_2 (r5, r8, 16)
0161
0162 st.ab r7, [r3, 4]
0163 st.ab r9, [r3, 4]
0164 .Lcopy8bytes_2:
0165
0166 #ifdef __BIG_ENDIAN__
0167 lsr.nz r5, r5, 16
0168 #endif
0169 sth.ab r5, [r3, 2]
0170
0171 and.f lp_count, r2, 0x07 ;Last 8bytes
0172 lpnz @.Lcopybytewise_2
0173 ;; LOOP START
0174 ldb.ab r6, [r1,1]
0175 stb.ab r6, [r3,1]
0176 .Lcopybytewise_2:
0177 j [blink]
0178
0179 .LunalignedOffby3:
0180 ;;; CASE 3: The source is unaligned, off by 3
0181 ;;; Hence, I need to read 1byte for achieve the 32bit alignment
0182
0183 ;; Both src and dst are aligned
0184 ;; Convert to words, unfold x2
0185 lsr.f lp_count, r2, 3
0186 #ifdef __BIG_ENDIAN__
0187 asl.ne r5, r5, 24
0188 #endif
0189 lpnz @.Lcopy8bytes_3
0190 ;; LOOP START
0191 ld.ab r6, [r1, 4]
0192 ld.ab r8, [r1,4]
0193
0194 SHIFT_1 (r7, r6, 8)
0195 or r7, r7, r5
0196 SHIFT_2 (r5, r6, 24)
0197
0198 SHIFT_1 (r9, r8, 8)
0199 or r9, r9, r5
0200 SHIFT_2 (r5, r8, 24)
0201
0202 st.ab r7, [r3, 4]
0203 st.ab r9, [r3, 4]
0204 .Lcopy8bytes_3:
0205
0206 #ifdef __BIG_ENDIAN__
0207 lsr.nz r5, r5, 24
0208 #endif
0209 stb.ab r5, [r3, 1]
0210
0211 and.f lp_count, r2, 0x07 ;Last 8bytes
0212 lpnz @.Lcopybytewise_3
0213 ;; LOOP START
0214 ldb.ab r6, [r1,1]
0215 stb.ab r6, [r3,1]
0216 .Lcopybytewise_3:
0217 j [blink]
0218
0219 END_CFI(memcpy)