Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/arch/alpha/lib/memcpy.c
0004  *
0005  *  Copyright (C) 1995  Linus Torvalds
0006  */
0007 
0008 /*
0009  * This is a reasonably optimized memcpy() routine.
0010  */
0011 
0012 /*
0013  * Note that the C code is written to be optimized into good assembly. However,
0014  * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
0015  * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
0016  * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
0017  */
0018 
0019 #include <linux/types.h>
0020 #include <linux/export.h>
0021 
0022 /*
0023  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
0024  * with a macro so that we can fix it up later..
0025  */
0026 #define ALIGN_DEST_TO8_UP(d,s,n) \
0027     while (d & 7) { \
0028         if (n <= 0) return; \
0029         n--; \
0030         *(char *) d = *(char *) s; \
0031         d++; s++; \
0032     }
0033 #define ALIGN_DEST_TO8_DN(d,s,n) \
0034     while (d & 7) { \
0035         if (n <= 0) return; \
0036         n--; \
0037         d--; s--; \
0038         *(char *) d = *(char *) s; \
0039     }
0040 
0041 /*
0042  * This should similarly be done with ldq_u*2/mask/stq. The destination
0043  * is aligned, but we don't fill in a full quad-word
0044  */
0045 #define DO_REST_UP(d,s,n) \
0046     while (n > 0) { \
0047         n--; \
0048         *(char *) d = *(char *) s; \
0049         d++; s++; \
0050     }
0051 #define DO_REST_DN(d,s,n) \
0052     while (n > 0) { \
0053         n--; \
0054         d--; s--; \
0055         *(char *) d = *(char *) s; \
0056     }
0057 
0058 /*
0059  * This should be done with ldq/mask/stq. The source and destination are
0060  * aligned, but we don't fill in a full quad-word
0061  */
0062 #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
0063 #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
0064 
0065 /*
0066  * This does unaligned memory copies. We want to avoid storing to
0067  * an unaligned address, as that would do a read-modify-write cycle.
0068  * We also want to avoid double-reading the unaligned reads.
0069  *
0070  * Note the ordering to try to avoid load (and address generation) latencies.
0071  */
0072 static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
0073                       long n)
0074 {
0075     ALIGN_DEST_TO8_UP(d,s,n);
0076     n -= 8;         /* to avoid compare against 8 in the loop */
0077     if (n >= 0) {
0078         unsigned long low_word, high_word;
0079         __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
0080         do {
0081             unsigned long tmp;
0082             __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
0083             n -= 8;
0084             __asm__("extql %1,%2,%0"
0085                 :"=r" (low_word)
0086                 :"r" (low_word), "r" (s));
0087             __asm__("extqh %1,%2,%0"
0088                 :"=r" (tmp)
0089                 :"r" (high_word), "r" (s));
0090             s += 8;
0091             *(unsigned long *) d = low_word | tmp;
0092             d += 8;
0093             low_word = high_word;
0094         } while (n >= 0);
0095     }
0096     n += 8;
0097     DO_REST_UP(d,s,n);
0098 }
0099 
0100 static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
0101                       long n)
0102 {
0103     /* I don't understand AXP assembler well enough for this. -Tim */
0104     s += n;
0105     d += n;
0106     while (n--)
0107         * (char *) --d = * (char *) --s;
0108 }
0109 
0110 /*
0111  * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
0112  * for the load-store. I don't know why, but it would seem that using a floating
0113  * point register for the move seems to slow things down (very small difference,
0114  * though).
0115  *
0116  * Note the ordering to try to avoid load (and address generation) latencies.
0117  */
0118 static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
0119                     long n)
0120 {
0121     ALIGN_DEST_TO8_UP(d,s,n);
0122     n -= 8;
0123     while (n >= 0) {
0124         unsigned long tmp;
0125         __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
0126         n -= 8;
0127         s += 8;
0128         *(unsigned long *) d = tmp;
0129         d += 8;
0130     }
0131     n += 8;
0132     DO_REST_ALIGNED_UP(d,s,n);
0133 }
0134 static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
0135                     long n)
0136 {
0137     s += n;
0138     d += n;
0139     ALIGN_DEST_TO8_DN(d,s,n);
0140     n -= 8;
0141     while (n >= 0) {
0142         unsigned long tmp;
0143         s -= 8;
0144         __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
0145         n -= 8;
0146         d -= 8;
0147         *(unsigned long *) d = tmp;
0148     }
0149     n += 8;
0150     DO_REST_ALIGNED_DN(d,s,n);
0151 }
0152 
0153 void * memcpy(void * dest, const void *src, size_t n)
0154 {
0155     if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
0156         __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
0157                      n);
0158         return dest;
0159     }
0160     __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
0161     return dest;
0162 }
0163 EXPORT_SYMBOL(memcpy);