0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/kernel.h>
0026 #include <asm/fpu/api.h>
0027
0028 #include "i915_memcpy.h"
0029
0030 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
0031 #define CI_BUG_ON(expr) BUG_ON(expr)
0032 #else
0033 #define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
0034 #endif
0035
0036 static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
0037
0038 static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
0039 {
0040 kernel_fpu_begin();
0041
0042 while (len >= 4) {
0043 asm("movntdqa (%0), %%xmm0\n"
0044 "movntdqa 16(%0), %%xmm1\n"
0045 "movntdqa 32(%0), %%xmm2\n"
0046 "movntdqa 48(%0), %%xmm3\n"
0047 "movaps %%xmm0, (%1)\n"
0048 "movaps %%xmm1, 16(%1)\n"
0049 "movaps %%xmm2, 32(%1)\n"
0050 "movaps %%xmm3, 48(%1)\n"
0051 :: "r" (src), "r" (dst) : "memory");
0052 src += 64;
0053 dst += 64;
0054 len -= 4;
0055 }
0056 while (len--) {
0057 asm("movntdqa (%0), %%xmm0\n"
0058 "movaps %%xmm0, (%1)\n"
0059 :: "r" (src), "r" (dst) : "memory");
0060 src += 16;
0061 dst += 16;
0062 }
0063
0064 kernel_fpu_end();
0065 }
0066
0067 static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
0068 {
0069 kernel_fpu_begin();
0070
0071 while (len >= 4) {
0072 asm("movntdqa (%0), %%xmm0\n"
0073 "movntdqa 16(%0), %%xmm1\n"
0074 "movntdqa 32(%0), %%xmm2\n"
0075 "movntdqa 48(%0), %%xmm3\n"
0076 "movups %%xmm0, (%1)\n"
0077 "movups %%xmm1, 16(%1)\n"
0078 "movups %%xmm2, 32(%1)\n"
0079 "movups %%xmm3, 48(%1)\n"
0080 :: "r" (src), "r" (dst) : "memory");
0081 src += 64;
0082 dst += 64;
0083 len -= 4;
0084 }
0085 while (len--) {
0086 asm("movntdqa (%0), %%xmm0\n"
0087 "movups %%xmm0, (%1)\n"
0088 :: "r" (src), "r" (dst) : "memory");
0089 src += 16;
0090 dst += 16;
0091 }
0092
0093 kernel_fpu_end();
0094 }
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113 bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
0114 {
0115 if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
0116 return false;
0117
0118 if (static_branch_likely(&has_movntdqa)) {
0119 if (likely(len))
0120 __memcpy_ntdqa(dst, src, len >> 4);
0121 return true;
0122 }
0123
0124 return false;
0125 }
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138 void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
0139 {
0140 unsigned long addr;
0141
0142 CI_BUG_ON(!i915_has_memcpy_from_wc());
0143
0144 addr = (unsigned long)src;
0145 if (!IS_ALIGNED(addr, 16)) {
0146 unsigned long x = min(ALIGN(addr, 16) - addr, len);
0147
0148 memcpy(dst, src, x);
0149
0150 len -= x;
0151 dst += x;
0152 src += x;
0153 }
0154
0155 if (likely(len))
0156 __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
0157 }
0158
0159 void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
0160 {
0161
0162
0163
0164
0165 if (static_cpu_has(X86_FEATURE_XMM4_1) &&
0166 !boot_cpu_has(X86_FEATURE_HYPERVISOR))
0167 static_branch_enable(&has_movntdqa);
0168 }