0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #ifdef CONFIG_AS_AVX512
0021
0022 #include <linux/raid/pq.h>
0023 #include "x86.h"
0024
0025 static const struct raid6_avx512_constants {
0026 u64 x1d[8];
0027 } raid6_avx512_constants __aligned(512/8) = {
0028 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0029 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0030 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0031 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
0032 };
0033
0034 static int raid6_have_avx512(void)
0035 {
0036 return boot_cpu_has(X86_FEATURE_AVX2) &&
0037 boot_cpu_has(X86_FEATURE_AVX) &&
0038 boot_cpu_has(X86_FEATURE_AVX512F) &&
0039 boot_cpu_has(X86_FEATURE_AVX512BW) &&
0040 boot_cpu_has(X86_FEATURE_AVX512VL) &&
0041 boot_cpu_has(X86_FEATURE_AVX512DQ);
0042 }
0043
0044 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
0045 {
0046 u8 **dptr = (u8 **)ptrs;
0047 u8 *p, *q;
0048 int d, z, z0;
0049
0050 z0 = disks - 3;
0051 p = dptr[z0+1];
0052 q = dptr[z0+2];
0053
0054 kernel_fpu_begin();
0055
0056 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
0057 "vpxorq %%zmm1,%%zmm1,%%zmm1"
0058 :
0059 : "m" (raid6_avx512_constants.x1d[0]));
0060
0061 for (d = 0; d < bytes; d += 64) {
0062 asm volatile("prefetchnta %0\n\t"
0063 "vmovdqa64 %0,%%zmm2\n\t"
0064 "prefetchnta %1\n\t"
0065 "vmovdqa64 %%zmm2,%%zmm4\n\t"
0066 "vmovdqa64 %1,%%zmm6"
0067 :
0068 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
0069 for (z = z0-2; z >= 0; z--) {
0070 asm volatile("prefetchnta %0\n\t"
0071 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0072 "vpmovm2b %%k1,%%zmm5\n\t"
0073 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0074 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0075 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0076 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
0077 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
0078 "vmovdqa64 %0,%%zmm6"
0079 :
0080 : "m" (dptr[z][d]));
0081 }
0082 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0083 "vpmovm2b %%k1,%%zmm5\n\t"
0084 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0085 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0086 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0087 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
0088 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
0089 "vmovntdq %%zmm2,%0\n\t"
0090 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
0091 "vmovntdq %%zmm4,%1\n\t"
0092 "vpxorq %%zmm4,%%zmm4,%%zmm4"
0093 :
0094 : "m" (p[d]), "m" (q[d]));
0095 }
0096
0097 asm volatile("sfence" : : : "memory");
0098 kernel_fpu_end();
0099 }
0100
0101 static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
0102 size_t bytes, void **ptrs)
0103 {
0104 u8 **dptr = (u8 **)ptrs;
0105 u8 *p, *q;
0106 int d, z, z0;
0107
0108 z0 = stop;
0109 p = dptr[disks-2];
0110 q = dptr[disks-1];
0111
0112 kernel_fpu_begin();
0113
0114 asm volatile("vmovdqa64 %0,%%zmm0"
0115 : : "m" (raid6_avx512_constants.x1d[0]));
0116
0117 for (d = 0 ; d < bytes ; d += 64) {
0118 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
0119 "vmovdqa64 %1,%%zmm2\n\t"
0120 "vpxorq %%zmm4,%%zmm2,%%zmm2"
0121 :
0122 : "m" (dptr[z0][d]), "m" (p[d]));
0123
0124 for (z = z0-1 ; z >= start ; z--) {
0125 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0126 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0127 "vpmovm2b %%k1,%%zmm5\n\t"
0128 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0129 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0130 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0131 "vmovdqa64 %0,%%zmm5\n\t"
0132 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0133 "vpxorq %%zmm5,%%zmm4,%%zmm4"
0134 :
0135 : "m" (dptr[z][d]));
0136 }
0137
0138 for (z = start-1 ; z >= 0 ; z--) {
0139 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0140 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0141 "vpmovm2b %%k1,%%zmm5\n\t"
0142 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0143 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0144 "vpxorq %%zmm5,%%zmm4,%%zmm4"
0145 :
0146 : );
0147 }
0148 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
0149
0150 "vmovdqa64 %%zmm4,%0\n\t"
0151 "vmovdqa64 %%zmm2,%1"
0152 :
0153 : "m" (q[d]), "m" (p[d]));
0154 }
0155
0156 asm volatile("sfence" : : : "memory");
0157 kernel_fpu_end();
0158 }
0159
0160 const struct raid6_calls raid6_avx512x1 = {
0161 raid6_avx5121_gen_syndrome,
0162 raid6_avx5121_xor_syndrome,
0163 raid6_have_avx512,
0164 "avx512x1",
0165 .priority = 2
0166 };
0167
0168
0169
0170
0171 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
0172 {
0173 u8 **dptr = (u8 **)ptrs;
0174 u8 *p, *q;
0175 int d, z, z0;
0176
0177 z0 = disks - 3;
0178 p = dptr[z0+1];
0179 q = dptr[z0+2];
0180
0181 kernel_fpu_begin();
0182
0183 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
0184 "vpxorq %%zmm1,%%zmm1,%%zmm1"
0185 :
0186 : "m" (raid6_avx512_constants.x1d[0]));
0187
0188
0189 for (d = 0; d < bytes; d += 128) {
0190 asm volatile("prefetchnta %0\n\t"
0191 "prefetchnta %1\n\t"
0192 "vmovdqa64 %0,%%zmm2\n\t"
0193 "vmovdqa64 %1,%%zmm3\n\t"
0194 "vmovdqa64 %%zmm2,%%zmm4\n\t"
0195 "vmovdqa64 %%zmm3,%%zmm6"
0196 :
0197 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
0198 for (z = z0-1; z >= 0; z--) {
0199 asm volatile("prefetchnta %0\n\t"
0200 "prefetchnta %1\n\t"
0201 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0202 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
0203 "vpmovm2b %%k1,%%zmm5\n\t"
0204 "vpmovm2b %%k2,%%zmm7\n\t"
0205 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0206 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0207 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0208 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0209 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0210 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0211 "vmovdqa64 %0,%%zmm5\n\t"
0212 "vmovdqa64 %1,%%zmm7\n\t"
0213 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0214 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0215 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0216 "vpxorq %%zmm7,%%zmm6,%%zmm6"
0217 :
0218 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
0219 }
0220 asm volatile("vmovntdq %%zmm2,%0\n\t"
0221 "vmovntdq %%zmm3,%1\n\t"
0222 "vmovntdq %%zmm4,%2\n\t"
0223 "vmovntdq %%zmm6,%3"
0224 :
0225 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
0226 "m" (q[d+64]));
0227 }
0228
0229 asm volatile("sfence" : : : "memory");
0230 kernel_fpu_end();
0231 }
0232
0233 static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
0234 size_t bytes, void **ptrs)
0235 {
0236 u8 **dptr = (u8 **)ptrs;
0237 u8 *p, *q;
0238 int d, z, z0;
0239
0240 z0 = stop;
0241 p = dptr[disks-2];
0242 q = dptr[disks-1];
0243
0244 kernel_fpu_begin();
0245
0246 asm volatile("vmovdqa64 %0,%%zmm0"
0247 : : "m" (raid6_avx512_constants.x1d[0]));
0248
0249 for (d = 0 ; d < bytes ; d += 128) {
0250 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
0251 "vmovdqa64 %1,%%zmm6\n\t"
0252 "vmovdqa64 %2,%%zmm2\n\t"
0253 "vmovdqa64 %3,%%zmm3\n\t"
0254 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
0255 "vpxorq %%zmm6,%%zmm3,%%zmm3"
0256 :
0257 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
0258 "m" (p[d]), "m" (p[d+64]));
0259
0260 for (z = z0-1 ; z >= start ; z--) {
0261 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0262 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0263 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0264 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0265 "vpmovm2b %%k1,%%zmm5\n\t"
0266 "vpmovm2b %%k2,%%zmm7\n\t"
0267 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0268 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0269 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0270 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0271 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0272 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0273 "vmovdqa64 %0,%%zmm5\n\t"
0274 "vmovdqa64 %1,%%zmm7\n\t"
0275 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0276 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0277 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0278 "vpxorq %%zmm7,%%zmm6,%%zmm6"
0279 :
0280 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
0281 }
0282
0283 for (z = start-1 ; z >= 0 ; z--) {
0284 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0285 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0286 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0287 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0288 "vpmovm2b %%k1,%%zmm5\n\t"
0289 "vpmovm2b %%k2,%%zmm7\n\t"
0290 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0291 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0292 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0293 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0294 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0295 "vpxorq %%zmm7,%%zmm6,%%zmm6"
0296 :
0297 : );
0298 }
0299 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
0300 "vpxorq %1,%%zmm6,%%zmm6\n\t"
0301
0302
0303
0304 "vmovdqa64 %%zmm4,%0\n\t"
0305 "vmovdqa64 %%zmm6,%1\n\t"
0306 "vmovdqa64 %%zmm2,%2\n\t"
0307 "vmovdqa64 %%zmm3,%3"
0308 :
0309 : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
0310 "m" (p[d+64]));
0311 }
0312
0313 asm volatile("sfence" : : : "memory");
0314 kernel_fpu_end();
0315 }
0316
0317 const struct raid6_calls raid6_avx512x2 = {
0318 raid6_avx5122_gen_syndrome,
0319 raid6_avx5122_xor_syndrome,
0320 raid6_have_avx512,
0321 "avx512x2",
0322 .priority = 2
0323 };
0324
0325 #ifdef CONFIG_X86_64
0326
0327
0328
0329
0330 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
0331 {
0332 u8 **dptr = (u8 **)ptrs;
0333 u8 *p, *q;
0334 int d, z, z0;
0335
0336 z0 = disks - 3;
0337 p = dptr[z0+1];
0338 q = dptr[z0+2];
0339
0340 kernel_fpu_begin();
0341
0342 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
0343 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"
0344 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
0345 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
0346 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
0347 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
0348 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
0349 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
0350 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
0351 "vpxorq %%zmm14,%%zmm14,%%zmm14"
0352 :
0353 : "m" (raid6_avx512_constants.x1d[0]));
0354
0355 for (d = 0; d < bytes; d += 256) {
0356 for (z = z0; z >= 0; z--) {
0357 asm volatile("prefetchnta %0\n\t"
0358 "prefetchnta %1\n\t"
0359 "prefetchnta %2\n\t"
0360 "prefetchnta %3\n\t"
0361 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0362 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
0363 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
0364 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
0365 "vpmovm2b %%k1,%%zmm5\n\t"
0366 "vpmovm2b %%k2,%%zmm7\n\t"
0367 "vpmovm2b %%k3,%%zmm13\n\t"
0368 "vpmovm2b %%k4,%%zmm15\n\t"
0369 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0370 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0371 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
0372 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
0373 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0374 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0375 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
0376 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
0377 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0378 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0379 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0380 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
0381 "vmovdqa64 %0,%%zmm5\n\t"
0382 "vmovdqa64 %1,%%zmm7\n\t"
0383 "vmovdqa64 %2,%%zmm13\n\t"
0384 "vmovdqa64 %3,%%zmm15\n\t"
0385 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0386 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0387 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
0388 "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
0389 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0390 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0391 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0392 "vpxorq %%zmm15,%%zmm14,%%zmm14"
0393 :
0394 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
0395 "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
0396 }
0397 asm volatile("vmovntdq %%zmm2,%0\n\t"
0398 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
0399 "vmovntdq %%zmm3,%1\n\t"
0400 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
0401 "vmovntdq %%zmm10,%2\n\t"
0402 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
0403 "vmovntdq %%zmm11,%3\n\t"
0404 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
0405 "vmovntdq %%zmm4,%4\n\t"
0406 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
0407 "vmovntdq %%zmm6,%5\n\t"
0408 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
0409 "vmovntdq %%zmm12,%6\n\t"
0410 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
0411 "vmovntdq %%zmm14,%7\n\t"
0412 "vpxorq %%zmm14,%%zmm14,%%zmm14"
0413 :
0414 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
0415 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
0416 "m" (q[d+128]), "m" (q[d+192]));
0417 }
0418
0419 asm volatile("sfence" : : : "memory");
0420 kernel_fpu_end();
0421 }
0422
0423 static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
0424 size_t bytes, void **ptrs)
0425 {
0426 u8 **dptr = (u8 **)ptrs;
0427 u8 *p, *q;
0428 int d, z, z0;
0429
0430 z0 = stop;
0431 p = dptr[disks-2];
0432 q = dptr[disks-1];
0433
0434 kernel_fpu_begin();
0435
0436 asm volatile("vmovdqa64 %0,%%zmm0"
0437 :: "m" (raid6_avx512_constants.x1d[0]));
0438
0439 for (d = 0 ; d < bytes ; d += 256) {
0440 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
0441 "vmovdqa64 %1,%%zmm6\n\t"
0442 "vmovdqa64 %2,%%zmm12\n\t"
0443 "vmovdqa64 %3,%%zmm14\n\t"
0444 "vmovdqa64 %4,%%zmm2\n\t"
0445 "vmovdqa64 %5,%%zmm3\n\t"
0446 "vmovdqa64 %6,%%zmm10\n\t"
0447 "vmovdqa64 %7,%%zmm11\n\t"
0448 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
0449 "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
0450 "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
0451 "vpxorq %%zmm14,%%zmm11,%%zmm11"
0452 :
0453 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
0454 "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
0455 "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
0456 "m" (p[d+192]));
0457
0458 for (z = z0-1 ; z >= start ; z--) {
0459 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0460 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0461 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
0462 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
0463 "prefetchnta %0\n\t"
0464 "prefetchnta %2\n\t"
0465 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0466 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0467 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
0468 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
0469 "vpmovm2b %%k1,%%zmm5\n\t"
0470 "vpmovm2b %%k2,%%zmm7\n\t"
0471 "vpmovm2b %%k3,%%zmm13\n\t"
0472 "vpmovm2b %%k4,%%zmm15\n\t"
0473 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0474 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0475 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
0476 "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
0477 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0478 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0479 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
0480 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
0481 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0482 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0483 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0484 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
0485 "vmovdqa64 %0,%%zmm5\n\t"
0486 "vmovdqa64 %1,%%zmm7\n\t"
0487 "vmovdqa64 %2,%%zmm13\n\t"
0488 "vmovdqa64 %3,%%zmm15\n\t"
0489 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0490 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0491 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
0492 "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
0493 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0494 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0495 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0496 "vpxorq %%zmm15,%%zmm14,%%zmm14"
0497 :
0498 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
0499 "m" (dptr[z][d+128]),
0500 "m" (dptr[z][d+192]));
0501 }
0502 asm volatile("prefetchnta %0\n\t"
0503 "prefetchnta %1\n\t"
0504 :
0505 : "m" (q[d]), "m" (q[d+128]));
0506
0507 for (z = start-1 ; z >= 0 ; z--) {
0508 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0509 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0510 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
0511 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
0512 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0513 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0514 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
0515 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
0516 "vpmovm2b %%k1,%%zmm5\n\t"
0517 "vpmovm2b %%k2,%%zmm7\n\t"
0518 "vpmovm2b %%k3,%%zmm13\n\t"
0519 "vpmovm2b %%k4,%%zmm15\n\t"
0520 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0521 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0522 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
0523 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
0524 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0525 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0526 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
0527 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
0528 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0529 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0530 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0531 "vpxorq %%zmm15,%%zmm14,%%zmm14"
0532 :
0533 : );
0534 }
0535 asm volatile("vmovntdq %%zmm2,%0\n\t"
0536 "vmovntdq %%zmm3,%1\n\t"
0537 "vmovntdq %%zmm10,%2\n\t"
0538 "vmovntdq %%zmm11,%3\n\t"
0539 "vpxorq %4,%%zmm4,%%zmm4\n\t"
0540 "vpxorq %5,%%zmm6,%%zmm6\n\t"
0541 "vpxorq %6,%%zmm12,%%zmm12\n\t"
0542 "vpxorq %7,%%zmm14,%%zmm14\n\t"
0543 "vmovntdq %%zmm4,%4\n\t"
0544 "vmovntdq %%zmm6,%5\n\t"
0545 "vmovntdq %%zmm12,%6\n\t"
0546 "vmovntdq %%zmm14,%7"
0547 :
0548 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
0549 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
0550 "m" (q[d+128]), "m" (q[d+192]));
0551 }
0552 asm volatile("sfence" : : : "memory");
0553 kernel_fpu_end();
0554 }
0555 const struct raid6_calls raid6_avx512x4 = {
0556 raid6_avx5124_gen_syndrome,
0557 raid6_avx5124_xor_syndrome,
0558 raid6_have_avx512,
0559 "avx512x4",
0560 .priority = 2
0561 };
0562 #endif
0563
0564 #endif