Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* -*- linux-c -*- --------------------------------------------------------
0003  *
0004  *   Copyright (C) 2016 Intel Corporation
0005  *
0006  *   Author: Gayatri Kammela <gayatri.kammela@intel.com>
0007  *   Author: Megha Dey <megha.dey@linux.intel.com>
0008  *
0009  *   Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
0010  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
0011  *
0012  * -----------------------------------------------------------------------
0013  */
0014 
0015 /*
0016  * AVX512 implementation of RAID-6 syndrome functions
0017  *
0018  */
0019 
0020 #ifdef CONFIG_AS_AVX512
0021 
0022 #include <linux/raid/pq.h>
0023 #include "x86.h"
0024 
0025 static const struct raid6_avx512_constants {
0026     u64 x1d[8];
0027 } raid6_avx512_constants __aligned(512/8) = {
0028     { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0029       0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0030       0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0031       0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
0032 };
0033 
0034 static int raid6_have_avx512(void)
0035 {
0036     return boot_cpu_has(X86_FEATURE_AVX2) &&
0037         boot_cpu_has(X86_FEATURE_AVX) &&
0038         boot_cpu_has(X86_FEATURE_AVX512F) &&
0039         boot_cpu_has(X86_FEATURE_AVX512BW) &&
0040         boot_cpu_has(X86_FEATURE_AVX512VL) &&
0041         boot_cpu_has(X86_FEATURE_AVX512DQ);
0042 }
0043 
0044 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
0045 {
0046     u8 **dptr = (u8 **)ptrs;
0047     u8 *p, *q;
0048     int d, z, z0;
0049 
0050     z0 = disks - 3;         /* Highest data disk */
0051     p = dptr[z0+1];         /* XOR parity */
0052     q = dptr[z0+2];         /* RS syndrome */
0053 
0054     kernel_fpu_begin();
0055 
0056     asm volatile("vmovdqa64 %0,%%zmm0\n\t"
0057              "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
0058              :
0059              : "m" (raid6_avx512_constants.x1d[0]));
0060 
0061     for (d = 0; d < bytes; d += 64) {
0062         asm volatile("prefetchnta %0\n\t"
0063                  "vmovdqa64 %0,%%zmm2\n\t"     /* P[0] */
0064                  "prefetchnta %1\n\t"
0065                  "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
0066                  "vmovdqa64 %1,%%zmm6"
0067                  :
0068                  : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
0069         for (z = z0-2; z >= 0; z--) {
0070             asm volatile("prefetchnta %0\n\t"
0071                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0072                      "vpmovm2b %%k1,%%zmm5\n\t"
0073                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0074                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0075                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0076                      "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
0077                      "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
0078                      "vmovdqa64 %0,%%zmm6"
0079                      :
0080                      : "m" (dptr[z][d]));
0081         }
0082         asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0083                  "vpmovm2b %%k1,%%zmm5\n\t"
0084                  "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0085                  "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0086                  "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0087                  "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
0088                  "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
0089                  "vmovntdq %%zmm2,%0\n\t"
0090                  "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
0091                  "vmovntdq %%zmm4,%1\n\t"
0092                  "vpxorq %%zmm4,%%zmm4,%%zmm4"
0093                  :
0094                  : "m" (p[d]), "m" (q[d]));
0095     }
0096 
0097     asm volatile("sfence" : : : "memory");
0098     kernel_fpu_end();
0099 }
0100 
0101 static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
0102                        size_t bytes, void **ptrs)
0103 {
0104     u8 **dptr = (u8 **)ptrs;
0105     u8 *p, *q;
0106     int d, z, z0;
0107 
0108     z0 = stop;      /* P/Q right side optimization */
0109     p = dptr[disks-2];  /* XOR parity */
0110     q = dptr[disks-1];  /* RS syndrome */
0111 
0112     kernel_fpu_begin();
0113 
0114     asm volatile("vmovdqa64 %0,%%zmm0"
0115              : : "m" (raid6_avx512_constants.x1d[0]));
0116 
0117     for (d = 0 ; d < bytes ; d += 64) {
0118         asm volatile("vmovdqa64 %0,%%zmm4\n\t"
0119                  "vmovdqa64 %1,%%zmm2\n\t"
0120                  "vpxorq %%zmm4,%%zmm2,%%zmm2"
0121                  :
0122                  : "m" (dptr[z0][d]),  "m" (p[d]));
0123         /* P/Q data pages */
0124         for (z = z0-1 ; z >= start ; z--) {
0125             asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0126                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0127                      "vpmovm2b %%k1,%%zmm5\n\t"
0128                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0129                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0130                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0131                      "vmovdqa64 %0,%%zmm5\n\t"
0132                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0133                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
0134                      :
0135                      : "m" (dptr[z][d]));
0136         }
0137         /* P/Q left side optimization */
0138         for (z = start-1 ; z >= 0 ; z--) {
0139             asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0140                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0141                      "vpmovm2b %%k1,%%zmm5\n\t"
0142                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0143                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0144                      "vpxorq %%zmm5,%%zmm4,%%zmm4"
0145                      :
0146                      : );
0147         }
0148         asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
0149         /* Don't use movntdq for r/w memory area < cache line */
0150                  "vmovdqa64 %%zmm4,%0\n\t"
0151                  "vmovdqa64 %%zmm2,%1"
0152                  :
0153                  : "m" (q[d]), "m" (p[d]));
0154     }
0155 
0156     asm volatile("sfence" : : : "memory");
0157     kernel_fpu_end();
0158 }
0159 
0160 const struct raid6_calls raid6_avx512x1 = {
0161     raid6_avx5121_gen_syndrome,
0162     raid6_avx5121_xor_syndrome,
0163     raid6_have_avx512,
0164     "avx512x1",
0165     .priority = 2       /* Prefer AVX512 over priority 1 (SSE2 and others) */
0166 };
0167 
0168 /*
0169  * Unrolled-by-2 AVX512 implementation
0170  */
0171 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
0172 {
0173     u8 **dptr = (u8 **)ptrs;
0174     u8 *p, *q;
0175     int d, z, z0;
0176 
0177     z0 = disks - 3;         /* Highest data disk */
0178     p = dptr[z0+1];         /* XOR parity */
0179     q = dptr[z0+2];         /* RS syndrome */
0180 
0181     kernel_fpu_begin();
0182 
0183     asm volatile("vmovdqa64 %0,%%zmm0\n\t"
0184              "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
0185              :
0186              : "m" (raid6_avx512_constants.x1d[0]));
0187 
0188     /* We uniformly assume a single prefetch covers at least 64 bytes */
0189     for (d = 0; d < bytes; d += 128) {
0190         asm volatile("prefetchnta %0\n\t"
0191                  "prefetchnta %1\n\t"
0192                  "vmovdqa64 %0,%%zmm2\n\t"      /* P[0] */
0193                  "vmovdqa64 %1,%%zmm3\n\t"      /* P[1] */
0194                  "vmovdqa64 %%zmm2,%%zmm4\n\t"  /* Q[0] */
0195                  "vmovdqa64 %%zmm3,%%zmm6"      /* Q[1] */
0196                  :
0197                  : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
0198         for (z = z0-1; z >= 0; z--) {
0199             asm volatile("prefetchnta %0\n\t"
0200                      "prefetchnta %1\n\t"
0201                      "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0202                      "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
0203                      "vpmovm2b %%k1,%%zmm5\n\t"
0204                      "vpmovm2b %%k2,%%zmm7\n\t"
0205                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0206                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0207                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0208                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0209                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0210                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0211                      "vmovdqa64 %0,%%zmm5\n\t"
0212                      "vmovdqa64 %1,%%zmm7\n\t"
0213                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0214                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0215                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0216                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
0217                      :
0218                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
0219         }
0220         asm volatile("vmovntdq %%zmm2,%0\n\t"
0221                  "vmovntdq %%zmm3,%1\n\t"
0222                  "vmovntdq %%zmm4,%2\n\t"
0223                  "vmovntdq %%zmm6,%3"
0224                  :
0225                  : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
0226                    "m" (q[d+64]));
0227     }
0228 
0229     asm volatile("sfence" : : : "memory");
0230     kernel_fpu_end();
0231 }
0232 
0233 static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
0234                        size_t bytes, void **ptrs)
0235 {
0236     u8 **dptr = (u8 **)ptrs;
0237     u8 *p, *q;
0238     int d, z, z0;
0239 
0240     z0 = stop;      /* P/Q right side optimization */
0241     p = dptr[disks-2];  /* XOR parity */
0242     q = dptr[disks-1];  /* RS syndrome */
0243 
0244     kernel_fpu_begin();
0245 
0246     asm volatile("vmovdqa64 %0,%%zmm0"
0247              : : "m" (raid6_avx512_constants.x1d[0]));
0248 
0249     for (d = 0 ; d < bytes ; d += 128) {
0250         asm volatile("vmovdqa64 %0,%%zmm4\n\t"
0251                  "vmovdqa64 %1,%%zmm6\n\t"
0252                  "vmovdqa64 %2,%%zmm2\n\t"
0253                  "vmovdqa64 %3,%%zmm3\n\t"
0254                  "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
0255                  "vpxorq %%zmm6,%%zmm3,%%zmm3"
0256                  :
0257                  : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
0258                    "m" (p[d]), "m" (p[d+64]));
0259         /* P/Q data pages */
0260         for (z = z0-1 ; z >= start ; z--) {
0261             asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0262                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0263                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0264                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0265                      "vpmovm2b %%k1,%%zmm5\n\t"
0266                      "vpmovm2b %%k2,%%zmm7\n\t"
0267                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0268                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0269                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0270                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0271                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0272                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0273                      "vmovdqa64 %0,%%zmm5\n\t"
0274                      "vmovdqa64 %1,%%zmm7\n\t"
0275                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0276                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0277                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0278                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
0279                      :
0280                      : "m" (dptr[z][d]),  "m" (dptr[z][d+64]));
0281         }
0282         /* P/Q left side optimization */
0283         for (z = start-1 ; z >= 0 ; z--) {
0284             asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0285                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0286                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0287                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0288                      "vpmovm2b %%k1,%%zmm5\n\t"
0289                      "vpmovm2b %%k2,%%zmm7\n\t"
0290                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0291                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0292                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0293                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0294                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0295                      "vpxorq %%zmm7,%%zmm6,%%zmm6"
0296                      :
0297                      : );
0298         }
0299         asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
0300                  "vpxorq %1,%%zmm6,%%zmm6\n\t"
0301                  /* Don't use movntdq for r/w
0302                   * memory area < cache line
0303                   */
0304                  "vmovdqa64 %%zmm4,%0\n\t"
0305                  "vmovdqa64 %%zmm6,%1\n\t"
0306                  "vmovdqa64 %%zmm2,%2\n\t"
0307                  "vmovdqa64 %%zmm3,%3"
0308                  :
0309                  : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
0310                    "m" (p[d+64]));
0311     }
0312 
0313     asm volatile("sfence" : : : "memory");
0314     kernel_fpu_end();
0315 }
0316 
0317 const struct raid6_calls raid6_avx512x2 = {
0318     raid6_avx5122_gen_syndrome,
0319     raid6_avx5122_xor_syndrome,
0320     raid6_have_avx512,
0321     "avx512x2",
0322     .priority = 2       /* Prefer AVX512 over priority 1 (SSE2 and others) */
0323 };
0324 
0325 #ifdef CONFIG_X86_64
0326 
0327 /*
0328  * Unrolled-by-4 AVX2 implementation
0329  */
0330 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
0331 {
0332     u8 **dptr = (u8 **)ptrs;
0333     u8 *p, *q;
0334     int d, z, z0;
0335 
0336     z0 = disks - 3;         /* Highest data disk */
0337     p = dptr[z0+1];         /* XOR parity */
0338     q = dptr[z0+2];         /* RS syndrome */
0339 
0340     kernel_fpu_begin();
0341 
0342     asm volatile("vmovdqa64 %0,%%zmm0\n\t"
0343              "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t"       /* Zero temp */
0344              "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"       /* P[0] */
0345              "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"       /* P[1] */
0346              "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"       /* Q[0] */
0347              "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"       /* Q[1] */
0348              "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"    /* P[2] */
0349              "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"    /* P[3] */
0350              "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"    /* Q[2] */
0351              "vpxorq %%zmm14,%%zmm14,%%zmm14"        /* Q[3] */
0352              :
0353              : "m" (raid6_avx512_constants.x1d[0]));
0354 
0355     for (d = 0; d < bytes; d += 256) {
0356         for (z = z0; z >= 0; z--) {
0357         asm volatile("prefetchnta %0\n\t"
0358                  "prefetchnta %1\n\t"
0359                  "prefetchnta %2\n\t"
0360                  "prefetchnta %3\n\t"
0361                  "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
0362                  "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
0363                  "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
0364                  "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
0365                  "vpmovm2b %%k1,%%zmm5\n\t"
0366                  "vpmovm2b %%k2,%%zmm7\n\t"
0367                  "vpmovm2b %%k3,%%zmm13\n\t"
0368                  "vpmovm2b %%k4,%%zmm15\n\t"
0369                  "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0370                  "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0371                  "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
0372                  "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
0373                  "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0374                  "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0375                  "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
0376                  "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
0377                  "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0378                  "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0379                  "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0380                  "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
0381                  "vmovdqa64 %0,%%zmm5\n\t"
0382                  "vmovdqa64 %1,%%zmm7\n\t"
0383                  "vmovdqa64 %2,%%zmm13\n\t"
0384                  "vmovdqa64 %3,%%zmm15\n\t"
0385                  "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0386                  "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0387                  "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
0388                  "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
0389                  "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0390                  "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0391                  "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0392                  "vpxorq %%zmm15,%%zmm14,%%zmm14"
0393                  :
0394                  : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
0395                    "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
0396         }
0397         asm volatile("vmovntdq %%zmm2,%0\n\t"
0398                  "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
0399                  "vmovntdq %%zmm3,%1\n\t"
0400                  "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
0401                  "vmovntdq %%zmm10,%2\n\t"
0402                  "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
0403                  "vmovntdq %%zmm11,%3\n\t"
0404                  "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
0405                  "vmovntdq %%zmm4,%4\n\t"
0406                  "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
0407                  "vmovntdq %%zmm6,%5\n\t"
0408                  "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
0409                  "vmovntdq %%zmm12,%6\n\t"
0410                  "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
0411                  "vmovntdq %%zmm14,%7\n\t"
0412                  "vpxorq %%zmm14,%%zmm14,%%zmm14"
0413                  :
0414                  : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
0415                    "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
0416                    "m" (q[d+128]), "m" (q[d+192]));
0417     }
0418 
0419     asm volatile("sfence" : : : "memory");
0420     kernel_fpu_end();
0421 }
0422 
0423 static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
0424                        size_t bytes, void **ptrs)
0425 {
0426     u8 **dptr = (u8 **)ptrs;
0427     u8 *p, *q;
0428     int d, z, z0;
0429 
0430     z0 = stop;      /* P/Q right side optimization */
0431     p = dptr[disks-2];  /* XOR parity */
0432     q = dptr[disks-1];  /* RS syndrome */
0433 
0434     kernel_fpu_begin();
0435 
0436     asm volatile("vmovdqa64 %0,%%zmm0"
0437              :: "m" (raid6_avx512_constants.x1d[0]));
0438 
0439     for (d = 0 ; d < bytes ; d += 256) {
0440         asm volatile("vmovdqa64 %0,%%zmm4\n\t"
0441                  "vmovdqa64 %1,%%zmm6\n\t"
0442                  "vmovdqa64 %2,%%zmm12\n\t"
0443                  "vmovdqa64 %3,%%zmm14\n\t"
0444                  "vmovdqa64 %4,%%zmm2\n\t"
0445                  "vmovdqa64 %5,%%zmm3\n\t"
0446                  "vmovdqa64 %6,%%zmm10\n\t"
0447                  "vmovdqa64 %7,%%zmm11\n\t"
0448                  "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
0449                  "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
0450                  "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
0451                  "vpxorq %%zmm14,%%zmm11,%%zmm11"
0452                  :
0453                  : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
0454                    "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
0455                    "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
0456                    "m" (p[d+192]));
0457         /* P/Q data pages */
0458         for (z = z0-1 ; z >= start ; z--) {
0459             asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0460                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0461                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
0462                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
0463                      "prefetchnta %0\n\t"
0464                      "prefetchnta %2\n\t"
0465                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0466                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0467                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
0468                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
0469                      "vpmovm2b %%k1,%%zmm5\n\t"
0470                      "vpmovm2b %%k2,%%zmm7\n\t"
0471                      "vpmovm2b %%k3,%%zmm13\n\t"
0472                      "vpmovm2b %%k4,%%zmm15\n\t"
0473                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0474                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0475                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
0476                      "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
0477                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0478                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0479                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
0480                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
0481                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0482                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0483                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0484                      "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
0485                      "vmovdqa64 %0,%%zmm5\n\t"
0486                      "vmovdqa64 %1,%%zmm7\n\t"
0487                      "vmovdqa64 %2,%%zmm13\n\t"
0488                      "vmovdqa64 %3,%%zmm15\n\t"
0489                      "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
0490                      "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
0491                      "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
0492                      "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
0493                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0494                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0495                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0496                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
0497                      :
0498                      : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
0499                        "m" (dptr[z][d+128]),
0500                        "m" (dptr[z][d+192]));
0501         }
0502         asm volatile("prefetchnta %0\n\t"
0503                  "prefetchnta %1\n\t"
0504                  :
0505                  : "m" (q[d]), "m" (q[d+128]));
0506         /* P/Q left side optimization */
0507         for (z = start-1 ; z >= 0 ; z--) {
0508             asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
0509                      "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
0510                      "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
0511                      "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
0512                      "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
0513                      "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
0514                      "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
0515                      "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
0516                      "vpmovm2b %%k1,%%zmm5\n\t"
0517                      "vpmovm2b %%k2,%%zmm7\n\t"
0518                      "vpmovm2b %%k3,%%zmm13\n\t"
0519                      "vpmovm2b %%k4,%%zmm15\n\t"
0520                      "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
0521                      "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
0522                      "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
0523                      "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
0524                      "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
0525                      "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
0526                      "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
0527                      "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
0528                      "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
0529                      "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
0530                      "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
0531                      "vpxorq %%zmm15,%%zmm14,%%zmm14"
0532                      :
0533                      : );
0534         }
0535         asm volatile("vmovntdq %%zmm2,%0\n\t"
0536                  "vmovntdq %%zmm3,%1\n\t"
0537                  "vmovntdq %%zmm10,%2\n\t"
0538                  "vmovntdq %%zmm11,%3\n\t"
0539                  "vpxorq %4,%%zmm4,%%zmm4\n\t"
0540                  "vpxorq %5,%%zmm6,%%zmm6\n\t"
0541                  "vpxorq %6,%%zmm12,%%zmm12\n\t"
0542                  "vpxorq %7,%%zmm14,%%zmm14\n\t"
0543                  "vmovntdq %%zmm4,%4\n\t"
0544                  "vmovntdq %%zmm6,%5\n\t"
0545                  "vmovntdq %%zmm12,%6\n\t"
0546                  "vmovntdq %%zmm14,%7"
0547                  :
0548                  : "m" (p[d]),  "m" (p[d+64]), "m" (p[d+128]),
0549                    "m" (p[d+192]), "m" (q[d]),  "m" (q[d+64]),
0550                    "m" (q[d+128]), "m" (q[d+192]));
0551     }
0552     asm volatile("sfence" : : : "memory");
0553     kernel_fpu_end();
0554 }
0555 const struct raid6_calls raid6_avx512x4 = {
0556     raid6_avx5124_gen_syndrome,
0557     raid6_avx5124_xor_syndrome,
0558     raid6_have_avx512,
0559     "avx512x4",
0560     .priority = 2       /* Prefer AVX512 over priority 1 (SSE2 and others) */
0561 };
0562 #endif
0563 
0564 #endif /* CONFIG_AS_AVX512 */