Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* -*- linux-c -*- ------------------------------------------------------- *
0003  *
0004  *   Copyright (C) 2012 Intel Corporation
0005  *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
0006  *
0007  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
0008  *
0009  * ----------------------------------------------------------------------- */
0010 
0011 /*
0012  * AVX2 implementation of RAID-6 syndrome functions
0013  *
0014  */
0015 
0016 #include <linux/raid/pq.h>
0017 #include "x86.h"
0018 
0019 static const struct raid6_avx2_constants {
0020     u64 x1d[4];
0021 } raid6_avx2_constants __aligned(32) = {
0022     { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0023       0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
0024 };
0025 
0026 static int raid6_have_avx2(void)
0027 {
0028     return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
0029 }
0030 
0031 /*
0032  * Plain AVX2 implementation
0033  */
0034 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
0035 {
0036     u8 **dptr = (u8 **)ptrs;
0037     u8 *p, *q;
0038     int d, z, z0;
0039 
0040     z0 = disks - 3;     /* Highest data disk */
0041     p = dptr[z0+1];     /* XOR parity */
0042     q = dptr[z0+2];     /* RS syndrome */
0043 
0044     kernel_fpu_begin();
0045 
0046     asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0047     asm volatile("vpxor %ymm3,%ymm3,%ymm3");    /* Zero temp */
0048 
0049     for (d = 0; d < bytes; d += 32) {
0050         asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0051         asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
0052         asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
0053         asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
0054         asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
0055         for (z = z0-2; z >= 0; z--) {
0056             asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0057             asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
0058             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0059             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0060             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0061             asm volatile("vpxor %ymm6,%ymm2,%ymm2");
0062             asm volatile("vpxor %ymm6,%ymm4,%ymm4");
0063             asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
0064         }
0065         asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
0066         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0067         asm volatile("vpand %ymm0,%ymm5,%ymm5");
0068         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0069         asm volatile("vpxor %ymm6,%ymm2,%ymm2");
0070         asm volatile("vpxor %ymm6,%ymm4,%ymm4");
0071 
0072         asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0073         asm volatile("vpxor %ymm2,%ymm2,%ymm2");
0074         asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0075         asm volatile("vpxor %ymm4,%ymm4,%ymm4");
0076     }
0077 
0078     asm volatile("sfence" : : : "memory");
0079     kernel_fpu_end();
0080 }
0081 
0082 static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
0083                      size_t bytes, void **ptrs)
0084 {
0085     u8 **dptr = (u8 **)ptrs;
0086     u8 *p, *q;
0087     int d, z, z0;
0088 
0089     z0 = stop;      /* P/Q right side optimization */
0090     p = dptr[disks-2];  /* XOR parity */
0091     q = dptr[disks-1];  /* RS syndrome */
0092 
0093     kernel_fpu_begin();
0094 
0095     asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0096 
0097     for (d = 0 ; d < bytes ; d += 32) {
0098         asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
0099         asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
0100         asm volatile("vpxor %ymm4,%ymm2,%ymm2");
0101         /* P/Q data pages */
0102         for (z = z0-1 ; z >= start ; z--) {
0103             asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0104             asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0105             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0106             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0107             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0108             asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
0109             asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0110             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0111         }
0112         /* P/Q left side optimization */
0113         for (z = start-1 ; z >= 0 ; z--) {
0114             asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0115             asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0116             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0117             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0118             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0119         }
0120         asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
0121         /* Don't use movntdq for r/w memory area < cache line */
0122         asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
0123         asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
0124     }
0125 
0126     asm volatile("sfence" : : : "memory");
0127     kernel_fpu_end();
0128 }
0129 
0130 const struct raid6_calls raid6_avx2x1 = {
0131     raid6_avx21_gen_syndrome,
0132     raid6_avx21_xor_syndrome,
0133     raid6_have_avx2,
0134     "avx2x1",
0135     .priority = 2       /* Prefer AVX2 over priority 1 (SSE2 and others) */
0136 };
0137 
0138 /*
0139  * Unrolled-by-2 AVX2 implementation
0140  */
0141 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
0142 {
0143     u8 **dptr = (u8 **)ptrs;
0144     u8 *p, *q;
0145     int d, z, z0;
0146 
0147     z0 = disks - 3;     /* Highest data disk */
0148     p = dptr[z0+1];     /* XOR parity */
0149     q = dptr[z0+2];     /* RS syndrome */
0150 
0151     kernel_fpu_begin();
0152 
0153     asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0154     asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
0155 
0156     /* We uniformly assume a single prefetch covers at least 32 bytes */
0157     for (d = 0; d < bytes; d += 64) {
0158         asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0159         asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
0160         asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
0161         asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
0162         asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
0163         asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
0164         for (z = z0-1; z >= 0; z--) {
0165             asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0166             asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
0167             asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
0168             asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
0169             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0170             asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0171             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0172             asm volatile("vpand %ymm0,%ymm7,%ymm7");
0173             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0174             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0175             asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
0176             asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
0177             asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0178             asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0179             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0180             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0181         }
0182         asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0183         asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
0184         asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0185         asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
0186     }
0187 
0188     asm volatile("sfence" : : : "memory");
0189     kernel_fpu_end();
0190 }
0191 
0192 static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
0193                      size_t bytes, void **ptrs)
0194 {
0195     u8 **dptr = (u8 **)ptrs;
0196     u8 *p, *q;
0197     int d, z, z0;
0198 
0199     z0 = stop;      /* P/Q right side optimization */
0200     p = dptr[disks-2];  /* XOR parity */
0201     q = dptr[disks-1];  /* RS syndrome */
0202 
0203     kernel_fpu_begin();
0204 
0205     asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0206 
0207     for (d = 0 ; d < bytes ; d += 64) {
0208         asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
0209         asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
0210         asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
0211         asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
0212         asm volatile("vpxor %ymm4,%ymm2,%ymm2");
0213         asm volatile("vpxor %ymm6,%ymm3,%ymm3");
0214         /* P/Q data pages */
0215         for (z = z0-1 ; z >= start ; z--) {
0216             asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0217             asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0218             asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0219             asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0220             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0221             asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0222             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0223             asm volatile("vpand %ymm0,%ymm7,%ymm7");
0224             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0225             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0226             asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
0227             asm volatile("vmovdqa %0,%%ymm7"
0228                      :: "m" (dptr[z][d+32]));
0229             asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0230             asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0231             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0232             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0233         }
0234         /* P/Q left side optimization */
0235         for (z = start-1 ; z >= 0 ; z--) {
0236             asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0237             asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0238             asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0239             asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0240             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0241             asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0242             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0243             asm volatile("vpand %ymm0,%ymm7,%ymm7");
0244             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0245             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0246         }
0247         asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
0248         asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
0249         /* Don't use movntdq for r/w memory area < cache line */
0250         asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
0251         asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
0252         asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
0253         asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
0254     }
0255 
0256     asm volatile("sfence" : : : "memory");
0257     kernel_fpu_end();
0258 }
0259 
0260 const struct raid6_calls raid6_avx2x2 = {
0261     raid6_avx22_gen_syndrome,
0262     raid6_avx22_xor_syndrome,
0263     raid6_have_avx2,
0264     "avx2x2",
0265     .priority = 2       /* Prefer AVX2 over priority 1 (SSE2 and others) */
0266 };
0267 
0268 #ifdef CONFIG_X86_64
0269 
0270 /*
0271  * Unrolled-by-4 AVX2 implementation
0272  */
0273 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
0274 {
0275     u8 **dptr = (u8 **)ptrs;
0276     u8 *p, *q;
0277     int d, z, z0;
0278 
0279     z0 = disks - 3;     /* Highest data disk */
0280     p = dptr[z0+1];     /* XOR parity */
0281     q = dptr[z0+2];     /* RS syndrome */
0282 
0283     kernel_fpu_begin();
0284 
0285     asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0286     asm volatile("vpxor %ymm1,%ymm1,%ymm1");    /* Zero temp */
0287     asm volatile("vpxor %ymm2,%ymm2,%ymm2");    /* P[0] */
0288     asm volatile("vpxor %ymm3,%ymm3,%ymm3");    /* P[1] */
0289     asm volatile("vpxor %ymm4,%ymm4,%ymm4");    /* Q[0] */
0290     asm volatile("vpxor %ymm6,%ymm6,%ymm6");    /* Q[1] */
0291     asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
0292     asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
0293     asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
0294     asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
0295 
0296     for (d = 0; d < bytes; d += 128) {
0297         for (z = z0; z >= 0; z--) {
0298             asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0299             asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
0300             asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
0301             asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
0302             asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
0303             asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
0304             asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
0305             asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
0306             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0307             asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0308             asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
0309             asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
0310             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0311             asm volatile("vpand %ymm0,%ymm7,%ymm7");
0312             asm volatile("vpand %ymm0,%ymm13,%ymm13");
0313             asm volatile("vpand %ymm0,%ymm15,%ymm15");
0314             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0315             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0316             asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0317             asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0318             asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
0319             asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
0320             asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
0321             asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
0322             asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0323             asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0324             asm volatile("vpxor %ymm13,%ymm10,%ymm10");
0325             asm volatile("vpxor %ymm15,%ymm11,%ymm11");
0326             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0327             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0328             asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0329             asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0330         }
0331         asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0332         asm volatile("vpxor %ymm2,%ymm2,%ymm2");
0333         asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
0334         asm volatile("vpxor %ymm3,%ymm3,%ymm3");
0335         asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
0336         asm volatile("vpxor %ymm10,%ymm10,%ymm10");
0337         asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
0338         asm volatile("vpxor %ymm11,%ymm11,%ymm11");
0339         asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0340         asm volatile("vpxor %ymm4,%ymm4,%ymm4");
0341         asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
0342         asm volatile("vpxor %ymm6,%ymm6,%ymm6");
0343         asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
0344         asm volatile("vpxor %ymm12,%ymm12,%ymm12");
0345         asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
0346         asm volatile("vpxor %ymm14,%ymm14,%ymm14");
0347     }
0348 
0349     asm volatile("sfence" : : : "memory");
0350     kernel_fpu_end();
0351 }
0352 
0353 static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
0354                      size_t bytes, void **ptrs)
0355 {
0356     u8 **dptr = (u8 **)ptrs;
0357     u8 *p, *q;
0358     int d, z, z0;
0359 
0360     z0 = stop;      /* P/Q right side optimization */
0361     p = dptr[disks-2];  /* XOR parity */
0362     q = dptr[disks-1];  /* RS syndrome */
0363 
0364     kernel_fpu_begin();
0365 
0366     asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
0367 
0368     for (d = 0 ; d < bytes ; d += 128) {
0369         asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
0370         asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
0371         asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
0372         asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
0373         asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
0374         asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
0375         asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
0376         asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
0377         asm volatile("vpxor %ymm4,%ymm2,%ymm2");
0378         asm volatile("vpxor %ymm6,%ymm3,%ymm3");
0379         asm volatile("vpxor %ymm12,%ymm10,%ymm10");
0380         asm volatile("vpxor %ymm14,%ymm11,%ymm11");
0381         /* P/Q data pages */
0382         for (z = z0-1 ; z >= start ; z--) {
0383             asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
0384             asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
0385             asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0386             asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0387             asm volatile("vpxor %ymm13,%ymm13,%ymm13");
0388             asm volatile("vpxor %ymm15,%ymm15,%ymm15");
0389             asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0390             asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0391             asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
0392             asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
0393             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0394             asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0395             asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
0396             asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
0397             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0398             asm volatile("vpand %ymm0,%ymm7,%ymm7");
0399             asm volatile("vpand %ymm0,%ymm13,%ymm13");
0400             asm volatile("vpand %ymm0,%ymm15,%ymm15");
0401             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0402             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0403             asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0404             asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0405             asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
0406             asm volatile("vmovdqa %0,%%ymm7"
0407                      :: "m" (dptr[z][d+32]));
0408             asm volatile("vmovdqa %0,%%ymm13"
0409                      :: "m" (dptr[z][d+64]));
0410             asm volatile("vmovdqa %0,%%ymm15"
0411                      :: "m" (dptr[z][d+96]));
0412             asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0413             asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0414             asm volatile("vpxor %ymm13,%ymm10,%ymm10");
0415             asm volatile("vpxor %ymm15,%ymm11,%ymm11");
0416             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0417             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0418             asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0419             asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0420         }
0421         asm volatile("prefetchnta %0" :: "m" (q[d]));
0422         asm volatile("prefetchnta %0" :: "m" (q[d+64]));
0423         /* P/Q left side optimization */
0424         for (z = start-1 ; z >= 0 ; z--) {
0425             asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0426             asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0427             asm volatile("vpxor %ymm13,%ymm13,%ymm13");
0428             asm volatile("vpxor %ymm15,%ymm15,%ymm15");
0429             asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0430             asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0431             asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
0432             asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
0433             asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0434             asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0435             asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
0436             asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
0437             asm volatile("vpand %ymm0,%ymm5,%ymm5");
0438             asm volatile("vpand %ymm0,%ymm7,%ymm7");
0439             asm volatile("vpand %ymm0,%ymm13,%ymm13");
0440             asm volatile("vpand %ymm0,%ymm15,%ymm15");
0441             asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0442             asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0443             asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0444             asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0445         }
0446         asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0447         asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
0448         asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
0449         asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
0450         asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
0451         asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
0452         asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
0453         asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
0454         asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0455         asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
0456         asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
0457         asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
0458     }
0459     asm volatile("sfence" : : : "memory");
0460     kernel_fpu_end();
0461 }
0462 
0463 const struct raid6_calls raid6_avx2x4 = {
0464     raid6_avx24_gen_syndrome,
0465     raid6_avx24_xor_syndrome,
0466     raid6_have_avx2,
0467     "avx2x4",
0468     .priority = 2       /* Prefer AVX2 over priority 1 (SSE2 and others) */
0469 };
0470 #endif /* CONFIG_X86_64 */