0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #include <linux/raid/pq.h>
0017 #include "x86.h"
0018
0019 static const struct raid6_avx2_constants {
0020 u64 x1d[4];
0021 } raid6_avx2_constants __aligned(32) = {
0022 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0023 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
0024 };
0025
0026 static int raid6_have_avx2(void)
0027 {
0028 return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
0029 }
0030
0031
0032
0033
0034 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
0035 {
0036 u8 **dptr = (u8 **)ptrs;
0037 u8 *p, *q;
0038 int d, z, z0;
0039
0040 z0 = disks - 3;
0041 p = dptr[z0+1];
0042 q = dptr[z0+2];
0043
0044 kernel_fpu_begin();
0045
0046 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0047 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
0048
0049 for (d = 0; d < bytes; d += 32) {
0050 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0051 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));
0052 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
0053 asm volatile("vmovdqa %ymm2,%ymm4");
0054 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
0055 for (z = z0-2; z >= 0; z--) {
0056 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0057 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
0058 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0059 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0060 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0061 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
0062 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
0063 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
0064 }
0065 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
0066 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0067 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0068 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0069 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
0070 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
0071
0072 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0073 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
0074 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0075 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
0076 }
0077
0078 asm volatile("sfence" : : : "memory");
0079 kernel_fpu_end();
0080 }
0081
0082 static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
0083 size_t bytes, void **ptrs)
0084 {
0085 u8 **dptr = (u8 **)ptrs;
0086 u8 *p, *q;
0087 int d, z, z0;
0088
0089 z0 = stop;
0090 p = dptr[disks-2];
0091 q = dptr[disks-1];
0092
0093 kernel_fpu_begin();
0094
0095 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0096
0097 for (d = 0 ; d < bytes ; d += 32) {
0098 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
0099 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
0100 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
0101
0102 for (z = z0-1 ; z >= start ; z--) {
0103 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0104 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0105 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0106 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0107 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0108 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
0109 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0110 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0111 }
0112
0113 for (z = start-1 ; z >= 0 ; z--) {
0114 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0115 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0116 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0117 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0118 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0119 }
0120 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
0121
0122 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
0123 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
0124 }
0125
0126 asm volatile("sfence" : : : "memory");
0127 kernel_fpu_end();
0128 }
0129
0130 const struct raid6_calls raid6_avx2x1 = {
0131 raid6_avx21_gen_syndrome,
0132 raid6_avx21_xor_syndrome,
0133 raid6_have_avx2,
0134 "avx2x1",
0135 .priority = 2
0136 };
0137
0138
0139
0140
0141 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
0142 {
0143 u8 **dptr = (u8 **)ptrs;
0144 u8 *p, *q;
0145 int d, z, z0;
0146
0147 z0 = disks - 3;
0148 p = dptr[z0+1];
0149 q = dptr[z0+2];
0150
0151 kernel_fpu_begin();
0152
0153 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0154 asm volatile("vpxor %ymm1,%ymm1,%ymm1");
0155
0156
0157 for (d = 0; d < bytes; d += 64) {
0158 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0159 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
0160 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));
0161 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));
0162 asm volatile("vmovdqa %ymm2,%ymm4");
0163 asm volatile("vmovdqa %ymm3,%ymm6");
0164 for (z = z0-1; z >= 0; z--) {
0165 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0166 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
0167 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
0168 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
0169 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0170 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0171 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0172 asm volatile("vpand %ymm0,%ymm7,%ymm7");
0173 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0174 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0175 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
0176 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
0177 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0178 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0179 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0180 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0181 }
0182 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0183 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
0184 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0185 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
0186 }
0187
0188 asm volatile("sfence" : : : "memory");
0189 kernel_fpu_end();
0190 }
0191
0192 static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
0193 size_t bytes, void **ptrs)
0194 {
0195 u8 **dptr = (u8 **)ptrs;
0196 u8 *p, *q;
0197 int d, z, z0;
0198
0199 z0 = stop;
0200 p = dptr[disks-2];
0201 q = dptr[disks-1];
0202
0203 kernel_fpu_begin();
0204
0205 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0206
0207 for (d = 0 ; d < bytes ; d += 64) {
0208 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
0209 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
0210 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
0211 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
0212 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
0213 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
0214
0215 for (z = z0-1 ; z >= start ; z--) {
0216 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0217 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0218 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0219 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0220 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0221 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0222 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0223 asm volatile("vpand %ymm0,%ymm7,%ymm7");
0224 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0225 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0226 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
0227 asm volatile("vmovdqa %0,%%ymm7"
0228 :: "m" (dptr[z][d+32]));
0229 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0230 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0231 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0232 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0233 }
0234
0235 for (z = start-1 ; z >= 0 ; z--) {
0236 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0237 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0238 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0239 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0240 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0241 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0242 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0243 asm volatile("vpand %ymm0,%ymm7,%ymm7");
0244 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0245 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0246 }
0247 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
0248 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
0249
0250 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
0251 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
0252 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
0253 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
0254 }
0255
0256 asm volatile("sfence" : : : "memory");
0257 kernel_fpu_end();
0258 }
0259
0260 const struct raid6_calls raid6_avx2x2 = {
0261 raid6_avx22_gen_syndrome,
0262 raid6_avx22_xor_syndrome,
0263 raid6_have_avx2,
0264 "avx2x2",
0265 .priority = 2
0266 };
0267
0268 #ifdef CONFIG_X86_64
0269
0270
0271
0272
0273 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
0274 {
0275 u8 **dptr = (u8 **)ptrs;
0276 u8 *p, *q;
0277 int d, z, z0;
0278
0279 z0 = disks - 3;
0280 p = dptr[z0+1];
0281 q = dptr[z0+2];
0282
0283 kernel_fpu_begin();
0284
0285 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
0286 asm volatile("vpxor %ymm1,%ymm1,%ymm1");
0287 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
0288 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
0289 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
0290 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
0291 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
0292 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
0293 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
0294 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
0295
0296 for (d = 0; d < bytes; d += 128) {
0297 for (z = z0; z >= 0; z--) {
0298 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0299 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
0300 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
0301 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
0302 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
0303 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
0304 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
0305 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
0306 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0307 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0308 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
0309 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
0310 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0311 asm volatile("vpand %ymm0,%ymm7,%ymm7");
0312 asm volatile("vpand %ymm0,%ymm13,%ymm13");
0313 asm volatile("vpand %ymm0,%ymm15,%ymm15");
0314 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0315 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0316 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0317 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0318 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
0319 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
0320 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
0321 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
0322 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0323 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0324 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
0325 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
0326 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0327 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0328 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0329 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0330 }
0331 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0332 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
0333 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
0334 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
0335 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
0336 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
0337 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
0338 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
0339 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0340 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
0341 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
0342 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
0343 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
0344 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
0345 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
0346 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
0347 }
0348
0349 asm volatile("sfence" : : : "memory");
0350 kernel_fpu_end();
0351 }
0352
0353 static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
0354 size_t bytes, void **ptrs)
0355 {
0356 u8 **dptr = (u8 **)ptrs;
0357 u8 *p, *q;
0358 int d, z, z0;
0359
0360 z0 = stop;
0361 p = dptr[disks-2];
0362 q = dptr[disks-1];
0363
0364 kernel_fpu_begin();
0365
0366 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
0367
0368 for (d = 0 ; d < bytes ; d += 128) {
0369 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
0370 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
0371 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
0372 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
0373 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
0374 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
0375 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
0376 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
0377 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
0378 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
0379 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
0380 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
0381
0382 for (z = z0-1 ; z >= start ; z--) {
0383 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
0384 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
0385 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0386 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0387 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
0388 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
0389 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0390 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0391 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
0392 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
0393 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0394 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0395 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
0396 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
0397 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0398 asm volatile("vpand %ymm0,%ymm7,%ymm7");
0399 asm volatile("vpand %ymm0,%ymm13,%ymm13");
0400 asm volatile("vpand %ymm0,%ymm15,%ymm15");
0401 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0402 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0403 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0404 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0405 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
0406 asm volatile("vmovdqa %0,%%ymm7"
0407 :: "m" (dptr[z][d+32]));
0408 asm volatile("vmovdqa %0,%%ymm13"
0409 :: "m" (dptr[z][d+64]));
0410 asm volatile("vmovdqa %0,%%ymm15"
0411 :: "m" (dptr[z][d+96]));
0412 asm volatile("vpxor %ymm5,%ymm2,%ymm2");
0413 asm volatile("vpxor %ymm7,%ymm3,%ymm3");
0414 asm volatile("vpxor %ymm13,%ymm10,%ymm10");
0415 asm volatile("vpxor %ymm15,%ymm11,%ymm11");
0416 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0417 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0418 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0419 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0420 }
0421 asm volatile("prefetchnta %0" :: "m" (q[d]));
0422 asm volatile("prefetchnta %0" :: "m" (q[d+64]));
0423
0424 for (z = start-1 ; z >= 0 ; z--) {
0425 asm volatile("vpxor %ymm5,%ymm5,%ymm5");
0426 asm volatile("vpxor %ymm7,%ymm7,%ymm7");
0427 asm volatile("vpxor %ymm13,%ymm13,%ymm13");
0428 asm volatile("vpxor %ymm15,%ymm15,%ymm15");
0429 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
0430 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
0431 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
0432 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
0433 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
0434 asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
0435 asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
0436 asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
0437 asm volatile("vpand %ymm0,%ymm5,%ymm5");
0438 asm volatile("vpand %ymm0,%ymm7,%ymm7");
0439 asm volatile("vpand %ymm0,%ymm13,%ymm13");
0440 asm volatile("vpand %ymm0,%ymm15,%ymm15");
0441 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
0442 asm volatile("vpxor %ymm7,%ymm6,%ymm6");
0443 asm volatile("vpxor %ymm13,%ymm12,%ymm12");
0444 asm volatile("vpxor %ymm15,%ymm14,%ymm14");
0445 }
0446 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
0447 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
0448 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
0449 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
0450 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
0451 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
0452 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
0453 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
0454 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
0455 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
0456 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
0457 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
0458 }
0459 asm volatile("sfence" : : : "memory");
0460 kernel_fpu_end();
0461 }
0462
0463 const struct raid6_calls raid6_avx2x4 = {
0464 raid6_avx24_gen_syndrome,
0465 raid6_avx24_xor_syndrome,
0466 raid6_have_avx2,
0467 "avx2x4",
0468 .priority = 2
0469 };
0470 #endif