0001
0002
0003
0004
0005
0006
0007
0008
0009 #ifdef CONFIG_AS_AVX512
0010
0011 #include <linux/raid/pq.h>
0012 #include "x86.h"
0013
0014 static int raid6_has_avx512(void)
0015 {
0016 return boot_cpu_has(X86_FEATURE_AVX2) &&
0017 boot_cpu_has(X86_FEATURE_AVX) &&
0018 boot_cpu_has(X86_FEATURE_AVX512F) &&
0019 boot_cpu_has(X86_FEATURE_AVX512BW) &&
0020 boot_cpu_has(X86_FEATURE_AVX512VL) &&
0021 boot_cpu_has(X86_FEATURE_AVX512DQ);
0022 }
0023
0024 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
0025 int failb, void **ptrs)
0026 {
0027 u8 *p, *q, *dp, *dq;
0028 const u8 *pbmul;
0029 const u8 *qmul;
0030 const u8 x0f = 0x0f;
0031
0032 p = (u8 *)ptrs[disks-2];
0033 q = (u8 *)ptrs[disks-1];
0034
0035
0036
0037
0038
0039
0040
0041 dp = (u8 *)ptrs[faila];
0042 ptrs[faila] = (void *)raid6_empty_zero_page;
0043 ptrs[disks-2] = dp;
0044 dq = (u8 *)ptrs[failb];
0045 ptrs[failb] = (void *)raid6_empty_zero_page;
0046 ptrs[disks-1] = dq;
0047
0048 raid6_call.gen_syndrome(disks, bytes, ptrs);
0049
0050
0051 ptrs[faila] = dp;
0052 ptrs[failb] = dq;
0053 ptrs[disks-2] = p;
0054 ptrs[disks-1] = q;
0055
0056
0057 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
0058 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
0059 raid6_gfexp[failb]]];
0060
0061 kernel_fpu_begin();
0062
0063
0064 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
0065
0066 while (bytes) {
0067 #ifdef CONFIG_X86_64
0068 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
0069 "vmovdqa64 %1, %%zmm9\n\t"
0070 "vmovdqa64 %2, %%zmm0\n\t"
0071 "vmovdqa64 %3, %%zmm8\n\t"
0072 "vpxorq %4, %%zmm1, %%zmm1\n\t"
0073 "vpxorq %5, %%zmm9, %%zmm9\n\t"
0074 "vpxorq %6, %%zmm0, %%zmm0\n\t"
0075 "vpxorq %7, %%zmm8, %%zmm8"
0076 :
0077 : "m" (q[0]), "m" (q[64]), "m" (p[0]),
0078 "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
0079 "m" (dp[0]), "m" (dp[64]));
0080
0081
0082
0083
0084
0085
0086
0087
0088 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0089 "vbroadcasti64x2 %1, %%zmm5"
0090 :
0091 : "m" (qmul[0]), "m" (qmul[16]));
0092
0093 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
0094 "vpsraw $4, %%zmm9, %%zmm12\n\t"
0095 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
0096 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
0097 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0098 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
0099 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
0100 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
0101 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
0102 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
0103 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
0104 "vpxorq %%zmm4, %%zmm5, %%zmm5"
0105 :
0106 : );
0107
0108
0109
0110
0111
0112
0113 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0114 "vbroadcasti64x2 %1, %%zmm1\n\t"
0115 "vpsraw $4, %%zmm0, %%zmm2\n\t"
0116 "vpsraw $4, %%zmm8, %%zmm6\n\t"
0117 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
0118 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
0119 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
0120 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
0121 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
0122 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
0123 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
0124 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
0125 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
0126 "vpxorq %%zmm12, %%zmm13, %%zmm13"
0127 :
0128 : "m" (pbmul[0]), "m" (pbmul[16]));
0129
0130
0131
0132
0133
0134 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
0135 "vpxorq %%zmm15, %%zmm13, %%zmm13"
0136 :
0137 : );
0138
0139
0140
0141
0142
0143 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
0144 "vmovdqa64 %%zmm13,%1\n\t"
0145 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
0146 "vpxorq %%zmm13, %%zmm8, %%zmm8"
0147 :
0148 : "m" (dq[0]), "m" (dq[64]));
0149
0150 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
0151 "vmovdqa64 %%zmm8, %1"
0152 :
0153 : "m" (dp[0]), "m" (dp[64]));
0154
0155 bytes -= 128;
0156 p += 128;
0157 q += 128;
0158 dp += 128;
0159 dq += 128;
0160 #else
0161 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
0162 "vmovdqa64 %1, %%zmm0\n\t"
0163 "vpxorq %2, %%zmm1, %%zmm1\n\t"
0164 "vpxorq %3, %%zmm0, %%zmm0"
0165 :
0166 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
0167
0168
0169
0170 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0171 "vbroadcasti64x2 %1, %%zmm5"
0172 :
0173 : "m" (qmul[0]), "m" (qmul[16]));
0174
0175
0176
0177
0178
0179 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
0180 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
0181 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0182 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
0183 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
0184 "vpxorq %%zmm4, %%zmm5, %%zmm5"
0185 :
0186 : );
0187
0188
0189
0190 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0191 "vbroadcasti64x2 %1, %%zmm1"
0192 :
0193 : "m" (pbmul[0]), "m" (pbmul[16]));
0194
0195 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
0196 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
0197 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
0198 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
0199 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
0200 "vpxorq %%zmm4, %%zmm1, %%zmm1"
0201 :
0202 : );
0203
0204
0205 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
0206
0207 "vmovdqa64 %%zmm1, %0\n\t"
0208 :
0209 : "m" (dq[0]));
0210
0211 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
0212 "vmovdqa64 %%zmm0, %0"
0213 :
0214 : "m" (dp[0]));
0215
0216 bytes -= 64;
0217 p += 64;
0218 q += 64;
0219 dp += 64;
0220 dq += 64;
0221 #endif
0222 }
0223
0224 kernel_fpu_end();
0225 }
0226
0227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
0228 void **ptrs)
0229 {
0230 u8 *p, *q, *dq;
0231 const u8 *qmul;
0232 const u8 x0f = 0x0f;
0233
0234 p = (u8 *)ptrs[disks-2];
0235 q = (u8 *)ptrs[disks-1];
0236
0237
0238
0239
0240
0241
0242 dq = (u8 *)ptrs[faila];
0243 ptrs[faila] = (void *)raid6_empty_zero_page;
0244 ptrs[disks-1] = dq;
0245
0246 raid6_call.gen_syndrome(disks, bytes, ptrs);
0247
0248
0249 ptrs[faila] = dq;
0250 ptrs[disks-1] = q;
0251
0252
0253 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
0254
0255 kernel_fpu_begin();
0256
0257 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
0258
0259 while (bytes) {
0260 #ifdef CONFIG_X86_64
0261 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
0262 "vmovdqa64 %1, %%zmm8\n\t"
0263 "vpxorq %2, %%zmm3, %%zmm3\n\t"
0264 "vpxorq %3, %%zmm8, %%zmm8"
0265 :
0266 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
0267 "m" (q[64]));
0268
0269
0270
0271
0272
0273 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
0274 "vmovapd %%zmm0, %%zmm13\n\t"
0275 "vbroadcasti64x2 %1, %%zmm1\n\t"
0276 "vmovapd %%zmm1, %%zmm14"
0277 :
0278 : "m" (qmul[0]), "m" (qmul[16]));
0279
0280 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
0281 "vpsraw $4, %%zmm8, %%zmm12\n\t"
0282 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0283 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
0284 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
0285 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
0286 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
0287 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
0288 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
0289 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
0290 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
0291 "vpxorq %%zmm13, %%zmm14, %%zmm14"
0292 :
0293 : );
0294
0295
0296
0297
0298
0299 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
0300 "vmovdqa64 %1, %%zmm12\n\t"
0301 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
0302 "vpxorq %%zmm14, %%zmm12, %%zmm12"
0303 :
0304 : "m" (p[0]), "m" (p[64]));
0305
0306
0307
0308
0309
0310
0311 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
0312 "vmovdqa64 %%zmm14, %1\n\t"
0313 "vmovdqa64 %%zmm2, %2\n\t"
0314 "vmovdqa64 %%zmm12,%3"
0315 :
0316 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
0317 "m" (p[64]));
0318
0319 bytes -= 128;
0320 p += 128;
0321 q += 128;
0322 dq += 128;
0323 #else
0324 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
0325 "vpxorq %1, %%zmm3, %%zmm3"
0326 :
0327 : "m" (dq[0]), "m" (q[0]));
0328
0329
0330
0331 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
0332 "vbroadcasti64x2 %1, %%zmm1"
0333 :
0334 : "m" (qmul[0]), "m" (qmul[16]));
0335
0336 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
0337 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0338 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
0339 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
0340 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
0341 "vpxorq %%zmm0, %%zmm1, %%zmm1"
0342 :
0343 : );
0344
0345
0346
0347 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
0348 "vpxorq %%zmm1, %%zmm2, %%zmm2"
0349 :
0350 : "m" (p[0]));
0351
0352
0353
0354 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
0355 "vmovdqa64 %%zmm2, %1"
0356 :
0357 : "m" (dq[0]), "m" (p[0]));
0358
0359 bytes -= 64;
0360 p += 64;
0361 q += 64;
0362 dq += 64;
0363 #endif
0364 }
0365
0366 kernel_fpu_end();
0367 }
0368
0369 const struct raid6_recov_calls raid6_recov_avx512 = {
0370 .data2 = raid6_2data_recov_avx512,
0371 .datap = raid6_datap_recov_avx512,
0372 .valid = raid6_has_avx512,
0373 #ifdef CONFIG_X86_64
0374 .name = "avx512x2",
0375 #else
0376 .name = "avx512x1",
0377 #endif
0378 .priority = 3,
0379 };
0380
0381 #else
0382 #warning "your version of binutils lacks AVX512 support"
0383 #endif