0001
0002
0003
0004
0005
0006
0007 #include <linux/raid/pq.h>
0008 #include "x86.h"
0009
0010 static int raid6_has_avx2(void)
0011 {
0012 return boot_cpu_has(X86_FEATURE_AVX2) &&
0013 boot_cpu_has(X86_FEATURE_AVX);
0014 }
0015
0016 static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
0017 int failb, void **ptrs)
0018 {
0019 u8 *p, *q, *dp, *dq;
0020 const u8 *pbmul;
0021 const u8 *qmul;
0022 const u8 x0f = 0x0f;
0023
0024 p = (u8 *)ptrs[disks-2];
0025 q = (u8 *)ptrs[disks-1];
0026
0027
0028
0029
0030 dp = (u8 *)ptrs[faila];
0031 ptrs[faila] = (void *)raid6_empty_zero_page;
0032 ptrs[disks-2] = dp;
0033 dq = (u8 *)ptrs[failb];
0034 ptrs[failb] = (void *)raid6_empty_zero_page;
0035 ptrs[disks-1] = dq;
0036
0037 raid6_call.gen_syndrome(disks, bytes, ptrs);
0038
0039
0040 ptrs[faila] = dp;
0041 ptrs[failb] = dq;
0042 ptrs[disks-2] = p;
0043 ptrs[disks-1] = q;
0044
0045
0046 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
0047 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
0048 raid6_gfexp[failb]]];
0049
0050 kernel_fpu_begin();
0051
0052
0053 asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
0054
0055 while (bytes) {
0056 #ifdef CONFIG_X86_64
0057 asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
0058 asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
0059 asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
0060 asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
0061 asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
0062 asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
0063 asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
0064 asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
0065
0066
0067
0068
0069
0070
0071
0072
0073 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
0074 asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
0075
0076 asm volatile("vpsraw $4, %ymm1, %ymm3");
0077 asm volatile("vpsraw $4, %ymm9, %ymm12");
0078 asm volatile("vpand %ymm7, %ymm1, %ymm1");
0079 asm volatile("vpand %ymm7, %ymm9, %ymm9");
0080 asm volatile("vpand %ymm7, %ymm3, %ymm3");
0081 asm volatile("vpand %ymm7, %ymm12, %ymm12");
0082 asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
0083 asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
0084 asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
0085 asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
0086 asm volatile("vpxor %ymm14, %ymm15, %ymm15");
0087 asm volatile("vpxor %ymm4, %ymm5, %ymm5");
0088
0089
0090
0091
0092
0093
0094 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
0095 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
0096 asm volatile("vpsraw $4, %ymm0, %ymm2");
0097 asm volatile("vpsraw $4, %ymm8, %ymm6");
0098 asm volatile("vpand %ymm7, %ymm0, %ymm3");
0099 asm volatile("vpand %ymm7, %ymm8, %ymm14");
0100 asm volatile("vpand %ymm7, %ymm2, %ymm2");
0101 asm volatile("vpand %ymm7, %ymm6, %ymm6");
0102 asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
0103 asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
0104 asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
0105 asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
0106 asm volatile("vpxor %ymm4, %ymm1, %ymm1");
0107 asm volatile("vpxor %ymm12, %ymm13, %ymm13");
0108
0109
0110
0111
0112
0113 asm volatile("vpxor %ymm5, %ymm1, %ymm1");
0114 asm volatile("vpxor %ymm15, %ymm13, %ymm13");
0115
0116
0117
0118
0119
0120 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0121 asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
0122 asm volatile("vpxor %ymm1, %ymm0, %ymm0");
0123 asm volatile("vpxor %ymm13, %ymm8, %ymm8");
0124
0125 asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
0126 asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
0127
0128 bytes -= 64;
0129 p += 64;
0130 q += 64;
0131 dp += 64;
0132 dq += 64;
0133 #else
0134 asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
0135 asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
0136 asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
0137 asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
0138
0139
0140
0141 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
0142 asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
0143
0144
0145
0146
0147
0148 asm volatile("vpsraw $4, %ymm1, %ymm3");
0149 asm volatile("vpand %ymm7, %ymm1, %ymm1");
0150 asm volatile("vpand %ymm7, %ymm3, %ymm3");
0151 asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
0152 asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
0153 asm volatile("vpxor %ymm4, %ymm5, %ymm5");
0154
0155
0156
0157 asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
0158 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
0159
0160 asm volatile("vpsraw $4, %ymm0, %ymm2");
0161 asm volatile("vpand %ymm7, %ymm0, %ymm3");
0162 asm volatile("vpand %ymm7, %ymm2, %ymm2");
0163 asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
0164 asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
0165 asm volatile("vpxor %ymm4, %ymm1, %ymm1");
0166
0167
0168 asm volatile("vpxor %ymm5, %ymm1, %ymm1");
0169
0170 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0171
0172 asm volatile("vpxor %ymm1, %ymm0, %ymm0");
0173 asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
0174
0175 bytes -= 32;
0176 p += 32;
0177 q += 32;
0178 dp += 32;
0179 dq += 32;
0180 #endif
0181 }
0182
0183 kernel_fpu_end();
0184 }
0185
0186 static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
0187 void **ptrs)
0188 {
0189 u8 *p, *q, *dq;
0190 const u8 *qmul;
0191 const u8 x0f = 0x0f;
0192
0193 p = (u8 *)ptrs[disks-2];
0194 q = (u8 *)ptrs[disks-1];
0195
0196
0197
0198 dq = (u8 *)ptrs[faila];
0199 ptrs[faila] = (void *)raid6_empty_zero_page;
0200 ptrs[disks-1] = dq;
0201
0202 raid6_call.gen_syndrome(disks, bytes, ptrs);
0203
0204
0205 ptrs[faila] = dq;
0206 ptrs[disks-1] = q;
0207
0208
0209 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
0210
0211 kernel_fpu_begin();
0212
0213 asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
0214
0215 while (bytes) {
0216 #ifdef CONFIG_X86_64
0217 asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
0218 asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
0219 asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
0220 asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
0221
0222
0223
0224
0225
0226 asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
0227 asm volatile("vmovapd %ymm0, %ymm13");
0228 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
0229 asm volatile("vmovapd %ymm1, %ymm14");
0230
0231 asm volatile("vpsraw $4, %ymm3, %ymm6");
0232 asm volatile("vpsraw $4, %ymm8, %ymm12");
0233 asm volatile("vpand %ymm7, %ymm3, %ymm3");
0234 asm volatile("vpand %ymm7, %ymm8, %ymm8");
0235 asm volatile("vpand %ymm7, %ymm6, %ymm6");
0236 asm volatile("vpand %ymm7, %ymm12, %ymm12");
0237 asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
0238 asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
0239 asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
0240 asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
0241 asm volatile("vpxor %ymm0, %ymm1, %ymm1");
0242 asm volatile("vpxor %ymm13, %ymm14, %ymm14");
0243
0244
0245
0246
0247
0248 asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
0249 asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
0250 asm volatile("vpxor %ymm1, %ymm2, %ymm2");
0251 asm volatile("vpxor %ymm14, %ymm12, %ymm12");
0252
0253
0254
0255
0256
0257
0258 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0259 asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
0260 asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
0261 asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
0262
0263 bytes -= 64;
0264 p += 64;
0265 q += 64;
0266 dq += 64;
0267 #else
0268 asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
0269 asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
0270
0271
0272
0273 asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
0274 asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
0275
0276 asm volatile("vpsraw $4, %ymm3, %ymm6");
0277 asm volatile("vpand %ymm7, %ymm3, %ymm3");
0278 asm volatile("vpand %ymm7, %ymm6, %ymm6");
0279 asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
0280 asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
0281 asm volatile("vpxor %ymm0, %ymm1, %ymm1");
0282
0283
0284
0285 asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
0286 asm volatile("vpxor %ymm1, %ymm2, %ymm2");
0287
0288
0289
0290 asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0291 asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
0292
0293 bytes -= 32;
0294 p += 32;
0295 q += 32;
0296 dq += 32;
0297 #endif
0298 }
0299
0300 kernel_fpu_end();
0301 }
0302
0303 const struct raid6_recov_calls raid6_recov_avx2 = {
0304 .data2 = raid6_2data_recov_avx2,
0305 .datap = raid6_datap_recov_avx2,
0306 .valid = raid6_has_avx2,
0307 #ifdef CONFIG_X86_64
0308 .name = "avx2x2",
0309 #else
0310 .name = "avx2x1",
0311 #endif
0312 .priority = 2,
0313 };