Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2016 Intel Corporation
0004  *
0005  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
0006  * Author: Megha Dey <megha.dey@linux.intel.com>
0007  */
0008 
0009 #ifdef CONFIG_AS_AVX512
0010 
0011 #include <linux/raid/pq.h>
0012 #include "x86.h"
0013 
0014 static int raid6_has_avx512(void)
0015 {
0016     return boot_cpu_has(X86_FEATURE_AVX2) &&
0017         boot_cpu_has(X86_FEATURE_AVX) &&
0018         boot_cpu_has(X86_FEATURE_AVX512F) &&
0019         boot_cpu_has(X86_FEATURE_AVX512BW) &&
0020         boot_cpu_has(X86_FEATURE_AVX512VL) &&
0021         boot_cpu_has(X86_FEATURE_AVX512DQ);
0022 }
0023 
0024 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
0025                      int failb, void **ptrs)
0026 {
0027     u8 *p, *q, *dp, *dq;
0028     const u8 *pbmul;    /* P multiplier table for B data */
0029     const u8 *qmul;     /* Q multiplier table (for both) */
0030     const u8 x0f = 0x0f;
0031 
0032     p = (u8 *)ptrs[disks-2];
0033     q = (u8 *)ptrs[disks-1];
0034 
0035     /*
0036      * Compute syndrome with zero for the missing data pages
0037      * Use the dead data pages as temporary storage for
0038      * delta p and delta q
0039      */
0040 
0041     dp = (u8 *)ptrs[faila];
0042     ptrs[faila] = (void *)raid6_empty_zero_page;
0043     ptrs[disks-2] = dp;
0044     dq = (u8 *)ptrs[failb];
0045     ptrs[failb] = (void *)raid6_empty_zero_page;
0046     ptrs[disks-1] = dq;
0047 
0048     raid6_call.gen_syndrome(disks, bytes, ptrs);
0049 
0050     /* Restore pointer table */
0051     ptrs[faila]   = dp;
0052     ptrs[failb]   = dq;
0053     ptrs[disks-2] = p;
0054     ptrs[disks-1] = q;
0055 
0056     /* Now, pick the proper data tables */
0057     pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
0058     qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
0059         raid6_gfexp[failb]]];
0060 
0061     kernel_fpu_begin();
0062 
0063     /* zmm0 = x0f[16] */
0064     asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
0065 
0066     while (bytes) {
0067 #ifdef CONFIG_X86_64
0068         asm volatile("vmovdqa64 %0, %%zmm1\n\t"
0069                  "vmovdqa64 %1, %%zmm9\n\t"
0070                  "vmovdqa64 %2, %%zmm0\n\t"
0071                  "vmovdqa64 %3, %%zmm8\n\t"
0072                  "vpxorq %4, %%zmm1, %%zmm1\n\t"
0073                  "vpxorq %5, %%zmm9, %%zmm9\n\t"
0074                  "vpxorq %6, %%zmm0, %%zmm0\n\t"
0075                  "vpxorq %7, %%zmm8, %%zmm8"
0076                  :
0077                  : "m" (q[0]), "m" (q[64]), "m" (p[0]),
0078                    "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
0079                    "m" (dp[0]), "m" (dp[64]));
0080 
0081         /*
0082          * 1 = dq[0]  ^ q[0]
0083          * 9 = dq[64] ^ q[64]
0084          * 0 = dp[0]  ^ p[0]
0085          * 8 = dp[64] ^ p[64]
0086          */
0087 
0088         asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0089                  "vbroadcasti64x2 %1, %%zmm5"
0090                  :
0091                  : "m" (qmul[0]), "m" (qmul[16]));
0092 
0093         asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
0094                  "vpsraw $4, %%zmm9, %%zmm12\n\t"
0095                  "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
0096                  "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
0097                  "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0098                  "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
0099                  "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
0100                  "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
0101                  "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
0102                  "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
0103                  "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
0104                  "vpxorq %%zmm4, %%zmm5, %%zmm5"
0105                  :
0106                  : );
0107 
0108         /*
0109          * 5 = qx[0]
0110          * 15 = qx[64]
0111          */
0112 
0113         asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0114                  "vbroadcasti64x2 %1, %%zmm1\n\t"
0115                  "vpsraw $4, %%zmm0, %%zmm2\n\t"
0116                  "vpsraw $4, %%zmm8, %%zmm6\n\t"
0117                  "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
0118                  "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
0119                  "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
0120                  "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
0121                  "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
0122                  "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
0123                  "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
0124                  "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
0125                  "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
0126                  "vpxorq %%zmm12, %%zmm13, %%zmm13"
0127                  :
0128                  : "m" (pbmul[0]), "m" (pbmul[16]));
0129 
0130         /*
0131          * 1  = pbmul[px[0]]
0132          * 13 = pbmul[px[64]]
0133          */
0134         asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
0135                  "vpxorq %%zmm15, %%zmm13, %%zmm13"
0136                  :
0137                  : );
0138 
0139         /*
0140          * 1 = db = DQ
0141          * 13 = db[64] = DQ[64]
0142          */
0143         asm volatile("vmovdqa64 %%zmm1, %0\n\t"
0144                  "vmovdqa64 %%zmm13,%1\n\t"
0145                  "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
0146                  "vpxorq %%zmm13, %%zmm8, %%zmm8"
0147                  :
0148                  : "m" (dq[0]), "m" (dq[64]));
0149 
0150         asm volatile("vmovdqa64 %%zmm0, %0\n\t"
0151                  "vmovdqa64 %%zmm8, %1"
0152                  :
0153                  : "m" (dp[0]), "m" (dp[64]));
0154 
0155         bytes -= 128;
0156         p += 128;
0157         q += 128;
0158         dp += 128;
0159         dq += 128;
0160 #else
0161         asm volatile("vmovdqa64 %0, %%zmm1\n\t"
0162                  "vmovdqa64 %1, %%zmm0\n\t"
0163                  "vpxorq %2, %%zmm1, %%zmm1\n\t"
0164                  "vpxorq %3, %%zmm0, %%zmm0"
0165                  :
0166                  : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
0167 
0168         /* 1 = dq ^ q;  0 = dp ^ p */
0169 
0170         asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0171                  "vbroadcasti64x2 %1, %%zmm5"
0172                  :
0173                  : "m" (qmul[0]), "m" (qmul[16]));
0174 
0175         /*
0176          * 1 = dq ^ q
0177          * 3 = dq ^ p >> 4
0178          */
0179         asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
0180                  "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
0181                  "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0182                  "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
0183                  "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
0184                  "vpxorq %%zmm4, %%zmm5, %%zmm5"
0185                  :
0186                  : );
0187 
0188         /* 5 = qx */
0189 
0190         asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
0191                  "vbroadcasti64x2 %1, %%zmm1"
0192                  :
0193                  : "m" (pbmul[0]), "m" (pbmul[16]));
0194 
0195         asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
0196                  "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
0197                  "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
0198                  "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
0199                  "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
0200                  "vpxorq %%zmm4, %%zmm1, %%zmm1"
0201                  :
0202                  : );
0203 
0204         /* 1 = pbmul[px] */
0205         asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
0206                  /* 1 = db = DQ */
0207                  "vmovdqa64 %%zmm1, %0\n\t"
0208                  :
0209                  : "m" (dq[0]));
0210 
0211         asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
0212                  "vmovdqa64 %%zmm0, %0"
0213                  :
0214                  : "m" (dp[0]));
0215 
0216         bytes -= 64;
0217         p += 64;
0218         q += 64;
0219         dp += 64;
0220         dq += 64;
0221 #endif
0222     }
0223 
0224     kernel_fpu_end();
0225 }
0226 
0227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
0228                      void **ptrs)
0229 {
0230     u8 *p, *q, *dq;
0231     const u8 *qmul;     /* Q multiplier table */
0232     const u8 x0f = 0x0f;
0233 
0234     p = (u8 *)ptrs[disks-2];
0235     q = (u8 *)ptrs[disks-1];
0236 
0237     /*
0238      * Compute syndrome with zero for the missing data page
0239      * Use the dead data page as temporary storage for delta q
0240      */
0241 
0242     dq = (u8 *)ptrs[faila];
0243     ptrs[faila] = (void *)raid6_empty_zero_page;
0244     ptrs[disks-1] = dq;
0245 
0246     raid6_call.gen_syndrome(disks, bytes, ptrs);
0247 
0248     /* Restore pointer table */
0249     ptrs[faila]   = dq;
0250     ptrs[disks-1] = q;
0251 
0252     /* Now, pick the proper data tables */
0253     qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
0254 
0255     kernel_fpu_begin();
0256 
0257     asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
0258 
0259     while (bytes) {
0260 #ifdef CONFIG_X86_64
0261         asm volatile("vmovdqa64 %0, %%zmm3\n\t"
0262                  "vmovdqa64 %1, %%zmm8\n\t"
0263                  "vpxorq %2, %%zmm3, %%zmm3\n\t"
0264                  "vpxorq %3, %%zmm8, %%zmm8"
0265                  :
0266                  : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
0267                    "m" (q[64]));
0268 
0269         /*
0270          * 3 = q[0] ^ dq[0]
0271          * 8 = q[64] ^ dq[64]
0272          */
0273         asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
0274                  "vmovapd %%zmm0, %%zmm13\n\t"
0275                  "vbroadcasti64x2 %1, %%zmm1\n\t"
0276                  "vmovapd %%zmm1, %%zmm14"
0277                  :
0278                  : "m" (qmul[0]), "m" (qmul[16]));
0279 
0280         asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
0281                  "vpsraw $4, %%zmm8, %%zmm12\n\t"
0282                  "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0283                  "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
0284                  "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
0285                  "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
0286                  "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
0287                  "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
0288                  "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
0289                  "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
0290                  "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
0291                  "vpxorq %%zmm13, %%zmm14, %%zmm14"
0292                  :
0293                  : );
0294 
0295         /*
0296          * 1  = qmul[q[0]  ^ dq[0]]
0297          * 14 = qmul[q[64] ^ dq[64]]
0298          */
0299         asm volatile("vmovdqa64 %0, %%zmm2\n\t"
0300                  "vmovdqa64 %1, %%zmm12\n\t"
0301                  "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
0302                  "vpxorq %%zmm14, %%zmm12, %%zmm12"
0303                  :
0304                  : "m" (p[0]), "m" (p[64]));
0305 
0306         /*
0307          * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
0308          * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
0309          */
0310 
0311         asm volatile("vmovdqa64 %%zmm1, %0\n\t"
0312                  "vmovdqa64 %%zmm14, %1\n\t"
0313                  "vmovdqa64 %%zmm2, %2\n\t"
0314                  "vmovdqa64 %%zmm12,%3"
0315                  :
0316                  : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
0317                    "m" (p[64]));
0318 
0319         bytes -= 128;
0320         p += 128;
0321         q += 128;
0322         dq += 128;
0323 #else
0324         asm volatile("vmovdqa64 %0, %%zmm3\n\t"
0325                  "vpxorq %1, %%zmm3, %%zmm3"
0326                  :
0327                  : "m" (dq[0]), "m" (q[0]));
0328 
0329         /* 3 = q ^ dq */
0330 
0331         asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
0332                  "vbroadcasti64x2 %1, %%zmm1"
0333                  :
0334                  : "m" (qmul[0]), "m" (qmul[16]));
0335 
0336         asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
0337                  "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
0338                  "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
0339                  "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
0340                  "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
0341                  "vpxorq %%zmm0, %%zmm1, %%zmm1"
0342                  :
0343                  : );
0344 
0345         /* 1 = qmul[q ^ dq] */
0346 
0347         asm volatile("vmovdqa64 %0, %%zmm2\n\t"
0348                  "vpxorq %%zmm1, %%zmm2, %%zmm2"
0349                  :
0350                  : "m" (p[0]));
0351 
0352         /* 2 = p ^ qmul[q ^ dq] */
0353 
0354         asm volatile("vmovdqa64 %%zmm1, %0\n\t"
0355                  "vmovdqa64 %%zmm2, %1"
0356                  :
0357                  : "m" (dq[0]), "m" (p[0]));
0358 
0359         bytes -= 64;
0360         p += 64;
0361         q += 64;
0362         dq += 64;
0363 #endif
0364     }
0365 
0366     kernel_fpu_end();
0367 }
0368 
0369 const struct raid6_recov_calls raid6_recov_avx512 = {
0370     .data2 = raid6_2data_recov_avx512,
0371     .datap = raid6_datap_recov_avx512,
0372     .valid = raid6_has_avx512,
0373 #ifdef CONFIG_X86_64
0374     .name = "avx512x2",
0375 #else
0376     .name = "avx512x1",
0377 #endif
0378     .priority = 3,
0379 };
0380 
0381 #else
0382 #warning "your version of binutils lacks AVX512 support"
0383 #endif