Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2012 Intel Corporation
0004  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
0005  */
0006 
0007 #include <linux/raid/pq.h>
0008 #include "x86.h"
0009 
0010 static int raid6_has_avx2(void)
0011 {
0012     return boot_cpu_has(X86_FEATURE_AVX2) &&
0013         boot_cpu_has(X86_FEATURE_AVX);
0014 }
0015 
0016 static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
0017         int failb, void **ptrs)
0018 {
0019     u8 *p, *q, *dp, *dq;
0020     const u8 *pbmul;    /* P multiplier table for B data */
0021     const u8 *qmul;     /* Q multiplier table (for both) */
0022     const u8 x0f = 0x0f;
0023 
0024     p = (u8 *)ptrs[disks-2];
0025     q = (u8 *)ptrs[disks-1];
0026 
0027     /* Compute syndrome with zero for the missing data pages
0028        Use the dead data pages as temporary storage for
0029        delta p and delta q */
0030     dp = (u8 *)ptrs[faila];
0031     ptrs[faila] = (void *)raid6_empty_zero_page;
0032     ptrs[disks-2] = dp;
0033     dq = (u8 *)ptrs[failb];
0034     ptrs[failb] = (void *)raid6_empty_zero_page;
0035     ptrs[disks-1] = dq;
0036 
0037     raid6_call.gen_syndrome(disks, bytes, ptrs);
0038 
0039     /* Restore pointer table */
0040     ptrs[faila]   = dp;
0041     ptrs[failb]   = dq;
0042     ptrs[disks-2] = p;
0043     ptrs[disks-1] = q;
0044 
0045     /* Now, pick the proper data tables */
0046     pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
0047     qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
0048         raid6_gfexp[failb]]];
0049 
0050     kernel_fpu_begin();
0051 
0052     /* ymm0 = x0f[16] */
0053     asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
0054 
0055     while (bytes) {
0056 #ifdef CONFIG_X86_64
0057         asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
0058         asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
0059         asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
0060         asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
0061         asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
0062         asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
0063         asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
0064         asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
0065 
0066         /*
0067          * 1 = dq[0]  ^ q[0]
0068          * 9 = dq[32] ^ q[32]
0069          * 0 = dp[0]  ^ p[0]
0070          * 8 = dp[32] ^ p[32]
0071          */
0072 
0073         asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
0074         asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
0075 
0076         asm volatile("vpsraw $4, %ymm1, %ymm3");
0077         asm volatile("vpsraw $4, %ymm9, %ymm12");
0078         asm volatile("vpand %ymm7, %ymm1, %ymm1");
0079         asm volatile("vpand %ymm7, %ymm9, %ymm9");
0080         asm volatile("vpand %ymm7, %ymm3, %ymm3");
0081         asm volatile("vpand %ymm7, %ymm12, %ymm12");
0082         asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
0083         asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
0084         asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
0085         asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
0086         asm volatile("vpxor %ymm14, %ymm15, %ymm15");
0087         asm volatile("vpxor %ymm4, %ymm5, %ymm5");
0088 
0089         /*
0090          * 5 = qx[0]
0091          * 15 = qx[32]
0092          */
0093 
0094         asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
0095         asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
0096         asm volatile("vpsraw $4, %ymm0, %ymm2");
0097         asm volatile("vpsraw $4, %ymm8, %ymm6");
0098         asm volatile("vpand %ymm7, %ymm0, %ymm3");
0099         asm volatile("vpand %ymm7, %ymm8, %ymm14");
0100         asm volatile("vpand %ymm7, %ymm2, %ymm2");
0101         asm volatile("vpand %ymm7, %ymm6, %ymm6");
0102         asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
0103         asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
0104         asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
0105         asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
0106         asm volatile("vpxor %ymm4, %ymm1, %ymm1");
0107         asm volatile("vpxor %ymm12, %ymm13, %ymm13");
0108 
0109         /*
0110          * 1  = pbmul[px[0]]
0111          * 13 = pbmul[px[32]]
0112          */
0113         asm volatile("vpxor %ymm5, %ymm1, %ymm1");
0114         asm volatile("vpxor %ymm15, %ymm13, %ymm13");
0115 
0116         /*
0117          * 1 = db = DQ
0118          * 13 = db[32] = DQ[32]
0119          */
0120         asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0121         asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
0122         asm volatile("vpxor %ymm1, %ymm0, %ymm0");
0123         asm volatile("vpxor %ymm13, %ymm8, %ymm8");
0124 
0125         asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
0126         asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
0127 
0128         bytes -= 64;
0129         p += 64;
0130         q += 64;
0131         dp += 64;
0132         dq += 64;
0133 #else
0134         asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
0135         asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
0136         asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
0137         asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
0138 
0139         /* 1 = dq ^ q;  0 = dp ^ p */
0140 
0141         asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
0142         asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
0143 
0144         /*
0145          * 1 = dq ^ q
0146          * 3 = dq ^ p >> 4
0147          */
0148         asm volatile("vpsraw $4, %ymm1, %ymm3");
0149         asm volatile("vpand %ymm7, %ymm1, %ymm1");
0150         asm volatile("vpand %ymm7, %ymm3, %ymm3");
0151         asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
0152         asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
0153         asm volatile("vpxor %ymm4, %ymm5, %ymm5");
0154 
0155         /* 5 = qx */
0156 
0157         asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
0158         asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
0159 
0160         asm volatile("vpsraw $4, %ymm0, %ymm2");
0161         asm volatile("vpand %ymm7, %ymm0, %ymm3");
0162         asm volatile("vpand %ymm7, %ymm2, %ymm2");
0163         asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
0164         asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
0165         asm volatile("vpxor %ymm4, %ymm1, %ymm1");
0166 
0167         /* 1 = pbmul[px] */
0168         asm volatile("vpxor %ymm5, %ymm1, %ymm1");
0169         /* 1 = db = DQ */
0170         asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0171 
0172         asm volatile("vpxor %ymm1, %ymm0, %ymm0");
0173         asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
0174 
0175         bytes -= 32;
0176         p += 32;
0177         q += 32;
0178         dp += 32;
0179         dq += 32;
0180 #endif
0181     }
0182 
0183     kernel_fpu_end();
0184 }
0185 
0186 static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
0187         void **ptrs)
0188 {
0189     u8 *p, *q, *dq;
0190     const u8 *qmul;     /* Q multiplier table */
0191     const u8 x0f = 0x0f;
0192 
0193     p = (u8 *)ptrs[disks-2];
0194     q = (u8 *)ptrs[disks-1];
0195 
0196     /* Compute syndrome with zero for the missing data page
0197        Use the dead data page as temporary storage for delta q */
0198     dq = (u8 *)ptrs[faila];
0199     ptrs[faila] = (void *)raid6_empty_zero_page;
0200     ptrs[disks-1] = dq;
0201 
0202     raid6_call.gen_syndrome(disks, bytes, ptrs);
0203 
0204     /* Restore pointer table */
0205     ptrs[faila]   = dq;
0206     ptrs[disks-1] = q;
0207 
0208     /* Now, pick the proper data tables */
0209     qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
0210 
0211     kernel_fpu_begin();
0212 
0213     asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
0214 
0215     while (bytes) {
0216 #ifdef CONFIG_X86_64
0217         asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
0218         asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
0219         asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
0220         asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
0221 
0222         /*
0223          * 3 = q[0] ^ dq[0]
0224          * 8 = q[32] ^ dq[32]
0225          */
0226         asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
0227         asm volatile("vmovapd %ymm0, %ymm13");
0228         asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
0229         asm volatile("vmovapd %ymm1, %ymm14");
0230 
0231         asm volatile("vpsraw $4, %ymm3, %ymm6");
0232         asm volatile("vpsraw $4, %ymm8, %ymm12");
0233         asm volatile("vpand %ymm7, %ymm3, %ymm3");
0234         asm volatile("vpand %ymm7, %ymm8, %ymm8");
0235         asm volatile("vpand %ymm7, %ymm6, %ymm6");
0236         asm volatile("vpand %ymm7, %ymm12, %ymm12");
0237         asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
0238         asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
0239         asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
0240         asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
0241         asm volatile("vpxor %ymm0, %ymm1, %ymm1");
0242         asm volatile("vpxor %ymm13, %ymm14, %ymm14");
0243 
0244         /*
0245          * 1  = qmul[q[0]  ^ dq[0]]
0246          * 14 = qmul[q[32] ^ dq[32]]
0247          */
0248         asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
0249         asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
0250         asm volatile("vpxor %ymm1, %ymm2, %ymm2");
0251         asm volatile("vpxor %ymm14, %ymm12, %ymm12");
0252 
0253         /*
0254          * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
0255          * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
0256          */
0257 
0258         asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0259         asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
0260         asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
0261         asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
0262 
0263         bytes -= 64;
0264         p += 64;
0265         q += 64;
0266         dq += 64;
0267 #else
0268         asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
0269         asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
0270 
0271         /* 3 = q ^ dq */
0272 
0273         asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
0274         asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
0275 
0276         asm volatile("vpsraw $4, %ymm3, %ymm6");
0277         asm volatile("vpand %ymm7, %ymm3, %ymm3");
0278         asm volatile("vpand %ymm7, %ymm6, %ymm6");
0279         asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
0280         asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
0281         asm volatile("vpxor %ymm0, %ymm1, %ymm1");
0282 
0283         /* 1 = qmul[q ^ dq] */
0284 
0285         asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
0286         asm volatile("vpxor %ymm1, %ymm2, %ymm2");
0287 
0288         /* 2 = p ^ qmul[q ^ dq] */
0289 
0290         asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
0291         asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
0292 
0293         bytes -= 32;
0294         p += 32;
0295         q += 32;
0296         dq += 32;
0297 #endif
0298     }
0299 
0300     kernel_fpu_end();
0301 }
0302 
0303 const struct raid6_recov_calls raid6_recov_avx2 = {
0304     .data2 = raid6_2data_recov_avx2,
0305     .datap = raid6_datap_recov_avx2,
0306     .valid = raid6_has_avx2,
0307 #ifdef CONFIG_X86_64
0308     .name = "avx2x2",
0309 #else
0310     .name = "avx2x1",
0311 #endif
0312     .priority = 2,
0313 };