Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* -*- linux-c -*- ------------------------------------------------------- *
0003  *
0004  *   Copyright 2002 H. Peter Anvin - All Rights Reserved
0005  *
0006  * ----------------------------------------------------------------------- */
0007 
0008 /*
0009  * raid6/sse2.c
0010  *
0011  * SSE-2 implementation of RAID-6 syndrome functions
0012  *
0013  */
0014 
0015 #include <linux/raid/pq.h>
0016 #include "x86.h"
0017 
0018 static const struct raid6_sse_constants {
0019     u64 x1d[2];
0020 } raid6_sse_constants  __attribute__((aligned(16))) = {
0021     { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
0022 };
0023 
0024 static int raid6_have_sse2(void)
0025 {
0026     /* Not really boot_cpu but "all_cpus" */
0027     return boot_cpu_has(X86_FEATURE_MMX) &&
0028         boot_cpu_has(X86_FEATURE_FXSR) &&
0029         boot_cpu_has(X86_FEATURE_XMM) &&
0030         boot_cpu_has(X86_FEATURE_XMM2);
0031 }
0032 
0033 /*
0034  * Plain SSE2 implementation
0035  */
0036 static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
0037 {
0038     u8 **dptr = (u8 **)ptrs;
0039     u8 *p, *q;
0040     int d, z, z0;
0041 
0042     z0 = disks - 3;     /* Highest data disk */
0043     p = dptr[z0+1];     /* XOR parity */
0044     q = dptr[z0+2];     /* RS syndrome */
0045 
0046     kernel_fpu_begin();
0047 
0048     asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0049     asm volatile("pxor %xmm5,%xmm5");   /* Zero temp */
0050 
0051     for ( d = 0 ; d < bytes ; d += 16 ) {
0052         asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0053         asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
0054         asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
0055         asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
0056         asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
0057         for ( z = z0-2 ; z >= 0 ; z-- ) {
0058             asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0059             asm volatile("pcmpgtb %xmm4,%xmm5");
0060             asm volatile("paddb %xmm4,%xmm4");
0061             asm volatile("pand %xmm0,%xmm5");
0062             asm volatile("pxor %xmm5,%xmm4");
0063             asm volatile("pxor %xmm5,%xmm5");
0064             asm volatile("pxor %xmm6,%xmm2");
0065             asm volatile("pxor %xmm6,%xmm4");
0066             asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
0067         }
0068         asm volatile("pcmpgtb %xmm4,%xmm5");
0069         asm volatile("paddb %xmm4,%xmm4");
0070         asm volatile("pand %xmm0,%xmm5");
0071         asm volatile("pxor %xmm5,%xmm4");
0072         asm volatile("pxor %xmm5,%xmm5");
0073         asm volatile("pxor %xmm6,%xmm2");
0074         asm volatile("pxor %xmm6,%xmm4");
0075 
0076         asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0077         asm volatile("pxor %xmm2,%xmm2");
0078         asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0079         asm volatile("pxor %xmm4,%xmm4");
0080     }
0081 
0082     asm volatile("sfence" : : : "memory");
0083     kernel_fpu_end();
0084 }
0085 
0086 
0087 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
0088                      size_t bytes, void **ptrs)
0089 {
0090     u8 **dptr = (u8 **)ptrs;
0091     u8 *p, *q;
0092     int d, z, z0;
0093 
0094     z0 = stop;      /* P/Q right side optimization */
0095     p = dptr[disks-2];  /* XOR parity */
0096     q = dptr[disks-1];  /* RS syndrome */
0097 
0098     kernel_fpu_begin();
0099 
0100     asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0101 
0102     for ( d = 0 ; d < bytes ; d += 16 ) {
0103         asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
0104         asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
0105         asm volatile("pxor %xmm4,%xmm2");
0106         /* P/Q data pages */
0107         for ( z = z0-1 ; z >= start ; z-- ) {
0108             asm volatile("pxor %xmm5,%xmm5");
0109             asm volatile("pcmpgtb %xmm4,%xmm5");
0110             asm volatile("paddb %xmm4,%xmm4");
0111             asm volatile("pand %xmm0,%xmm5");
0112             asm volatile("pxor %xmm5,%xmm4");
0113             asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0114             asm volatile("pxor %xmm5,%xmm2");
0115             asm volatile("pxor %xmm5,%xmm4");
0116         }
0117         /* P/Q left side optimization */
0118         for ( z = start-1 ; z >= 0 ; z-- ) {
0119             asm volatile("pxor %xmm5,%xmm5");
0120             asm volatile("pcmpgtb %xmm4,%xmm5");
0121             asm volatile("paddb %xmm4,%xmm4");
0122             asm volatile("pand %xmm0,%xmm5");
0123             asm volatile("pxor %xmm5,%xmm4");
0124         }
0125         asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
0126         /* Don't use movntdq for r/w memory area < cache line */
0127         asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
0128         asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
0129     }
0130 
0131     asm volatile("sfence" : : : "memory");
0132     kernel_fpu_end();
0133 }
0134 
0135 const struct raid6_calls raid6_sse2x1 = {
0136     raid6_sse21_gen_syndrome,
0137     raid6_sse21_xor_syndrome,
0138     raid6_have_sse2,
0139     "sse2x1",
0140     1           /* Has cache hints */
0141 };
0142 
0143 /*
0144  * Unrolled-by-2 SSE2 implementation
0145  */
0146 static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
0147 {
0148     u8 **dptr = (u8 **)ptrs;
0149     u8 *p, *q;
0150     int d, z, z0;
0151 
0152     z0 = disks - 3;     /* Highest data disk */
0153     p = dptr[z0+1];     /* XOR parity */
0154     q = dptr[z0+2];     /* RS syndrome */
0155 
0156     kernel_fpu_begin();
0157 
0158     asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0159     asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
0160     asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
0161 
0162     /* We uniformly assume a single prefetch covers at least 32 bytes */
0163     for ( d = 0 ; d < bytes ; d += 32 ) {
0164         asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0165         asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
0166         asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
0167         asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
0168         asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
0169         for ( z = z0-1 ; z >= 0 ; z-- ) {
0170             asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0171             asm volatile("pcmpgtb %xmm4,%xmm5");
0172             asm volatile("pcmpgtb %xmm6,%xmm7");
0173             asm volatile("paddb %xmm4,%xmm4");
0174             asm volatile("paddb %xmm6,%xmm6");
0175             asm volatile("pand %xmm0,%xmm5");
0176             asm volatile("pand %xmm0,%xmm7");
0177             asm volatile("pxor %xmm5,%xmm4");
0178             asm volatile("pxor %xmm7,%xmm6");
0179             asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
0180             asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
0181             asm volatile("pxor %xmm5,%xmm2");
0182             asm volatile("pxor %xmm7,%xmm3");
0183             asm volatile("pxor %xmm5,%xmm4");
0184             asm volatile("pxor %xmm7,%xmm6");
0185             asm volatile("pxor %xmm5,%xmm5");
0186             asm volatile("pxor %xmm7,%xmm7");
0187         }
0188         asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0189         asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
0190         asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0191         asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
0192     }
0193 
0194     asm volatile("sfence" : : : "memory");
0195     kernel_fpu_end();
0196 }
0197 
0198 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
0199                      size_t bytes, void **ptrs)
0200 {
0201     u8 **dptr = (u8 **)ptrs;
0202     u8 *p, *q;
0203     int d, z, z0;
0204 
0205     z0 = stop;      /* P/Q right side optimization */
0206     p = dptr[disks-2];  /* XOR parity */
0207     q = dptr[disks-1];  /* RS syndrome */
0208 
0209     kernel_fpu_begin();
0210 
0211     asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0212 
0213     for ( d = 0 ; d < bytes ; d += 32 ) {
0214         asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
0215         asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
0216         asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
0217         asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
0218         asm volatile("pxor %xmm4,%xmm2");
0219         asm volatile("pxor %xmm6,%xmm3");
0220         /* P/Q data pages */
0221         for ( z = z0-1 ; z >= start ; z-- ) {
0222             asm volatile("pxor %xmm5,%xmm5");
0223             asm volatile("pxor %xmm7,%xmm7");
0224             asm volatile("pcmpgtb %xmm4,%xmm5");
0225             asm volatile("pcmpgtb %xmm6,%xmm7");
0226             asm volatile("paddb %xmm4,%xmm4");
0227             asm volatile("paddb %xmm6,%xmm6");
0228             asm volatile("pand %xmm0,%xmm5");
0229             asm volatile("pand %xmm0,%xmm7");
0230             asm volatile("pxor %xmm5,%xmm4");
0231             asm volatile("pxor %xmm7,%xmm6");
0232             asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0233             asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
0234             asm volatile("pxor %xmm5,%xmm2");
0235             asm volatile("pxor %xmm7,%xmm3");
0236             asm volatile("pxor %xmm5,%xmm4");
0237             asm volatile("pxor %xmm7,%xmm6");
0238         }
0239         /* P/Q left side optimization */
0240         for ( z = start-1 ; z >= 0 ; z-- ) {
0241             asm volatile("pxor %xmm5,%xmm5");
0242             asm volatile("pxor %xmm7,%xmm7");
0243             asm volatile("pcmpgtb %xmm4,%xmm5");
0244             asm volatile("pcmpgtb %xmm6,%xmm7");
0245             asm volatile("paddb %xmm4,%xmm4");
0246             asm volatile("paddb %xmm6,%xmm6");
0247             asm volatile("pand %xmm0,%xmm5");
0248             asm volatile("pand %xmm0,%xmm7");
0249             asm volatile("pxor %xmm5,%xmm4");
0250             asm volatile("pxor %xmm7,%xmm6");
0251         }
0252         asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
0253         asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
0254         /* Don't use movntdq for r/w memory area < cache line */
0255         asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
0256         asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
0257         asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
0258         asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
0259     }
0260 
0261     asm volatile("sfence" : : : "memory");
0262     kernel_fpu_end();
0263 }
0264 
0265 const struct raid6_calls raid6_sse2x2 = {
0266     raid6_sse22_gen_syndrome,
0267     raid6_sse22_xor_syndrome,
0268     raid6_have_sse2,
0269     "sse2x2",
0270     1           /* Has cache hints */
0271 };
0272 
0273 #ifdef CONFIG_X86_64
0274 
0275 /*
0276  * Unrolled-by-4 SSE2 implementation
0277  */
0278 static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
0279 {
0280     u8 **dptr = (u8 **)ptrs;
0281     u8 *p, *q;
0282     int d, z, z0;
0283 
0284     z0 = disks - 3;     /* Highest data disk */
0285     p = dptr[z0+1];     /* XOR parity */
0286     q = dptr[z0+2];     /* RS syndrome */
0287 
0288     kernel_fpu_begin();
0289 
0290     asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
0291     asm volatile("pxor %xmm2,%xmm2");   /* P[0] */
0292     asm volatile("pxor %xmm3,%xmm3");   /* P[1] */
0293     asm volatile("pxor %xmm4,%xmm4");   /* Q[0] */
0294     asm volatile("pxor %xmm5,%xmm5");   /* Zero temp */
0295     asm volatile("pxor %xmm6,%xmm6");   /* Q[1] */
0296     asm volatile("pxor %xmm7,%xmm7");   /* Zero temp */
0297     asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
0298     asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
0299     asm volatile("pxor %xmm12,%xmm12");     /* Q[2] */
0300     asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
0301     asm volatile("pxor %xmm14,%xmm14");     /* Q[3] */
0302     asm volatile("pxor %xmm15,%xmm15");     /* Zero temp */
0303 
0304     for ( d = 0 ; d < bytes ; d += 64 ) {
0305         for ( z = z0 ; z >= 0 ; z-- ) {
0306             /* The second prefetch seems to improve performance... */
0307             asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
0308             asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
0309             asm volatile("pcmpgtb %xmm4,%xmm5");
0310             asm volatile("pcmpgtb %xmm6,%xmm7");
0311             asm volatile("pcmpgtb %xmm12,%xmm13");
0312             asm volatile("pcmpgtb %xmm14,%xmm15");
0313             asm volatile("paddb %xmm4,%xmm4");
0314             asm volatile("paddb %xmm6,%xmm6");
0315             asm volatile("paddb %xmm12,%xmm12");
0316             asm volatile("paddb %xmm14,%xmm14");
0317             asm volatile("pand %xmm0,%xmm5");
0318             asm volatile("pand %xmm0,%xmm7");
0319             asm volatile("pand %xmm0,%xmm13");
0320             asm volatile("pand %xmm0,%xmm15");
0321             asm volatile("pxor %xmm5,%xmm4");
0322             asm volatile("pxor %xmm7,%xmm6");
0323             asm volatile("pxor %xmm13,%xmm12");
0324             asm volatile("pxor %xmm15,%xmm14");
0325             asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0326             asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
0327             asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
0328             asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
0329             asm volatile("pxor %xmm5,%xmm2");
0330             asm volatile("pxor %xmm7,%xmm3");
0331             asm volatile("pxor %xmm13,%xmm10");
0332             asm volatile("pxor %xmm15,%xmm11");
0333             asm volatile("pxor %xmm5,%xmm4");
0334             asm volatile("pxor %xmm7,%xmm6");
0335             asm volatile("pxor %xmm13,%xmm12");
0336             asm volatile("pxor %xmm15,%xmm14");
0337             asm volatile("pxor %xmm5,%xmm5");
0338             asm volatile("pxor %xmm7,%xmm7");
0339             asm volatile("pxor %xmm13,%xmm13");
0340             asm volatile("pxor %xmm15,%xmm15");
0341         }
0342         asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0343         asm volatile("pxor %xmm2,%xmm2");
0344         asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
0345         asm volatile("pxor %xmm3,%xmm3");
0346         asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
0347         asm volatile("pxor %xmm10,%xmm10");
0348         asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
0349         asm volatile("pxor %xmm11,%xmm11");
0350         asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0351         asm volatile("pxor %xmm4,%xmm4");
0352         asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
0353         asm volatile("pxor %xmm6,%xmm6");
0354         asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
0355         asm volatile("pxor %xmm12,%xmm12");
0356         asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
0357         asm volatile("pxor %xmm14,%xmm14");
0358     }
0359 
0360     asm volatile("sfence" : : : "memory");
0361     kernel_fpu_end();
0362 }
0363 
0364 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
0365                      size_t bytes, void **ptrs)
0366 {
0367     u8 **dptr = (u8 **)ptrs;
0368     u8 *p, *q;
0369     int d, z, z0;
0370 
0371     z0 = stop;      /* P/Q right side optimization */
0372     p = dptr[disks-2];  /* XOR parity */
0373     q = dptr[disks-1];  /* RS syndrome */
0374 
0375     kernel_fpu_begin();
0376 
0377     asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
0378 
0379     for ( d = 0 ; d < bytes ; d += 64 ) {
0380         asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
0381         asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
0382         asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
0383         asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
0384         asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
0385         asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
0386         asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
0387         asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
0388         asm volatile("pxor %xmm4,%xmm2");
0389         asm volatile("pxor %xmm6,%xmm3");
0390         asm volatile("pxor %xmm12,%xmm10");
0391         asm volatile("pxor %xmm14,%xmm11");
0392         /* P/Q data pages */
0393         for ( z = z0-1 ; z >= start ; z-- ) {
0394             asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
0395             asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
0396             asm volatile("pxor %xmm5,%xmm5");
0397             asm volatile("pxor %xmm7,%xmm7");
0398             asm volatile("pxor %xmm13,%xmm13");
0399             asm volatile("pxor %xmm15,%xmm15");
0400             asm volatile("pcmpgtb %xmm4,%xmm5");
0401             asm volatile("pcmpgtb %xmm6,%xmm7");
0402             asm volatile("pcmpgtb %xmm12,%xmm13");
0403             asm volatile("pcmpgtb %xmm14,%xmm15");
0404             asm volatile("paddb %xmm4,%xmm4");
0405             asm volatile("paddb %xmm6,%xmm6");
0406             asm volatile("paddb %xmm12,%xmm12");
0407             asm volatile("paddb %xmm14,%xmm14");
0408             asm volatile("pand %xmm0,%xmm5");
0409             asm volatile("pand %xmm0,%xmm7");
0410             asm volatile("pand %xmm0,%xmm13");
0411             asm volatile("pand %xmm0,%xmm15");
0412             asm volatile("pxor %xmm5,%xmm4");
0413             asm volatile("pxor %xmm7,%xmm6");
0414             asm volatile("pxor %xmm13,%xmm12");
0415             asm volatile("pxor %xmm15,%xmm14");
0416             asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0417             asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
0418             asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
0419             asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
0420             asm volatile("pxor %xmm5,%xmm2");
0421             asm volatile("pxor %xmm7,%xmm3");
0422             asm volatile("pxor %xmm13,%xmm10");
0423             asm volatile("pxor %xmm15,%xmm11");
0424             asm volatile("pxor %xmm5,%xmm4");
0425             asm volatile("pxor %xmm7,%xmm6");
0426             asm volatile("pxor %xmm13,%xmm12");
0427             asm volatile("pxor %xmm15,%xmm14");
0428         }
0429         asm volatile("prefetchnta %0" :: "m" (q[d]));
0430         asm volatile("prefetchnta %0" :: "m" (q[d+32]));
0431         /* P/Q left side optimization */
0432         for ( z = start-1 ; z >= 0 ; z-- ) {
0433             asm volatile("pxor %xmm5,%xmm5");
0434             asm volatile("pxor %xmm7,%xmm7");
0435             asm volatile("pxor %xmm13,%xmm13");
0436             asm volatile("pxor %xmm15,%xmm15");
0437             asm volatile("pcmpgtb %xmm4,%xmm5");
0438             asm volatile("pcmpgtb %xmm6,%xmm7");
0439             asm volatile("pcmpgtb %xmm12,%xmm13");
0440             asm volatile("pcmpgtb %xmm14,%xmm15");
0441             asm volatile("paddb %xmm4,%xmm4");
0442             asm volatile("paddb %xmm6,%xmm6");
0443             asm volatile("paddb %xmm12,%xmm12");
0444             asm volatile("paddb %xmm14,%xmm14");
0445             asm volatile("pand %xmm0,%xmm5");
0446             asm volatile("pand %xmm0,%xmm7");
0447             asm volatile("pand %xmm0,%xmm13");
0448             asm volatile("pand %xmm0,%xmm15");
0449             asm volatile("pxor %xmm5,%xmm4");
0450             asm volatile("pxor %xmm7,%xmm6");
0451             asm volatile("pxor %xmm13,%xmm12");
0452             asm volatile("pxor %xmm15,%xmm14");
0453         }
0454         asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0455         asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
0456         asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
0457         asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
0458         asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
0459         asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
0460         asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
0461         asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
0462         asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0463         asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
0464         asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
0465         asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
0466     }
0467     asm volatile("sfence" : : : "memory");
0468     kernel_fpu_end();
0469 }
0470 
0471 
0472 const struct raid6_calls raid6_sse2x4 = {
0473     raid6_sse24_gen_syndrome,
0474     raid6_sse24_xor_syndrome,
0475     raid6_have_sse2,
0476     "sse2x4",
0477     1           /* Has cache hints */
0478 };
0479 
0480 #endif /* CONFIG_X86_64 */