0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019 #ifdef CONFIG_X86_32
0020
0021 #include <linux/raid/pq.h>
0022 #include "x86.h"
0023
0024
0025 extern const struct raid6_mmx_constants {
0026 u64 x1d;
0027 } raid6_mmx_constants;
0028
0029 static int raid6_have_sse1_or_mmxext(void)
0030 {
0031
0032 return boot_cpu_has(X86_FEATURE_MMX) &&
0033 (boot_cpu_has(X86_FEATURE_XMM) ||
0034 boot_cpu_has(X86_FEATURE_MMXEXT));
0035 }
0036
0037
0038
0039
0040 static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
0041 {
0042 u8 **dptr = (u8 **)ptrs;
0043 u8 *p, *q;
0044 int d, z, z0;
0045
0046 z0 = disks - 3;
0047 p = dptr[z0+1];
0048 q = dptr[z0+2];
0049
0050 kernel_fpu_begin();
0051
0052 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
0053 asm volatile("pxor %mm5,%mm5");
0054
0055 for ( d = 0 ; d < bytes ; d += 8 ) {
0056 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0057 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d]));
0058 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
0059 asm volatile("movq %mm2,%mm4");
0060 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
0061 for ( z = z0-2 ; z >= 0 ; z-- ) {
0062 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0063 asm volatile("pcmpgtb %mm4,%mm5");
0064 asm volatile("paddb %mm4,%mm4");
0065 asm volatile("pand %mm0,%mm5");
0066 asm volatile("pxor %mm5,%mm4");
0067 asm volatile("pxor %mm5,%mm5");
0068 asm volatile("pxor %mm6,%mm2");
0069 asm volatile("pxor %mm6,%mm4");
0070 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
0071 }
0072 asm volatile("pcmpgtb %mm4,%mm5");
0073 asm volatile("paddb %mm4,%mm4");
0074 asm volatile("pand %mm0,%mm5");
0075 asm volatile("pxor %mm5,%mm4");
0076 asm volatile("pxor %mm5,%mm5");
0077 asm volatile("pxor %mm6,%mm2");
0078 asm volatile("pxor %mm6,%mm4");
0079
0080 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
0081 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
0082 }
0083
0084 asm volatile("sfence" : : : "memory");
0085 kernel_fpu_end();
0086 }
0087
0088 const struct raid6_calls raid6_sse1x1 = {
0089 raid6_sse11_gen_syndrome,
0090 NULL,
0091 raid6_have_sse1_or_mmxext,
0092 "sse1x1",
0093 1
0094 };
0095
0096
0097
0098
0099 static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
0100 {
0101 u8 **dptr = (u8 **)ptrs;
0102 u8 *p, *q;
0103 int d, z, z0;
0104
0105 z0 = disks - 3;
0106 p = dptr[z0+1];
0107 q = dptr[z0+2];
0108
0109 kernel_fpu_begin();
0110
0111 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
0112 asm volatile("pxor %mm5,%mm5");
0113 asm volatile("pxor %mm7,%mm7");
0114
0115
0116 for ( d = 0 ; d < bytes ; d += 16 ) {
0117 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0118 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d]));
0119 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
0120 asm volatile("movq %mm2,%mm4");
0121 asm volatile("movq %mm3,%mm6");
0122 for ( z = z0-1 ; z >= 0 ; z-- ) {
0123 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0124 asm volatile("pcmpgtb %mm4,%mm5");
0125 asm volatile("pcmpgtb %mm6,%mm7");
0126 asm volatile("paddb %mm4,%mm4");
0127 asm volatile("paddb %mm6,%mm6");
0128 asm volatile("pand %mm0,%mm5");
0129 asm volatile("pand %mm0,%mm7");
0130 asm volatile("pxor %mm5,%mm4");
0131 asm volatile("pxor %mm7,%mm6");
0132 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
0133 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
0134 asm volatile("pxor %mm5,%mm2");
0135 asm volatile("pxor %mm7,%mm3");
0136 asm volatile("pxor %mm5,%mm4");
0137 asm volatile("pxor %mm7,%mm6");
0138 asm volatile("pxor %mm5,%mm5");
0139 asm volatile("pxor %mm7,%mm7");
0140 }
0141 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
0142 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
0143 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
0144 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
0145 }
0146
0147 asm volatile("sfence" : :: "memory");
0148 kernel_fpu_end();
0149 }
0150
0151 const struct raid6_calls raid6_sse1x2 = {
0152 raid6_sse12_gen_syndrome,
0153 NULL,
0154 raid6_have_sse1_or_mmxext,
0155 "sse1x2",
0156 1
0157 };
0158
0159 #endif