0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #include <linux/raid/pq.h>
0016 #include "x86.h"
0017
0018 static const struct raid6_sse_constants {
0019 u64 x1d[2];
0020 } raid6_sse_constants __attribute__((aligned(16))) = {
0021 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
0022 };
0023
0024 static int raid6_have_sse2(void)
0025 {
0026
0027 return boot_cpu_has(X86_FEATURE_MMX) &&
0028 boot_cpu_has(X86_FEATURE_FXSR) &&
0029 boot_cpu_has(X86_FEATURE_XMM) &&
0030 boot_cpu_has(X86_FEATURE_XMM2);
0031 }
0032
0033
0034
0035
0036 static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
0037 {
0038 u8 **dptr = (u8 **)ptrs;
0039 u8 *p, *q;
0040 int d, z, z0;
0041
0042 z0 = disks - 3;
0043 p = dptr[z0+1];
0044 q = dptr[z0+2];
0045
0046 kernel_fpu_begin();
0047
0048 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0049 asm volatile("pxor %xmm5,%xmm5");
0050
0051 for ( d = 0 ; d < bytes ; d += 16 ) {
0052 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0053 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));
0054 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
0055 asm volatile("movdqa %xmm2,%xmm4");
0056 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
0057 for ( z = z0-2 ; z >= 0 ; z-- ) {
0058 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0059 asm volatile("pcmpgtb %xmm4,%xmm5");
0060 asm volatile("paddb %xmm4,%xmm4");
0061 asm volatile("pand %xmm0,%xmm5");
0062 asm volatile("pxor %xmm5,%xmm4");
0063 asm volatile("pxor %xmm5,%xmm5");
0064 asm volatile("pxor %xmm6,%xmm2");
0065 asm volatile("pxor %xmm6,%xmm4");
0066 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
0067 }
0068 asm volatile("pcmpgtb %xmm4,%xmm5");
0069 asm volatile("paddb %xmm4,%xmm4");
0070 asm volatile("pand %xmm0,%xmm5");
0071 asm volatile("pxor %xmm5,%xmm4");
0072 asm volatile("pxor %xmm5,%xmm5");
0073 asm volatile("pxor %xmm6,%xmm2");
0074 asm volatile("pxor %xmm6,%xmm4");
0075
0076 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0077 asm volatile("pxor %xmm2,%xmm2");
0078 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0079 asm volatile("pxor %xmm4,%xmm4");
0080 }
0081
0082 asm volatile("sfence" : : : "memory");
0083 kernel_fpu_end();
0084 }
0085
0086
0087 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
0088 size_t bytes, void **ptrs)
0089 {
0090 u8 **dptr = (u8 **)ptrs;
0091 u8 *p, *q;
0092 int d, z, z0;
0093
0094 z0 = stop;
0095 p = dptr[disks-2];
0096 q = dptr[disks-1];
0097
0098 kernel_fpu_begin();
0099
0100 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0101
0102 for ( d = 0 ; d < bytes ; d += 16 ) {
0103 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
0104 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
0105 asm volatile("pxor %xmm4,%xmm2");
0106
0107 for ( z = z0-1 ; z >= start ; z-- ) {
0108 asm volatile("pxor %xmm5,%xmm5");
0109 asm volatile("pcmpgtb %xmm4,%xmm5");
0110 asm volatile("paddb %xmm4,%xmm4");
0111 asm volatile("pand %xmm0,%xmm5");
0112 asm volatile("pxor %xmm5,%xmm4");
0113 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0114 asm volatile("pxor %xmm5,%xmm2");
0115 asm volatile("pxor %xmm5,%xmm4");
0116 }
0117
0118 for ( z = start-1 ; z >= 0 ; z-- ) {
0119 asm volatile("pxor %xmm5,%xmm5");
0120 asm volatile("pcmpgtb %xmm4,%xmm5");
0121 asm volatile("paddb %xmm4,%xmm4");
0122 asm volatile("pand %xmm0,%xmm5");
0123 asm volatile("pxor %xmm5,%xmm4");
0124 }
0125 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
0126
0127 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
0128 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
0129 }
0130
0131 asm volatile("sfence" : : : "memory");
0132 kernel_fpu_end();
0133 }
0134
0135 const struct raid6_calls raid6_sse2x1 = {
0136 raid6_sse21_gen_syndrome,
0137 raid6_sse21_xor_syndrome,
0138 raid6_have_sse2,
0139 "sse2x1",
0140 1
0141 };
0142
0143
0144
0145
0146 static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
0147 {
0148 u8 **dptr = (u8 **)ptrs;
0149 u8 *p, *q;
0150 int d, z, z0;
0151
0152 z0 = disks - 3;
0153 p = dptr[z0+1];
0154 q = dptr[z0+2];
0155
0156 kernel_fpu_begin();
0157
0158 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0159 asm volatile("pxor %xmm5,%xmm5");
0160 asm volatile("pxor %xmm7,%xmm7");
0161
0162
0163 for ( d = 0 ; d < bytes ; d += 32 ) {
0164 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
0165 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));
0166 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16]));
0167 asm volatile("movdqa %xmm2,%xmm4");
0168 asm volatile("movdqa %xmm3,%xmm6");
0169 for ( z = z0-1 ; z >= 0 ; z-- ) {
0170 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
0171 asm volatile("pcmpgtb %xmm4,%xmm5");
0172 asm volatile("pcmpgtb %xmm6,%xmm7");
0173 asm volatile("paddb %xmm4,%xmm4");
0174 asm volatile("paddb %xmm6,%xmm6");
0175 asm volatile("pand %xmm0,%xmm5");
0176 asm volatile("pand %xmm0,%xmm7");
0177 asm volatile("pxor %xmm5,%xmm4");
0178 asm volatile("pxor %xmm7,%xmm6");
0179 asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
0180 asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
0181 asm volatile("pxor %xmm5,%xmm2");
0182 asm volatile("pxor %xmm7,%xmm3");
0183 asm volatile("pxor %xmm5,%xmm4");
0184 asm volatile("pxor %xmm7,%xmm6");
0185 asm volatile("pxor %xmm5,%xmm5");
0186 asm volatile("pxor %xmm7,%xmm7");
0187 }
0188 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0189 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
0190 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0191 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
0192 }
0193
0194 asm volatile("sfence" : : : "memory");
0195 kernel_fpu_end();
0196 }
0197
0198 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
0199 size_t bytes, void **ptrs)
0200 {
0201 u8 **dptr = (u8 **)ptrs;
0202 u8 *p, *q;
0203 int d, z, z0;
0204
0205 z0 = stop;
0206 p = dptr[disks-2];
0207 q = dptr[disks-1];
0208
0209 kernel_fpu_begin();
0210
0211 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
0212
0213 for ( d = 0 ; d < bytes ; d += 32 ) {
0214 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
0215 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
0216 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
0217 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
0218 asm volatile("pxor %xmm4,%xmm2");
0219 asm volatile("pxor %xmm6,%xmm3");
0220
0221 for ( z = z0-1 ; z >= start ; z-- ) {
0222 asm volatile("pxor %xmm5,%xmm5");
0223 asm volatile("pxor %xmm7,%xmm7");
0224 asm volatile("pcmpgtb %xmm4,%xmm5");
0225 asm volatile("pcmpgtb %xmm6,%xmm7");
0226 asm volatile("paddb %xmm4,%xmm4");
0227 asm volatile("paddb %xmm6,%xmm6");
0228 asm volatile("pand %xmm0,%xmm5");
0229 asm volatile("pand %xmm0,%xmm7");
0230 asm volatile("pxor %xmm5,%xmm4");
0231 asm volatile("pxor %xmm7,%xmm6");
0232 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0233 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
0234 asm volatile("pxor %xmm5,%xmm2");
0235 asm volatile("pxor %xmm7,%xmm3");
0236 asm volatile("pxor %xmm5,%xmm4");
0237 asm volatile("pxor %xmm7,%xmm6");
0238 }
0239
0240 for ( z = start-1 ; z >= 0 ; z-- ) {
0241 asm volatile("pxor %xmm5,%xmm5");
0242 asm volatile("pxor %xmm7,%xmm7");
0243 asm volatile("pcmpgtb %xmm4,%xmm5");
0244 asm volatile("pcmpgtb %xmm6,%xmm7");
0245 asm volatile("paddb %xmm4,%xmm4");
0246 asm volatile("paddb %xmm6,%xmm6");
0247 asm volatile("pand %xmm0,%xmm5");
0248 asm volatile("pand %xmm0,%xmm7");
0249 asm volatile("pxor %xmm5,%xmm4");
0250 asm volatile("pxor %xmm7,%xmm6");
0251 }
0252 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
0253 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
0254
0255 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
0256 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
0257 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
0258 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
0259 }
0260
0261 asm volatile("sfence" : : : "memory");
0262 kernel_fpu_end();
0263 }
0264
0265 const struct raid6_calls raid6_sse2x2 = {
0266 raid6_sse22_gen_syndrome,
0267 raid6_sse22_xor_syndrome,
0268 raid6_have_sse2,
0269 "sse2x2",
0270 1
0271 };
0272
0273 #ifdef CONFIG_X86_64
0274
0275
0276
0277
0278 static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
0279 {
0280 u8 **dptr = (u8 **)ptrs;
0281 u8 *p, *q;
0282 int d, z, z0;
0283
0284 z0 = disks - 3;
0285 p = dptr[z0+1];
0286 q = dptr[z0+2];
0287
0288 kernel_fpu_begin();
0289
0290 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
0291 asm volatile("pxor %xmm2,%xmm2");
0292 asm volatile("pxor %xmm3,%xmm3");
0293 asm volatile("pxor %xmm4,%xmm4");
0294 asm volatile("pxor %xmm5,%xmm5");
0295 asm volatile("pxor %xmm6,%xmm6");
0296 asm volatile("pxor %xmm7,%xmm7");
0297 asm volatile("pxor %xmm10,%xmm10");
0298 asm volatile("pxor %xmm11,%xmm11");
0299 asm volatile("pxor %xmm12,%xmm12");
0300 asm volatile("pxor %xmm13,%xmm13");
0301 asm volatile("pxor %xmm14,%xmm14");
0302 asm volatile("pxor %xmm15,%xmm15");
0303
0304 for ( d = 0 ; d < bytes ; d += 64 ) {
0305 for ( z = z0 ; z >= 0 ; z-- ) {
0306
0307 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
0308 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
0309 asm volatile("pcmpgtb %xmm4,%xmm5");
0310 asm volatile("pcmpgtb %xmm6,%xmm7");
0311 asm volatile("pcmpgtb %xmm12,%xmm13");
0312 asm volatile("pcmpgtb %xmm14,%xmm15");
0313 asm volatile("paddb %xmm4,%xmm4");
0314 asm volatile("paddb %xmm6,%xmm6");
0315 asm volatile("paddb %xmm12,%xmm12");
0316 asm volatile("paddb %xmm14,%xmm14");
0317 asm volatile("pand %xmm0,%xmm5");
0318 asm volatile("pand %xmm0,%xmm7");
0319 asm volatile("pand %xmm0,%xmm13");
0320 asm volatile("pand %xmm0,%xmm15");
0321 asm volatile("pxor %xmm5,%xmm4");
0322 asm volatile("pxor %xmm7,%xmm6");
0323 asm volatile("pxor %xmm13,%xmm12");
0324 asm volatile("pxor %xmm15,%xmm14");
0325 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0326 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
0327 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
0328 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
0329 asm volatile("pxor %xmm5,%xmm2");
0330 asm volatile("pxor %xmm7,%xmm3");
0331 asm volatile("pxor %xmm13,%xmm10");
0332 asm volatile("pxor %xmm15,%xmm11");
0333 asm volatile("pxor %xmm5,%xmm4");
0334 asm volatile("pxor %xmm7,%xmm6");
0335 asm volatile("pxor %xmm13,%xmm12");
0336 asm volatile("pxor %xmm15,%xmm14");
0337 asm volatile("pxor %xmm5,%xmm5");
0338 asm volatile("pxor %xmm7,%xmm7");
0339 asm volatile("pxor %xmm13,%xmm13");
0340 asm volatile("pxor %xmm15,%xmm15");
0341 }
0342 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0343 asm volatile("pxor %xmm2,%xmm2");
0344 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
0345 asm volatile("pxor %xmm3,%xmm3");
0346 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
0347 asm volatile("pxor %xmm10,%xmm10");
0348 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
0349 asm volatile("pxor %xmm11,%xmm11");
0350 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0351 asm volatile("pxor %xmm4,%xmm4");
0352 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
0353 asm volatile("pxor %xmm6,%xmm6");
0354 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
0355 asm volatile("pxor %xmm12,%xmm12");
0356 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
0357 asm volatile("pxor %xmm14,%xmm14");
0358 }
0359
0360 asm volatile("sfence" : : : "memory");
0361 kernel_fpu_end();
0362 }
0363
0364 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
0365 size_t bytes, void **ptrs)
0366 {
0367 u8 **dptr = (u8 **)ptrs;
0368 u8 *p, *q;
0369 int d, z, z0;
0370
0371 z0 = stop;
0372 p = dptr[disks-2];
0373 q = dptr[disks-1];
0374
0375 kernel_fpu_begin();
0376
0377 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
0378
0379 for ( d = 0 ; d < bytes ; d += 64 ) {
0380 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
0381 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
0382 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
0383 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
0384 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
0385 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
0386 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
0387 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
0388 asm volatile("pxor %xmm4,%xmm2");
0389 asm volatile("pxor %xmm6,%xmm3");
0390 asm volatile("pxor %xmm12,%xmm10");
0391 asm volatile("pxor %xmm14,%xmm11");
0392
0393 for ( z = z0-1 ; z >= start ; z-- ) {
0394 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
0395 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
0396 asm volatile("pxor %xmm5,%xmm5");
0397 asm volatile("pxor %xmm7,%xmm7");
0398 asm volatile("pxor %xmm13,%xmm13");
0399 asm volatile("pxor %xmm15,%xmm15");
0400 asm volatile("pcmpgtb %xmm4,%xmm5");
0401 asm volatile("pcmpgtb %xmm6,%xmm7");
0402 asm volatile("pcmpgtb %xmm12,%xmm13");
0403 asm volatile("pcmpgtb %xmm14,%xmm15");
0404 asm volatile("paddb %xmm4,%xmm4");
0405 asm volatile("paddb %xmm6,%xmm6");
0406 asm volatile("paddb %xmm12,%xmm12");
0407 asm volatile("paddb %xmm14,%xmm14");
0408 asm volatile("pand %xmm0,%xmm5");
0409 asm volatile("pand %xmm0,%xmm7");
0410 asm volatile("pand %xmm0,%xmm13");
0411 asm volatile("pand %xmm0,%xmm15");
0412 asm volatile("pxor %xmm5,%xmm4");
0413 asm volatile("pxor %xmm7,%xmm6");
0414 asm volatile("pxor %xmm13,%xmm12");
0415 asm volatile("pxor %xmm15,%xmm14");
0416 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
0417 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
0418 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
0419 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
0420 asm volatile("pxor %xmm5,%xmm2");
0421 asm volatile("pxor %xmm7,%xmm3");
0422 asm volatile("pxor %xmm13,%xmm10");
0423 asm volatile("pxor %xmm15,%xmm11");
0424 asm volatile("pxor %xmm5,%xmm4");
0425 asm volatile("pxor %xmm7,%xmm6");
0426 asm volatile("pxor %xmm13,%xmm12");
0427 asm volatile("pxor %xmm15,%xmm14");
0428 }
0429 asm volatile("prefetchnta %0" :: "m" (q[d]));
0430 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
0431
0432 for ( z = start-1 ; z >= 0 ; z-- ) {
0433 asm volatile("pxor %xmm5,%xmm5");
0434 asm volatile("pxor %xmm7,%xmm7");
0435 asm volatile("pxor %xmm13,%xmm13");
0436 asm volatile("pxor %xmm15,%xmm15");
0437 asm volatile("pcmpgtb %xmm4,%xmm5");
0438 asm volatile("pcmpgtb %xmm6,%xmm7");
0439 asm volatile("pcmpgtb %xmm12,%xmm13");
0440 asm volatile("pcmpgtb %xmm14,%xmm15");
0441 asm volatile("paddb %xmm4,%xmm4");
0442 asm volatile("paddb %xmm6,%xmm6");
0443 asm volatile("paddb %xmm12,%xmm12");
0444 asm volatile("paddb %xmm14,%xmm14");
0445 asm volatile("pand %xmm0,%xmm5");
0446 asm volatile("pand %xmm0,%xmm7");
0447 asm volatile("pand %xmm0,%xmm13");
0448 asm volatile("pand %xmm0,%xmm15");
0449 asm volatile("pxor %xmm5,%xmm4");
0450 asm volatile("pxor %xmm7,%xmm6");
0451 asm volatile("pxor %xmm13,%xmm12");
0452 asm volatile("pxor %xmm15,%xmm14");
0453 }
0454 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
0455 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
0456 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
0457 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
0458 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
0459 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
0460 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
0461 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
0462 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
0463 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
0464 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
0465 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
0466 }
0467 asm volatile("sfence" : : : "memory");
0468 kernel_fpu_end();
0469 }
0470
0471
0472 const struct raid6_calls raid6_sse2x4 = {
0473 raid6_sse24_gen_syndrome,
0474 raid6_sse24_xor_syndrome,
0475 raid6_have_sse2,
0476 "sse2x4",
0477 1
0478 };
0479
0480 #endif