0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #if !defined(__CHECKER__)
0014 #include <altivec.h>
0015 #else
0016 #define vec_xor(a, b) a ^ b
0017 #define vector __attribute__((vector_size(16)))
0018 #endif
0019
0020 #include "xor_vmx.h"
0021
0022 typedef vector signed char unative_t;
0023
0024 #define DEFINE(V) \
0025 unative_t *V = (unative_t *)V##_in; \
0026 unative_t V##_0, V##_1, V##_2, V##_3
0027
0028 #define LOAD(V) \
0029 do { \
0030 V##_0 = V[0]; \
0031 V##_1 = V[1]; \
0032 V##_2 = V[2]; \
0033 V##_3 = V[3]; \
0034 } while (0)
0035
0036 #define STORE(V) \
0037 do { \
0038 V[0] = V##_0; \
0039 V[1] = V##_1; \
0040 V[2] = V##_2; \
0041 V[3] = V##_3; \
0042 } while (0)
0043
0044 #define XOR(V1, V2) \
0045 do { \
0046 V1##_0 = vec_xor(V1##_0, V2##_0); \
0047 V1##_1 = vec_xor(V1##_1, V2##_1); \
0048 V1##_2 = vec_xor(V1##_2, V2##_2); \
0049 V1##_3 = vec_xor(V1##_3, V2##_3); \
0050 } while (0)
0051
0052 void __xor_altivec_2(unsigned long bytes,
0053 unsigned long * __restrict v1_in,
0054 const unsigned long * __restrict v2_in)
0055 {
0056 DEFINE(v1);
0057 DEFINE(v2);
0058 unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0059
0060 do {
0061 LOAD(v1);
0062 LOAD(v2);
0063 XOR(v1, v2);
0064 STORE(v1);
0065
0066 v1 += 4;
0067 v2 += 4;
0068 } while (--lines > 0);
0069 }
0070
0071 void __xor_altivec_3(unsigned long bytes,
0072 unsigned long * __restrict v1_in,
0073 const unsigned long * __restrict v2_in,
0074 const unsigned long * __restrict v3_in)
0075 {
0076 DEFINE(v1);
0077 DEFINE(v2);
0078 DEFINE(v3);
0079 unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0080
0081 do {
0082 LOAD(v1);
0083 LOAD(v2);
0084 LOAD(v3);
0085 XOR(v1, v2);
0086 XOR(v1, v3);
0087 STORE(v1);
0088
0089 v1 += 4;
0090 v2 += 4;
0091 v3 += 4;
0092 } while (--lines > 0);
0093 }
0094
0095 void __xor_altivec_4(unsigned long bytes,
0096 unsigned long * __restrict v1_in,
0097 const unsigned long * __restrict v2_in,
0098 const unsigned long * __restrict v3_in,
0099 const unsigned long * __restrict v4_in)
0100 {
0101 DEFINE(v1);
0102 DEFINE(v2);
0103 DEFINE(v3);
0104 DEFINE(v4);
0105 unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0106
0107 do {
0108 LOAD(v1);
0109 LOAD(v2);
0110 LOAD(v3);
0111 LOAD(v4);
0112 XOR(v1, v2);
0113 XOR(v3, v4);
0114 XOR(v1, v3);
0115 STORE(v1);
0116
0117 v1 += 4;
0118 v2 += 4;
0119 v3 += 4;
0120 v4 += 4;
0121 } while (--lines > 0);
0122 }
0123
0124 void __xor_altivec_5(unsigned long bytes,
0125 unsigned long * __restrict v1_in,
0126 const unsigned long * __restrict v2_in,
0127 const unsigned long * __restrict v3_in,
0128 const unsigned long * __restrict v4_in,
0129 const unsigned long * __restrict v5_in)
0130 {
0131 DEFINE(v1);
0132 DEFINE(v2);
0133 DEFINE(v3);
0134 DEFINE(v4);
0135 DEFINE(v5);
0136 unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0137
0138 do {
0139 LOAD(v1);
0140 LOAD(v2);
0141 LOAD(v3);
0142 LOAD(v4);
0143 LOAD(v5);
0144 XOR(v1, v2);
0145 XOR(v3, v4);
0146 XOR(v1, v5);
0147 XOR(v1, v3);
0148 STORE(v1);
0149
0150 v1 += 4;
0151 v2 += 4;
0152 v3 += 4;
0153 v4 += 4;
0154 v5 += 4;
0155 } while (--lines > 0);
0156 }