Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *
0004  * Copyright (C) IBM Corporation, 2012
0005  *
0006  * Author: Anton Blanchard <anton@au.ibm.com>
0007  */
0008 
0009 /*
0010  * Sparse (as at v0.5.0) gets very, very confused by this file.
0011  * Make it a bit simpler for it.
0012  */
0013 #if !defined(__CHECKER__)
0014 #include <altivec.h>
0015 #else
0016 #define vec_xor(a, b) a ^ b
0017 #define vector __attribute__((vector_size(16)))
0018 #endif
0019 
0020 #include "xor_vmx.h"
0021 
0022 typedef vector signed char unative_t;
0023 
0024 #define DEFINE(V)               \
0025     unative_t *V = (unative_t *)V##_in; \
0026     unative_t V##_0, V##_1, V##_2, V##_3
0027 
0028 #define LOAD(V)         \
0029     do {            \
0030         V##_0 = V[0];   \
0031         V##_1 = V[1];   \
0032         V##_2 = V[2];   \
0033         V##_3 = V[3];   \
0034     } while (0)
0035 
0036 #define STORE(V)        \
0037     do {            \
0038         V[0] = V##_0;   \
0039         V[1] = V##_1;   \
0040         V[2] = V##_2;   \
0041         V[3] = V##_3;   \
0042     } while (0)
0043 
0044 #define XOR(V1, V2)                 \
0045     do {                        \
0046         V1##_0 = vec_xor(V1##_0, V2##_0);   \
0047         V1##_1 = vec_xor(V1##_1, V2##_1);   \
0048         V1##_2 = vec_xor(V1##_2, V2##_2);   \
0049         V1##_3 = vec_xor(V1##_3, V2##_3);   \
0050     } while (0)
0051 
0052 void __xor_altivec_2(unsigned long bytes,
0053              unsigned long * __restrict v1_in,
0054              const unsigned long * __restrict v2_in)
0055 {
0056     DEFINE(v1);
0057     DEFINE(v2);
0058     unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0059 
0060     do {
0061         LOAD(v1);
0062         LOAD(v2);
0063         XOR(v1, v2);
0064         STORE(v1);
0065 
0066         v1 += 4;
0067         v2 += 4;
0068     } while (--lines > 0);
0069 }
0070 
0071 void __xor_altivec_3(unsigned long bytes,
0072              unsigned long * __restrict v1_in,
0073              const unsigned long * __restrict v2_in,
0074              const unsigned long * __restrict v3_in)
0075 {
0076     DEFINE(v1);
0077     DEFINE(v2);
0078     DEFINE(v3);
0079     unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0080 
0081     do {
0082         LOAD(v1);
0083         LOAD(v2);
0084         LOAD(v3);
0085         XOR(v1, v2);
0086         XOR(v1, v3);
0087         STORE(v1);
0088 
0089         v1 += 4;
0090         v2 += 4;
0091         v3 += 4;
0092     } while (--lines > 0);
0093 }
0094 
0095 void __xor_altivec_4(unsigned long bytes,
0096              unsigned long * __restrict v1_in,
0097              const unsigned long * __restrict v2_in,
0098              const unsigned long * __restrict v3_in,
0099              const unsigned long * __restrict v4_in)
0100 {
0101     DEFINE(v1);
0102     DEFINE(v2);
0103     DEFINE(v3);
0104     DEFINE(v4);
0105     unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0106 
0107     do {
0108         LOAD(v1);
0109         LOAD(v2);
0110         LOAD(v3);
0111         LOAD(v4);
0112         XOR(v1, v2);
0113         XOR(v3, v4);
0114         XOR(v1, v3);
0115         STORE(v1);
0116 
0117         v1 += 4;
0118         v2 += 4;
0119         v3 += 4;
0120         v4 += 4;
0121     } while (--lines > 0);
0122 }
0123 
0124 void __xor_altivec_5(unsigned long bytes,
0125              unsigned long * __restrict v1_in,
0126              const unsigned long * __restrict v2_in,
0127              const unsigned long * __restrict v3_in,
0128              const unsigned long * __restrict v4_in,
0129              const unsigned long * __restrict v5_in)
0130 {
0131     DEFINE(v1);
0132     DEFINE(v2);
0133     DEFINE(v3);
0134     DEFINE(v4);
0135     DEFINE(v5);
0136     unsigned long lines = bytes / (sizeof(unative_t)) / 4;
0137 
0138     do {
0139         LOAD(v1);
0140         LOAD(v2);
0141         LOAD(v3);
0142         LOAD(v4);
0143         LOAD(v5);
0144         XOR(v1, v2);
0145         XOR(v3, v4);
0146         XOR(v1, v5);
0147         XOR(v1, v3);
0148         STORE(v1);
0149 
0150         v1 += 4;
0151         v2 += 4;
0152         v3 += 4;
0153         v4 += 4;
0154         v5 += 4;
0155     } while (--lines > 0);
0156 }