powerpc/kernel/vecemu.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Routines to emulate some Altivec/VMX instructions, specifically
0004  * those that can trap when given denormalized operands in Java mode.
0005  */
0006 #include <linux/kernel.h>
0007 #include <linux/errno.h>
0008 #include <linux/sched.h>
0009 #include <asm/ptrace.h>
0010 #include <asm/processor.h>
0011 #include <asm/switch_to.h>
0012 #include <linux/uaccess.h>
0013 #include <asm/inst.h>
0014
0015 /* Functions in vector.S */
0016 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
0017 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
0018 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
0019 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
0020 extern void vrefp(vector128 *dst, vector128 *src);
0021 extern void vrsqrtefp(vector128 *dst, vector128 *src);
0022 extern void vexptep(vector128 *dst, vector128 *src);
0023
0024 static unsigned int exp2s[8] = {
0025     0x800000,
0026     0x8b95c2,
0027     0x9837f0,
0028     0xa5fed7,
0029     0xb504f3,
0030     0xc5672a,
0031     0xd744fd,
0032     0xeac0c7
0033 };
0034
0035 /*
0036  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
0037  * single-precision floating-point representation of x.
0038  */
0039 static unsigned int eexp2(unsigned int s)
0040 {
0041     int exp, pwr;
0042     unsigned int mant, frac;
0043
0044     /* extract exponent field from input */
0045     exp = ((s >> 23) & 0xff) - 127;
0046     if (exp > 7) {
0047         /* check for NaN input */
0048         if (exp == 128 && (s & 0x7fffff) != 0)
0049             return s | 0x400000;    /* return QNaN */
0050         /* 2^-big = 0, 2^+big = +Inf */
0051         return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
0052     }
0053     if (exp < -23)
0054         return 0x3f800000;  /* 1.0 */
0055
0056     /* convert to fixed point integer in 9.23 representation */
0057     pwr = (s & 0x7fffff) | 0x800000;
0058     if (exp > 0)
0059         pwr <<= exp;
0060     else
0061         pwr >>= -exp;
0062     if (s & 0x80000000)
0063         pwr = -pwr;
0064
0065     /* extract integer part, which becomes exponent part of result */
0066     exp = (pwr >> 23) + 126;
0067     if (exp >= 254)
0068         return 0x7f800000;
0069     if (exp < -23)
0070         return 0;
0071
0072     /* table lookup on top 3 bits of fraction to get mantissa */
0073     mant = exp2s[(pwr >> 20) & 7];
0074
0075     /* linear interpolation using remaining 20 bits of fraction */
0076     asm("mulhwu %0,%1,%2" : "=r" (frac)
0077         : "r" (pwr << 12), "r" (0x172b83ff));
0078     asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
0079     mant += frac;
0080
0081     if (exp >= 0)
0082         return mant + (exp << 23);
0083
0084     /* denormalized result */
0085     exp = -exp;
0086     mant += 1 << (exp - 1);
0087     return mant >> exp;
0088 }
0089
0090 /*
0091  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
0092  * single-precision floating-point representation of x.
0093  */
0094 static unsigned int elog2(unsigned int s)
0095 {
0096     int exp, mant, lz, frac;
0097
0098     exp = s & 0x7f800000;
0099     mant = s & 0x7fffff;
0100     if (exp == 0x7f800000) {    /* Inf or NaN */
0101         if (mant != 0)
0102             s |= 0x400000;  /* turn NaN into QNaN */
0103         return s;
0104     }
0105     if ((exp | mant) == 0)      /* +0 or -0 */
0106         return 0xff800000;  /* return -Inf */
0107
0108     if (exp == 0) {
0109         /* denormalized */
0110         asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
0111         mant <<= lz - 8;
0112         exp = (-118 - lz) << 23;
0113     } else {
0114         mant |= 0x800000;
0115         exp -= 127 << 23;
0116     }
0117
0118     if (mant >= 0xb504f3) {             /* 2^0.5 * 2^23 */
0119         exp |= 0x400000;            /* 0.5 * 2^23 */
0120         asm("mulhwu %0,%1,%2" : "=r" (mant)
0121             : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
0122     }
0123     if (mant >= 0x9837f0) {             /* 2^0.25 * 2^23 */
0124         exp |= 0x200000;            /* 0.25 * 2^23 */
0125         asm("mulhwu %0,%1,%2" : "=r" (mant)
0126             : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
0127     }
0128     if (mant >= 0x8b95c2) {             /* 2^0.125 * 2^23 */
0129         exp |= 0x100000;            /* 0.125 * 2^23 */
0130         asm("mulhwu %0,%1,%2" : "=r" (mant)
0131             : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
0132     }
0133     if (mant > 0x800000) {              /* 1.0 * 2^23 */
0134         /* calculate (mant - 1) * 1.381097463 */
0135         /* 1.381097463 == 0.125 / (2^0.125 - 1) */
0136         asm("mulhwu %0,%1,%2" : "=r" (frac)
0137             : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
0138         exp += frac;
0139     }
0140     s = exp & 0x80000000;
0141     if (exp != 0) {
0142         if (s)
0143             exp = -exp;
0144         asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
0145         lz = 8 - lz;
0146         if (lz > 0)
0147             exp >>= lz;
0148         else if (lz < 0)
0149             exp <<= -lz;
0150         s += ((lz + 126) << 23) + exp;
0151     }
0152     return s;
0153 }
0154
0155 #define VSCR_SAT    1
0156
0157 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
0158 {
0159     int exp, mant;
0160
0161     exp = (x >> 23) & 0xff;
0162     mant = x & 0x7fffff;
0163     if (exp == 255 && mant != 0)
0164         return 0;       /* NaN -> 0 */
0165     exp = exp - 127 + scale;
0166     if (exp < 0)
0167         return 0;       /* round towards zero */
0168     if (exp >= 31) {
0169         /* saturate, unless the result would be -2^31 */
0170         if (x + (scale << 23) != 0xcf000000)
0171             *vscrp |= VSCR_SAT;
0172         return (x & 0x80000000)? 0x80000000: 0x7fffffff;
0173     }
0174     mant |= 0x800000;
0175     mant = (mant << 7) >> (30 - exp);
0176     return (x & 0x80000000)? -mant: mant;
0177 }
0178
0179 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
0180 {
0181     int exp;
0182     unsigned int mant;
0183
0184     exp = (x >> 23) & 0xff;
0185     mant = x & 0x7fffff;
0186     if (exp == 255 && mant != 0)
0187         return 0;       /* NaN -> 0 */
0188     exp = exp - 127 + scale;
0189     if (exp < 0)
0190         return 0;       /* round towards zero */
0191     if (x & 0x80000000) {
0192         /* negative => saturate to 0 */
0193         *vscrp |= VSCR_SAT;
0194         return 0;
0195     }
0196     if (exp >= 32) {
0197         /* saturate */
0198         *vscrp |= VSCR_SAT;
0199         return 0xffffffff;
0200     }
0201     mant |= 0x800000;
0202     mant = (mant << 8) >> (31 - exp);
0203     return mant;
0204 }
0205
0206 /* Round to floating integer, towards 0 */
0207 static unsigned int rfiz(unsigned int x)
0208 {
0209     int exp;
0210
0211     exp = ((x >> 23) & 0xff) - 127;
0212     if (exp == 128 && (x & 0x7fffff) != 0)
0213         return x | 0x400000;    /* NaN -> make it a QNaN */
0214     if (exp >= 23)
0215         return x;       /* it's an integer already (or Inf) */
0216     if (exp < 0)
0217         return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
0218     return x & ~(0x7fffff >> exp);
0219 }
0220
0221 /* Round to floating integer, towards +/- Inf */
0222 static unsigned int rfii(unsigned int x)
0223 {
0224     int exp, mask;
0225
0226     exp = ((x >> 23) & 0xff) - 127;
0227     if (exp == 128 && (x & 0x7fffff) != 0)
0228         return x | 0x400000;    /* NaN -> make it a QNaN */
0229     if (exp >= 23)
0230         return x;       /* it's an integer already (or Inf) */
0231     if ((x & 0x7fffffff) == 0)
0232         return x;       /* +/-0 -> +/-0 */
0233     if (exp < 0)
0234         /* 0 < |x| < 1.0 rounds to +/- 1.0 */
0235         return (x & 0x80000000) | 0x3f800000;
0236     mask = 0x7fffff >> exp;
0237     /* mantissa overflows into exponent - that's OK,
0238        it can't overflow into the sign bit */
0239     return (x + mask) & ~mask;
0240 }
0241
0242 /* Round to floating integer, to nearest */
0243 static unsigned int rfin(unsigned int x)
0244 {
0245     int exp, half;
0246
0247     exp = ((x >> 23) & 0xff) - 127;
0248     if (exp == 128 && (x & 0x7fffff) != 0)
0249         return x | 0x400000;    /* NaN -> make it a QNaN */
0250     if (exp >= 23)
0251         return x;       /* it's an integer already (or Inf) */
0252     if (exp < -1)
0253         return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
0254     if (exp == -1)
0255         /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
0256         return (x & 0x80000000) | 0x3f800000;
0257     half = 0x400000 >> exp;
0258     /* add 0.5 to the magnitude and chop off the fraction bits */
0259     return (x + half) & ~(0x7fffff >> exp);
0260 }
0261
0262 int emulate_altivec(struct pt_regs *regs)
0263 {
0264     ppc_inst_t instr;
0265     unsigned int i, word;
0266     unsigned int va, vb, vc, vd;
0267     vector128 *vrs;
0268
0269     if (get_user_instr(instr, (void __user *)regs->nip))
0270         return -EFAULT;
0271
0272     word = ppc_inst_val(instr);
0273     if (ppc_inst_primary_opcode(instr) != 4)
0274         return -EINVAL;     /* not an altivec instruction */
0275     vd = (word >> 21) & 0x1f;
0276     va = (word >> 16) & 0x1f;
0277     vb = (word >> 11) & 0x1f;
0278     vc = (word >> 6) & 0x1f;
0279
0280     vrs = current->thread.vr_state.vr;
0281     switch (word & 0x3f) {
0282     case 10:
0283         switch (vc) {
0284         case 0: /* vaddfp */
0285             vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
0286             break;
0287         case 1: /* vsubfp */
0288             vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
0289             break;
0290         case 4: /* vrefp */
0291             vrefp(&vrs[vd], &vrs[vb]);
0292             break;
0293         case 5: /* vrsqrtefp */
0294             vrsqrtefp(&vrs[vd], &vrs[vb]);
0295             break;
0296         case 6: /* vexptefp */
0297             for (i = 0; i < 4; ++i)
0298                 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
0299             break;
0300         case 7: /* vlogefp */
0301             for (i = 0; i < 4; ++i)
0302                 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
0303             break;
0304         case 8:     /* vrfin */
0305             for (i = 0; i < 4; ++i)
0306                 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
0307             break;
0308         case 9:     /* vrfiz */
0309             for (i = 0; i < 4; ++i)
0310                 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
0311             break;
0312         case 10:    /* vrfip */
0313             for (i = 0; i < 4; ++i) {
0314                 u32 x = vrs[vb].u[i];
0315                 x = (x & 0x80000000)? rfiz(x): rfii(x);
0316                 vrs[vd].u[i] = x;
0317             }
0318             break;
0319         case 11:    /* vrfim */
0320             for (i = 0; i < 4; ++i) {
0321                 u32 x = vrs[vb].u[i];
0322                 x = (x & 0x80000000)? rfii(x): rfiz(x);
0323                 vrs[vd].u[i] = x;
0324             }
0325             break;
0326         case 14:    /* vctuxs */
0327             for (i = 0; i < 4; ++i)
0328                 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
0329                     &current->thread.vr_state.vscr.u[3]);
0330             break;
0331         case 15:    /* vctsxs */
0332             for (i = 0; i < 4; ++i)
0333                 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
0334                     &current->thread.vr_state.vscr.u[3]);
0335             break;
0336         default:
0337             return -EINVAL;
0338         }
0339         break;
0340     case 46:    /* vmaddfp */
0341         vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
0342         break;
0343     case 47:    /* vnmsubfp */
0344         vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
0345         break;
0346     default:
0347         return -EINVAL;
0348     }
0349
0350     return 0;
0351 }