arm/vfp/vfp.h

0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  *  linux/arch/arm/vfp/vfp.h
0004  *
0005  *  Copyright (C) 2004 ARM Limited.
0006  *  Written by Deep Blue Solutions Limited.
0007  */
0008
0009 static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
0010 {
0011     if (shift) {
0012         if (shift < 32)
0013             val = val >> shift | ((val << (32 - shift)) != 0);
0014         else
0015             val = val != 0;
0016     }
0017     return val;
0018 }
0019
0020 static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
0021 {
0022     if (shift) {
0023         if (shift < 64)
0024             val = val >> shift | ((val << (64 - shift)) != 0);
0025         else
0026             val = val != 0;
0027     }
0028     return val;
0029 }
0030
0031 static inline u32 vfp_hi64to32jamming(u64 val)
0032 {
0033     u32 v;
0034
0035     asm(
0036     "cmp    %Q1, #1     @ vfp_hi64to32jamming\n\t"
0037     "movcc  %0, %R1\n\t"
0038     "orrcs  %0, %R1, #1"
0039     : "=r" (v) : "r" (val) : "cc");
0040
0041     return v;
0042 }
0043
0044 static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
0045 {
0046     asm(    "adds   %Q0, %Q2, %Q4\n\t"
0047         "adcs   %R0, %R2, %R4\n\t"
0048         "adcs   %Q1, %Q3, %Q5\n\t"
0049         "adc    %R1, %R3, %R5"
0050         : "=r" (nl), "=r" (nh)
0051         : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
0052         : "cc");
0053     *resh = nh;
0054     *resl = nl;
0055 }
0056
0057 static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
0058 {
0059     asm(    "subs   %Q0, %Q2, %Q4\n\t"
0060         "sbcs   %R0, %R2, %R4\n\t"
0061         "sbcs   %Q1, %Q3, %Q5\n\t"
0062         "sbc    %R1, %R3, %R5\n\t"
0063         : "=r" (nl), "=r" (nh)
0064         : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
0065         : "cc");
0066     *resh = nh;
0067     *resl = nl;
0068 }
0069
0070 static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
0071 {
0072     u32 nh, nl, mh, ml;
0073     u64 rh, rma, rmb, rl;
0074
0075     nl = n;
0076     ml = m;
0077     rl = (u64)nl * ml;
0078
0079     nh = n >> 32;
0080     rma = (u64)nh * ml;
0081
0082     mh = m >> 32;
0083     rmb = (u64)nl * mh;
0084     rma += rmb;
0085
0086     rh = (u64)nh * mh;
0087     rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
0088
0089     rma <<= 32;
0090     rl += rma;
0091     rh += (rl < rma);
0092
0093     *resl = rl;
0094     *resh = rh;
0095 }
0096
0097 static inline void shift64left(u64 *resh, u64 *resl, u64 n)
0098 {
0099     *resh = n >> 63;
0100     *resl = n << 1;
0101 }
0102
0103 static inline u64 vfp_hi64multiply64(u64 n, u64 m)
0104 {
0105     u64 rh, rl;
0106     mul64to128(&rh, &rl, n, m);
0107     return rh | (rl != 0);
0108 }
0109
0110 static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
0111 {
0112     u64 mh, ml, remh, reml, termh, terml, z;
0113
0114     if (nh >= m)
0115         return ~0ULL;
0116     mh = m >> 32;
0117     if (mh << 32 <= nh) {
0118         z = 0xffffffff00000000ULL;
0119     } else {
0120         z = nh;
0121         do_div(z, mh);
0122         z <<= 32;
0123     }
0124     mul64to128(&termh, &terml, m, z);
0125     sub128(&remh, &reml, nh, nl, termh, terml);
0126     ml = m << 32;
0127     while ((s64)remh < 0) {
0128         z -= 0x100000000ULL;
0129         add128(&remh, &reml, remh, reml, mh, ml);
0130     }
0131     remh = (remh << 32) | (reml >> 32);
0132     if (mh << 32 <= remh) {
0133         z |= 0xffffffff;
0134     } else {
0135         do_div(remh, mh);
0136         z |= remh;
0137     }
0138     return z;
0139 }
0140
0141 /*
0142  * Operations on unpacked elements
0143  */
0144 #define vfp_sign_negate(sign)   (sign ^ 0x8000)
0145
0146 /*
0147  * Single-precision
0148  */
0149 struct vfp_single {
0150     s16 exponent;
0151     u16 sign;
0152     u32 significand;
0153 };
0154
0155 asmlinkage s32 vfp_get_float(unsigned int reg);
0156 asmlinkage void vfp_put_float(s32 val, unsigned int reg);
0157
0158 /*
0159  * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
0160  * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
0161  * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
0162  *  which are not propagated to the float upon packing.
0163  */
0164 #define VFP_SINGLE_MANTISSA_BITS    (23)
0165 #define VFP_SINGLE_EXPONENT_BITS    (8)
0166 #define VFP_SINGLE_LOW_BITS     (32 - VFP_SINGLE_MANTISSA_BITS - 2)
0167 #define VFP_SINGLE_LOW_BITS_MASK    ((1 << VFP_SINGLE_LOW_BITS) - 1)
0168
0169 /*
0170  * The bit in an unpacked float which indicates that it is a quiet NaN
0171  */
0172 #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
0173
0174 /*
0175  * Operations on packed single-precision numbers
0176  */
0177 #define vfp_single_packed_sign(v)   ((v) & 0x80000000)
0178 #define vfp_single_packed_negate(v) ((v) ^ 0x80000000)
0179 #define vfp_single_packed_abs(v)    ((v) & ~0x80000000)
0180 #define vfp_single_packed_exponent(v)   (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
0181 #define vfp_single_packed_mantissa(v)   ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
0182
0183 /*
0184  * Unpack a single-precision float.  Note that this returns the magnitude
0185  * of the single-precision float mantissa with the 1. if necessary,
0186  * aligned to bit 30.
0187  */
0188 static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
0189 {
0190     u32 significand;
0191
0192     s->sign = vfp_single_packed_sign(val) >> 16,
0193     s->exponent = vfp_single_packed_exponent(val);
0194
0195     significand = (u32) val;
0196     significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
0197     if (s->exponent && s->exponent != 255)
0198         significand |= 0x40000000;
0199     s->significand = significand;
0200 }
0201
0202 /*
0203  * Re-pack a single-precision float.  This assumes that the float is
0204  * already normalised such that the MSB is bit 30, _not_ bit 31.
0205  */
0206 static inline s32 vfp_single_pack(struct vfp_single *s)
0207 {
0208     u32 val;
0209     val = (s->sign << 16) +
0210           (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
0211           (s->significand >> VFP_SINGLE_LOW_BITS);
0212     return (s32)val;
0213 }
0214
0215 #define VFP_NUMBER      (1<<0)
0216 #define VFP_ZERO        (1<<1)
0217 #define VFP_DENORMAL        (1<<2)
0218 #define VFP_INFINITY        (1<<3)
0219 #define VFP_NAN         (1<<4)
0220 #define VFP_NAN_SIGNAL      (1<<5)
0221
0222 #define VFP_QNAN        (VFP_NAN)
0223 #define VFP_SNAN        (VFP_NAN|VFP_NAN_SIGNAL)
0224
0225 static inline int vfp_single_type(struct vfp_single *s)
0226 {
0227     int type = VFP_NUMBER;
0228     if (s->exponent == 255) {
0229         if (s->significand == 0)
0230             type = VFP_INFINITY;
0231         else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
0232             type = VFP_QNAN;
0233         else
0234             type = VFP_SNAN;
0235     } else if (s->exponent == 0) {
0236         if (s->significand == 0)
0237             type |= VFP_ZERO;
0238         else
0239             type |= VFP_DENORMAL;
0240     }
0241     return type;
0242 }
0243
0244 #ifndef DEBUG
0245 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
0246 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions);
0247 #else
0248 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
0249 #endif
0250
0251 /*
0252  * Double-precision
0253  */
0254 struct vfp_double {
0255     s16 exponent;
0256     u16 sign;
0257     u64 significand;
0258 };
0259
0260 /*
0261  * VFP_REG_ZERO is a special register number for vfp_get_double
0262  * which returns (double)0.0.  This is useful for the compare with
0263  * zero instructions.
0264  */
0265 #ifdef CONFIG_VFPv3
0266 #define VFP_REG_ZERO    32
0267 #else
0268 #define VFP_REG_ZERO    16
0269 #endif
0270 asmlinkage u64 vfp_get_double(unsigned int reg);
0271 asmlinkage void vfp_put_double(u64 val, unsigned int reg);
0272
0273 #define VFP_DOUBLE_MANTISSA_BITS    (52)
0274 #define VFP_DOUBLE_EXPONENT_BITS    (11)
0275 #define VFP_DOUBLE_LOW_BITS     (64 - VFP_DOUBLE_MANTISSA_BITS - 2)
0276 #define VFP_DOUBLE_LOW_BITS_MASK    ((1 << VFP_DOUBLE_LOW_BITS) - 1)
0277
0278 /*
0279  * The bit in an unpacked double which indicates that it is a quiet NaN
0280  */
0281 #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
0282
0283 /*
0284  * Operations on packed single-precision numbers
0285  */
0286 #define vfp_double_packed_sign(v)   ((v) & (1ULL << 63))
0287 #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63))
0288 #define vfp_double_packed_abs(v)    ((v) & ~(1ULL << 63))
0289 #define vfp_double_packed_exponent(v)   (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
0290 #define vfp_double_packed_mantissa(v)   ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
0291
0292 /*
0293  * Unpack a double-precision float.  Note that this returns the magnitude
0294  * of the double-precision float mantissa with the 1. if necessary,
0295  * aligned to bit 62.
0296  */
0297 static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
0298 {
0299     u64 significand;
0300
0301     s->sign = vfp_double_packed_sign(val) >> 48;
0302     s->exponent = vfp_double_packed_exponent(val);
0303
0304     significand = (u64) val;
0305     significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
0306     if (s->exponent && s->exponent != 2047)
0307         significand |= (1ULL << 62);
0308     s->significand = significand;
0309 }
0310
0311 /*
0312  * Re-pack a double-precision float.  This assumes that the float is
0313  * already normalised such that the MSB is bit 30, _not_ bit 31.
0314  */
0315 static inline s64 vfp_double_pack(struct vfp_double *s)
0316 {
0317     u64 val;
0318     val = ((u64)s->sign << 48) +
0319           ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
0320           (s->significand >> VFP_DOUBLE_LOW_BITS);
0321     return (s64)val;
0322 }
0323
0324 static inline int vfp_double_type(struct vfp_double *s)
0325 {
0326     int type = VFP_NUMBER;
0327     if (s->exponent == 2047) {
0328         if (s->significand == 0)
0329             type = VFP_INFINITY;
0330         else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
0331             type = VFP_QNAN;
0332         else
0333             type = VFP_SNAN;
0334     } else if (s->exponent == 0) {
0335         if (s->significand == 0)
0336             type |= VFP_ZERO;
0337         else
0338             type |= VFP_DENORMAL;
0339     }
0340     return type;
0341 }
0342
0343 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
0344
0345 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
0346
0347 /*
0348  * A special flag to tell the normalisation code not to normalise.
0349  */
0350 #define VFP_NAN_FLAG    0x100
0351
0352 /*
0353  * A bit pattern used to indicate the initial (unset) value of the
0354  * exception mask, in case nothing handles an instruction.  This
0355  * doesn't include the NAN flag, which get masked out before
0356  * we check for an error.
0357  */
0358 #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG)
0359
0360 /*
0361  * A flag to tell vfp instruction type.
0362  *  OP_SCALAR - this operation always operates in scalar mode
0363  *  OP_SD - the instruction exceptionally writes to a single precision result.
0364  *  OP_DD - the instruction exceptionally writes to a double precision result.
0365  *  OP_SM - the instruction exceptionally reads from a single precision operand.
0366  */
0367 #define OP_SCALAR   (1 << 0)
0368 #define OP_SD       (1 << 1)
0369 #define OP_DD       (1 << 1)
0370 #define OP_SM       (1 << 2)
0371
0372 struct op {
0373     u32 (* const fn)(int dd, int dn, int dm, u32 fpscr);
0374     u32 flags;
0375 };
0376
0377 asmlinkage void vfp_save_state(void *location, u32 fpexc);