Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2012-15 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  * Authors: AMD
0023  *
0024  */
0025 
0026 #include "dm_services.h"
0027 #include "include/fixed31_32.h"
0028 
0029 static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL };
0030 static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL };
0031 static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL };
0032 
0033 static inline unsigned long long abs_i64(
0034     long long arg)
0035 {
0036     if (arg > 0)
0037         return (unsigned long long)arg;
0038     else
0039         return (unsigned long long)(-arg);
0040 }
0041 
0042 /*
0043  * @brief
0044  * result = dividend / divisor
0045  * *remainder = dividend % divisor
0046  */
0047 static inline unsigned long long complete_integer_division_u64(
0048     unsigned long long dividend,
0049     unsigned long long divisor,
0050     unsigned long long *remainder)
0051 {
0052     unsigned long long result;
0053 
0054     ASSERT(divisor);
0055 
0056     result = div64_u64_rem(dividend, divisor, remainder);
0057 
0058     return result;
0059 }
0060 
0061 
0062 #define FRACTIONAL_PART_MASK \
0063     ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
0064 
0065 #define GET_INTEGER_PART(x) \
0066     ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
0067 
0068 #define GET_FRACTIONAL_PART(x) \
0069     (FRACTIONAL_PART_MASK & (x))
0070 
0071 struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator)
0072 {
0073     struct fixed31_32 res;
0074 
0075     bool arg1_negative = numerator < 0;
0076     bool arg2_negative = denominator < 0;
0077 
0078     unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
0079     unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
0080 
0081     unsigned long long remainder;
0082 
0083     /* determine integer part */
0084 
0085     unsigned long long res_value = complete_integer_division_u64(
0086         arg1_value, arg2_value, &remainder);
0087 
0088     ASSERT(res_value <= LONG_MAX);
0089 
0090     /* determine fractional part */
0091     {
0092         unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
0093 
0094         do {
0095             remainder <<= 1;
0096 
0097             res_value <<= 1;
0098 
0099             if (remainder >= arg2_value) {
0100                 res_value |= 1;
0101                 remainder -= arg2_value;
0102             }
0103         } while (--i != 0);
0104     }
0105 
0106     /* round up LSB */
0107     {
0108         unsigned long long summand = (remainder << 1) >= arg2_value;
0109 
0110         ASSERT(res_value <= LLONG_MAX - summand);
0111 
0112         res_value += summand;
0113     }
0114 
0115     res.value = (long long)res_value;
0116 
0117     if (arg1_negative ^ arg2_negative)
0118         res.value = -res.value;
0119 
0120     return res;
0121 }
0122 
0123 struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
0124 {
0125     struct fixed31_32 res;
0126 
0127     bool arg1_negative = arg1.value < 0;
0128     bool arg2_negative = arg2.value < 0;
0129 
0130     unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
0131     unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
0132 
0133     unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
0134     unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
0135 
0136     unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
0137     unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
0138 
0139     unsigned long long tmp;
0140 
0141     res.value = arg1_int * arg2_int;
0142 
0143     ASSERT(res.value <= LONG_MAX);
0144 
0145     res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
0146 
0147     tmp = arg1_int * arg2_fra;
0148 
0149     ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
0150 
0151     res.value += tmp;
0152 
0153     tmp = arg2_int * arg1_fra;
0154 
0155     ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
0156 
0157     res.value += tmp;
0158 
0159     tmp = arg1_fra * arg2_fra;
0160 
0161     tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
0162         (tmp >= (unsigned long long)dc_fixpt_half.value);
0163 
0164     ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
0165 
0166     res.value += tmp;
0167 
0168     if (arg1_negative ^ arg2_negative)
0169         res.value = -res.value;
0170 
0171     return res;
0172 }
0173 
0174 struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg)
0175 {
0176     struct fixed31_32 res;
0177 
0178     unsigned long long arg_value = abs_i64(arg.value);
0179 
0180     unsigned long long arg_int = GET_INTEGER_PART(arg_value);
0181 
0182     unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
0183 
0184     unsigned long long tmp;
0185 
0186     res.value = arg_int * arg_int;
0187 
0188     ASSERT(res.value <= LONG_MAX);
0189 
0190     res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
0191 
0192     tmp = arg_int * arg_fra;
0193 
0194     ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
0195 
0196     res.value += tmp;
0197 
0198     ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
0199 
0200     res.value += tmp;
0201 
0202     tmp = arg_fra * arg_fra;
0203 
0204     tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
0205         (tmp >= (unsigned long long)dc_fixpt_half.value);
0206 
0207     ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
0208 
0209     res.value += tmp;
0210 
0211     return res;
0212 }
0213 
0214 struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
0215 {
0216     /*
0217      * @note
0218      * Good idea to use Newton's method
0219      */
0220 
0221     ASSERT(arg.value);
0222 
0223     return dc_fixpt_from_fraction(
0224         dc_fixpt_one.value,
0225         arg.value);
0226 }
0227 
0228 struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg)
0229 {
0230     struct fixed31_32 square;
0231 
0232     struct fixed31_32 res = dc_fixpt_one;
0233 
0234     int n = 27;
0235 
0236     struct fixed31_32 arg_norm = arg;
0237 
0238     if (dc_fixpt_le(
0239         dc_fixpt_two_pi,
0240         dc_fixpt_abs(arg))) {
0241         arg_norm = dc_fixpt_sub(
0242             arg_norm,
0243             dc_fixpt_mul_int(
0244                 dc_fixpt_two_pi,
0245                 (int)div64_s64(
0246                     arg_norm.value,
0247                     dc_fixpt_two_pi.value)));
0248     }
0249 
0250     square = dc_fixpt_sqr(arg_norm);
0251 
0252     do {
0253         res = dc_fixpt_sub(
0254             dc_fixpt_one,
0255             dc_fixpt_div_int(
0256                 dc_fixpt_mul(
0257                     square,
0258                     res),
0259                 n * (n - 1)));
0260 
0261         n -= 2;
0262     } while (n > 2);
0263 
0264     if (arg.value != arg_norm.value)
0265         res = dc_fixpt_div(
0266             dc_fixpt_mul(res, arg_norm),
0267             arg);
0268 
0269     return res;
0270 }
0271 
0272 struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg)
0273 {
0274     return dc_fixpt_mul(
0275         arg,
0276         dc_fixpt_sinc(arg));
0277 }
0278 
0279 struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg)
0280 {
0281     /* TODO implement argument normalization */
0282 
0283     const struct fixed31_32 square = dc_fixpt_sqr(arg);
0284 
0285     struct fixed31_32 res = dc_fixpt_one;
0286 
0287     int n = 26;
0288 
0289     do {
0290         res = dc_fixpt_sub(
0291             dc_fixpt_one,
0292             dc_fixpt_div_int(
0293                 dc_fixpt_mul(
0294                     square,
0295                     res),
0296                 n * (n - 1)));
0297 
0298         n -= 2;
0299     } while (n != 0);
0300 
0301     return res;
0302 }
0303 
0304 /*
0305  * @brief
0306  * result = exp(arg),
0307  * where abs(arg) < 1
0308  *
0309  * Calculated as Taylor series.
0310  */
0311 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
0312 {
0313     unsigned int n = 9;
0314 
0315     struct fixed31_32 res = dc_fixpt_from_fraction(
0316         n + 2,
0317         n + 1);
0318     /* TODO find correct res */
0319 
0320     ASSERT(dc_fixpt_lt(arg, dc_fixpt_one));
0321 
0322     do
0323         res = dc_fixpt_add(
0324             dc_fixpt_one,
0325             dc_fixpt_div_int(
0326                 dc_fixpt_mul(
0327                     arg,
0328                     res),
0329                 n));
0330     while (--n != 1);
0331 
0332     return dc_fixpt_add(
0333         dc_fixpt_one,
0334         dc_fixpt_mul(
0335             arg,
0336             res));
0337 }
0338 
0339 struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg)
0340 {
0341     /*
0342      * @brief
0343      * Main equation is:
0344      * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
0345      * where m = round(x / ln(2)), r = x - m * ln(2)
0346      */
0347 
0348     if (dc_fixpt_le(
0349         dc_fixpt_ln2_div_2,
0350         dc_fixpt_abs(arg))) {
0351         int m = dc_fixpt_round(
0352             dc_fixpt_div(
0353                 arg,
0354                 dc_fixpt_ln2));
0355 
0356         struct fixed31_32 r = dc_fixpt_sub(
0357             arg,
0358             dc_fixpt_mul_int(
0359                 dc_fixpt_ln2,
0360                 m));
0361 
0362         ASSERT(m != 0);
0363 
0364         ASSERT(dc_fixpt_lt(
0365             dc_fixpt_abs(r),
0366             dc_fixpt_one));
0367 
0368         if (m > 0)
0369             return dc_fixpt_shl(
0370                 fixed31_32_exp_from_taylor_series(r),
0371                 (unsigned char)m);
0372         else
0373             return dc_fixpt_div_int(
0374                 fixed31_32_exp_from_taylor_series(r),
0375                 1LL << -m);
0376     } else if (arg.value != 0)
0377         return fixed31_32_exp_from_taylor_series(arg);
0378     else
0379         return dc_fixpt_one;
0380 }
0381 
0382 struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg)
0383 {
0384     struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one);
0385     /* TODO improve 1st estimation */
0386 
0387     struct fixed31_32 error;
0388 
0389     ASSERT(arg.value > 0);
0390     /* TODO if arg is negative, return NaN */
0391     /* TODO if arg is zero, return -INF */
0392 
0393     do {
0394         struct fixed31_32 res1 = dc_fixpt_add(
0395             dc_fixpt_sub(
0396                 res,
0397                 dc_fixpt_one),
0398             dc_fixpt_div(
0399                 arg,
0400                 dc_fixpt_exp(res)));
0401 
0402         error = dc_fixpt_sub(
0403             res,
0404             res1);
0405 
0406         res = res1;
0407         /* TODO determine max_allowed_error based on quality of exp() */
0408     } while (abs_i64(error.value) > 100ULL);
0409 
0410     return res;
0411 }
0412 
0413 
0414 /* this function is a generic helper to translate fixed point value to
0415  * specified integer format that will consist of integer_bits integer part and
0416  * fractional_bits fractional part. For example it is used in
0417  * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
0418  * part in 32 bits. It is used in hw programming (scaler)
0419  */
0420 
0421 static inline unsigned int ux_dy(
0422     long long value,
0423     unsigned int integer_bits,
0424     unsigned int fractional_bits)
0425 {
0426     /* 1. create mask of integer part */
0427     unsigned int result = (1 << integer_bits) - 1;
0428     /* 2. mask out fractional part */
0429     unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
0430     /* 3. shrink fixed point integer part to be of integer_bits width*/
0431     result &= GET_INTEGER_PART(value);
0432     /* 4. make space for fractional part to be filled in after integer */
0433     result <<= fractional_bits;
0434     /* 5. shrink fixed point fractional part to of fractional_bits width*/
0435     fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
0436     /* 6. merge the result */
0437     return result | fractional_part;
0438 }
0439 
0440 static inline unsigned int clamp_ux_dy(
0441     long long value,
0442     unsigned int integer_bits,
0443     unsigned int fractional_bits,
0444     unsigned int min_clamp)
0445 {
0446     unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
0447 
0448     if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
0449         return (1 << (integer_bits + fractional_bits)) - 1;
0450     else if (truncated_val > min_clamp)
0451         return truncated_val;
0452     else
0453         return min_clamp;
0454 }
0455 
0456 unsigned int dc_fixpt_u4d19(struct fixed31_32 arg)
0457 {
0458     return ux_dy(arg.value, 4, 19);
0459 }
0460 
0461 unsigned int dc_fixpt_u3d19(struct fixed31_32 arg)
0462 {
0463     return ux_dy(arg.value, 3, 19);
0464 }
0465 
0466 unsigned int dc_fixpt_u2d19(struct fixed31_32 arg)
0467 {
0468     return ux_dy(arg.value, 2, 19);
0469 }
0470 
0471 unsigned int dc_fixpt_u0d19(struct fixed31_32 arg)
0472 {
0473     return ux_dy(arg.value, 0, 19);
0474 }
0475 
0476 unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg)
0477 {
0478     return clamp_ux_dy(arg.value, 0, 14, 1);
0479 }
0480 
0481 unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg)
0482 {
0483     return clamp_ux_dy(arg.value, 0, 10, 1);
0484 }
0485 
0486 int dc_fixpt_s4d19(struct fixed31_32 arg)
0487 {
0488     if (arg.value < 0)
0489         return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19);
0490     else
0491         return ux_dy(arg.value, 4, 19);
0492 }