contrib/arm-optimized-routines/math/v_sinf.c

   1 /*
   2  * Single-precision vector sin function.
   3  *
   4  * Copyright (c) 2019, Arm Limited.
   5  * SPDX-License-Identifier: MIT
   6  */
   7
   8 #include "mathlib.h"
   9 #include "v_math.h"
  10 #if V_SUPPORTED
  11
  12 static const float Poly[] = {
  13   /* 1.886 ulp error */
  14   0x1.5b2e76p-19f,
  15   -0x1.9f42eap-13f,
  16   0x1.110df4p-7f,
  17   -0x1.555548p-3f,
  18 };
  19 #define Pi1 v_f32 (0x1.921fb6p+1f)
  20 #define Pi2 v_f32 (-0x1.777a5cp-24f)
  21 #define Pi3 v_f32 (-0x1.ee59dap-49f)
  22 #define A3 v_f32 (Poly[3])
  23 #define A5 v_f32 (Poly[2])
  24 #define A7 v_f32 (Poly[1])
  25 #define A9 v_f32 (Poly[0])
  26 #define RangeVal v_f32 (0x1p20f)
  27 #define InvPi v_f32 (0x1.45f306p-2f)
  28 #define Shift v_f32 (0x1.8p+23f)
  29 #define AbsMask v_u32 (0x7fffffff)
  30
  31 VPCS_ATTR
  32 static v_f32_t
  33 specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
  34 {
  35   /* Fall back to scalar code.  */
  36   return v_call_f32 (sinf, x, y, cmp);
  37 }
  38
  39 VPCS_ATTR
  40 v_f32_t
  41 V_NAME(sinf) (v_f32_t x)
  42 {
  43   v_f32_t n, r, r2, y;
  44   v_u32_t sign, odd, cmp;
  45
  46   r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
  47   sign = v_as_u32_f32 (x) & ~AbsMask;
  48   cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
  49
  50   /* n = rint(|x|/pi) */
  51   n = v_fma_f32 (InvPi, r, Shift);
  52   odd = v_as_u32_f32 (n) << 31;
  53   n -= Shift;
  54
  55   /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
  56   r = v_fma_f32 (-Pi1, n, r);
  57   r = v_fma_f32 (-Pi2, n, r);
  58   r = v_fma_f32 (-Pi3, n, r);
  59
  60   /* y = sin(r) */
  61   r2 = r * r;
  62   y = v_fma_f32 (A9, r2, A7);
  63   y = v_fma_f32 (y, r2, A5);
  64   y = v_fma_f32 (y, r2, A3);
  65   y = v_fma_f32 (y * r2, r, r);
  66
  67   /* sign fix */
  68   y = v_as_f32_u32 (v_as_u32_f32 (y) ^ sign ^ odd);
  69
  70   if (unlikely (v_any_u32 (cmp)))
  71     return specialcase (x, y, cmp);
  72   return y;
  73 }
  74 VPCS_ALIAS
  75 #endif