contrib/arm-optimized-routines/math/v_math.h

   1 /*
   2  * Vector math abstractions.
   3  *
   4  * Copyright (c) 2019-2020, Arm Limited.
   5  * SPDX-License-Identifier: MIT
   6  */
   7
   8 #ifndef _V_MATH_H
   9 #define _V_MATH_H
  10
  11 #ifndef WANT_VMATH
  12 /* Enable the build of vector math code.  */
  13 # define WANT_VMATH 1
  14 #endif
  15 #if WANT_VMATH
  16
  17 /* The goal of this header is to allow vector and scalar
  18    build of the same algorithm, the provided intrinsic
  19    wrappers are also vector length agnostic so they can
  20    be implemented for SVE too (or other simd architectures)
  21    and then the code should work on those targets too.  */
  22
  23 #if SCALAR
  24 #define V_NAME(x) __s_##x
  25 #elif VPCS && __aarch64__
  26 #define V_NAME(x) __vn_##x
  27 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
  28 #else
  29 #define V_NAME(x) __v_##x
  30 #endif
  31
  32 #ifndef VPCS_ATTR
  33 #define VPCS_ATTR
  34 #endif
  35 #ifndef VPCS_ALIAS
  36 #define VPCS_ALIAS
  37 #endif
  38
  39 #include <stdint.h>
  40 #include "math_config.h"
  41
  42 typedef float f32_t;
  43 typedef uint32_t u32_t;
  44 typedef int32_t s32_t;
  45 typedef double f64_t;
  46 typedef uint64_t u64_t;
  47 typedef int64_t s64_t;
  48
  49 /* reinterpret as type1 from type2.  */
  50 static inline u32_t
  51 as_u32_f32 (f32_t x)
  52 {
  53   union { f32_t f; u32_t u; } r = {x};
  54   return r.u;
  55 }
  56 static inline f32_t
  57 as_f32_u32 (u32_t x)
  58 {
  59   union { u32_t u; f32_t f; } r = {x};
  60   return r.f;
  61 }
  62 static inline s32_t
  63 as_s32_u32 (u32_t x)
  64 {
  65   union { u32_t u; s32_t i; } r = {x};
  66   return r.i;
  67 }
  68 static inline u32_t
  69 as_u32_s32 (s32_t x)
  70 {
  71   union { s32_t i; u32_t u; } r = {x};
  72   return r.u;
  73 }
  74 static inline u64_t
  75 as_u64_f64 (f64_t x)
  76 {
  77   union { f64_t f; u64_t u; } r = {x};
  78   return r.u;
  79 }
  80 static inline f64_t
  81 as_f64_u64 (u64_t x)
  82 {
  83   union { u64_t u; f64_t f; } r = {x};
  84   return r.f;
  85 }
  86 static inline s64_t
  87 as_s64_u64 (u64_t x)
  88 {
  89   union { u64_t u; s64_t i; } r = {x};
  90   return r.i;
  91 }
  92 static inline u64_t
  93 as_u64_s64 (s64_t x)
  94 {
  95   union { s64_t i; u64_t u; } r = {x};
  96   return r.u;
  97 }
  98
  99 #if SCALAR
 100 #define V_SUPPORTED 1
 101 typedef f32_t v_f32_t;
 102 typedef u32_t v_u32_t;
 103 typedef s32_t v_s32_t;
 104 typedef f64_t v_f64_t;
 105 typedef u64_t v_u64_t;
 106 typedef s64_t v_s64_t;
 107
 108 static inline int
 109 v_lanes32 (void)
 110 {
 111   return 1;
 112 }
 113
 114 static inline v_f32_t
 115 v_f32 (f32_t x)
 116 {
 117   return x;
 118 }
 119 static inline v_u32_t
 120 v_u32 (u32_t x)
 121 {
 122   return x;
 123 }
 124 static inline v_s32_t
 125 v_s32 (s32_t x)
 126 {
 127   return x;
 128 }
 129
 130 static inline f32_t
 131 v_get_f32 (v_f32_t x, int i)
 132 {
 133   return x;
 134 }
 135 static inline u32_t
 136 v_get_u32 (v_u32_t x, int i)
 137 {
 138   return x;
 139 }
 140 static inline s32_t
 141 v_get_s32 (v_s32_t x, int i)
 142 {
 143   return x;
 144 }
 145
 146 static inline void
 147 v_set_f32 (v_f32_t *x, int i, f32_t v)
 148 {
 149   *x = v;
 150 }
 151 static inline void
 152 v_set_u32 (v_u32_t *x, int i, u32_t v)
 153 {
 154   *x = v;
 155 }
 156 static inline void
 157 v_set_s32 (v_s32_t *x, int i, s32_t v)
 158 {
 159   *x = v;
 160 }
 161
 162 /* true if any elements of a v_cond result is non-zero.  */
 163 static inline int
 164 v_any_u32 (v_u32_t x)
 165 {
 166   return x != 0;
 167 }
 168 /* to wrap the result of relational operators.  */
 169 static inline v_u32_t
 170 v_cond_u32 (v_u32_t x)
 171 {
 172   return x ? -1 : 0;
 173 }
 174 static inline v_f32_t
 175 v_abs_f32 (v_f32_t x)
 176 {
 177   return __builtin_fabsf (x);
 178 }
 179 static inline v_f32_t
 180 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
 181 {
 182   return __builtin_fmaf (x, y, z);
 183 }
 184 static inline v_f32_t
 185 v_round_f32 (v_f32_t x)
 186 {
 187   return __builtin_roundf (x);
 188 }
 189 static inline v_s32_t
 190 v_round_s32 (v_f32_t x)
 191 {
 192   return __builtin_lroundf (x); /* relies on -fno-math-errno.  */
 193 }
 194 /* convert to type1 from type2.  */
 195 static inline v_f32_t
 196 v_to_f32_s32 (v_s32_t x)
 197 {
 198   return x;
 199 }
 200 static inline v_f32_t
 201 v_to_f32_u32 (v_u32_t x)
 202 {
 203   return x;
 204 }
 205 /* reinterpret as type1 from type2.  */
 206 static inline v_u32_t
 207 v_as_u32_f32 (v_f32_t x)
 208 {
 209   union { v_f32_t f; v_u32_t u; } r = {x};
 210   return r.u;
 211 }
 212 static inline v_f32_t
 213 v_as_f32_u32 (v_u32_t x)
 214 {
 215   union { v_u32_t u; v_f32_t f; } r = {x};
 216   return r.f;
 217 }
 218 static inline v_s32_t
 219 v_as_s32_u32 (v_u32_t x)
 220 {
 221   union { v_u32_t u; v_s32_t i; } r = {x};
 222   return r.i;
 223 }
 224 static inline v_u32_t
 225 v_as_u32_s32 (v_s32_t x)
 226 {
 227   union { v_s32_t i; v_u32_t u; } r = {x};
 228   return r.u;
 229 }
 230 static inline v_f32_t
 231 v_lookup_f32 (const f32_t *tab, v_u32_t idx)
 232 {
 233   return tab[idx];
 234 }
 235 static inline v_u32_t
 236 v_lookup_u32 (const u32_t *tab, v_u32_t idx)
 237 {
 238   return tab[idx];
 239 }
 240 static inline v_f32_t
 241 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
 242 {
 243   return f (x);
 244 }
 245 static inline v_f32_t
 246 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
 247              v_u32_t p)
 248 {
 249   return f (x1, x2);
 250 }
 251
 252 static inline int
 253 v_lanes64 (void)
 254 {
 255   return 1;
 256 }
 257 static inline v_f64_t
 258 v_f64 (f64_t x)
 259 {
 260   return x;
 261 }
 262 static inline v_u64_t
 263 v_u64 (u64_t x)
 264 {
 265   return x;
 266 }
 267 static inline v_s64_t
 268 v_s64 (s64_t x)
 269 {
 270   return x;
 271 }
 272 static inline f64_t
 273 v_get_f64 (v_f64_t x, int i)
 274 {
 275   return x;
 276 }
 277 static inline void
 278 v_set_f64 (v_f64_t *x, int i, f64_t v)
 279 {
 280   *x = v;
 281 }
 282 /* true if any elements of a v_cond result is non-zero.  */
 283 static inline int
 284 v_any_u64 (v_u64_t x)
 285 {
 286   return x != 0;
 287 }
 288 /* to wrap the result of relational operators.  */
 289 static inline v_u64_t
 290 v_cond_u64 (v_u64_t x)
 291 {
 292   return x ? -1 : 0;
 293 }
 294 static inline v_f64_t
 295 v_abs_f64 (v_f64_t x)
 296 {
 297   return __builtin_fabs (x);
 298 }
 299 static inline v_f64_t
 300 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
 301 {
 302   return __builtin_fma (x, y, z);
 303 }
 304 static inline v_f64_t
 305 v_round_f64 (v_f64_t x)
 306 {
 307   return __builtin_round (x);
 308 }
 309 static inline v_s64_t
 310 v_round_s64 (v_f64_t x)
 311 {
 312   return __builtin_lround (x); /* relies on -fno-math-errno.  */
 313 }
 314 /* convert to type1 from type2.  */
 315 static inline v_f64_t
 316 v_to_f64_s64 (v_s64_t x)
 317 {
 318   return x;
 319 }
 320 static inline v_f64_t
 321 v_to_f64_u64 (v_u64_t x)
 322 {
 323   return x;
 324 }
 325 /* reinterpret as type1 from type2.  */
 326 static inline v_u64_t
 327 v_as_u64_f64 (v_f64_t x)
 328 {
 329   union { v_f64_t f; v_u64_t u; } r = {x};
 330   return r.u;
 331 }
 332 static inline v_f64_t
 333 v_as_f64_u64 (v_u64_t x)
 334 {
 335   union { v_u64_t u; v_f64_t f; } r = {x};
 336   return r.f;
 337 }
 338 static inline v_s64_t
 339 v_as_s64_u64 (v_u64_t x)
 340 {
 341   union { v_u64_t u; v_s64_t i; } r = {x};
 342   return r.i;
 343 }
 344 static inline v_u64_t
 345 v_as_u64_s64 (v_s64_t x)
 346 {
 347   union { v_s64_t i; v_u64_t u; } r = {x};
 348   return r.u;
 349 }
 350 static inline v_f64_t
 351 v_lookup_f64 (const f64_t *tab, v_u64_t idx)
 352 {
 353   return tab[idx];
 354 }
 355 static inline v_u64_t
 356 v_lookup_u64 (const u64_t *tab, v_u64_t idx)
 357 {
 358   return tab[idx];
 359 }
 360 static inline v_f64_t
 361 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
 362 {
 363   return f (x);
 364 }
 365
 366 #elif __aarch64__
 367 #define V_SUPPORTED 1
 368 #include <arm_neon.h>
 369 typedef float32x4_t v_f32_t;
 370 typedef uint32x4_t v_u32_t;
 371 typedef int32x4_t v_s32_t;
 372 typedef float64x2_t v_f64_t;
 373 typedef uint64x2_t v_u64_t;
 374 typedef int64x2_t v_s64_t;
 375
 376 static inline int
 377 v_lanes32 (void)
 378 {
 379   return 4;
 380 }
 381
 382 static inline v_f32_t
 383 v_f32 (f32_t x)
 384 {
 385   return (v_f32_t){x, x, x, x};
 386 }
 387 static inline v_u32_t
 388 v_u32 (u32_t x)
 389 {
 390   return (v_u32_t){x, x, x, x};
 391 }
 392 static inline v_s32_t
 393 v_s32 (s32_t x)
 394 {
 395   return (v_s32_t){x, x, x, x};
 396 }
 397
 398 static inline f32_t
 399 v_get_f32 (v_f32_t x, int i)
 400 {
 401   return x[i];
 402 }
 403 static inline u32_t
 404 v_get_u32 (v_u32_t x, int i)
 405 {
 406   return x[i];
 407 }
 408 static inline s32_t
 409 v_get_s32 (v_s32_t x, int i)
 410 {
 411   return x[i];
 412 }
 413
 414 static inline void
 415 v_set_f32 (v_f32_t *x, int i, f32_t v)
 416 {
 417   (*x)[i] = v;
 418 }
 419 static inline void
 420 v_set_u32 (v_u32_t *x, int i, u32_t v)
 421 {
 422   (*x)[i] = v;
 423 }
 424 static inline void
 425 v_set_s32 (v_s32_t *x, int i, s32_t v)
 426 {
 427   (*x)[i] = v;
 428 }
 429
 430 /* true if any elements of a v_cond result is non-zero.  */
 431 static inline int
 432 v_any_u32 (v_u32_t x)
 433 {
 434   /* assume elements in x are either 0 or -1u.  */
 435   return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
 436 }
 437 /* to wrap the result of relational operators.  */
 438 static inline v_u32_t
 439 v_cond_u32 (v_u32_t x)
 440 {
 441   return x;
 442 }
 443 static inline v_f32_t
 444 v_abs_f32 (v_f32_t x)
 445 {
 446   return vabsq_f32 (x);
 447 }
 448 static inline v_f32_t
 449 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
 450 {
 451   return vfmaq_f32 (z, x, y);
 452 }
 453 static inline v_f32_t
 454 v_round_f32 (v_f32_t x)
 455 {
 456   return vrndaq_f32 (x);
 457 }
 458 static inline v_s32_t
 459 v_round_s32 (v_f32_t x)
 460 {
 461   return vcvtaq_s32_f32 (x);
 462 }
 463 /* convert to type1 from type2.  */
 464 static inline v_f32_t
 465 v_to_f32_s32 (v_s32_t x)
 466 {
 467   return (v_f32_t){x[0], x[1], x[2], x[3]};
 468 }
 469 static inline v_f32_t
 470 v_to_f32_u32 (v_u32_t x)
 471 {
 472   return (v_f32_t){x[0], x[1], x[2], x[3]};
 473 }
 474 /* reinterpret as type1 from type2.  */
 475 static inline v_u32_t
 476 v_as_u32_f32 (v_f32_t x)
 477 {
 478   union { v_f32_t f; v_u32_t u; } r = {x};
 479   return r.u;
 480 }
 481 static inline v_f32_t
 482 v_as_f32_u32 (v_u32_t x)
 483 {
 484   union { v_u32_t u; v_f32_t f; } r = {x};
 485   return r.f;
 486 }
 487 static inline v_s32_t
 488 v_as_s32_u32 (v_u32_t x)
 489 {
 490   union { v_u32_t u; v_s32_t i; } r = {x};
 491   return r.i;
 492 }
 493 static inline v_u32_t
 494 v_as_u32_s32 (v_s32_t x)
 495 {
 496   union { v_s32_t i; v_u32_t u; } r = {x};
 497   return r.u;
 498 }
 499 static inline v_f32_t
 500 v_lookup_f32 (const f32_t *tab, v_u32_t idx)
 501 {
 502   return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
 503 }
 504 static inline v_u32_t
 505 v_lookup_u32 (const u32_t *tab, v_u32_t idx)
 506 {
 507   return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
 508 }
 509 static inline v_f32_t
 510 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
 511 {
 512   return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
 513                    p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
 514 }
 515 static inline v_f32_t
 516 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
 517              v_u32_t p)
 518 {
 519   return (
 520     v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
 521              p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
 522 }
 523
 524 static inline int
 525 v_lanes64 (void)
 526 {
 527   return 2;
 528 }
 529 static inline v_f64_t
 530 v_f64 (f64_t x)
 531 {
 532   return (v_f64_t){x, x};
 533 }
 534 static inline v_u64_t
 535 v_u64 (u64_t x)
 536 {
 537   return (v_u64_t){x, x};
 538 }
 539 static inline v_s64_t
 540 v_s64 (s64_t x)
 541 {
 542   return (v_s64_t){x, x};
 543 }
 544 static inline f64_t
 545 v_get_f64 (v_f64_t x, int i)
 546 {
 547   return x[i];
 548 }
 549 static inline void
 550 v_set_f64 (v_f64_t *x, int i, f64_t v)
 551 {
 552   (*x)[i] = v;
 553 }
 554 /* true if any elements of a v_cond result is non-zero.  */
 555 static inline int
 556 v_any_u64 (v_u64_t x)
 557 {
 558   /* assume elements in x are either 0 or -1u.  */
 559   return vpaddd_u64 (x) != 0;
 560 }
 561 /* to wrap the result of relational operators.  */
 562 static inline v_u64_t
 563 v_cond_u64 (v_u64_t x)
 564 {
 565   return x;
 566 }
 567 static inline v_f64_t
 568 v_abs_f64 (v_f64_t x)
 569 {
 570   return vabsq_f64 (x);
 571 }
 572 static inline v_f64_t
 573 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
 574 {
 575   return vfmaq_f64 (z, x, y);
 576 }
 577 static inline v_f64_t
 578 v_round_f64 (v_f64_t x)
 579 {
 580   return vrndaq_f64 (x);
 581 }
 582 static inline v_s64_t
 583 v_round_s64 (v_f64_t x)
 584 {
 585   return vcvtaq_s64_f64 (x);
 586 }
 587 /* convert to type1 from type2.  */
 588 static inline v_f64_t
 589 v_to_f64_s64 (v_s64_t x)
 590 {
 591   return (v_f64_t){x[0], x[1]};
 592 }
 593 static inline v_f64_t
 594 v_to_f64_u64 (v_u64_t x)
 595 {
 596   return (v_f64_t){x[0], x[1]};
 597 }
 598 /* reinterpret as type1 from type2.  */
 599 static inline v_u64_t
 600 v_as_u64_f64 (v_f64_t x)
 601 {
 602   union { v_f64_t f; v_u64_t u; } r = {x};
 603   return r.u;
 604 }
 605 static inline v_f64_t
 606 v_as_f64_u64 (v_u64_t x)
 607 {
 608   union { v_u64_t u; v_f64_t f; } r = {x};
 609   return r.f;
 610 }
 611 static inline v_s64_t
 612 v_as_s64_u64 (v_u64_t x)
 613 {
 614   union {  v_u64_t u; v_s64_t i; } r = {x};
 615   return r.i;
 616 }
 617 static inline v_u64_t
 618 v_as_u64_s64 (v_s64_t x)
 619 {
 620   union { v_s64_t i; v_u64_t u; } r = {x};
 621   return r.u;
 622 }
 623 static inline v_f64_t
 624 v_lookup_f64 (const f64_t *tab, v_u64_t idx)
 625 {
 626   return (v_f64_t){tab[idx[0]], tab[idx[1]]};
 627 }
 628 static inline v_u64_t
 629 v_lookup_u64 (const u64_t *tab, v_u64_t idx)
 630 {
 631   return (v_u64_t){tab[idx[0]], tab[idx[1]]};
 632 }
 633 static inline v_f64_t
 634 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
 635 {
 636   return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
 637 }
 638 #endif
 639
 640 #endif
 641 #endif