2 * Vector math abstractions.
4 * Copyright (c) 2019-2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
12 /* Enable the build of vector math code. */
17 /* The goal of this header is to allow vector and scalar
18 build of the same algorithm, the provided intrinsic
19 wrappers are also vector length agnostic so they can
20 be implemented for SVE too (or other simd architectures)
21 and then the code should work on those targets too. */
24 #define V_NAME(x) __s_##x
25 #elif VPCS && __aarch64__
26 #define V_NAME(x) __vn_##x
27 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
29 #define V_NAME(x) __v_##x
40 #include "math_config.h"
43 typedef uint32_t u32_t;
44 typedef int32_t s32_t;
46 typedef uint64_t u64_t;
47 typedef int64_t s64_t;
49 /* reinterpret as type1 from type2. */
53 union { f32_t f; u32_t u; } r = {x};
59 union { u32_t u; f32_t f; } r = {x};
65 union { u32_t u; s32_t i; } r = {x};
71 union { s32_t i; u32_t u; } r = {x};
77 union { f64_t f; u64_t u; } r = {x};
83 union { u64_t u; f64_t f; } r = {x};
89 union { u64_t u; s64_t i; } r = {x};
95 union { s64_t i; u64_t u; } r = {x};
100 #define V_SUPPORTED 1
101 typedef f32_t v_f32_t;
102 typedef u32_t v_u32_t;
103 typedef s32_t v_s32_t;
104 typedef f64_t v_f64_t;
105 typedef u64_t v_u64_t;
106 typedef s64_t v_s64_t;
114 static inline v_f32_t
119 static inline v_u32_t
124 static inline v_s32_t
131 v_get_f32 (v_f32_t x, int i)
136 v_get_u32 (v_u32_t x, int i)
141 v_get_s32 (v_s32_t x, int i)
147 v_set_f32 (v_f32_t *x, int i, f32_t v)
152 v_set_u32 (v_u32_t *x, int i, u32_t v)
157 v_set_s32 (v_s32_t *x, int i, s32_t v)
162 /* true if any elements of a v_cond result is non-zero. */
164 v_any_u32 (v_u32_t x)
168 /* to wrap the result of relational operators. */
169 static inline v_u32_t
170 v_cond_u32 (v_u32_t x)
174 static inline v_f32_t
175 v_abs_f32 (v_f32_t x)
177 return __builtin_fabsf (x);
179 static inline v_f32_t
180 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
182 return __builtin_fmaf (x, y, z);
184 static inline v_f32_t
185 v_round_f32 (v_f32_t x)
187 return __builtin_roundf (x);
189 static inline v_s32_t
190 v_round_s32 (v_f32_t x)
192 return __builtin_lroundf (x); /* relies on -fno-math-errno. */
194 /* convert to type1 from type2. */
195 static inline v_f32_t
196 v_to_f32_s32 (v_s32_t x)
200 static inline v_f32_t
201 v_to_f32_u32 (v_u32_t x)
205 /* reinterpret as type1 from type2. */
206 static inline v_u32_t
207 v_as_u32_f32 (v_f32_t x)
209 union { v_f32_t f; v_u32_t u; } r = {x};
212 static inline v_f32_t
213 v_as_f32_u32 (v_u32_t x)
215 union { v_u32_t u; v_f32_t f; } r = {x};
218 static inline v_s32_t
219 v_as_s32_u32 (v_u32_t x)
221 union { v_u32_t u; v_s32_t i; } r = {x};
224 static inline v_u32_t
225 v_as_u32_s32 (v_s32_t x)
227 union { v_s32_t i; v_u32_t u; } r = {x};
230 static inline v_f32_t
231 v_lookup_f32 (const f32_t *tab, v_u32_t idx)
235 static inline v_u32_t
236 v_lookup_u32 (const u32_t *tab, v_u32_t idx)
240 static inline v_f32_t
241 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
245 static inline v_f32_t
246 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
257 static inline v_f64_t
262 static inline v_u64_t
267 static inline v_s64_t
273 v_get_f64 (v_f64_t x, int i)
278 v_set_f64 (v_f64_t *x, int i, f64_t v)
282 /* true if any elements of a v_cond result is non-zero. */
284 v_any_u64 (v_u64_t x)
288 /* to wrap the result of relational operators. */
289 static inline v_u64_t
290 v_cond_u64 (v_u64_t x)
294 static inline v_f64_t
295 v_abs_f64 (v_f64_t x)
297 return __builtin_fabs (x);
299 static inline v_f64_t
300 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
302 return __builtin_fma (x, y, z);
304 static inline v_f64_t
305 v_round_f64 (v_f64_t x)
307 return __builtin_round (x);
309 static inline v_s64_t
310 v_round_s64 (v_f64_t x)
312 return __builtin_lround (x); /* relies on -fno-math-errno. */
314 /* convert to type1 from type2. */
315 static inline v_f64_t
316 v_to_f64_s64 (v_s64_t x)
320 static inline v_f64_t
321 v_to_f64_u64 (v_u64_t x)
325 /* reinterpret as type1 from type2. */
326 static inline v_u64_t
327 v_as_u64_f64 (v_f64_t x)
329 union { v_f64_t f; v_u64_t u; } r = {x};
332 static inline v_f64_t
333 v_as_f64_u64 (v_u64_t x)
335 union { v_u64_t u; v_f64_t f; } r = {x};
338 static inline v_s64_t
339 v_as_s64_u64 (v_u64_t x)
341 union { v_u64_t u; v_s64_t i; } r = {x};
344 static inline v_u64_t
345 v_as_u64_s64 (v_s64_t x)
347 union { v_s64_t i; v_u64_t u; } r = {x};
350 static inline v_f64_t
351 v_lookup_f64 (const f64_t *tab, v_u64_t idx)
355 static inline v_u64_t
356 v_lookup_u64 (const u64_t *tab, v_u64_t idx)
360 static inline v_f64_t
361 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
367 #define V_SUPPORTED 1
368 #include <arm_neon.h>
369 typedef float32x4_t v_f32_t;
370 typedef uint32x4_t v_u32_t;
371 typedef int32x4_t v_s32_t;
372 typedef float64x2_t v_f64_t;
373 typedef uint64x2_t v_u64_t;
374 typedef int64x2_t v_s64_t;
382 static inline v_f32_t
385 return (v_f32_t){x, x, x, x};
387 static inline v_u32_t
390 return (v_u32_t){x, x, x, x};
392 static inline v_s32_t
395 return (v_s32_t){x, x, x, x};
399 v_get_f32 (v_f32_t x, int i)
404 v_get_u32 (v_u32_t x, int i)
409 v_get_s32 (v_s32_t x, int i)
415 v_set_f32 (v_f32_t *x, int i, f32_t v)
420 v_set_u32 (v_u32_t *x, int i, u32_t v)
425 v_set_s32 (v_s32_t *x, int i, s32_t v)
430 /* true if any elements of a v_cond result is non-zero. */
432 v_any_u32 (v_u32_t x)
434 /* assume elements in x are either 0 or -1u. */
435 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
437 /* to wrap the result of relational operators. */
438 static inline v_u32_t
439 v_cond_u32 (v_u32_t x)
443 static inline v_f32_t
444 v_abs_f32 (v_f32_t x)
446 return vabsq_f32 (x);
448 static inline v_f32_t
449 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
451 return vfmaq_f32 (z, x, y);
453 static inline v_f32_t
454 v_round_f32 (v_f32_t x)
456 return vrndaq_f32 (x);
458 static inline v_s32_t
459 v_round_s32 (v_f32_t x)
461 return vcvtaq_s32_f32 (x);
463 /* convert to type1 from type2. */
464 static inline v_f32_t
465 v_to_f32_s32 (v_s32_t x)
467 return (v_f32_t){x[0], x[1], x[2], x[3]};
469 static inline v_f32_t
470 v_to_f32_u32 (v_u32_t x)
472 return (v_f32_t){x[0], x[1], x[2], x[3]};
474 /* reinterpret as type1 from type2. */
475 static inline v_u32_t
476 v_as_u32_f32 (v_f32_t x)
478 union { v_f32_t f; v_u32_t u; } r = {x};
481 static inline v_f32_t
482 v_as_f32_u32 (v_u32_t x)
484 union { v_u32_t u; v_f32_t f; } r = {x};
487 static inline v_s32_t
488 v_as_s32_u32 (v_u32_t x)
490 union { v_u32_t u; v_s32_t i; } r = {x};
493 static inline v_u32_t
494 v_as_u32_s32 (v_s32_t x)
496 union { v_s32_t i; v_u32_t u; } r = {x};
499 static inline v_f32_t
500 v_lookup_f32 (const f32_t *tab, v_u32_t idx)
502 return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
504 static inline v_u32_t
505 v_lookup_u32 (const u32_t *tab, v_u32_t idx)
507 return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
509 static inline v_f32_t
510 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
512 return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
513 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
515 static inline v_f32_t
516 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
520 v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
521 p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
529 static inline v_f64_t
532 return (v_f64_t){x, x};
534 static inline v_u64_t
537 return (v_u64_t){x, x};
539 static inline v_s64_t
542 return (v_s64_t){x, x};
545 v_get_f64 (v_f64_t x, int i)
550 v_set_f64 (v_f64_t *x, int i, f64_t v)
554 /* true if any elements of a v_cond result is non-zero. */
556 v_any_u64 (v_u64_t x)
558 /* assume elements in x are either 0 or -1u. */
559 return vpaddd_u64 (x) != 0;
561 /* to wrap the result of relational operators. */
562 static inline v_u64_t
563 v_cond_u64 (v_u64_t x)
567 static inline v_f64_t
568 v_abs_f64 (v_f64_t x)
570 return vabsq_f64 (x);
572 static inline v_f64_t
573 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
575 return vfmaq_f64 (z, x, y);
577 static inline v_f64_t
578 v_round_f64 (v_f64_t x)
580 return vrndaq_f64 (x);
582 static inline v_s64_t
583 v_round_s64 (v_f64_t x)
585 return vcvtaq_s64_f64 (x);
587 /* convert to type1 from type2. */
588 static inline v_f64_t
589 v_to_f64_s64 (v_s64_t x)
591 return (v_f64_t){x[0], x[1]};
593 static inline v_f64_t
594 v_to_f64_u64 (v_u64_t x)
596 return (v_f64_t){x[0], x[1]};
598 /* reinterpret as type1 from type2. */
599 static inline v_u64_t
600 v_as_u64_f64 (v_f64_t x)
602 union { v_f64_t f; v_u64_t u; } r = {x};
605 static inline v_f64_t
606 v_as_f64_u64 (v_u64_t x)
608 union { v_u64_t u; v_f64_t f; } r = {x};
611 static inline v_s64_t
612 v_as_s64_u64 (v_u64_t x)
614 union { v_u64_t u; v_s64_t i; } r = {x};
617 static inline v_u64_t
618 v_as_u64_s64 (v_s64_t x)
620 union { v_s64_t i; v_u64_t u; } r = {x};
623 static inline v_f64_t
624 v_lookup_f64 (const f64_t *tab, v_u64_t idx)
626 return (v_f64_t){tab[idx[0]], tab[idx[1]]};
628 static inline v_u64_t
629 v_lookup_u64 (const u64_t *tab, v_u64_t idx)
631 return (v_u64_t){tab[idx[0]], tab[idx[1]]};
633 static inline v_f64_t
634 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
636 return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};