一呼百應, "one call, a hundred responses"
Loading...
Searching...
No Matches
simd.hpp
Go to the documentation of this file.
1#pragma once
2
11#include <concepts>
12#include <cstdint>
13#include <initializer_list>
14#include <type_traits>
15#include <algorithm>
16#include <immintrin.h>
17#include "attributes/common.hpp"
18#include "numerics.hpp"
19#include "types.hpp"
20#include "simd_data.hpp"
21
22
23using namespace std;
24
25namespace ein {
26
27namespace {
28template<typename T> struct arg1 {};
29template<typename Ret, typename Arg, typename ... Args> struct arg1<Ret(Arg, Args...)> { using type = Arg; };
30template<typename Ret, typename Arg, typename ... Args> struct arg1<Ret(*)(Arg, Args...)> : arg1<Ret(Arg,Args...)> {};
31template <typename F> using arg1_t = arg1<F>::type;
32};
33
36
38template <typename T, size_t N> requires (has_simd_type<T,N>)
40private:
44 static constexpr size_t bytesize = N*sizeof(T);
45
46 template <typename U>
48
49 template <typename U>
50 using cast_t = simd<U,N*sizeof(T)/sizeof(U)>;
51
52 using value_type = T;
53 static constexpr size_t size = N;
54public:
55
57
60
63 constexpr simd() noexcept = default;
64
67 constexpr simd(simd const &) noexcept = default;
68
71 constexpr simd(simd &&) noexcept = default;
72
75 template <std::convertible_to<T> ... Args>
77 constexpr simd(Args && ... args) noexcept
78 requires (sizeof...(Args) == N)
79 : data(std::forward<Args>(args)...) {}
80
84 constexpr simd(T value) noexcept
85 : data(__extension__(data_t){value}) {}
86
89 constexpr simd(data_t const & data) noexcept
90 : data(data) {}
91
94 constexpr simd(data_t && data) noexcept
95 : data(std::move(data)) {}
96
97 // constexpr simd(const T (&list)[N]) :data(list) {}
98
103 constexpr simd(std::initializer_list<T> init) {
104 // NB: initializer_lists are janky af
105 std::copy_n(init.begin(),std::min(N,init.size()),begin());
106 }
107
108 // \todo append
109 // concat
110 //constexpr simd(simd<T,N/2> a, simd<T,N/2> b) : data(__builtin_shufflevector(a,b,...,...) {}
111
114 constexpr simd(intrinsic_t const & data) noexcept
115 requires (!std::is_same_v<data_t, intrinsic_t>)
116 : data(reinterpret_cast<data_t>(data)) {}
117
120 constexpr simd(intrinsic_t && data) noexcept
121 requires (!std::is_same_v<data_t, intrinsic_t>)
122 : data(reinterpret_cast<data_t>(std::move(data))) {}
123
125
128
130 constexpr simd & operator = (simd &&) noexcept = default;
131
133 constexpr simd & operator = (simd const &) noexcept = default;
134
136 template <typename U>
137 requires (!std::is_same_v<U,T> && has_builtin_convertvector<U,T,N>)
139 constexpr simd & operator = (simd_t<U> other) noexcept {
140 if consteval {
141 for (int i=0;i<N;++i)
142 data[i] = T(other.data[i]);
143 } else {
144 data = __builtin_convertvector(other.data,data_t);
145 }
146 }
147
151
154 constexpr operator intrinsic_t & () noexcept {
155 if constexpr (std::is_same_v<intrinsic_t,data_t>) return data;
156 else return reinterpret_cast<intrinsic_t &>(data);
157 }
158
162 constexpr operator intrinsic_t const & () const noexcept {
163 if constexpr (std::is_same_v<intrinsic_t,data_t>) return data;
164 else return reinterpret_cast<intrinsic_t const &>(data);
165 }
166
170 constexpr intrinsic_t & it() noexcept {
171 if constexpr (std::is_same_v<intrinsic_t,data_t>) return data;
172 else return reinterpret_cast<intrinsic_t &>(data);
173 }
174
178 constexpr intrinsic_t const & it() const noexcept {
179 if constexpr (std::is_same_v<intrinsic_t,data_t>) return data;
180 else return reinterpret_cast<intrinsic_t const &>(data);
181 }
182
186
189 constexpr T & operator[](std::size_t i) noexcept { return reinterpret_cast<T *>(&data)[i]; }
190
193 constexpr T const & operator[](std::size_t i) const noexcept { return reinterpret_cast<T const *>(&data)[i]; }
194
197 constexpr T * begin() noexcept { return reinterpret_cast<T*>(&data); }
198
201 constexpr T * end() noexcept { return begin() + N; }
202
205 constexpr T const * cbegin() const noexcept { return reinterpret_cast<T const *>(&data); }
206
209 constexpr T const * cend() const noexcept { return begin() + N; }
210
213 constexpr T const * begin() const noexcept { return cbegin(); }
214
217 constexpr T const * end() const noexcept { return cend(); }
218
221 constexpr std::reverse_iterator<T*> rbegin() noexcept { return std::reverse_iterator<T*>(end()); }
222
225 constexpr std::reverse_iterator<T*> rend() noexcept { return std::reverse_iterator<T*>(begin()); }
226
229 constexpr std::reverse_iterator<const T*> crbegin() const noexcept { return std::reverse_iterator<const T*>(cend()); }
230
233 constexpr std::reverse_iterator<const T*> crend() const noexcept { return std::reverse_iterator<const T*>(cbegin()); }
234
237 constexpr std::reverse_iterator<const T*> rbegin() const noexcept { return crbegin(); }
238
241 constexpr std::reverse_iterator<const T*> rend() const noexcept { return crend(); }
242
246
253 template <size_t I> requires (I < N) friend
255 constexpr T & get(simd & s) noexcept {
256 return s[I];
257 }
258
265 template <size_t I> requires (I < N) friend
267 constexpr T const & get(simd const & s) noexcept {
268 return s[I];
269 }
270
277 template <size_t I> requires (I < N) friend
279 constexpr T && get(simd && s) noexcept {
280 return std::move(s[I]);
281 }
282
286
289 constexpr simd operator +(simd x, simd y) noexcept
290 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a + b; }) {
291 return x.data + y.data;
292 }
293
296 constexpr simd & operator +=(simd other) noexcept
297 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a += b; }) {
298 data += other.data;
299 return *this;
300 }
301
304 constexpr simd operator -(simd x, simd y) noexcept
305 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a - b; }) {
306 return x.data - y.data;
307 }
308
311 constexpr simd & operator -=(simd other) noexcept
312 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a -= b; }) {
313 data -= other.data;
314 return *this;
315 }
316
319 constexpr simd operator *(simd x, simd y) noexcept
320 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a * b; }) {
321 return x.data * y.data;
322 }
323
326 constexpr simd & operator *=(simd other) noexcept
327 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a *= b; }) {
328 data *= other.data;
329 return *this;
330 }
331
334 constexpr simd operator / (simd x, simd y) noexcept
335 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a / b; }) {
336 return x.data / y.data;
337 }
338
341 constexpr simd & operator /= (simd other) noexcept
342 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a /= b; }) {
343 data /= other.data;
344 return *this;
345 }
346
348 template <typename U> friend
350 constexpr simd operator &(simd x, simd y) noexcept
351 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a & b; }) {
352 return x.data & y.data;
353 }
354
357 constexpr simd & operator &=(simd other) noexcept
358 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a &= b; }) {
359 data &= other.data;
360 return *this;
361 }
362
364 template <typename U> friend
366 constexpr simd operator |(simd x, simd y) noexcept
367 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a | b; }) {
368 return x.data | y.data;
369 }
370
373 constexpr simd & operator |= (simd other) noexcept
374 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a |= b; }) {
375 data |= other.data;
376 return *this;
377 }
378
380 friend
382 constexpr simd operator ^(simd x, simd y) noexcept
383 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a ^ b; }) {
384 return x.data ^ y.data;
385 }
386
389 constexpr simd & operator ^= (simd other) noexcept
390 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a ^= b; }) {
391 data ^= other.data;
392 return *this;
393 }
394
397 constexpr simd operator + () const noexcept
398 requires (has_lifted_operations<T> && requires (data_t x) { + x; }) {
399 return + data;
400 }
401
404 constexpr simd operator - () const noexcept
405 requires (has_lifted_operations<T> && requires (data_t x) { - x; }) {
406 return - data;
407 }
408
409 //EIN_UNARY_OP(~)
410 //EIN_UNARY_OP(!)
411
415 constexpr simd & operator--() noexcept
416 requires (has_lifted_operations<T> && requires (data_t x) { --x; }) {
417 --data;
418 return *this;
419 }
420
424 constexpr simd operator--(int) noexcept
425 requires (has_lifted_operations<T> && requires (data_t x) { x--; }) {
426 simd t = *this;
427 data--;
428 return t;
429 }
430
434 constexpr simd & operator++() noexcept
435 requires (has_lifted_operations<T> && requires (data_t x) { ++x; }) {
436 ++data;
437 return *this;
438 }
439
443 constexpr simd operator++(int) noexcept
444 requires (has_lifted_operations<T> && requires (data_t x) { x++; }) {
445 simd t = *this;
446 data++;
447 return t;
448 }
449
452 template <size_t K>
454 friend constexpr simd operator >>(simd x, imm_t<K>) noexcept
455 requires (has_lifted_operations<T> && requires (data_t a) { a >> static_cast<T>(K); }) {
456 return x.data >> static_cast<T>(K);
457 }
458
462 friend constexpr simd operator >>(simd x, T y) noexcept
463 requires (has_lifted_operations<T> && requires (data_t a, T y) { a >> y; }) {
464 return x.data >> y;
465 }
466
470 friend constexpr simd operator >>(simd x, simd y) noexcept
471 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a >> b; }) {
472 return x.data >> y.data;
473 }
474
475 // <<
476
479 template <size_t K>
481 friend constexpr simd operator <<(simd x, imm_t<K>) noexcept
482 requires (has_lifted_operations<T> && requires (data_t a) { a << static_cast<T>(K); }) {
483 return x.data << static_cast<T>(K);
484 }
485
489 friend constexpr simd operator <<(simd x, T y) noexcept
490 requires (has_lifted_operations<T> && requires (data_t a, T b) { a << b; }) {
491 return x.data << y;
492 }
493
497 friend constexpr simd operator <<(simd x, simd y) noexcept
498 requires (has_lifted_operations<T> && requires (data_t a, data_t b) { a << b; }) {
499 return x.data << y.data;
500 }
501
502 // >>=
503
506 template <size_t K>
508 constexpr simd & operator >>=(imm_t<K>) noexcept
509 requires (has_lifted_operations<T> && requires (data_t a) { a >>= static_cast<T>(K); }) {
510 data >>= static_cast<T>(K);
511 return *this;
512 }
513
517 constexpr simd & operator >>=(T y) noexcept
518 requires (has_lifted_operations<T> && requires (data_t a, T b) { a >>= b; }) {
519 data >>= y;
520 return *this;
521 }
522
526 constexpr simd & operator >>=(simd y) noexcept
527 requires (has_lifted_operations<T> && requires (data_t a) { a >>= a; }) {
528 data >>= y.data;
529 return *this;
530 }
531
534 template <size_t K>
536 constexpr simd & operator <<=(imm_t<K>) noexcept
537 requires (has_lifted_operations<T> && requires (data_t a) { a <<= static_cast<T>(K); }) {
538 data <<= static_cast<T>(K);
539 return *this;
540 }
541
542 // <<=
543
547 constexpr simd & operator <<=(T y) noexcept
548 requires (has_lifted_operations<T> && requires (data_t a, T b) { a <<= b; }) {
549 data <<= y;
550 return *this;
551 }
552
556 constexpr simd & operator <<=(simd y) noexcept
557 requires (has_lifted_operations<T> && requires (data_t a) { a <<= a; }) {
558 data <<= y.data;
559 return *this;
560 }
561
563 // end of operators
566
568 template <size_t ... is>
570 constexpr simd<T,sizeof...(is)> shuffle() noexcept
571 requires (((is < N) && ... && has_simd_type<T,sizeof...(is)>) &&
572 requires (data_t x) { simd<T,sizeof...(is)>(__builtin_shufflevector(x, is...)); }) {
573 if consteval {
574 return { data[is]... };
575 } else {
576 return __builtin_shufflevector(data,is...);
577 }
578 }
579
582 template <size_t ... is>
584 constexpr simd<T,sizeof...(is)> shuffle(simd<T,N> b) noexcept
585 requires (((is < N*2) && ... && has_simd_type<T,sizeof...(is)>) &&
586 requires (data_t x) { simd<T,sizeof...(is)>(__builtin_shufflevector(x, x, is...)); }) {
587 if consteval {
588 return { (is < N ? data[is] : b[is-N])... };
589 } else {
590 return __builtin_shufflevector(data,b.data,is...);
591 }
592 }
593
595 // end of shuffles
598
601 friend constexpr mask_t operator < (simd x, simd y) noexcept
602 requires (!has_mmask<T,N> && (has_lifted_operations<T> && requires (data_t a) { { a < a } -> std::same_as<mask_t>; })) {
603 return x.data < y.data;
604 }
605
608 friend constexpr mask_t operator > (simd x, simd y) noexcept
609 requires (!has_mmask<T,N> && (has_lifted_operations<T> && requires (data_t a) { { a > a } -> std::same_as<mask_t>; })) {
610 return x.data > y.data;
611 }
612
615 friend constexpr mask_t operator <= (simd x, simd y) noexcept
616 requires (!has_mmask<T,N> && (has_lifted_operations<T> && requires (data_t a) { { a <= a } -> std::same_as<mask_t>; }) ) {
617 return x.data <= y.data;
618 }
619
622 friend constexpr mask_t operator >= (simd x, simd y) noexcept
623 requires (!has_mmask<T,N> && (has_lifted_operations<T> && requires (data_t a) { { a >= a } -> std::same_as<mask_t>; })) {
624 return x.data >= y.data;
625 }
626
629 friend constexpr mask_t operator == (simd x, simd y) noexcept
630 requires (!has_mmask<T,N> && (has_lifted_operations<T> && requires (data_t a) { { a == a } -> std::same_as<mask_t>; })) {
631 return x.data == y.data;
632 }
633
636 friend constexpr mask_t operator != (simd x, simd y) noexcept
637 requires (!has_mmask<T,N> && (has_lifted_operations<T> && requires (data_t a) { { a != a } -> std::same_as<mask_t>; })) {
638 return x.data != y.data;
639 }
640
641// avx512 comparisons
642
643#ifdef __AVX512F__
644 // implement masked compares
645
647#define EIN_COMPARE_OP_SIZE(bs,cmd) \
648 if constexpr (bytesize == bs) return cmd(a.it(),b.it()); \
649 else
650#define EIN_COMPARE_OP_TY(opfix,ty,infix) \
651 if constexpr (std::is_same_v<storage_type<T>,ty>) { \
652 EIN_COMPARE_OP_SIZE(16,_mm_cmp##opfix##_##infix##_mask) \
653 EIN_COMPARE_OP_SIZE(32,_mm256_cmp##opfix##_##infix##_mask) \
654 EIN_COMPARE_OP_SIZE(64,_mm512_cmp##opfix##_##infix##_mask) \
655 static_assert(false); \
656 } else
657
658#define EIN_COMPARE_OP(op,opfix) \
659 if consteval { \
660 mask_t result = 0; \
661 for (size_t i=0;i<N;++i) \
662 if (a[i] op b[i]) \
663 result |= (1 << i); \
664 return result; \
665 } else { \
666 EIN_COMPARE_OP_TY(opfix,float,ps) \
667 EIN_COMPARE_OP_TY(opfix,double,pd) \
668 EIN_COMPARE_OP_TY(opfix,int8_t,epi8) \
669 EIN_COMPARE_OP_TY(opfix,int16_t,epi16) \
670 EIN_COMPARE_OP_TY(opfix,int32_t,epi32) \
671 EIN_COMPARE_OP_TY(opfix,int64_t,epi64) \
672 EIN_COMPARE_OP_TY(opfix,uint8_t,epu8) \
673 EIN_COMPARE_OP_TY(opfix,uint16_t,epu16) \
674 EIN_COMPARE_OP_TY(opfix,uint32_t,epu32) \
675 EIN_COMPARE_OP_TY(opfix,uint64_t,epu64) \
676 ON512FP16(
677 EIN_COMPARE_OP_TY(opfix,_Float16,ph) \
678 EIN_COMPARE_OP_TY(opfix,__fp16,ph) \
679 ) \
680 static_assert(false); \
681 }
683
685 friend constexpr mask_t operator < (simd a, simd b) noexcept
687 EIN_COMPARE_OP(<,lt)
688 }
690 friend constexpr mask_t operator > (simd a, simd b) noexcept
692 EIN_COMPARE_OP(>,gt)
693 }
695 friend constexpr mask_t operator <= (simd a, simd b) noexcept
697 EIN_COMPARE_OP(<=,le)
698 }
700 friend constexpr mask_t operator >= (simd a, simd b) noexcept
702 EIN_COMPARE_OP(>=,ge)
703 }
705 friend constexpr mask_t operator == (simd a, simd b) noexcept
707 EIN_COMPARE_OP(==,eq)
708 }
710 friend constexpr mask_t operator != (simd a, simd b) noexcept
712 EIN_COMPARE_OP(!=,ne)
713 }
714
715#undef EIN_COMPARE_OP
716#undef EIN_COMPARE_OP_TY
717#undef EIN_COMPARE_OP_TY_SIZE
718#endif
719
720// CMP(FLOAT)
721
723 template <CMP imm8>
724 requires one_of_t<T,float,double> && (size_t(imm8) < max_fp_comparison_predicate)
726 friend constexpr
727 mask_t cmp(simd a, simd b) noexcept {
728 if consteval {
729 // compile time polyfill
730#ifdef __AVX512F__
731 mask_t result = 0;
732 for (size_t i=0;i<N;++i)
733 if (cmp<imm8>(a[i],b[i]))
734 result |= 1 << i;
735 return result;
737#define ein_suffix(x) x##_mask
739#else
740 mask_t result = 0;
741 for (size_t i=0;i<N;++i)
742 result[i] = cmp<imm8>(a[i],b[i]);
743 return result;
745#define ein_suffix(x) x
747#endif
748
749 } else {
750 if constexpr(std::is_same_v<T,float>) {
751 if constexpr (bytesize==16) return ein_suffix(_mm_cmp_ps)(a.it(),b.it(),static_cast<int>(imm8));
752 else if constexpr (bytesize==32) return ein_suffix(_mm256_cmp_ps)(a.it(),b.it(),static_cast<int>(imm8));
753 ON512(else if constexpr (bytesize==64) return ein_suffix(_mm512_cmp_ps)(a.it(),b.it(),static_cast<int>(imm8));)
754 else static_assert(false);
755 } else if constexpr (std::is_same_v<T,double>) {
756 if constexpr (bytesize==16) return ein_suffix(_mm_cmp_pd)(a.it(),b.it(),static_cast<int>(imm8));
757 else if constexpr (bytesize==32) return ein_suffix(_mm256_cmp_pd)(a.it(),b.it(),static_cast<int>(imm8));
758 ON512(else if constexpr (bytesize==64) return ein_suffix(_mm512_cmp_pd)(a.it(),b.it(),static_cast<int>(imm8));)
759 } else
760ON512FP16( if constexpr (one_of_t<T,__fp16,_Float16>) {
761 else if constexpr (bytesize==16) return _mm_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8));
762 else if constexpr (bytesize==32) return _mm256_cmp_ph(a.it(),b.it(),static_cast<int>(imm8));
763 else if constexpr (bytesize==64) return _mm512_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8));
764 } else)
766#undef ein_suffix
768 {
769 static_assert(false);
770 }
771 }
772 }
773
774// CMPINT
775
777 template <CMPINT imm8>
778 requires one_of_t<storage_type<T>,uint8_t,int8_t,uint16_t,int16_t,uint32_t,int32_t,uint64_t,int64_t> && has_lifted_operations<T> && (size_t(imm8) < 8uz)
780 friend constexpr
781 mask_t cmpint(simd a, simd b) noexcept {
782 using enum CMPINT;
783 if consteval {
784 // compile time polyfill
785#ifdef __AVX512F__
786 mask_t result = 0;
787 for (size_t i=0;i<N;++i)
788 if (cmpint<imm8>(a[i],b[i]))
789 result |= 1 << i;
790 return result;
792#define ein_suffix _mask
794#else
795 mask_t result = 0;
796 for (size_t i=0;i<N;++i)
797 if (cmpint<imm8>(a[i],b[i]))
798 result[i] = -1;
799 return result;
801#define ein_suffix
803#endif
804 } else {
805#ifdef __AVX512F__
807#define EIN_HANDLE(type,infix) \
808 if constexpr(std::is_same_v<T,type>) { \
809 if constexpr (bytesize==16) return _mm_cmp_ ## infix ## _mask(a.it(),b.it(),imm8); \
810 else if constexpr (bytesize==32) return _mm256_cmp_ ## infix ## _mask(a.it(),b.it(),imm8); \
811 else if constexpr (bytesize==64) return _mm512_cmp_ ## infix ## _mask(a.it(),b.it(),imm8); \
812 else static_assert(false); \
813 } else
815 EIN_HANDLE(int8_t,epi8)
816 EIN_HANDLE(uint8_t,epu8)
817 EIN_HANDLE(int16_t,epi16)
818 EIN_HANDLE(uint16_t,epu16)
819 EIN_HANDLE(int32_t,epi32)
820 EIN_HANDLE(uint32_t,epu32)
821 EIN_HANDLE(int64_t,epi64)
822 EIN_HANDLE(uint64_t,epu64)
823 static_assert(false);
824#undef EIN_HANDLE
825#else
826 // AVX-2 polyfill
827 if constexpr (imm8 == FALSE) {
828 if constexpr (bytesize==16) return _mm_setzero_si128();
829 else if constexpr (bytesize==32) return _mm256_setzero_si256();
830 else static_assert(false);
831 } else if constexpr (imm8 == TRUE) {
832 if constexpr (bytesize==16) return _mm_set1_epi32(-1);
833 else if constexpr (bytesize==32) return _mm256_set1_epi32(-1);
834 else static_assert(false);
835 } else if constexpr (imm8 == EQ) return a == b;
836 else if constexpr (imm8 == NE) return a != b;
837 else if constexpr (imm8 == LT) return a < b;
838 else if constexpr (imm8 == NLE) return a > b;
839 else if constexpr (imm8 == LE) return a <= b;
840 else if constexpr (imm8 == NLT) return a >= b;
841 else static_assert(false);
842#endif
843 }
844 }
845
846#ifdef __AVX512F__
848#define EIN_COMPARE512_CASE(type,infix) \
849 if constexpr(one_of_t<T,u##type,type>) { \
850 if constexpr (bytesize==16) return _mm_cmpeq_##infix##_mask(a.it(),b.it()); \
851 else if constexpr (bytesize==32) return _mm256_cmpeq_##infix##_mask(a.it(),b.it()); \
852 else if constexpr (bytesize==64) return _mm512_cmpeq_##infix##_mask(a.it(),b.it()); \
853 else static_assert(false); \
854 } else
855#define EIN_COMPARE512(op,infix,imm8) \
856 if consteval { \
857 mask_t mask; \
858 for (size_t i=0;i<N;++i) \
859 if (a[i] op b[i]) \
860 mask |= 1 << i; \
861 return mask; \
862 } else { \
863 if constexpr (one_of_t<T,float>) { \
864 if constexpr (N==4) return _mm_cmp##infix##_ps_mask(a.it(),b.it()); \
865 else if constexpr (N==8) return _mm256_cmp##infix##_ps_mask(a.it(),b.it()); \
866 else if constexpr (N==16) return _mm512_cmp##infix##_ps_mask(a.it(),b.it()); \
867 else static_assert(false); \
868 } else if constexpr (one_of_t<T,double>) { \
869 if constexpr (N==4) return _mm_cmp##infix##_pd_mask(a.it(),b.it()); \
870 else if constexpr (N==8) return _mm256_cmp##infix##_pd_mask(a.it(),b.it()); \
871 else if constexpr (N==16) return _mm512_cmp##infix##_pd_mask(a.it(),b.it()); \
872 else static_assert(false); \
873ON512FP16( \
874 } else if constexpr (one_of_t<T,_Float16,__fp16>) { \
875 if constexpr (N==4) return _mm_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8)); \
876 else if constexpr (N==8) return _mm256_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8)); \
877 else if constexpr (N==16) return _mm512_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8)); \
878 else static_assert(false);) \
879 } else \
880 EIN_COMPARE512_CASE(int8_t,epi8) \
881 EIN_COMPARE512_CASE(int16_t,epi16) \
882 EIN_COMPARE512_CASE(int32_t,epi32) \
883 EIN_COMPARE512_CASE(int64_t,epi64) \
884 static_assert(false); \
885 }
887
890 friend constexpr mask_t operator == (simd a, simd b) noexcept
892 EIN_COMPARE512(==,eq,CMP::EQ_OQ)
893 }
894
897 friend constexpr mask_t operator /= (simd a, simd b) noexcept
898 requires (has_mmask<T,N> && simd_builtin<T>) {
899 EIN_COMPARE512(/=,neq,CMP::NEQ_UQ)
900 }
901
904 friend constexpr mask_t operator < (simd a, simd b) noexcept
905 requires (has_mmask<T,N> && simd_builtin<T>) {
906 EIN_COMPARE512(<,lt,CMP::LT_OQ)
907 }
908
911 friend constexpr mask_t operator <= (simd a, simd b) noexcept
912 requires (has_mmask<T,N> && simd_builtin<T>) {
913 EIN_COMPARE512(<=,le,CMP::LE_OQ)
914 }
915
918 friend constexpr mask_t operator > (simd a, simd b) noexcept
919 requires (has_mmask<T,N> && simd_builtin<T>) {
920 EIN_COMPARE512(>,gt,CMP::GT_OQ)
921 }
922
925 friend constexpr mask_t operator >= (simd a, simd b) noexcept
926 requires (has_mmask<T,N> && simd_builtin<T>) {
927 EIN_COMPARE512(>=,ge,CMP::GE_OQ)
928 }
929
930 #undef EIN_COMPARE512
931#endif
933 // end of comparisons
934
937
940 friend constexpr simd scalef(simd x, simd y) noexcept
941 requires one_of_t<T,float,double> {
942 if consteval {
943 simd result;
944 for (size_t i=0;i<N;++i)
945 result[i] = scalef(x[i],y[i]);
946 return result;
947 } else {
948 if constexpr (std::is_same_v<T,float>) {
949 if constexpr (bytesize==16) return _mm_scalef_ps(x,y);
950 else if constexpr (bytesize==32) return _mm256_scalef_ps(x,y);
951#ifdef __AVX512F__
952 else if constexpr (bytesize==64) return _mm512_scalef_ps(x,y);
953#endif
954 else static_assert(false);
955 } else if constexpr (std::is_same_v<T,double>) {
956 if constexpr (bytesize==16) return _mm_scalef_pd(x,y);
957 else if constexpr (bytesize==32) return _mm256_scalef_pd(x,y);
958#ifdef __AVX512F__
959 else if constexpr (bytesize==64) return _mm512_scalef_pd(x,y);
960#endif
961#if defined(__AVX512FP16__) && defined(__AVX512VL__)
962 } else if constexpr (std::is_same_v<T,__fp16,_Float16>) {
963 if constexpr (bytesize==16) return _mm_scalef_ph(x,y);
964 else if constexpr (bytesize==32) return _mm256_scalef_ph(x,y);
965 else if constexpr (bytesize==64) return _mm512_scalef_ph(x,y);
966#endif
967 } else {
968 static_assert(false);
969 }
970 }
971 }
972
976
978 friend constexpr void swap(simd & x, simd & y) noexcept {
979 if consteval {
980 for (int i=0;i<N;++i)
981 swap(x[i],y[i]);
982 } else {
983 swap(x.data,y.data);
984 }
985 }
987}; // struct simd
988
991
993template <typename T, size_t N>
994requires (
995 has_simd_type<T,N>
996 && (N % sizeof(T) == 0)
997#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
998 && !one_of_t<T, long long>
999#endif
1000)
1001simd(T __attribute ((__vector_size__(N)))) -> simd<T,N/sizeof(T)>;
1002
1004template <typename ... Args>
1005requires has_simd_type<std::common_type<Args...>, sizeof...(Args)>
1006simd(Args&&...) -> simd<std::common_type<Args...>,sizeof...(Args)>;
1007
1010template <typename T>
1011requires has_simd_type<T,max_simd_size/sizeof(T)>
1012simd(T) -> simd<T,has_simd_type<T,max_simd_size/sizeof(T)>>;
1013
1015// end of ctads
1016
1017#if 0
1020
1022 #define EIN_SWITCH(on_m128,on_m128d,on_m128i,on_m128ph,on_m256,on_m256d,on_m256i,on_m256ph,on_m512,on_m512d,on_m512i,on_m512ph) \
1023 if constexpr (std::is_same_v<intrinsic_t,__m128>) { EIN_CASE(on_m128) } \
1024 else if constexpr (std::is_same_v<intrinsic_t,__m128d>) { EIN_CASE(on_m128d) } \
1025 else if constexpr (std::is_same_v<intrinsic_t,__m128i>) { EIN_CASE(on_m128i) } \
1026 else if constexpr (std::is_same_v<intrinsic_t,__m256>) { EIN_CASE(on_m256) } \
1027 else if constexpr (std::is_same_v<intrinsic_t,__m256d>) { EIN_CASE(on_m256d) } \
1028 else if constexpr (std::is_same_v<intrinsic_t,__m256i>) { EIN_CASE(on_m256i) } \
1029 ON512(else if constexpr (std::is_same_v<intrinsic_t,__m512>) { EIN_CASE(on_m512) } \
1030 else if constexpr (std::is_same_v<intrinsic_t,__m512d>) { EIN_CASE(on_m512d) } \
1031 else if constexpr (std::is_same_v<intrinsic_t,__m512i>) { EIN_CASE(on_m512i) }) \
1032 else
1033ON512FP16( if constexpr (std::is_same_v<intrinsic_t,__m128ph>) { EIN_CASE(on_m128ph) } \
1034 else if constexpr (std::is_same_v<intrinsic_t,__m256ph>) { EIN_CASE(on_m256ph) } \
1035 else if constexpr (std::is_same_v<intrinsic_t,__m512ph>) { EIN_CASE(on_m512ph) } \
1036 else ) \
1037 { static_assert(false) };
1038
1039#define EIN_CASE(f) return f(reinterpret_cast<arg1_t<decltype(f)>>(p));
1041
1043template <size_t N, typename T>
1045static constexpr simd<T,N> load(T const * p) noexcept {
1046 if consteval {
1047 simd<T,N> result;
1048 for (size_t i = 0;i<N;++i)
1049 result[i] = p[i];
1050 return result;
1051 } else {
1052 using intrinsic_t = simd_intrinsic_t<T,N>;
1053 // No CPU has cared about the distinction in the datatype
1054 // being loaded in almost 25 years, but here we are.
1055 // I think the Pentium III was the last cpu that needed this distinction!
1056 EIN_SWITCH(
1057 _mm_load_ps, _mm_load_pd, _mm_load_epi32, _mm_load_ph,
1058 _mm256_load_ps, _mm256_load_pd, _mm256_load_epi32, _mm256_load_ph,
1059 _mm512_load_ps, _mm512_load_pd, _mm512_load_epi32, _mm512_load_ph
1060 )
1061 }
1062}
1063
1064template <size_t N, typename T>
1066static constexpr simd<T,N> loadu(T const * p) noexcept {
1067 if consteval {
1068 simd<T,N> result;
1069 for (size_t i = 0;i<N;++i)
1070 result[i] = p[i];
1071 return result;
1072 } else {
1073 using intrinsic_t = simd_intrinsic_t<T,N>;
1074 EIN_SWITCH(
1075 _mm_loadu_ps, _mm_loadu_pd, _mm_loadu_epi32, _mm_loadu_ph,
1076 _mm256_loadu_ps, _mm256_loadu_pd, _mm256_loadu_epi32, _mm256_loadu_ph,
1077 _mm512_loadu_ps, _mm512_loadu_pd, _mm512_loadu_epi32, _mm512_loadu_ph
1078 )
1079 }
1080}
1081
1083template <size_t N, typename T>
1085static constexpr simd<T,N> lddqu(T const * p) noexcept {
1086 if consteval {
1087 simd<T,N> result;
1088 for (size_t i = 0;i<N;++i)
1089 result[i] = p[i];
1090 return result;
1091 } else {
1092 using intrinsic_t = simd_intrinsic_t<T,N>;
1093 EIN_SWITCH(
1094 _mm_loadu_ps, _mm_loadu_pd, _mm_lddqu_si128, _mm_loadu_ph,
1095 _mm256_loadu_ps, _mm256_loadu_pd, _mm256_lddqu_si256, _mm256_loadu_ph,
1096 _mm512_loadu_ps, _mm512_loadu_pd, _mm512_lddqu_si512, _mm512_loadu_ph
1097 )
1098 }
1099}
1100
1102#undef EIN_CASE
1103#define EIN_CASE(f) return f(p);
1105
1108template <size_t N, typename T>
1110static constexpr simd<T,N> stream_load(T const * p) noexcept {
1111 if consteval {
1112 simd<T,N> result;
1113 for (size_t i = 0;i<N;++i)
1114 result[i] = p[i];
1115 return result;
1116 } else {
1117 using intrinsic_t = simd_intrinsic_t<T,N>;
1119 #define ein_mm_stream_load_ps(x) cast_ps(_mm_stream_load_si128(x))
1120 #define ein_mm256_stream_load_ps(x) cast_ps(_mm256_stream_load_si256(x))
1121 #define ein_mm512_stream_load_ps(x) cast_ps(_mm512_stream_load_si512(x))
1122 #define ein_mm_stream_load_pd(x) cast_pd(_mm_stream_load_si128(x))
1123 #define ein_mm256_stream_load_pd(x) cast_pd(_mm256_stream_load_si256(x))
1124 #define ein_mm512_stream_load_pd(x) cast_pd(_mm512_stream_load_si512(x))
1125 #define ein_mm_stream_load_ph(x) cast_ph(_mm_stream_load_si128(x))
1126 #define ein_mm256_stream_load_ph(x) cast_ph(_mm256_stream_load_si256(x))
1127 #define ein_mm512_stream_load_ph(x) cast_ph(_mm512_stream_load_si512(x))
1128 EIN_SWITCH(
1129 ein_mm_stream_load_ps, ein_mm_stream_load_pd, _mm_stream_load_si128, ein_mm_stream_load_ph,
1130 ein_mm256_stream_load_ps, ein_mm256_stream_load_pd, _mm256_stream_load_si256, ein_mm256_stream_load_ph,
1131 ein_mm512_stream_load_ps, ein_mm512_stream_load_pd, _mm512_stream_load_si512, ein_mm512_stream_load_ph
1132 )
1133 #undef ein_mm_stream_load_ps
1134 #undef ein_mm256_stream_load_ps
1135 #undef ein_mm512_stream_load_ps
1136 #undef ein_mm_stream_load_pd
1137 #undef ein_mm256_stream_load_pd
1138 #undef ein_mm512_stream_load_pd
1139 #undef ein_mm_stream_load_ph
1140 #undef ein_mm256_stream_load_ph
1141 #undef ein_mm512_stream_load_ph
1143 }
1144}
1145
1147#undef EIN_CASE
1148#define EIN_CASE(f) f(p,x.it());
1150
1152 // end of loads
1153
1156
1157template <typename T, size_t N>
1159static constexpr void store(T * p, simd<T,N> x) noexcept {
1160 if consteval {
1161 for (size_t i = 0;i<N;++i)
1162 p[i] = x.data[i];
1163 } else {
1164 using intrinsic_t = simd_intrinsic_t<T,N>;
1165 EIN_SWITCH(
1166 _mm_store_ps, _mm_store_pd, _mm_store_epi32, _mm_store_ph,
1167 _mm256_store_ps, _mm256_store_pd, _mm256_store_epi32, _mm256_store_ph,
1168 _mm512_store_ps, _mm512_store_pd, _mm512_store_epi32, _mm512_store_ph
1169 )
1170 }
1171}
1172
1173template <typename T, size_t N>
1175static constexpr void storeu(T * p, simd<T,N> x) noexcept {
1176 if consteval {
1177 for (size_t i = 0;i<N;++i)
1178 p[i] = x.data[i];
1179 } else {
1180 using intrinsic_t = simd_intrinsic_t<T,N>;
1181 EIN_SWITCH(
1182 _mm_storeu_ps, _mm_storeu_pd, _mm_storeu_epi32, _mm_storeu_ph,
1183 _mm256_storeu_ps, _mm256_storeu_pd, _mm256_storeu_epi32, _mm256_storeu_ph,
1184 _mm512_storeu_ps, _mm512_storeu_pd, _mm512_storeu_epi32, _mm512_storeu_ph
1185 )
1186 }
1187}
1188
1189template <typename T, size_t N>
1191static constexpr void stream(T * p, simd<T,N> x) noexcept {
1192 if consteval {
1193 for (size_t i = 0;i<N;++i)
1194 p[i] = x.data[i];
1195 } else {
1196 using intrinsic_t = simd_intrinsic_t<T,N>;
1198#define ein_mm_stream_ph(x,y) _mm_stream_si128(x,cast_si(y))
1199#define ein_mm256_stream_ph(x,y) _mm256_stream_si256(x,cast_si(y))
1200#define ein_mm512_stream_ph(x,y) _mm512_stream_si512(x,cast_si(y))
1201 EIN_SWITCH(
1202 _mm_stream_ps, _mm_stream_pd, _mm_stream_si128, ein_mm_stream_ph,
1203 _mm256_stream_ps, _mm256_stream_pd, _mm256_stream_si256, ein_mm256_stream_ph,
1204 _mm512_stream_ps, _mm512_stream_pd, _mm512_stream_si512, ein_mm512_stream_ph
1205 )
1206#undef ein_mm_stream_ph
1207#undef ein_mm256_stream_ph
1208#undef ein_mm512_stream_ph
1210 }
1211}
1212
1213#undef EIN_CASE
1214#undef EIN_SWITCH
1215
1217// end of stores
1218#endif
1221
1222namespace detail{
1223
1224template <typename T>
1225struct simd_type_impl : std::false_type {};
1226
1227template <typename T, size_t N>
1228requires has_simd_type<T,N>
1229struct simd_type_impl<simd<T,N>> : std::true_type {};
1230
1231}
1232
1234template <typename SIMD>
1236
1239
1241template <size_t ... is>
1243auto shuffle(simd_type auto x) {
1244 return x.template shuffle<is...>();
1245}
1246
1248template <size_t ... is>
1250auto shuffle(simd_type auto x, simd_type auto y) {
1251 return x.template shuffle<is...>(y);
1252}
1253
1255
1257extern template struct simd<int8_t,16>;
1258extern template struct simd<int8_t,32>;
1259extern template struct simd<uint8_t,16>;
1260extern template struct simd<uint8_t,32>;
1261extern template struct simd<int16_t,8>;
1262extern template struct simd<int16_t,16>;
1263extern template struct simd<uint16_t,8>;
1264extern template struct simd<uint16_t,16>;
1265extern template struct simd<int32_t,4>;
1266extern template struct simd<int32_t,8>;
1267extern template struct simd<uint32_t,4>;
1268extern template struct simd<uint32_t,8>;
1269extern template struct simd<float,4>;
1270extern template struct simd<float,8>;
1271extern template struct simd<int64_t,2>;
1272extern template struct simd<int64_t,4>;
1273extern template struct simd<uint64_t,2>;
1274extern template struct simd<uint64_t,4>;
1275extern template struct simd<double,2>;
1276extern template struct simd<double,4>;
1277#ifdef __AVX512F__
1278extern template struct simd<int16_t,32>;
1279extern template struct simd<uint16_t,32>;
1280extern template struct simd<int32_t,16>;
1281extern template struct simd<uint32_t,16>;
1282extern template struct simd<float,16>;
1283extern template struct simd<int64_t,8>;
1284extern template struct simd<uint64_t,8>;
1285extern template struct simd<double,8>;
1286extern template struct simd<int8_t,64>;
1287extern template struct simd<uint8_t,64>;
1288// todo ein::fp16, ein::bf16, __fp16, __Float16, __bf16
1289#endif
1290
1292} // namespace ein
1293
1294namespace std {
1300 template <typename T, size_t N>
1301 struct tuple_size<ein::simd<T, N>> : integral_constant<size_t, N> {};
1302
1304 template <size_t I, typename T, size_t N>
1305 requires (I < N)
1306 struct tuple_element<I, ein::simd<T, N>> {
1307 using type = T;
1308 };
1311}
1312
1313
1314#if defined(EIN_TESTING) || defined(EIN_TESTING_SIMD)
1315#include <string_view>
1316#include "types.hpp"
1317
1318TEMPLATE_TEST_CASE("simd","[simd]",int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,float,double) {
1319 using namespace ein;
1320
1321 constexpr size_t N128 = 16/sizeof(TestType);
1322 constexpr size_t N256 = 32/sizeof(TestType);
1323#ifdef __AVX512F__
1324 constexpr size_t N512 = 64/sizeof(TestType);
1325#endif
1326 using t128 = simd<TestType,N128>;
1327 using t256 = simd<TestType,N256>;
1328#ifdef __AVX512F__
1329 using t512 = simd<TestType,N512>;
1330#endif
1331
1332 SECTION("default constructors") {
1333 [[maybe_unused]] t128 x128;
1334 [[maybe_unused]] t256 x256;
1335#ifdef __AVX512F__
1336 [[maybe_unused]] t512 x512;
1337#endif
1338 }
1339 SECTION("broadcast constructor") {
1340 TestType x{}; // = GENERATE(take(10,random<TestType>()));
1341 [[maybe_unused]] t128 x128(x);
1342 [[maybe_unused]] t256 x256(x);
1343#ifdef __AVX512F__
1344 [[maybe_unused]] t512 x512(x);
1345#endif
1346 }
1347
1348}
1349#endif // EIN_TESTING || EIN_TESTING_SIMD
can we convert simd_data_t<U,N> -> simd_data_t<T,N> automatically using gcc vector extensions?
Does this type have operations that semantically correct when lifted to the simd_data_t level?
Definition simd_data.hpp:82
Do we want to use AVX512's notion of an _mmask8, _mmask16, _mmask32, or _mmask64 for masking operatio...
ein::simd_data_t<T,N> is defined
Definition simd_data.hpp:86
type T is not one of the candidates
Definition types.hpp:52
type T is one of the candidates
Definition types.hpp:48
is this type one of the types that is handed well automatically by clang/gcc vector extensions?
recognizes any valid simd type
Definition simd.hpp:1235
#define ein_reinitializes
[[clang::reinitializes]]
Definition common.hpp:418
#define ein_artificial
[[artificial]].
Definition common.hpp:220
#define ein_inline
inline [[always_inline]]
Definition common.hpp:188
#define ein_hidden
[[visibility("hidden")]] [[exclude_from_explicit_instantiations]]
Definition common.hpp:365
#define ein_nodiscard
C++17 [[nodiscard]].
Definition common.hpp:165
constexpr size_t max_fp_comparison_predicate
AVX512 added many more floating point comparison types. Do we have them?
Definition numerics.hpp:190
constexpr bool cmpint(T a, T b) noexcept
Definition numerics.hpp:175
constexpr bool cmp(T a, T b) noexcept
perform an avx512 style floating point comparison for scalar values.
Definition numerics.hpp:232
@ FALSE
always false
@ TRUE
always true
@ LE_OQ
Less-than-or-equal (ordered, nonsignaling) (AVX-512)
@ GE_OQ
Greater-than-or-equal (ordered, nonsignaling) (AVX-512)
@ GT_OQ
Greater-than (ordered, nonsignaling) (AVX-512)
@ EQ_OQ
Equal (ordered, nonsignaling)
@ NEQ_UQ
Not-equal (unordered, nonsignaling)
@ LT_OQ
Less-than (ordered, nonsignaling) (AVX-512)
A compile time constant passed as an empty struct.
Definition numerics.hpp:61
#define ein_const
[[const]] is not const
Definition common.hpp:84
#define ein_pure
[[pure]]
Definition common.hpp:102
constexpr std::reverse_iterator< const T * > crend() const noexcept
const reverse end iterator
Definition simd.hpp:233
friend constexpr T const & get(simd const &s) noexcept
enables destructuring constant reference
Definition simd.hpp:267
friend constexpr T && get(simd &&s) noexcept
enable destructuring move
Definition simd.hpp:279
constexpr intrinsic_t & it() noexcept
provide compatibility with Intel intrinsics by freely using this as simd_intrinsic_t<T,...
Definition simd.hpp:170
constexpr T const * begin() const noexcept
const start iterator
Definition simd.hpp:213
constexpr std::reverse_iterator< const T * > rend() const noexcept
const reverse end iterator
Definition simd.hpp:241
constexpr std::reverse_iterator< const T * > rbegin() const noexcept
const reverse start iterator
Definition simd.hpp:237
friend constexpr mask_t cmpint(simd a, simd b) noexcept
Definition simd.hpp:781
friend constexpr mask_t cmp(simd a, simd b) noexcept
Definition simd.hpp:727
constexpr simd & operator--() noexcept
--x
Definition simd.hpp:415
constexpr simd(std::initializer_list< T > init)
initialize the first init .size values from an initializer_list
Definition simd.hpp:103
constexpr simd & operator++() noexcept
++x
Definition simd.hpp:434
constexpr simd() noexcept=default
default initialization
T value_type
Definition simd.hpp:52
constexpr simd operator++(int) noexcept
x++
Definition simd.hpp:443
constexpr std::reverse_iterator< T * > rend() noexcept
reverse end iterator
Definition simd.hpp:225
simd_intrinsic_t< T, N > intrinsic_t
Definition simd.hpp:43
friend constexpr T & get(simd &s) noexcept
enables destructuring reference
Definition simd.hpp:255
data_t data
Definition simd.hpp:56
simd_data_t< T, N > data_t
Definition simd.hpp:41
constexpr T const * cend() const noexcept
const end iterator
Definition simd.hpp:209
constexpr T const * cbegin() const noexcept
const start iterator
Definition simd.hpp:205
constexpr simd(intrinsic_t &&data) noexcept
move construct from the corresponding intel intrinsic type (if different than the gcc/clang one)
Definition simd.hpp:120
constexpr T const & operator[](std::size_t i) const noexcept
access the element in the i th lane
Definition simd.hpp:193
constexpr simd(intrinsic_t const &data) noexcept
copy construct from the corresponding intel intrinsic type (if different than the gcc/clang one)
Definition simd.hpp:114
constexpr simd(data_t const &data) noexcept
copy construction from clang/gcc vector intrinsics
Definition simd.hpp:89
simd_mask_t< T, N > mask_t
Definition simd.hpp:42
constexpr T & operator[](std::size_t i) noexcept
access the element in the i th lane
Definition simd.hpp:189
constexpr simd< T, sizeof...(is)> shuffle() noexcept
shuffle selected elements to produce a new simd register
Definition simd.hpp:570
constexpr simd(T value) noexcept
broadcast construction
Definition simd.hpp:84
friend constexpr simd scalef(simd x, simd y) noexcept
Definition simd.hpp:940
constexpr T const * end() const noexcept
const end iterator
Definition simd.hpp:217
constexpr simd operator--(int) noexcept
x--
Definition simd.hpp:424
constexpr T * end() noexcept
end iterator
Definition simd.hpp:201
constexpr T * begin() noexcept
start iterator
Definition simd.hpp:197
constexpr intrinsic_t const & it() const noexcept
provide compatibility with Intel intrinsics by freely using this as simd_intrinsic_t<T,...
Definition simd.hpp:178
friend constexpr void swap(simd &x, simd &y) noexcept
Definition simd.hpp:978
constexpr simd(data_t &&data) noexcept
move construction from clang/gcc vector intrinsics
Definition simd.hpp:94
constexpr simd< T, sizeof...(is)> shuffle(simd< T, N > b) noexcept
Use elements taken from this and another simd register to construct another. If an index i in is is l...
Definition simd.hpp:584
constexpr std::reverse_iterator< T * > rbegin() noexcept
reverse start iterator
Definition simd.hpp:221
constexpr std::reverse_iterator< const T * > crbegin() const noexcept
const reverse start iterator
Definition simd.hpp:229
typename detail::simd_intrinsic< T, N >::type simd_intrinsic_t
Returns the Intel intrinsic type associated with a simd register full of N values of type T.
storage_type< T > __attribute__((__vector_size__(N *sizeof(storage_type< T >)), __aligned__(N *sizeof(storage_type< T >)))) simd_data_t
Definition simd_data.hpp:97
auto shuffle(simd_type auto x)
create a new simd register with contents drawn from this one
Definition simd.hpp:1243
std::conditional_t< has_mmask< T, N >, mmask< N >, simd_intrinsic_t< T, N > > simd_mask_t
What type of mask should I use?
constexpr size_t max_simd_size
largest simd register width supported on this platform in bytes
Definition simd_data.hpp:43
simd primitive definition
Definition simd.hpp:39
const string_view type
returns the unmangled name of a the type T
Definition types.hpp:30
Definition cpuid.cpp:16
X template float scalef(float, float) noexcept
cond xmacro
Definition bf16.cpp:11