13#include <initializer_list>
28template<
typename T>
struct arg1 {};
29template<
typename Ret,
typename Arg,
typename ... Args>
struct arg1<Ret(Arg, Args...)> {
using type = Arg; };
30template<
typename Ret,
typename Arg,
typename ... Args>
struct arg1<Ret(*)(Arg, Args...)> : arg1<Ret(Arg,Args...)> {};
31template <
typename F>
using arg1_t = arg1<F>::type;
38template <
typename T,
size_t N>
requires (has_simd_type<T,N>)
44 static constexpr size_t bytesize = N*
sizeof(T);
53 static constexpr size_t size = N;
63 constexpr simd() noexcept = default;
67 constexpr
simd(
simd const &) noexcept = default;
75 template <
std::convertible_to<T> ... Args>
77 constexpr
simd(Args && ... args) noexcept
78 requires (sizeof...(Args) == N)
79 : data(
std::forward<Args>(args)...) {}
84 constexpr simd(T value) noexcept
85 : data(__extension__(
data_t){value}) {}
95 : data(std::move(data)) {}
103 constexpr simd(std::initializer_list<T> init) {
105 std::copy_n(init.begin(),std::min(N,init.size()),begin());
115 requires (!std::is_same_v<data_t, intrinsic_t>)
116 : data(reinterpret_cast<
data_t>(data)) {}
121 requires (!std::is_same_v<data_t, intrinsic_t>)
122 : data(reinterpret_cast<
data_t>(
std::move(data))) {}
130 constexpr simd & operator = (
simd &&) noexcept = default;
133 constexpr
simd & operator = (
simd const &) noexcept = default;
136 template <typename U>
141 for (
int i=0;i<N;++i)
142 data[i] = T(other.data[i]);
144 data = __builtin_convertvector(other.data,
data_t);
155 if constexpr (std::is_same_v<intrinsic_t,data_t>)
return data;
163 if constexpr (std::is_same_v<intrinsic_t,data_t>)
return data;
164 else return reinterpret_cast<intrinsic_t const &
>(data);
171 if constexpr (std::is_same_v<intrinsic_t,data_t>)
return data;
179 if constexpr (std::is_same_v<intrinsic_t,data_t>)
return data;
180 else return reinterpret_cast<intrinsic_t const &
>(data);
189 constexpr T &
operator[](std::size_t i)
noexcept {
return reinterpret_cast<T *
>(&data)[i]; }
193 constexpr T
const &
operator[](std::size_t i)
const noexcept {
return reinterpret_cast<T
const *
>(&data)[i]; }
197 constexpr T *
begin() noexcept {
return reinterpret_cast<T*
>(&data); }
201 constexpr T *
end() noexcept {
return begin() + N; }
205 constexpr T
const *
cbegin() const noexcept {
return reinterpret_cast<T
const *
>(&data); }
209 constexpr T
const *
cend() const noexcept {
return begin() + N; }
213 constexpr T
const *
begin() const noexcept {
return cbegin(); }
217 constexpr T
const *
end() const noexcept {
return cend(); }
221 constexpr std::reverse_iterator<T*>
rbegin() noexcept {
return std::reverse_iterator<T*>(end()); }
225 constexpr std::reverse_iterator<T*>
rend() noexcept {
return std::reverse_iterator<T*>(begin()); }
229 constexpr std::reverse_iterator<const T*>
crbegin() const noexcept {
return std::reverse_iterator<const T*>(cend()); }
233 constexpr std::reverse_iterator<const T*>
crend() const noexcept {
return std::reverse_iterator<const T*>(cbegin()); }
237 constexpr std::reverse_iterator<const T*>
rbegin() const noexcept {
return crbegin(); }
241 constexpr std::reverse_iterator<const T*>
rend() const noexcept {
return crend(); }
253 template <
size_t I>
requires (I < N)
friend
265 template <
size_t I>
requires (I < N)
friend
267 constexpr T
const &
get(
simd const & s)
noexcept {
277 template <
size_t I>
requires (I < N)
friend
280 return std::move(s[I]);
291 return x.data + y.data;
306 return x.data - y.data;
321 return x.data * y.data;
336 return x.data / y.data;
348 template <
typename U>
friend
352 return x.data & y.data;
364 template <
typename U>
friend
368 return x.data | y.data;
384 return x.data ^ y.data;
397 constexpr simd operator + () const noexcept
404 constexpr simd operator - () const noexcept
456 return x.data >>
static_cast<T
>(K);
462 friend constexpr simd operator >>(
simd x, T y)
noexcept
472 return x.data >> y.data;
483 return x.data <<
static_cast<T
>(K);
489 friend constexpr simd operator <<(
simd x, T y)
noexcept
499 return x.data << y.data;
510 data >>=
static_cast<T
>(K);
517 constexpr simd & operator >>=(T y)
noexcept
538 data <<= static_cast<T>(K);
547 constexpr simd & operator <<=(T y)
noexcept
568 template <
size_t ... is>
571 requires (((is < N) && ... &&
has_simd_type<T,
sizeof...(is)>) &&
572 requires (
data_t x) {
simd<T,
sizeof...(is)>(__builtin_shufflevector(x, is...)); }) {
574 return { data[is]... };
576 return __builtin_shufflevector(data,is...);
582 template <
size_t ... is>
585 requires (((is < N*2) && ... &&
has_simd_type<T,
sizeof...(is)>) &&
586 requires (
data_t x) {
simd<T,
sizeof...(is)>(__builtin_shufflevector(x, x, is...)); }) {
588 return { (is < N ? data[is] : b[is-N])... };
590 return __builtin_shufflevector(data,b.
data,is...);
603 return x.data < y.data;
610 return x.data > y.data;
617 return x.data <= y.data;
624 return x.data >= y.data;
631 return x.data == y.data;
638 return x.data != y.data;
647#define EIN_COMPARE_OP_SIZE(bs,cmd) \
648 if constexpr (bytesize == bs) return cmd(a.it(),b.it()); \
650#define EIN_COMPARE_OP_TY(opfix,ty,infix) \
651 if constexpr (std::is_same_v<storage_type<T>,ty>) { \
652 EIN_COMPARE_OP_SIZE(16,_mm_cmp##opfix##_##infix##_mask) \
653 EIN_COMPARE_OP_SIZE(32,_mm256_cmp##opfix##_##infix##_mask) \
654 EIN_COMPARE_OP_SIZE(64,_mm512_cmp##opfix##_##infix##_mask) \
655 static_assert(false); \
658#define EIN_COMPARE_OP(op,opfix) \
661 for (size_t i=0;i<N;++i) \
663 result |= (1 << i); \
666 EIN_COMPARE_OP_TY(opfix,float,ps) \
667 EIN_COMPARE_OP_TY(opfix,double,pd) \
668 EIN_COMPARE_OP_TY(opfix,int8_t,epi8) \
669 EIN_COMPARE_OP_TY(opfix,int16_t,epi16) \
670 EIN_COMPARE_OP_TY(opfix,int32_t,epi32) \
671 EIN_COMPARE_OP_TY(opfix,int64_t,epi64) \
672 EIN_COMPARE_OP_TY(opfix,uint8_t,epu8) \
673 EIN_COMPARE_OP_TY(opfix,uint16_t,epu16) \
674 EIN_COMPARE_OP_TY(opfix,uint32_t,epu32) \
675 EIN_COMPARE_OP_TY(opfix,uint64_t,epu64) \
677 EIN_COMPARE_OP_TY(opfix,_Float16,ph) \
678 EIN_COMPARE_OP_TY(opfix,__fp16,ph) \
680 static_assert(
false); \
697 EIN_COMPARE_OP(<=,le)
702 EIN_COMPARE_OP(>=,ge)
707 EIN_COMPARE_OP(==,eq)
712 EIN_COMPARE_OP(!=,ne)
716#undef EIN_COMPARE_OP_TY
717#undef EIN_COMPARE_OP_TY_SIZE
732 for (
size_t i=0;i<N;++i)
737#define ein_suffix(x) x##_mask
741 for (
size_t i=0;i<N;++i)
745#define ein_suffix(x) x
750 if constexpr(std::is_same_v<T,float>) {
751 if constexpr (bytesize==16)
return ein_suffix(_mm_cmp_ps)(a.
it(),b.
it(),
static_cast<int>(imm8));
752 else if constexpr (bytesize==32)
return ein_suffix(_mm256_cmp_ps)(a.
it(),b.
it(),
static_cast<int>(imm8));
753 ON512(
else if constexpr (bytesize==64)
return ein_suffix(_mm512_cmp_ps)(a.
it(),b.
it(),
static_cast<int>(imm8));)
754 else static_assert(
false);
755 }
else if constexpr (std::is_same_v<T,double>) {
756 if constexpr (bytesize==16)
return ein_suffix(_mm_cmp_pd)(a.
it(),b.
it(),
static_cast<int>(imm8));
757 else if constexpr (bytesize==32)
return ein_suffix(_mm256_cmp_pd)(a.
it(),b.
it(),
static_cast<int>(imm8));
758 ON512(
else if constexpr (bytesize==64)
return ein_suffix(_mm512_cmp_pd)(a.
it(),b.
it(),
static_cast<int>(imm8));)
761 else if constexpr (bytesize==16)
return _mm_cmp_ph_mask(a.
it(),b.
it(),
static_cast<int>(imm8));
762 else if constexpr (bytesize==32)
return _mm256_cmp_ph(a.
it(),b.
it(),
static_cast<int>(imm8));
763 else if constexpr (bytesize==64)
return _mm512_cmp_ph_mask(a.
it(),b.
it(),
static_cast<int>(imm8));
769 static_assert(
false);
777 template <CMPINT imm8>
787 for (
size_t i=0;i<N;++i)
792#define ein_suffix _mask
796 for (
size_t i=0;i<N;++i)
807#define EIN_HANDLE(type,infix) \
808 if constexpr(std::is_same_v<T,type>) { \
809 if constexpr (bytesize==16) return _mm_cmp_ ## infix ## _mask(a.it(),b.it(),imm8); \
810 else if constexpr (bytesize==32) return _mm256_cmp_ ## infix ## _mask(a.it(),b.it(),imm8); \
811 else if constexpr (bytesize==64) return _mm512_cmp_ ## infix ## _mask(a.it(),b.it(),imm8); \
812 else static_assert(false); \
815 EIN_HANDLE(int8_t,epi8)
816 EIN_HANDLE(uint8_t,epu8)
817 EIN_HANDLE(int16_t,epi16)
818 EIN_HANDLE(uint16_t,epu16)
819 EIN_HANDLE(int32_t,epi32)
820 EIN_HANDLE(uint32_t,epu32)
821 EIN_HANDLE(int64_t,epi64)
822 EIN_HANDLE(uint64_t,epu64)
823 static_assert(
false);
827 if constexpr (imm8 ==
FALSE) {
828 if constexpr (bytesize==16)
return _mm_setzero_si128();
829 else if constexpr (bytesize==32)
return _mm256_setzero_si256();
830 else static_assert(
false);
831 }
else if constexpr (imm8 ==
TRUE) {
832 if constexpr (bytesize==16)
return _mm_set1_epi32(-1);
833 else if constexpr (bytesize==32)
return _mm256_set1_epi32(-1);
834 else static_assert(
false);
835 }
else if constexpr (imm8 ==
EQ)
return a == b;
836 else if constexpr (imm8 ==
NE)
return a != b;
837 else if constexpr (imm8 ==
LT)
return a < b;
838 else if constexpr (imm8 ==
NLE)
return a > b;
839 else if constexpr (imm8 ==
LE)
return a <= b;
840 else if constexpr (imm8 ==
NLT)
return a >= b;
841 else static_assert(
false);
848#define EIN_COMPARE512_CASE(type,infix) \
849 if constexpr(one_of_t<T,u##type,type>) { \
850 if constexpr (bytesize==16) return _mm_cmpeq_##infix##_mask(a.it(),b.it()); \
851 else if constexpr (bytesize==32) return _mm256_cmpeq_##infix##_mask(a.it(),b.it()); \
852 else if constexpr (bytesize==64) return _mm512_cmpeq_##infix##_mask(a.it(),b.it()); \
853 else static_assert(false); \
855#define EIN_COMPARE512(op,infix,imm8) \
858 for (size_t i=0;i<N;++i) \
863 if constexpr (one_of_t<T,float>) { \
864 if constexpr (N==4) return _mm_cmp##infix##_ps_mask(a.it(),b.it()); \
865 else if constexpr (N==8) return _mm256_cmp##infix##_ps_mask(a.it(),b.it()); \
866 else if constexpr (N==16) return _mm512_cmp##infix##_ps_mask(a.it(),b.it()); \
867 else static_assert(false); \
868 } else if constexpr (one_of_t<T,double>) { \
869 if constexpr (N==4) return _mm_cmp##infix##_pd_mask(a.it(),b.it()); \
870 else if constexpr (N==8) return _mm256_cmp##infix##_pd_mask(a.it(),b.it()); \
871 else if constexpr (N==16) return _mm512_cmp##infix##_pd_mask(a.it(),b.it()); \
872 else static_assert(false); \
874 } else if constexpr (one_of_t<T,_Float16,__fp16>) { \
875 if constexpr (N==4) return _mm_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8)); \
876 else if constexpr (N==8) return _mm256_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8)); \
877 else if constexpr (N==16) return _mm512_cmp_ph_mask(a.it(),b.it(),static_cast<int>(imm8)); \
878 else static_assert(false);) \
880 EIN_COMPARE512_CASE(int8_t,epi8) \
881 EIN_COMPARE512_CASE(int16_t,epi16) \
882 EIN_COMPARE512_CASE(int32_t,epi32) \
883 EIN_COMPARE512_CASE(int64_t,epi64) \
884 static_assert(false); \
930 #undef EIN_COMPARE512
944 for (
size_t i=0;i<N;++i)
945 result[i] =
scalef(x[i],y[i]);
948 if constexpr (std::is_same_v<T,float>) {
949 if constexpr (bytesize==16)
return _mm_scalef_ps(x,y);
950 else if constexpr (bytesize==32)
return _mm256_scalef_ps(x,y);
952 else if constexpr (bytesize==64)
return _mm512_scalef_ps(x,y);
954 else static_assert(
false);
955 }
else if constexpr (std::is_same_v<T,double>) {
956 if constexpr (bytesize==16)
return _mm_scalef_pd(x,y);
957 else if constexpr (bytesize==32)
return _mm256_scalef_pd(x,y);
959 else if constexpr (bytesize==64)
return _mm512_scalef_pd(x,y);
961#if defined(__AVX512FP16__) && defined(__AVX512VL__)
962 }
else if constexpr (std::is_same_v<T,__fp16,_Float16>) {
963 if constexpr (bytesize==16)
return _mm_scalef_ph(x,y);
964 else if constexpr (bytesize==32)
return _mm256_scalef_ph(x,y);
965 else if constexpr (bytesize==64)
return _mm512_scalef_ph(x,y);
968 static_assert(
false);
980 for (
int i=0;i<N;++i)
993template <
typename T,
size_t N>
996 && (N %
sizeof(T) == 0)
997#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
998 && !one_of_t<T, long long>
1001simd(T __attribute ((__vector_size__(N)))) ->
simd<T,N/
sizeof(T)>;
1004template <
typename ... Args>
1005requires has_simd_type<std::common_type<Args...>,
sizeof...(Args)>
1006simd(Args&&...) ->
simd<std::common_type<Args...>,
sizeof...(Args)>;
1010template <
typename T>
1022 #define EIN_SWITCH(on_m128,on_m128d,on_m128i,on_m128ph,on_m256,on_m256d,on_m256i,on_m256ph,on_m512,on_m512d,on_m512i,on_m512ph) \
1023 if constexpr (std::is_same_v<intrinsic_t,__m128>) { EIN_CASE(on_m128) } \
1024 else if constexpr (std::is_same_v<intrinsic_t,__m128d>) { EIN_CASE(on_m128d) } \
1025 else if constexpr (std::is_same_v<intrinsic_t,__m128i>) { EIN_CASE(on_m128i) } \
1026 else if constexpr (std::is_same_v<intrinsic_t,__m256>) { EIN_CASE(on_m256) } \
1027 else if constexpr (std::is_same_v<intrinsic_t,__m256d>) { EIN_CASE(on_m256d) } \
1028 else if constexpr (std::is_same_v<intrinsic_t,__m256i>) { EIN_CASE(on_m256i) } \
1029 ON512(else if constexpr (std::is_same_v<intrinsic_t,__m512>) { EIN_CASE(on_m512) } \
1030 else if constexpr (std::is_same_v<intrinsic_t,__m512d>) { EIN_CASE(on_m512d) } \
1031 else if constexpr (std::is_same_v<intrinsic_t,__m512i>) { EIN_CASE(on_m512i) }) \
1033ON512FP16(
if constexpr (std::is_same_v<intrinsic_t,__m128ph>) { EIN_CASE(on_m128ph) } \
1034 else if constexpr (std::is_same_v<intrinsic_t,__m256ph>) { EIN_CASE(on_m256ph) } \
1035 else if constexpr (std::is_same_v<intrinsic_t,__m512ph>) { EIN_CASE(on_m512ph) } \
1037 {
static_assert(
false) };
1039#define EIN_CASE(f) return f(reinterpret_cast<arg1_t<decltype(f)>>(p));
1043template <
size_t N,
typename T>
1045static constexpr simd<T,N> load(T
const * p)
noexcept {
1048 for (
size_t i = 0;i<N;++i)
1057 _mm_load_ps, _mm_load_pd, _mm_load_epi32, _mm_load_ph,
1058 _mm256_load_ps, _mm256_load_pd, _mm256_load_epi32, _mm256_load_ph,
1059 _mm512_load_ps, _mm512_load_pd, _mm512_load_epi32, _mm512_load_ph
1064template <
size_t N,
typename T>
1066static constexpr simd<T,N> loadu(T
const * p)
noexcept {
1069 for (
size_t i = 0;i<N;++i)
1075 _mm_loadu_ps, _mm_loadu_pd, _mm_loadu_epi32, _mm_loadu_ph,
1076 _mm256_loadu_ps, _mm256_loadu_pd, _mm256_loadu_epi32, _mm256_loadu_ph,
1077 _mm512_loadu_ps, _mm512_loadu_pd, _mm512_loadu_epi32, _mm512_loadu_ph
1083template <
size_t N,
typename T>
1085static constexpr simd<T,N> lddqu(T
const * p)
noexcept {
1088 for (
size_t i = 0;i<N;++i)
1094 _mm_loadu_ps, _mm_loadu_pd, _mm_lddqu_si128, _mm_loadu_ph,
1095 _mm256_loadu_ps, _mm256_loadu_pd, _mm256_lddqu_si256, _mm256_loadu_ph,
1096 _mm512_loadu_ps, _mm512_loadu_pd, _mm512_lddqu_si512, _mm512_loadu_ph
1103#define EIN_CASE(f) return f(p);
1108template <
size_t N,
typename T>
1110static constexpr simd<T,N> stream_load(T
const * p)
noexcept {
1113 for (
size_t i = 0;i<N;++i)
1119 #define ein_mm_stream_load_ps(x) cast_ps(_mm_stream_load_si128(x))
1120 #define ein_mm256_stream_load_ps(x) cast_ps(_mm256_stream_load_si256(x))
1121 #define ein_mm512_stream_load_ps(x) cast_ps(_mm512_stream_load_si512(x))
1122 #define ein_mm_stream_load_pd(x) cast_pd(_mm_stream_load_si128(x))
1123 #define ein_mm256_stream_load_pd(x) cast_pd(_mm256_stream_load_si256(x))
1124 #define ein_mm512_stream_load_pd(x) cast_pd(_mm512_stream_load_si512(x))
1125 #define ein_mm_stream_load_ph(x) cast_ph(_mm_stream_load_si128(x))
1126 #define ein_mm256_stream_load_ph(x) cast_ph(_mm256_stream_load_si256(x))
1127 #define ein_mm512_stream_load_ph(x) cast_ph(_mm512_stream_load_si512(x))
1129 ein_mm_stream_load_ps, ein_mm_stream_load_pd, _mm_stream_load_si128, ein_mm_stream_load_ph,
1130 ein_mm256_stream_load_ps, ein_mm256_stream_load_pd, _mm256_stream_load_si256, ein_mm256_stream_load_ph,
1131 ein_mm512_stream_load_ps, ein_mm512_stream_load_pd, _mm512_stream_load_si512, ein_mm512_stream_load_ph
1133 #undef ein_mm_stream_load_ps
1134 #undef ein_mm256_stream_load_ps
1135 #undef ein_mm512_stream_load_ps
1136 #undef ein_mm_stream_load_pd
1137 #undef ein_mm256_stream_load_pd
1138 #undef ein_mm512_stream_load_pd
1139 #undef ein_mm_stream_load_ph
1140 #undef ein_mm256_stream_load_ph
1141 #undef ein_mm512_stream_load_ph
1148#define EIN_CASE(f) f(p,x.it());
1157template <
typename T,
size_t N>
1159static constexpr void store(T * p,
simd<T,N> x)
noexcept {
1161 for (
size_t i = 0;i<N;++i)
1166 _mm_store_ps, _mm_store_pd, _mm_store_epi32, _mm_store_ph,
1167 _mm256_store_ps, _mm256_store_pd, _mm256_store_epi32, _mm256_store_ph,
1168 _mm512_store_ps, _mm512_store_pd, _mm512_store_epi32, _mm512_store_ph
1173template <
typename T,
size_t N>
1175static constexpr void storeu(T * p,
simd<T,N> x)
noexcept {
1177 for (
size_t i = 0;i<N;++i)
1182 _mm_storeu_ps, _mm_storeu_pd, _mm_storeu_epi32, _mm_storeu_ph,
1183 _mm256_storeu_ps, _mm256_storeu_pd, _mm256_storeu_epi32, _mm256_storeu_ph,
1184 _mm512_storeu_ps, _mm512_storeu_pd, _mm512_storeu_epi32, _mm512_storeu_ph
1189template <
typename T,
size_t N>
1191static constexpr void stream(T * p,
simd<T,N> x)
noexcept {
1193 for (
size_t i = 0;i<N;++i)
1198#define ein_mm_stream_ph(x,y) _mm_stream_si128(x,cast_si(y))
1199#define ein_mm256_stream_ph(x,y) _mm256_stream_si256(x,cast_si(y))
1200#define ein_mm512_stream_ph(x,y) _mm512_stream_si512(x,cast_si(y))
1202 _mm_stream_ps, _mm_stream_pd, _mm_stream_si128, ein_mm_stream_ph,
1203 _mm256_stream_ps, _mm256_stream_pd, _mm256_stream_si256, ein_mm256_stream_ph,
1204 _mm512_stream_ps, _mm512_stream_pd, _mm512_stream_si512, ein_mm512_stream_ph
1206#undef ein_mm_stream_ph
1207#undef ein_mm256_stream_ph
1208#undef ein_mm512_stream_ph
1224template <
typename T>
1227template <
typename T,
size_t N>
1234template <
typename SIMD>
1241template <
size_t ... is>
1244 return x.template
shuffle<is...>();
1248template <
size_t ... is>
1251 return x.template
shuffle<is...>(y);
1300 template <
typename T,
size_t N>
1301 struct tuple_size<ein::simd<T, N>> : integral_constant<size_t, N> {};
1304 template <
size_t I,
typename T,
size_t N>
1314#if defined(EIN_TESTING) || defined(EIN_TESTING_SIMD)
1315#include <string_view>
1318TEMPLATE_TEST_CASE(
"simd",
"[simd]",int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,
float,
double) {
1319 using namespace ein;
1321 constexpr size_t N128 = 16/
sizeof(TestType);
1322 constexpr size_t N256 = 32/
sizeof(TestType);
1324 constexpr size_t N512 = 64/
sizeof(TestType);
1332 SECTION(
"default constructors") {
1333 [[maybe_unused]] t128 x128;
1334 [[maybe_unused]] t256 x256;
1336 [[maybe_unused]] t512 x512;
1339 SECTION(
"broadcast constructor") {
1341 [[maybe_unused]] t128 x128(x);
1342 [[maybe_unused]] t256 x256(x);
1344 [[maybe_unused]] t512 x512(x);
can we convert simd_data_t<U,N> -> simd_data_t<T,N> automatically using gcc vector extensions?
Does this type have operations that semantically correct when lifted to the simd_data_t level?
Do we want to use AVX512's notion of an _mmask8, _mmask16, _mmask32, or _mmask64 for masking operatio...
ein::simd_data_t<T,N> is defined
type T is not one of the candidates
type T is one of the candidates
is this type one of the types that is handed well automatically by clang/gcc vector extensions?
recognizes any valid simd type
#define ein_reinitializes
[[clang::reinitializes]]
#define ein_artificial
[[artificial]].
#define ein_inline
inline [[always_inline]]
#define ein_hidden
[[visibility("hidden")]] [[exclude_from_explicit_instantiations]]
#define ein_nodiscard
C++17 [[nodiscard]].
constexpr size_t max_fp_comparison_predicate
AVX512 added many more floating point comparison types. Do we have them?
constexpr bool cmpint(T a, T b) noexcept
constexpr bool cmp(T a, T b) noexcept
perform an avx512 style floating point comparison for scalar values.
@ LE_OQ
Less-than-or-equal (ordered, nonsignaling) (AVX-512)
@ GE_OQ
Greater-than-or-equal (ordered, nonsignaling) (AVX-512)
@ GT_OQ
Greater-than (ordered, nonsignaling) (AVX-512)
@ EQ_OQ
Equal (ordered, nonsignaling)
@ NEQ_UQ
Not-equal (unordered, nonsignaling)
@ LT_OQ
Less-than (ordered, nonsignaling) (AVX-512)
A compile time constant passed as an empty struct.
#define ein_const
[[const]] is not const
constexpr std::reverse_iterator< const T * > crend() const noexcept
const reverse end iterator
friend constexpr T const & get(simd const &s) noexcept
enables destructuring constant reference
friend constexpr T && get(simd &&s) noexcept
enable destructuring move
constexpr intrinsic_t & it() noexcept
provide compatibility with Intel intrinsics by freely using this as simd_intrinsic_t<T,...
constexpr T const * begin() const noexcept
const start iterator
constexpr std::reverse_iterator< const T * > rend() const noexcept
const reverse end iterator
constexpr std::reverse_iterator< const T * > rbegin() const noexcept
const reverse start iterator
friend constexpr mask_t cmpint(simd a, simd b) noexcept
friend constexpr mask_t cmp(simd a, simd b) noexcept
constexpr simd & operator--() noexcept
--x
constexpr simd(std::initializer_list< T > init)
initialize the first init .size values from an initializer_list
constexpr simd & operator++() noexcept
++x
constexpr simd() noexcept=default
default initialization
constexpr simd operator++(int) noexcept
x++
constexpr std::reverse_iterator< T * > rend() noexcept
reverse end iterator
simd_intrinsic_t< T, N > intrinsic_t
friend constexpr T & get(simd &s) noexcept
enables destructuring reference
simd_data_t< T, N > data_t
constexpr T const * cend() const noexcept
const end iterator
constexpr T const * cbegin() const noexcept
const start iterator
constexpr simd(intrinsic_t &&data) noexcept
move construct from the corresponding intel intrinsic type (if different than the gcc/clang one)
constexpr T const & operator[](std::size_t i) const noexcept
access the element in the i th lane
constexpr simd(intrinsic_t const &data) noexcept
copy construct from the corresponding intel intrinsic type (if different than the gcc/clang one)
constexpr simd(data_t const &data) noexcept
copy construction from clang/gcc vector intrinsics
simd_mask_t< T, N > mask_t
constexpr T & operator[](std::size_t i) noexcept
access the element in the i th lane
constexpr simd< T, sizeof...(is)> shuffle() noexcept
shuffle selected elements to produce a new simd register
constexpr simd(T value) noexcept
broadcast construction
friend constexpr simd scalef(simd x, simd y) noexcept
constexpr T const * end() const noexcept
const end iterator
constexpr simd operator--(int) noexcept
x--
constexpr T * end() noexcept
end iterator
constexpr T * begin() noexcept
start iterator
constexpr intrinsic_t const & it() const noexcept
provide compatibility with Intel intrinsics by freely using this as simd_intrinsic_t<T,...
friend constexpr void swap(simd &x, simd &y) noexcept
constexpr simd(data_t &&data) noexcept
move construction from clang/gcc vector intrinsics
constexpr simd< T, sizeof...(is)> shuffle(simd< T, N > b) noexcept
Use elements taken from this and another simd register to construct another. If an index i in is is l...
constexpr std::reverse_iterator< T * > rbegin() noexcept
reverse start iterator
constexpr std::reverse_iterator< const T * > crbegin() const noexcept
const reverse start iterator
typename detail::simd_intrinsic< T, N >::type simd_intrinsic_t
Returns the Intel intrinsic type associated with a simd register full of N values of type T.
storage_type< T > __attribute__((__vector_size__(N *sizeof(storage_type< T >)), __aligned__(N *sizeof(storage_type< T >)))) simd_data_t
auto shuffle(simd_type auto x)
create a new simd register with contents drawn from this one
std::conditional_t< has_mmask< T, N >, mmask< N >, simd_intrinsic_t< T, N > > simd_mask_t
What type of mask should I use?
constexpr size_t max_simd_size
largest simd register width supported on this platform in bytes
simd primitive definition
const string_view type
returns the unmangled name of a the type T
X template float scalef(float, float) noexcept
cond xmacro