21#define ON512(...) __VA_ARGS__
27#if defined(__AVX512VL__) && defined(__AVX512FP16__)
28#define ON512FP16(...) __VA_ARGS__
29#define IF512FP16(x,y) x
32#define IF512FP16(x,y) y
67 std::bit_cast<storage_type<T>>(t);
85template <
typename T,
size_t N>
88 && std::is_pod_v<storage_type<T>>
90 &&
one_of<
sizeof(T),1,2,4,8>
91 &&
one_of<
sizeof(T)*N,16,32,64>
95template <
typename T,
size_t N>
100template <
typename U,
typename T,
size_t N>
113concept simd_builtin =
one_of_t<T,int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,float,double,__fp16,__bf16,_Float16>;
116template <
size_t N>
struct si {};
117template <
size_t N>
struct ps {};
118template <
size_t N>
struct pd {};
120template <>
struct si<128> {
using type = __m128i; };
121template <>
struct ps<128> {
using type = __m128; };
122template <>
struct pd<128> {
using type = __m128d; };
124template <>
struct si<256> {
using type = __m256i; };
125template <>
struct ps<256> {
using type = __m256; };
126template <>
struct pd<256> {
using type = __m256d; };
129template <>
struct si<512> {
using type = __m512i; };
130template <>
struct ps<512> {
using type = __m512; };
131template <>
struct pd<512> {
using type = __m512d; };
134#if defined(__AVX512FP16__) && defined(__AVX512VL__)
135template <
size_t N>
struct ph {};
136template <>
struct ph<128> {
using type = __m128ph; };
137template <>
struct ph<256> {
using type = __m256ph; };
138template <>
struct ph<512> {
using type = __m512ph; };
153#if defined(__AVX512FP16__) && defined(__AVX512VL__)
156 using type =
typename ph<N*
sizeof(__fp16)*8>
::type;
160struct simd_intrinsic<_Float16,N> {
161 using type =
typename ph<N*
sizeof(_Float16)*8>
::type;
165struct simd_intrinsic<
ein::
fp16,N> {
171#if defined(__AVX512FP16__) && defined(__AVX512VL__)
172 not_one_of_t<float,double,__fp16,_Float16,ein::fp16> T,
174 not_one_of_t<float,double> T,
186template <
typename T,
size_t N>
193template <
typename T,
size_t N>
203template <
size_t N>
struct mmask {};
204template <>
struct mmask<8> {
using type = __mmask8; };
205template <>
struct mmask<16> {
using type = __mmask16; };
206template <>
struct mmask<32> {
using type = __mmask32; };
207template <>
struct mmask<64> {
using type = __mmask64; };
215 =
typename mmask<N>::type;
220template <
typename T,
size_t N>
230__m128
cast_ps(__m128i a)
noexcept {
return _mm_castsi128_ps(a); }
233__m128
cast_ps(__m128 a)
noexcept {
return a; }
236__m256
cast_ps(__m256i a)
noexcept {
return _mm256_castsi256_ps(a); }
239__m256
cast_ps(__m256 a)
noexcept {
return a; }
242__m128d
cast_pd(__m128i a)
noexcept {
return _mm_castsi128_pd(a); }
245__m128d
cast_pd(__m128d a)
noexcept {
return a; }
248__m256d
cast_pd(__m256i a)
noexcept {
return _mm256_castsi256_pd(a); }
251__m256d
cast_pd(__m256d a)
noexcept {
return a; }
254__m128i
cast_si(__m128 a)
noexcept {
return _mm_castps_si128(a); }
257__m128i
cast_si(__m128d a)
noexcept {
return _mm_castpd_si128(a); }
260__m128i
cast_si(__m128i a)
noexcept {
return a; }
263__m256i
cast_si(__m256 a)
noexcept {
return _mm256_castps_si256(a); }
266__m256i
cast_si(__m256d a)
noexcept {
return _mm256_castpd_si256(a); }
269__m256i
cast_si(__m256i a)
noexcept {
return a; }
274__m512
cast_ps(__m512i a)
noexcept {
return _mm512_castsi512_ps(a); }
277__m512d
cast_pd(__m512i a)
noexcept {
return _mm512_castsi512_pd(a); }
280__m512
cast_ps(__m512 a)
noexcept {
return a; }
283__m512d
cast_pd(__m512d a)
noexcept {
return a; }
286__m512i
cast_si(__m512 a)
noexcept {
return _mm512_castps_si512(a); }
289__m512i
cast_si(__m512d a)
noexcept {
return _mm512_castpd_si512(a); }
292__m512i
cast_si(__m512i a)
noexcept {
return a; }
294#if defined(__AVX512FP16__) && defined(__AVX512VL__)
295__m128i
cast_si(__m128ph a)
noexcept {
return _mm_castph_si128(a); }
296__m128
cast_ps(__m128ph a)
noexcept {
return _mm_castph_ps(a); }
297__m128d
cast_pd(__m128ph a)
noexcept {
return _mm_castph_pd(a); }
298__m128ph cast_ph(__m128ph a)
noexcept {
return a; }
300__m256i
cast_si(__m256ph a)
noexcept {
return _mm256_castph_si256(a); }
301__m256
cast_ps(__m256ph a)
noexcept {
return _mm256_castph_ps(a); }
302__m256d
cast_pd(__m256ph a)
noexcept {
return _mm256_castph_pd(a); }
303__m256ph cast_ph(__m256ph a)
noexcept {
return a; }
305__m512i
cast_si(__m512ph a)
noexcept {
return _mm512_castph_si512(a); }
306__m512
cast_ps(__m512ph a)
noexcept {
return _mm512_castph_ps(a); }
307__m512d
cast_pd(__m512ph a)
noexcept {
return _mm512_castph_pd(a); }
308__m512ph cast_ph(__m512ph a)
noexcept {
return a; }
310__m128ph cast_ph(__m128 a)
noexcept {
return _mm_castps_ph(a); }
311__m256ph cast_ph(__m256 a)
noexcept {
return _mm256_castps_ph(a); }
312__m512ph cast_ph(__m512 a)
noexcept {
return _mm512_castps_ph(a); }
314__m128ph cast_ph(__m128i a)
noexcept {
return _mm_castsi128_ph(a); }
315__m256ph cast_ph(__m256i a)
noexcept {
return _mm256_castsi256_ph(a); }
316__m512ph cast_ph(__m512i a)
noexcept {
return _mm512_castsi512_ph(a); }
318__m128ph cast_ph(__m128d a)
noexcept {
return _mm_castpd_ph(a); }
319__m256ph cast_ph(__m256d a)
noexcept {
return _mm256_castpd_ph(a); }
320__m512ph cast_ph(__m512d a)
noexcept {
return _mm512_castpd_ph(a); }
328#if defined(EIN_TESTING) || defined(EIN_TESTING_SIMD_DATA)
329#include <string_view>
332TEMPLATE_TEST_CASE(
"simd_data",
"[simd_data]",int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,int64_t,uint64_t,
float,
double) {
335 constexpr size_t N128 = 16/
sizeof(TestType);
336 constexpr size_t N256 = 32/
sizeof(TestType);
338 constexpr size_t N512 = 64/
sizeof(TestType);
340 SECTION(
"has_simd_data") {
341 STATIC_REQUIRE(has_simd_type<TestType,N128>);
342 STATIC_REQUIRE(has_simd_type<TestType,N256>);
344 STATIC_REQUIRE(has_simd_type<TestType,N512>);
347 SECTION(
"simd_data_t") {
353 SECTION(
"has the right size") {
354 CHECK(
sizeof(d128) == 16);
355 CHECK(
sizeof(d256) == 32);
357 CHECK(
sizeof(d512) == 64);
360 d128 x128{TestType{}};
361 d256 x256{TestType{}};
363 d512 x512{TestType{}};
365 SECTION(
"can be indexed at the right type") {
372 SECTION(
"can be indexed with the right value") {
381 SECTION(
"simd_intrinsic_t") {
388 [[maybe_unused]] t128 x128{};
389 [[maybe_unused]] t256 x256{};
391 [[maybe_unused]] t512 x512{};
can we convert simd_data_t<U,N> -> simd_data_t<T,N> automatically using gcc vector extensions?
Does this type have operations that semantically correct when lifted to the simd_data_t level?
Do we want to use AVX512's notion of an _mmask8, _mmask16, _mmask32, or _mmask64 for masking operatio...
ein::simd_data_t<T,N> is defined
type T is one of the candidates
N is one of the candidates
is this type one of the types that is handed well automatically by clang/gcc vector extensions?
#define ein_artificial
[[artificial]].
#define ein_inline
inline [[always_inline]]
#define ein_nodiscard
C++17 [[nodiscard]].
#define ein_const
[[const]] is not const
typename ps< N *sizeof(float) *8 >::type type
typename pd< N *sizeof(double) *8 >::type type
typename ps< N *sizeof(T) *8 >::type type
__m128d cast_pd(__m128i a) noexcept
typename mmask< N >::type mmask_t
If AVX512 is enabled returns the type of an n-bit mmask.
__m128 cast_ps(__m128i a) noexcept
typename detail::simd_intrinsic< T, N >::type simd_intrinsic_t
Returns the Intel intrinsic type associated with a simd register full of N values of type T.
storage_type< T > __attribute__((__vector_size__(N *sizeof(storage_type< T >)), __aligned__(N *sizeof(storage_type< T >)))) simd_data_t
__m128i cast_si(__m128 a) noexcept
typename detail::storage_type_impl< T >::type storage_type
The type used to store T in a simd_data_t.
std::conditional_t< has_mmask< T, N >, mmask< N >, simd_intrinsic_t< T, N > > simd_mask_t
What type of mask should I use?
constexpr size_t max_simd_size
largest simd register width supported on this platform in bytes
const string_view type
returns the unmangled name of a the type T