// Simd fixed_size ABI specific implementations -*- C++ -*- // Copyright (C) 2020-2021 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . /* * The fixed_size ABI gives the following guarantees: * - simd objects are passed via the stack * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>` * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note: * if the alignment were to exceed the system/compiler maximum, it is bounded * to that maximum) * - simd_mask objects are passed like bitset<_Np> * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>` * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of * `bitset<_Np>` */ #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ #if __cplusplus >= 201703L #include _GLIBCXX_SIMD_BEGIN_NAMESPACE // __simd_tuple_element {{{ template struct __simd_tuple_element; template struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>> { using type = simd<_Tp, _A0>; }; template struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>> { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; }; template using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type; // }}} // __simd_tuple_concat {{{ template _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...> __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left, const _SimdTuple<_Tp, _A1s...>& __right) { if constexpr (sizeof...(_A0s) == 0) return __right; else if constexpr (sizeof...(_A1s) == 0) return __left; else return {__left.first, __simd_tuple_concat(__left.second, __right)}; } template _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...> __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right) { return {__left, __right}; } // }}} // __simd_tuple_pop_front {{{ // Returns the next _SimdTuple in __x that has _Np elements less. // Precondition: _Np must match the number of elements in __first (recursively) template _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) __simd_tuple_pop_front(_Tp&& __x) { if constexpr (_Np == 0) return static_cast<_Tp&&>(__x); else { using _Up = __remove_cvref_t<_Tp>; static_assert(_Np >= _Up::_S_first_size); return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second); } } // }}} // __get_simd_at<_Np> {{{1 struct __as_simd {}; struct __as_simd_tuple {}; template _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0> __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) { return {__private_init, __t.first}; } template _GLIBCXX_SIMD_INTRINSIC constexpr const auto& __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) { return __t.first; } template _GLIBCXX_SIMD_INTRINSIC constexpr auto& __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) { return __t.first; } template _GLIBCXX_SIMD_INTRINSIC constexpr auto __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); } template _GLIBCXX_SIMD_INTRINSIC constexpr auto& __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); } template _GLIBCXX_SIMD_INTRINSIC constexpr auto __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t) { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); } // }}} // __get_tuple_at<_Np> {{{ template _GLIBCXX_SIMD_INTRINSIC constexpr auto __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t) { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } template _GLIBCXX_SIMD_INTRINSIC constexpr auto& __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t) { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } // __tuple_element_meta {{{1 template struct __tuple_element_meta : public _Abi::_SimdImpl { static_assert(is_same_v); // this fails e.g. when _SimdImpl is an // alias for _SimdImplBuiltin<_DifferentAbi> using value_type = _Tp; using abi_type = _Abi; using _Traits = _SimdTraits<_Tp, _Abi>; using _MaskImpl = typename _Abi::_MaskImpl; using _MaskMember = typename _Traits::_MaskMember; using simd_type = simd<_Tp, _Abi>; static constexpr size_t _S_offset = _Offset; static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; } static constexpr _MaskImpl _S_mask_impl = {}; template _GLIBCXX_SIMD_INTRINSIC static auto _S_submask(_BitMask<_Np, _Sanitized> __bits) { return __bits.template _M_extract<_Offset, _S_size()>(); } template _GLIBCXX_SIMD_INTRINSIC static _MaskMember _S_make_mask(_BitMask<_Np, _Sanitized> __bits) { return _MaskImpl::template _S_convert<_Tp>( __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized()); } _GLIBCXX_SIMD_INTRINSIC static _ULLong _S_mask_to_shifted_ullong(_MaskMember __k) { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; } }; template __tuple_element_meta<_Tp, _Abi, _Offset> __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) { return {}; } // }}}1 // _WithOffset wrapper class {{{ template struct _WithOffset : public _Base { static inline constexpr size_t _S_offset = _Offset; _GLIBCXX_SIMD_INTRINSIC char* _M_as_charptr() { return reinterpret_cast(this) + _S_offset * sizeof(typename _Base::value_type); } _GLIBCXX_SIMD_INTRINSIC const char* _M_as_charptr() const { return reinterpret_cast(this) + _S_offset * sizeof(typename _Base::value_type); } }; // make _WithOffset<_WithOffset> ill-formed to use: template struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; template decltype(auto) __add_offset(_Tp& __base) { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } template decltype(auto) __add_offset(const _Tp& __base) { return static_cast>&>( __base); } template decltype(auto) __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>( static_cast<_Tp&>(__base)); } template decltype(auto) __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) { return static_cast&>( static_cast(__base)); } template constexpr inline size_t __offset = 0; template constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>> = _WithOffset<_Offset, _Tp>::_S_offset; template constexpr inline size_t __offset = __offset<_Tp>; template constexpr inline size_t __offset<_Tp&> = __offset<_Tp>; template constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>; // }}} // _SimdTuple specializations {{{1 // empty {{{2 template struct _SimdTuple<_Tp> { using value_type = _Tp; static constexpr size_t _S_tuple_size = 0; static constexpr size_t _S_size() { return 0; } }; // _SimdTupleData {{{2 template struct _SimdTupleData { _FirstType first; _SecondType second; _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop() const { if constexpr (is_class_v<_FirstType>) return first._M_is_constprop() && second._M_is_constprop(); else return __builtin_constant_p(first) && second._M_is_constprop(); } }; template struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>> { _FirstType first; static constexpr _SimdTuple<_Tp> second = {}; _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop() const { if constexpr (is_class_v<_FirstType>) return first._M_is_constprop(); else return __builtin_constant_p(first); } }; // 1 or more {{{2 template struct _SimdTuple<_Tp, _Abi0, _Abis...> : _SimdTupleData::_SimdMember, _SimdTuple<_Tp, _Abis...>> { static_assert(!__is_fixed_size_abi_v<_Abi0>); using value_type = _Tp; using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember; using _FirstAbi = _Abi0; using _SecondType = _SimdTuple<_Tp, _Abis...>; static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1; static constexpr size_t _S_size() { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); } static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>; static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...); using _Base = _SimdTupleData::_SimdMember, _SimdTuple<_Tp, _Abis...>>; using _Base::first; using _Base::second; _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default; _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default; _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&) = default; template _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(_Up&& __x) : _Base{static_cast<_Up&&>(__x)} {} template _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(_Up&& __x, _Up2&& __y) : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {} template _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(_Up&& __x, _SimdTuple<_Tp>) : _Base{static_cast<_Up&&>(__x)} {} _GLIBCXX_SIMD_INTRINSIC char* _M_as_charptr() { return reinterpret_cast(this); } _GLIBCXX_SIMD_INTRINSIC const char* _M_as_charptr() const { return reinterpret_cast(this); } template _GLIBCXX_SIMD_INTRINSIC constexpr auto& _M_at() { if constexpr (_Np == 0) return first; else return second.template _M_at<_Np - 1>(); } template _GLIBCXX_SIMD_INTRINSIC constexpr const auto& _M_at() const { if constexpr (_Np == 0) return first; else return second.template _M_at<_Np - 1>(); } template _GLIBCXX_SIMD_INTRINSIC constexpr auto _M_simd_at() const { if constexpr (_Np == 0) return simd<_Tp, _Abi0>(__private_init, first); else return second.template _M_simd_at<_Np - 1>(); } template _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {}) { auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>()); if constexpr (_S_tuple_size == 1) return {__first}; else return {__first, _SecondType::_S_generate( static_cast<_Fp&&>(__gen), _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())}; } template _GLIBCXX_SIMD_INTRINSIC _SimdTuple _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const { auto&& __first = __fun(__make_meta<_Offset>(*this), first, __more.first...); if constexpr (_S_tuple_size == 1) return {__first}; else return { __first, second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>( static_cast<_Fp&&>(__fun), __more.second...)}; } template _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) _M_extract_argument(_Tup&& __tup) const { using _TupT = typename __remove_cvref_t<_Tup>::value_type; if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>) return __tup.first; else if (__builtin_is_constant_evaluated()) return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate([&]( auto __meta) constexpr { return __meta._S_generator( [&](auto __i) constexpr { return __tup[__i]; }, static_cast<_TupT*>(nullptr)); }); else return [&]() { __fixed_size_storage_t<_TupT, _S_first_size> __r; __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(), sizeof(__r)); return __r; }(); } template _GLIBCXX_SIMD_INTRINSIC constexpr auto& _M_skip_argument(_Tup&& __tup) const { static_assert(_S_tuple_size > 1); using _Up = __remove_cvref_t<_Tup>; constexpr size_t __off = __offset<_Up>; if constexpr (_S_first_size == _Up::_S_first_size && __off == 0) return __tup.second; else if constexpr (_S_first_size > _Up::_S_first_size && _S_first_size % _Up::_S_first_size == 0 && __off == 0) return __simd_tuple_pop_front<_S_first_size>(__tup); else if constexpr (_S_first_size + __off < _Up::_S_first_size) return __add_offset<_S_first_size>(__tup); else if constexpr (_S_first_size + __off == _Up::_S_first_size) return __tup.second; else __assert_unreachable<_Tup>(); } template _GLIBCXX_SIMD_INTRINSIC constexpr void _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) & { static_assert(_Offset == 0); first = __x.first; if constexpr (sizeof...(_More) > 0) { static_assert(sizeof...(_Abis) >= sizeof...(_More)); second.template _M_assign_front<0>(__x.second); } } template _GLIBCXX_SIMD_INTRINSIC constexpr void _M_assign_front(const _FirstType& __x) & { static_assert(_Offset == 0); first = __x; } template _GLIBCXX_SIMD_INTRINSIC constexpr void _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) & { __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type), __x._M_as_charptr(), sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size()); } /* * Iterate over the first objects in this _SimdTuple and call __fun for each * of them. If additional arguments are passed via __more, chunk them into * _SimdTuple or __vector_type_t objects of the same number of values. */ template _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const { if constexpr ((... || conjunction_v< is_lvalue_reference<_More>, negation>>>) ) { // need to write back at least one of __more after calling __fun auto&& __first = [&](auto... __args) constexpr { auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, __args...); [[maybe_unused]] auto&& __ignore_me = {( [](auto&& __dst, const auto& __src) { if constexpr (is_assignable_v) { __dst.template _M_assign_front<__offset>( __src); } }(static_cast<_More&&>(__more), __args), 0)...}; return __r; } (_M_extract_argument(__more)...); if constexpr (_S_tuple_size == 1) return {__first}; else return {__first, second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), _M_skip_argument(__more)...)}; } else { auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, _M_extract_argument(__more)...); if constexpr (_S_tuple_size == 1) return {__first}; else return {__first, second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), _M_skip_argument(__more)...)}; } } template _GLIBCXX_SIMD_INTRINSIC auto _M_apply_r(_Fp&& __fun, const _More&... __more) const { auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, __more.first...); if constexpr (_S_tuple_size == 1) return __first; else return __simd_tuple_concat<_R>( __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun), __more.second...)); } template _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()> _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more) { const _SanitizedBitMask<_S_first_size> __first = _Abi0::_MaskImpl::_S_to_bits( __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first, __more.first...)); if constexpr (_S_tuple_size == 1) return __first; else return _M_test(__fun, __x.second, __more.second...) ._M_prepend(__first); } template _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](integral_constant<_Up, _I>) const noexcept { if constexpr (_I < simd_size_v<_Tp, _Abi0>) return _M_subscript_read(_I); else return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; } _Tp operator[](size_t __i) const noexcept { if constexpr (_S_tuple_size == 1) return _M_subscript_read(__i); else { #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS return reinterpret_cast*>(this)[__i]; #else if constexpr (__is_scalar_abi<_Abi0>()) { const _Tp* ptr = &first; return ptr[__i]; } else return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i) : second[__i - simd_size_v<_Tp, _Abi0>]; #endif } } void _M_set(size_t __i, _Tp __val) noexcept { if constexpr (_S_tuple_size == 1) return _M_subscript_write(__i, __val); else { #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val; #else if (__i < simd_size_v<_Tp, _Abi0>) _M_subscript_write(__i, __val); else second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val); #endif } } private: // _M_subscript_read/_write {{{ _Tp _M_subscript_read([[maybe_unused]] size_t __i) const noexcept { if constexpr (__is_vectorizable_v<_FirstType>) return first; else return first[__i]; } void _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept { if constexpr (__is_vectorizable_v<_FirstType>) first = __y; else first._M_set(__i, __y); } // }}} }; // __make_simd_tuple {{{1 template _GLIBCXX_SIMD_INTRINSIC _SimdTuple<_Tp, _A0> __make_simd_tuple(simd<_Tp, _A0> __x0) { return {__data(__x0)}; } template _GLIBCXX_SIMD_INTRINSIC _SimdTuple<_Tp, _A0, _As...> __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) { return {__data(__x0), __make_simd_tuple(__xs...)}; } template _GLIBCXX_SIMD_INTRINSIC _SimdTuple<_Tp, _A0> __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0) { return {__arg0}; } template _GLIBCXX_SIMD_INTRINSIC _SimdTuple<_Tp, _A0, _A1, _Abis...> __make_simd_tuple( const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0, const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1, const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args) { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; } // __to_simd_tuple {{{1 template _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX); template , typename _V0, typename _V0VT = _VectorTraits<_V0>, typename... _VX> _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX) { static_assert(is_same_v); static_assert(_Offset < _V0VT::_S_full_size); using _R0 = __vector_type_t<_Tp, _R::_S_first_size>; if constexpr (_R::_S_tuple_size == 1) { if constexpr (_Np == 1) return _R{__from0[_Offset]}; else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np) return _R{__intrin_bitcast<_R0>(__from0)}; else if constexpr (_Offset * 2 == _V0VT::_S_full_size && _V0VT::_S_full_size / 2 >= _Np) return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))}; else if constexpr (_Offset * 4 == _V0VT::_S_full_size && _V0VT::_S_full_size / 4 >= _Np) return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))}; else __assert_unreachable<_Tp>(); } else { if constexpr (1 == _R::_S_first_size) { // extract one scalar and recurse if constexpr (_Offset + 1 < _V0VT::_S_full_size) return _R{__from0[_Offset], __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0, __fromX...)}; else return _R{__from0[_Offset], __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)}; } // place __from0 into _R::first and recurse for __fromX -> _R::second else if constexpr (_V0VT::_S_full_size == _R::_S_first_size && _Offset == 0) return _R{__from0, __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)}; // place lower part of __from0 into _R::first and recurse with _Offset else if constexpr (_V0VT::_S_full_size > _R::_S_first_size && _Offset == 0) return _R{__intrin_bitcast<_R0>(__from0), __to_simd_tuple<_Tp, _Np - _R::_S_first_size, _R::_S_first_size>(__from0, __fromX...)}; // place lower part of second quarter of __from0 into _R::first and // recurse with _Offset else if constexpr (_Offset * 4 == _V0VT::_S_full_size && _V0VT::_S_full_size >= 4 * _R::_S_first_size) return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), __to_simd_tuple<_Tp, _Np - _R::_S_first_size, _Offset + _R::_S_first_size>(__from0, __fromX...)}; // place lower half of high half of __from0 into _R::first and recurse // with _Offset else if constexpr (_Offset * 2 == _V0VT::_S_full_size && _V0VT::_S_full_size >= 4 * _R::_S_first_size) return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), __to_simd_tuple<_Tp, _Np - _R::_S_first_size, _Offset + _R::_S_first_size>(__from0, __fromX...)}; // place high half of __from0 into _R::first and recurse with __fromX else if constexpr (_Offset * 2 == _V0VT::_S_full_size && _V0VT::_S_full_size / 2 >= _R::_S_first_size) return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)), __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>( __fromX...)}; // ill-formed if some unforseen pattern is needed else __assert_unreachable<_Tp>(); } } template _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX) { if constexpr (is_same_v<_Tp, _V>) { static_assert( sizeof...(_VX) == 0, "An array of scalars must be the last argument to __to_simd_tuple"); return __call_with_subscripts( __from, make_index_sequence<_NV>(), [&](const auto... __args) constexpr { return __simd_tuple_concat( _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>()); }); } else return __call_with_subscripts( __from, make_index_sequence<_NV>(), [&](const auto... __args) constexpr { return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...); }); } template using __to_tuple_helper = _Tp; template _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut> __to_simd_tuple_impl(index_sequence<_Indexes...>, const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) { return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>( __args[_Indexes]...); } template > _GLIBCXX_SIMD_INTRINSIC _R __to_simd_tuple_sized( const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) { static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut); return __to_simd_tuple_impl<_Tp, _A0, _NOut>( make_index_sequence<_R::_S_tuple_size>(), __args); } // __optimize_simd_tuple {{{1 template _GLIBCXX_SIMD_INTRINSIC _SimdTuple<_Tp> __optimize_simd_tuple(const _SimdTuple<_Tp>) { return {}; } template _GLIBCXX_SIMD_INTRINSIC const _SimdTuple<_Tp, _Ap>& __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x) { return __x; } template ::_S_size()>> _GLIBCXX_SIMD_INTRINSIC _R __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x) { using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>; if constexpr (is_same_v<_R, _Tup>) return __x; else if constexpr (is_same_v) return {__x.first, __optimize_simd_tuple(__x.second)}; else if constexpr (__is_scalar_abi<_A0>() || _A0::template _S_is_partial<_Tp>) return {__generate_from_n_evaluations<_R::_S_first_size, typename _R::_FirstType>( [&](auto __i) { return __x[__i]; }), __optimize_simd_tuple( __simd_tuple_pop_front<_R::_S_first_size>(__x))}; else if constexpr (is_same_v<_A0, _A1> && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>) return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), __optimize_simd_tuple(__x.second.second)}; else if constexpr (sizeof...(_Abis) >= 2 && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>) && simd_size_v<_Tp, _A0> == __simd_tuple_element_t< (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size()) return { __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), __concat(__x.template _M_at<2>(), __x.template _M_at<3>())), __optimize_simd_tuple(__x.second.second.second.second)}; else { static_assert(sizeof(_R) == sizeof(__x)); _R __r; __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(), sizeof(_Tp) * _R::_S_size()); return __r; } } // __for_each(const _SimdTuple &, Fun) {{{1 template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) { __fun(__make_meta<_Offset>(__t), __t.first); __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, static_cast<_Fp&&>(__fun)); } // __for_each(_SimdTuple &, Fun) {{{1 template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) { __fun(__make_meta<_Offset>(__t), __t.first); __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, static_cast<_Fp&&>(__fun)); } // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1 template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a, const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) { __fun(__make_meta<_Offset>(__a), __a.first, __b.first); __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, static_cast<_Fp&&>(__fun)); } // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1 template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } template _GLIBCXX_SIMD_INTRINSIC constexpr void __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a, const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) { __fun(__make_meta<_Offset>(__a), __a.first, __b.first); __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, static_cast<_Fp&&>(__fun)); } // }}}1 // __extract_part(_SimdTuple) {{{ template _GLIBCXX_SIMD_INTRINSIC auto // __vector_type_t or _SimdTuple __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x) { // worst cases: // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4) // (b) 2, 2, 2 => 3, 3 (_Total = 2) // (c) 4, 2 => 2, 2, 2 (_Total = 3) using _Tuple = _SimdTuple<_Tp, _A0, _As...>; static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1); constexpr size_t _Np = _Tuple::_S_size(); static_assert(_Np >= _Total && _Np % _Total == 0); constexpr size_t __values_per_part = _Np / _Total; [[maybe_unused]] constexpr size_t __values_to_skip = _Index * __values_per_part; constexpr size_t __return_size = __values_per_part * _Combine; using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>; // handle (optimize) the simple cases if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size) return __x.first._M_data; else if constexpr (_Index == 0 && _Total == _Combine) return __x; else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size) return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>( __as_vector(__x.first)); // recurse to skip unused data members at the beginning of _SimdTuple else if constexpr (__values_to_skip >= _Tuple::_S_first_size) { // recurse if constexpr (_Tuple::_S_first_size % __values_per_part == 0) { constexpr int __parts_in_first = _Tuple::_S_first_size / __values_per_part; return __extract_part<_Index - __parts_in_first, _Total - __parts_in_first, _Combine>( __x.second); } else return __extract_part<__values_to_skip - _Tuple::_S_first_size, _Np - _Tuple::_S_first_size, __return_size>( __x.second); } // extract from multiple _SimdTuple data members else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip) { #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS const __may_alias<_Tp>* const element_ptr = reinterpret_cast*>(&__x) + __values_to_skip; return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned)); #else [[maybe_unused]] constexpr size_t __offset = __values_to_skip; return __as_vector(simd<_Tp, _RetAbi>([&](auto __i) constexpr { constexpr _SizeConstant<__i + __offset> __k; return __x[__k]; })); #endif } // all of the return values are in __x.first else if constexpr (_Tuple::_S_first_size % __values_per_part == 0) return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part, _Combine>(__x.first); else return __extract_part<__values_to_skip, _Tuple::_S_first_size, _Combine * __values_per_part>(__x.first); } // }}} // __fixed_size_storage_t<_Tp, _Np>{{{ template >, int _Remain = _Np - int(_Next::size())> struct __fixed_size_storage_builder; template struct __fixed_size_storage : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {}; template struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 0> { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; }; template struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, _Remain> { using type = typename __fixed_size_storage_builder< _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type; }; // }}} // _AbisInSimdTuple {{{ template struct _SeqOp; template struct _SeqOp> { using _FirstPlusOne = index_sequence<_I0 + 1, _Is...>; using _NotFirstPlusOne = index_sequence<_I0, (_Is + 1)...>; template using _Prepend = index_sequence<_First, _I0 + _Add, (_Is + _Add)...>; }; template struct _AbisInSimdTuple; template struct _AbisInSimdTuple<_SimdTuple<_Tp>> { using _Counts = index_sequence<0>; using _Begins = index_sequence<0>; }; template struct _AbisInSimdTuple<_SimdTuple<_Tp, _Ap>> { using _Counts = index_sequence<1>; using _Begins = index_sequence<0>; }; template struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A0, _As...>> { using _Counts = typename _SeqOp>::_Counts>::_FirstPlusOne; using _Begins = typename _SeqOp>::_Begins>::_NotFirstPlusOne; }; template struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A1, _As...>> { using _Counts = typename _SeqOp>::_Counts>::template _Prepend<1, 0>; using _Begins = typename _SeqOp>::_Begins>::template _Prepend<0, 1>; }; // }}} // __autocvt_to_simd {{{ template >> struct __autocvt_to_simd { _Tp _M_data; using _TT = __remove_cvref_t<_Tp>; operator _TT() { return _M_data; } operator _TT&() { static_assert(is_lvalue_reference<_Tp>::value, ""); static_assert(!is_const<_Tp>::value, ""); return _M_data; } operator _TT*() { static_assert(is_lvalue_reference<_Tp>::value, ""); static_assert(!is_const<_Tp>::value, ""); return &_M_data; } constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd) {} template operator simd() { return {__private_init, _M_data}; } template operator simd&() { return *reinterpret_cast*>( &_M_data); } template operator simd*() { return reinterpret_cast*>( &_M_data); } }; template __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>; template struct __autocvt_to_simd<_Tp, true> { using _TT = __remove_cvref_t<_Tp>; _Tp _M_data; fixed_size_simd<_TT, 1> _M_fd; constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} ~__autocvt_to_simd() { _M_data = __data(_M_fd).first; } operator fixed_size_simd<_TT, 1>() { return _M_fd; } operator fixed_size_simd<_TT, 1> &() { static_assert(is_lvalue_reference<_Tp>::value, ""); static_assert(!is_const<_Tp>::value, ""); return _M_fd; } operator fixed_size_simd<_TT, 1> *() { static_assert(is_lvalue_reference<_Tp>::value, ""); static_assert(!is_const<_Tp>::value, ""); return &_M_fd; } }; // }}} struct _CommonImplFixedSize; template struct _SimdImplFixedSize; template struct _MaskImplFixedSize; // simd_abi::_Fixed {{{ template struct simd_abi::_Fixed { template static constexpr size_t _S_size = _Np; template static constexpr size_t _S_full_size = _Np; // validity traits {{{ struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {}; template struct _IsValidSizeFor : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {}; template struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, _IsValidSizeFor<_Tp>> {}; template static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; // }}} // _S_masked {{{ _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> _S_masked(_BitMask<_Np> __x) { return __x._M_sanitized(); } _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> _S_masked(_SanitizedBitMask<_Np> __x) { return __x; } // }}} // _*Impl {{{ using _CommonImpl = _CommonImplFixedSize; using _SimdImpl = _SimdImplFixedSize<_Np>; using _MaskImpl = _MaskImplFixedSize<_Np>; // }}} // __traits {{{ template > struct __traits : _InvalidTraits {}; template struct __traits<_Tp, true> { using _IsValid = true_type; using _SimdImpl = _SimdImplFixedSize<_Np>; using _MaskImpl = _MaskImplFixedSize<_Np>; // simd and simd_mask member types {{{ using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; using _MaskMember = _SanitizedBitMask<_Np>; static constexpr size_t _S_simd_align = std::__bit_ceil(_Np * sizeof(_Tp)); static constexpr size_t _S_mask_align = alignof(_MaskMember); // }}} // _SimdBase / base class for simd, providing extra conversions {{{ struct _SimdBase { // The following ensures, function arguments are passed via the stack. // This is important for ABI compatibility across TU boundaries _SimdBase(const _SimdBase&) {} _SimdBase() = default; explicit operator const _SimdMember &() const { return static_cast*>(this)->_M_data; } explicit operator array<_Tp, _Np>() const { array<_Tp, _Np> __r; // _SimdMember can be larger because of higher alignment static_assert(sizeof(__r) <= sizeof(_SimdMember), ""); __builtin_memcpy(__r.data(), &static_cast(*this), sizeof(__r)); return __r; } }; // }}} // _MaskBase {{{ // empty. The bitset interface suffices struct _MaskBase {}; // }}} // _SimdCastType {{{ struct _SimdCastType { _SimdCastType(const array<_Tp, _Np>&); _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} explicit operator const _SimdMember &() const { return _M_data; } private: const _SimdMember& _M_data; }; // }}} // _MaskCastType {{{ class _MaskCastType { _MaskCastType() = delete; }; // }}} }; // }}} }; // }}} // _CommonImplFixedSize {{{ struct _CommonImplFixedSize { // _S_store {{{ template _GLIBCXX_SIMD_INTRINSIC static void _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr) { constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size(); __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp)); } // }}} }; // }}} // _SimdImplFixedSize {{{1 // fixed_size should not inherit from _SimdMathFallback in order for // specializations in the used _SimdTuple Abis to get used template struct _SimdImplFixedSize { // member types {{{2 using _MaskMember = _SanitizedBitMask<_Np>; template using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; template static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size; template using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>; template using _TypeTag = _Tp*; // broadcast {{{2 template static constexpr inline _SimdMember<_Tp> _S_broadcast(_Tp __x) noexcept { return _SimdMember<_Tp>::_S_generate([&](auto __meta) constexpr { return __meta._S_broadcast(__x); }); } // _S_generator {{{2 template static constexpr inline _SimdMember<_Tp> _S_generator(_Fp&& __gen, _TypeTag<_Tp>) { return _SimdMember<_Tp>::_S_generate([&__gen](auto __meta) constexpr { return __meta._S_generator( [&](auto __i) constexpr { return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>()) : 0; }, _TypeTag<_Tp>()); }); } // _S_load {{{2 template static inline _SimdMember<_Tp> _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept { return _SimdMember<_Tp>::_S_generate([&](auto __meta) { return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>()); }); } // _S_masked_load {{{2 template static inline _SimdTuple<_Tp, _As...> _S_masked_load(const _SimdTuple<_Tp, _As...>& __old, const _MaskMember __bits, const _Up* __mem) noexcept { auto __merge = __old; __for_each(__merge, [&](auto __meta, auto& __native) { if (__meta._S_submask(__bits).any()) #pragma GCC diagnostic push // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts // the responsibility for avoiding UB to the caller of the masked load // via the mask. Consequently, the compiler may assume this branch is // unreachable, if the pointer arithmetic is UB. #pragma GCC diagnostic ignored "-Warray-bounds" __native = __meta._S_masked_load(__native, __meta._S_make_mask(__bits), __mem + __meta._S_offset); #pragma GCC diagnostic pop }); return __merge; } // _S_store {{{2 template static inline void _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept { __for_each(__v, [&](auto __meta, auto __native) { __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>()); }); } // _S_masked_store {{{2 template static inline void _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem, const _MaskMember __bits) noexcept { __for_each(__v, [&](auto __meta, auto __native) { if (__meta._S_submask(__bits).any()) #pragma GCC diagnostic push // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts // the responsibility for avoiding UB to the caller of the masked // store via the mask. Consequently, the compiler may assume this // branch is unreachable, if the pointer arithmetic is UB. #pragma GCC diagnostic ignored "-Warray-bounds" __meta._S_masked_store(__native, __mem + __meta._S_offset, __meta._S_make_mask(__bits)); #pragma GCC diagnostic pop }); } // negation {{{2 template static inline _MaskMember _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept { _MaskMember __bits = 0; __for_each( __x, [&__bits](auto __meta, auto __native) constexpr { __bits |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native)); }); return __bits; } // reductions {{{2 template static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x, const _BinaryOperation& __binary_op) { using _Tup = _SimdMember<_Tp>; const _Tup& __tup = __data(__x); if constexpr (_Tup::_S_tuple_size == 1) return _Tup::_FirstAbi::_SimdImpl::_S_reduce( __tup.template _M_simd_at<0>(), __binary_op); else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2 && _Tup::_SecondType::_S_size() == 1) { return __binary_op(simd<_Tp, simd_abi::scalar>( reduce(__tup.template _M_simd_at<0>(), __binary_op)), __tup.template _M_simd_at<1>())[0]; } else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4 && _Tup::_SecondType::_S_size() == 2) { return __binary_op( simd<_Tp, simd_abi::scalar>( reduce(__tup.template _M_simd_at<0>(), __binary_op)), simd<_Tp, simd_abi::scalar>( reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0]; } else { const auto& __x2 = __call_with_n_evaluations< __div_roundup(_Tup::_S_tuple_size, 2)>( [](auto __first_simd, auto... __remaining) { if constexpr (sizeof...(__remaining) == 0) return __first_simd; else { using _Tup2 = _SimdTuple<_Tp, typename decltype(__first_simd)::abi_type, typename decltype(__remaining)::abi_type...>; return fixed_size_simd<_Tp, _Tup2::_S_size()>( __private_init, __make_simd_tuple(__first_simd, __remaining...)); } }, [&](auto __i) { auto __left = __tup.template _M_simd_at<2 * __i>(); if constexpr (2 * __i + 1 == _Tup::_S_tuple_size) return __left; else { auto __right = __tup.template _M_simd_at<2 * __i + 1>(); using _LT = decltype(__left); using _RT = decltype(__right); if constexpr (_LT::size() == _RT::size()) return __binary_op(__left, __right); else { _GLIBCXX_SIMD_USE_CONSTEXPR_API typename _LT::mask_type __k( __private_init, [](auto __j) constexpr { return __j < _RT::size(); }); _LT __ext_right = __left; where(__k, __ext_right) = __proposed::resizing_simd_cast<_LT>(__right); where(__k, __left) = __binary_op(__left, __ext_right); return __left; } } }); return reduce(__x2, __binary_op); } } // _S_min, _S_max {{{2 template static inline constexpr _SimdTuple<_Tp, _As...> _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) { return __a._M_apply_per_chunk( [](auto __impl, auto __aa, auto __bb) constexpr { return __impl._S_min(__aa, __bb); }, __b); } template static inline constexpr _SimdTuple<_Tp, _As...> _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) { return __a._M_apply_per_chunk( [](auto __impl, auto __aa, auto __bb) constexpr { return __impl._S_max(__aa, __bb); }, __b); } // _S_complement {{{2 template static inline constexpr _SimdTuple<_Tp, _As...> _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept { return __x._M_apply_per_chunk([](auto __impl, auto __xx) constexpr { return __impl._S_complement(__xx); }); } // _S_unary_minus {{{2 template static inline constexpr _SimdTuple<_Tp, _As...> _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept { return __x._M_apply_per_chunk([](auto __impl, auto __xx) constexpr { return __impl._S_unary_minus(__xx); }); } // arithmetic operators {{{2 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ template \ static inline constexpr _SimdTuple<_Tp, _As...> name_( \ const _SimdTuple<_Tp, _As...> __x, const _SimdTuple<_Tp, _As...> __y) \ { \ return __x._M_apply_per_chunk( \ [](auto __impl, auto __xx, auto __yy) constexpr { \ return __impl.name_(__xx, __yy); \ }, \ __y); \ } _GLIBCXX_SIMD_FIXED_OP(_S_plus, +) _GLIBCXX_SIMD_FIXED_OP(_S_minus, -) _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *) _GLIBCXX_SIMD_FIXED_OP(_S_divides, /) _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %) _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &) _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |) _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^) _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<) _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>) #undef _GLIBCXX_SIMD_FIXED_OP template static inline constexpr _SimdTuple<_Tp, _As...> _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y) { return __x._M_apply_per_chunk([__y](auto __impl, auto __xx) constexpr { return __impl._S_bit_shift_left(__xx, __y); }); } template static inline constexpr _SimdTuple<_Tp, _As...> _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y) { return __x._M_apply_per_chunk([__y](auto __impl, auto __xx) constexpr { return __impl._S_bit_shift_right(__xx, __y); }); } // math {{{2 #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \ template \ static inline __fixed_size_storage_t<_RetTp, _Np> \ _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \ const _More&... __more) \ { \ if constexpr (sizeof...(_More) == 0) \ { \ if constexpr (is_same_v<_Tp, _RetTp>) \ return __x._M_apply_per_chunk( \ [](auto __impl, auto __xx) constexpr { \ using _V = typename decltype(__impl)::simd_type; \ return __data(__name(_V(__private_init, __xx))); \ }); \ else \ return __optimize_simd_tuple( \ __x.template _M_apply_r<_RetTp>([](auto __impl, auto __xx) { \ return __impl._S_##__name(__xx); \ })); \ } \ else if constexpr ( \ is_same_v< \ _Tp, \ _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \ return __x._M_apply_per_chunk( \ [](auto __impl, auto __xx, auto... __pack) constexpr { \ using _V = typename decltype(__impl)::simd_type; \ return __data(__name(_V(__private_init, __xx), \ _V(__private_init, __pack)...)); \ }, \ __more...); \ else if constexpr (is_same_v<_Tp, _RetTp>) \ return __x._M_apply_per_chunk( \ [](auto __impl, auto __xx, auto... __pack) constexpr { \ using _V = typename decltype(__impl)::simd_type; \ return __data(__name(_V(__private_init, __xx), \ __autocvt_to_simd(__pack)...)); \ }, \ __more...); \ else \ __assert_unreachable<_Tp>(); \ } _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1) _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb) // modf implemented in simd_math.h _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbn) // double scalbn(double x, int exp); _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint) _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint) _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round) _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround) _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) // copysign in simd_math.h _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma) _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify) #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE template static _SimdTuple<_Tp, _Abis...> _S_remquo( const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y, __fixed_size_storage_t::_S_size()>* __z) { return __x._M_apply_per_chunk( [](auto __impl, const auto __xx, const auto __yy, auto& __zz) { return __impl._S_remquo(__xx, __yy, &__zz); }, __y, *__z); } template static inline _SimdTuple<_Tp, _As...> _S_frexp(const _SimdTuple<_Tp, _As...>& __x, __fixed_size_storage_t& __exp) noexcept { return __x._M_apply_per_chunk( [](auto __impl, const auto& __a, auto& __b) { return __data( frexp(typename decltype(__impl)::simd_type(__private_init, __a), __autocvt_to_simd(__b))); }, __exp); } #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \ template \ static inline _MaskMember \ _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \ { \ return _M_test([](auto __impl, \ auto __xx) { return __impl._S_##name_(__xx); }, \ __x); \ } _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf) _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite) _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan) _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal) _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit) #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_ // _S_increment & _S_decrement{{{2 template _GLIBCXX_SIMD_INTRINSIC static constexpr void _S_increment(_SimdTuple<_Ts...>& __x) { __for_each( __x, [](auto __meta, auto& native) constexpr { __meta._S_increment(native); }); } template _GLIBCXX_SIMD_INTRINSIC static constexpr void _S_decrement(_SimdTuple<_Ts...>& __x) { __for_each( __x, [](auto __meta, auto& native) constexpr { __meta._S_decrement(native); }); } // compares {{{2 #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \ template \ _GLIBCXX_SIMD_INTRINSIC constexpr static _MaskMember \ __cmp(const _SimdTuple<_Tp, _As...>& __x, \ const _SimdTuple<_Tp, _As...>& __y) \ { \ return _M_test( \ [](auto __impl, auto __xx, auto __yy) constexpr { \ return __impl.__cmp(__xx, __yy); \ }, \ __x, __y); \ } _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater) _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered) #undef _GLIBCXX_SIMD_CMP_OPERATIONS // smart_reference access {{{2 template _GLIBCXX_SIMD_INTRINSIC static void _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept { __v._M_set(__i, static_cast<_Up&&>(__x)); } // _S_masked_assign {{{2 template _GLIBCXX_SIMD_INTRINSIC static void _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs) { __for_each( __lhs, __rhs, [&](auto __meta, auto& __native_lhs, auto __native_rhs) constexpr { __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, __native_rhs); }); } // Optimization for the case where the RHS is a scalar. No need to broadcast // the scalar to a simd first. template _GLIBCXX_SIMD_INTRINSIC static void _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, const __type_identity_t<_Tp> __rhs) { __for_each( __lhs, [&](auto __meta, auto& __native_lhs) constexpr { __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, __rhs); }); } // _S_masked_cassign {{{2 template static inline void _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, const _SimdTuple<_Tp, _As...>& __rhs, _Op __op) { __for_each( __lhs, __rhs, [&](auto __meta, auto& __native_lhs, auto __native_rhs) constexpr { __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), __native_lhs, __native_rhs, __op); }); } // Optimization for the case where the RHS is a scalar. No need to broadcast // the scalar to a simd first. template static inline void _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, const _Tp& __rhs, _Op __op) { __for_each( __lhs, [&](auto __meta, auto& __native_lhs) constexpr { __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), __native_lhs, __rhs, __op); }); } // _S_masked_unary {{{2 template