head 1.1;
branch 1.1.1;
access;
symbols
netbsd-11-0-RC5:1.1.1.2
netbsd-11-0-RC4:1.1.1.2
netbsd-11-0-RC3:1.1.1.2
netbsd-11-0-RC2:1.1.1.2
netbsd-11-0-RC1:1.1.1.2
gcc-14-3-0:1.1.1.3
perseant-exfatfs-base-20250801:1.1.1.2
netbsd-11:1.1.1.2.0.2
netbsd-11-base:1.1.1.2
gcc-12-5-0:1.1.1.2
perseant-exfatfs-base-20240630:1.1.1.2
gcc-12-4-0:1.1.1.2
perseant-exfatfs:1.1.1.1.0.2
perseant-exfatfs-base:1.1.1.1
gcc-12-3-0:1.1.1.1
FSF:1.1.1;
locks; strict;
comment @ * @;
1.1
date 2023.07.30.05.21.20; author mrg; state Exp;
branches
1.1.1.1;
next ;
commitid tk6nV4mbc9nVEMyE;
1.1.1.1
date 2023.07.30.05.21.20; author mrg; state Exp;
branches
1.1.1.1.2.1;
next 1.1.1.2;
commitid tk6nV4mbc9nVEMyE;
1.1.1.2
date 2024.06.30.07.35.40; author mrg; state Exp;
branches;
next 1.1.1.3;
commitid m7BwZsPdfJvuHYfF;
1.1.1.3
date 2025.09.13.23.45.49; author mrg; state Exp;
branches;
next ;
commitid KwhwN4krNWa6XBaG;
1.1.1.1.2.1
date 2024.07.01.01.00.58; author perseant; state Exp;
branches;
next ;
commitid NkoYLLCQWWw9v4gF;
desc
@@
1.1
log
@Initial revision
@
text
@// Simd NEON specific implementations -*- C++ -*-
// Copyright (C) 2020-2022 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// .
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_NEON_H_
#define _GLIBCXX_EXPERIMENTAL_SIMD_NEON_H_
#if __cplusplus >= 201703L
#if !_GLIBCXX_SIMD_HAVE_NEON
#error "simd_neon.h may only be included when NEON on ARM is available"
#endif
_GLIBCXX_SIMD_BEGIN_NAMESPACE
// _CommonImplNeon {{{
struct _CommonImplNeon : _CommonImplBuiltin
{
// _S_store {{{
using _CommonImplBuiltin::_S_store;
// }}}
};
// }}}
// _SimdImplNeon {{{
template
struct _SimdImplNeon : _SimdImplBuiltin<_Abi>
{
using _Base = _SimdImplBuiltin<_Abi>;
template
using _MaskMember = typename _Base::template _MaskMember<_Tp>;
template
static constexpr size_t _S_max_store_size = 16;
// _S_masked_load {{{
template
static inline _SimdWrapper<_Tp, _Np>
_S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
const _Up* __mem) noexcept
{
__execute_n_times<_Np>([&](auto __i) {
if (__k[__i] != 0)
__merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
});
return __merge;
}
// }}}
// _S_masked_store_nocvt {{{
template
_GLIBCXX_SIMD_INTRINSIC static void
_S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
_MaskMember<_Tp> __k)
{
__execute_n_times<_Np>([&](auto __i) {
if (__k[__i] != 0)
__mem[__i] = __v[__i];
});
}
// }}}
// _S_reduce {{{
template
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
{
constexpr size_t _Np = __x.size();
if constexpr (sizeof(__x) == 16 && _Np >= 4
&& !_Abi::template _S_is_partial<_Tp>)
{
const auto __halves = split>>(__x);
const auto __y = __binary_op(__halves[0], __halves[1]);
return _SimdImplNeon>::_S_reduce(
__y, static_cast<_BinaryOperation&&>(__binary_op));
}
else if constexpr (_Np == 8)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0, 3, 2, 5, 4, 7, 6>(
__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<3, 2, 1, 0, 7, 6, 5, 4>(
__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<7, 6, 5, 4, 3, 2, 1, 0>(
__x._M_data)));
return __x[0];
}
else if constexpr (_Np == 4)
{
__x
= __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0, 3, 2>(__x._M_data)));
__x
= __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<3, 2, 1, 0>(__x._M_data)));
return __x[0];
}
else if constexpr (_Np == 2)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0>(__x._M_data)));
return __x[0];
}
else
return _Base::_S_reduce(__x,
static_cast<_BinaryOperation&&>(__binary_op));
}
// }}}
// math {{{
// _S_sqrt {{{
template >
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_sqrt(_Tp __x)
{
if constexpr (__have_neon_a64)
{
const auto __intrin = __to_intrin(__x);
if constexpr (_TVT::template _S_is)
return vsqrt_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vsqrtq_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vsqrt_f64(__intrin);
else if constexpr (_TVT::template _S_is)
return vsqrtq_f64(__intrin);
else
__assert_unreachable<_Tp>();
}
else
return _Base::_S_sqrt(__x);
}
// }}}
// _S_trunc {{{
template >
_GLIBCXX_SIMD_INTRINSIC static _TW _S_trunc(_TW __x)
{
using _Tp = typename _TVT::value_type;
if constexpr (__have_neon_a32)
{
const auto __intrin = __to_intrin(__x);
if constexpr (_TVT::template _S_is)
return vrnd_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndq_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vrnd_f64(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndq_f64(__intrin);
else
__assert_unreachable<_Tp>();
}
else if constexpr (is_same_v<_Tp, float>)
{
auto __intrin = __to_intrin(__x);
if constexpr (sizeof(__x) == 16)
__intrin = vcvtq_f32_s32(vcvtq_s32_f32(__intrin));
else
__intrin = vcvt_f32_s32(vcvt_s32_f32(__intrin));
return _Base::_S_abs(__x)._M_data < 0x1p23f
? __vector_bitcast(__intrin)
: __x._M_data;
}
else
return _Base::_S_trunc(__x);
}
// }}}
// _S_round {{{
template
_GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
_S_round(_SimdWrapper<_Tp, _Np> __x)
{
if constexpr (__have_neon_a32)
{
const auto __intrin = __to_intrin(__x);
if constexpr (sizeof(_Tp) == 4 && sizeof(__x) == 8)
return vrnda_f32(__intrin);
else if constexpr (sizeof(_Tp) == 4 && sizeof(__x) == 16)
return vrndaq_f32(__intrin);
else if constexpr (sizeof(_Tp) == 8 && sizeof(__x) == 8)
return vrnda_f64(__intrin);
else if constexpr (sizeof(_Tp) == 8 && sizeof(__x) == 16)
return vrndaq_f64(__intrin);
else
__assert_unreachable<_Tp>();
}
else
return _Base::_S_round(__x);
}
// }}}
// _S_floor {{{
template >
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_floor(_Tp __x)
{
if constexpr (__have_neon_a32)
{
const auto __intrin = __to_intrin(__x);
if constexpr (_TVT::template _S_is)
return vrndm_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndmq_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndm_f64(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndmq_f64(__intrin);
else
__assert_unreachable<_Tp>();
}
else
return _Base::_S_floor(__x);
}
// }}}
// _S_ceil {{{
template >
_GLIBCXX_SIMD_INTRINSIC static _Tp _S_ceil(_Tp __x)
{
if constexpr (__have_neon_a32)
{
const auto __intrin = __to_intrin(__x);
if constexpr (_TVT::template _S_is)
return vrndp_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndpq_f32(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndp_f64(__intrin);
else if constexpr (_TVT::template _S_is)
return vrndpq_f64(__intrin);
else
__assert_unreachable<_Tp>();
}
else
return _Base::_S_ceil(__x);
}
//}}} }}}
}; // }}}
// _MaskImplNeonMixin {{{
struct _MaskImplNeonMixin
{
using _Base = _MaskImplBuiltinMixin;
template
_GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
_S_to_bits(_SimdWrapper<_Tp, _Np> __x)
{
if (__builtin_is_constant_evaluated())
return _Base::_S_to_bits(__x);
using _I = __int_for_sizeof_t<_Tp>;
if constexpr (sizeof(__x) == 16)
{
auto __asint = __vector_bitcast<_I>(__x);
#ifdef __aarch64__
[[maybe_unused]] constexpr auto __zero = decltype(__asint)();
#else
[[maybe_unused]] constexpr auto __zero = decltype(__lo64(__asint))();
#endif
if constexpr (sizeof(_Tp) == 1)
{
constexpr auto __bitsel
= __generate_from_n_evaluations<16, __vector_type_t<_I, 16>>(
[&](auto __i) {
return static_cast<_I>(
__i < _Np ? (__i < 8 ? 1 << __i : 1 << (__i - 8)) : 0);
});
__asint &= __bitsel;
#ifdef __aarch64__
return __vector_bitcast<_UShort>(
vpaddq_s8(vpaddq_s8(vpaddq_s8(__asint, __zero), __zero),
__zero))[0];
#else
return __vector_bitcast<_UShort>(
vpadd_s8(vpadd_s8(vpadd_s8(__lo64(__asint), __hi64(__asint)),
__zero),
__zero))[0];
#endif
}
else if constexpr (sizeof(_Tp) == 2)
{
constexpr auto __bitsel
= __generate_from_n_evaluations<8, __vector_type_t<_I, 8>>(
[&](auto __i) {
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
});
__asint &= __bitsel;
#ifdef __aarch64__
return vaddvq_s16(__asint);
#else
return vpadd_s16(
vpadd_s16(vpadd_s16(__lo64(__asint), __hi64(__asint)), __zero),
__zero)[0];
#endif
}
else if constexpr (sizeof(_Tp) == 4)
{
constexpr auto __bitsel
= __generate_from_n_evaluations<4, __vector_type_t<_I, 4>>(
[&](auto __i) {
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
});
__asint &= __bitsel;
#ifdef __aarch64__
return vaddvq_s32(__asint);
#else
return vpadd_s32(vpadd_s32(__lo64(__asint), __hi64(__asint)),
__zero)[0];
#endif
}
else if constexpr (sizeof(_Tp) == 8)
return (__asint[0] & 1) | (__asint[1] & 2);
else
__assert_unreachable<_Tp>();
}
else if constexpr (sizeof(__x) == 8)
{
auto __asint = __vector_bitcast<_I>(__x);
[[maybe_unused]] constexpr auto __zero = decltype(__asint)();
if constexpr (sizeof(_Tp) == 1)
{
constexpr auto __bitsel
= __generate_from_n_evaluations<8, __vector_type_t<_I, 8>>(
[&](auto __i) {
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
});
__asint &= __bitsel;
#ifdef __aarch64__
return vaddv_s8(__asint);
#else
return vpadd_s8(vpadd_s8(vpadd_s8(__asint, __zero), __zero),
__zero)[0];
#endif
}
else if constexpr (sizeof(_Tp) == 2)
{
constexpr auto __bitsel
= __generate_from_n_evaluations<4, __vector_type_t<_I, 4>>(
[&](auto __i) {
return static_cast<_I>(__i < _Np ? 1 << __i : 0);
});
__asint &= __bitsel;
#ifdef __aarch64__
return vaddv_s16(__asint);
#else
return vpadd_s16(vpadd_s16(__asint, __zero), __zero)[0];
#endif
}
else if constexpr (sizeof(_Tp) == 4)
{
__asint &= __make_vector<_I>(0x1, 0x2);
#ifdef __aarch64__
return vaddv_s32(__asint);
#else
return vpadd_s32(__asint, __zero)[0];
#endif
}
else
__assert_unreachable<_Tp>();
}
else
return _Base::_S_to_bits(__x);
}
};
// }}}
// _MaskImplNeon {{{
template
struct _MaskImplNeon : _MaskImplNeonMixin, _MaskImplBuiltin<_Abi>
{
using _MaskImplBuiltinMixin::_S_to_maskvector;
using _MaskImplNeonMixin::_S_to_bits;
using _Base = _MaskImplBuiltin<_Abi>;
using _Base::_S_convert;
// _S_all_of {{{
template
_GLIBCXX_SIMD_INTRINSIC static bool _S_all_of(simd_mask<_Tp, _Abi> __k)
{
const auto __kk
= __vector_bitcast(__k._M_data)
| ~__vector_bitcast(_Abi::template _S_implicit_mask<_Tp>());
if constexpr (sizeof(__k) == 16)
{
const auto __x = __vector_bitcast(__kk);
return __x[0] + __x[1] == -2;
}
else if constexpr (sizeof(__k) <= 8)
return __bit_cast<__int_for_sizeof_t>(__kk) == -1;
else
__assert_unreachable<_Tp>();
}
// }}}
// _S_any_of {{{
template
_GLIBCXX_SIMD_INTRINSIC static bool _S_any_of(simd_mask<_Tp, _Abi> __k)
{
const auto __kk
= __vector_bitcast(__k._M_data)
| ~__vector_bitcast(_Abi::template _S_implicit_mask<_Tp>());
if constexpr (sizeof(__k) == 16)
{
const auto __x = __vector_bitcast(__kk);
return (__x[0] | __x[1]) != 0;
}
else if constexpr (sizeof(__k) <= 8)
return __bit_cast<__int_for_sizeof_t>(__kk) != 0;
else
__assert_unreachable<_Tp>();
}
// }}}
// _S_none_of {{{
template
_GLIBCXX_SIMD_INTRINSIC static bool _S_none_of(simd_mask<_Tp, _Abi> __k)
{
const auto __kk = _Abi::_S_masked(__k._M_data);
if constexpr (sizeof(__k) == 16)
{
const auto __x = __vector_bitcast(__kk);
return (__x[0] | __x[1]) == 0;
}
else if constexpr (sizeof(__k) <= 8)
return __bit_cast<__int_for_sizeof_t>(__kk) == 0;
else
__assert_unreachable<_Tp>();
}
// }}}
// _S_some_of {{{
template
_GLIBCXX_SIMD_INTRINSIC static bool _S_some_of(simd_mask<_Tp, _Abi> __k)
{
if constexpr (sizeof(__k) <= 8)
{
const auto __kk = __vector_bitcast(__k._M_data)
| ~__vector_bitcast(
_Abi::template _S_implicit_mask<_Tp>());
using _Up = make_unsigned_t<__int_for_sizeof_t>;
return __bit_cast<_Up>(__kk) + 1 > 1;
}
else
return _Base::_S_some_of(__k);
}
// }}}
// _S_popcount {{{
template
_GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k)
{
if constexpr (sizeof(_Tp) == 1)
{
const auto __s8 = __vector_bitcast<_SChar>(__k._M_data);
int8x8_t __tmp = __lo64(__s8) + __hi64z(__s8);
return -vpadd_s8(vpadd_s8(vpadd_s8(__tmp, int8x8_t()), int8x8_t()),
int8x8_t())[0];
}
else if constexpr (sizeof(_Tp) == 2)
{
const auto __s16 = __vector_bitcast(__k._M_data);
int16x4_t __tmp = __lo64(__s16) + __hi64z(__s16);
return -vpadd_s16(vpadd_s16(__tmp, int16x4_t()), int16x4_t())[0];
}
else if constexpr (sizeof(_Tp) == 4)
{
const auto __s32 = __vector_bitcast(__k._M_data);
int32x2_t __tmp = __lo64(__s32) + __hi64z(__s32);
return -vpadd_s32(__tmp, int32x2_t())[0];
}
else if constexpr (sizeof(_Tp) == 8)
{
static_assert(sizeof(__k) == 16);
const auto __s64 = __vector_bitcast(__k._M_data);
return -(__s64[0] + __s64[1]);
}
}
// }}}
// _S_find_first_set {{{
template
_GLIBCXX_SIMD_INTRINSIC static int
_S_find_first_set(simd_mask<_Tp, _Abi> __k)
{
// TODO: the _Base implementation is not optimal for NEON
return _Base::_S_find_first_set(__k);
}
// }}}
// _S_find_last_set {{{
template
_GLIBCXX_SIMD_INTRINSIC static int
_S_find_last_set(simd_mask<_Tp, _Abi> __k)
{
// TODO: the _Base implementation is not optimal for NEON
return _Base::_S_find_last_set(__k);
}
// }}}
}; // }}}
_GLIBCXX_SIMD_END_NAMESPACE
#endif // __cplusplus >= 201703L
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_NEON_H_
// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
@
1.1.1.1
log
@initial import of GCC 12.3.0.
major changes in GCC 11 included:
- The default mode for C++ is now -std=gnu++17 instead of -std=gnu++14.
- When building GCC itself, the host compiler must now support C++11,
rather than C++98.
- Some short options of the gcov tool have been renamed: -i to -j and
-j to -H.
- ThreadSanitizer improvements.
- Introduce Hardware-assisted AddressSanitizer support.
- For targets that produce DWARF debugging information GCC now defaults
to DWARF version 5. This can produce up to 25% more compact debug
information compared to earlier versions.
- Many optimisations.
- The existing malloc attribute has been extended so that it can be
used to identify allocator/deallocator API pairs. A pair of new
-Wmismatched-dealloc and -Wmismatched-new-delete warnings are added.
- Other new warnings:
-Wsizeof-array-div, enabled by -Wall, warns about divisions of two
sizeof operators when the first one is applied to an array and the
divisor does not equal the size of the array element.
-Wstringop-overread, enabled by default, warns about calls to string
functions reading past the end of the arrays passed to them as
arguments.
-Wtsan, enabled by default, warns about unsupported features in
ThreadSanitizer (currently std::atomic_thread_fence).
- Enchanced warnings:
-Wfree-nonheap-object detects many more instances of calls to
deallocation functions with pointers not returned from a dynamic
memory allocation function.
-Wmaybe-uninitialized diagnoses passing pointers or references to
uninitialized memory to functions taking const-qualified arguments.
-Wuninitialized detects reads from uninitialized dynamically
allocated memory.
-Warray-parameter warns about functions with inconsistent array forms.
-Wvla-parameter warns about functions with inconsistent VLA forms.
- Several new features from the upcoming C2X revision of the ISO C
standard are supported with -std=c2x and -std=gnu2x.
- Several C++20 features have been implemented.
- The C++ front end has experimental support for some of the upcoming
C++23 draft.
- Several new C++ warnings.
- Enhanced Arm, AArch64, x86, and RISC-V CPU support.
- The implementation of how program state is tracked within
-fanalyzer has been completely rewritten with many enhancements.
see https://gcc.gnu.org/gcc-11/changes.html for a full list.
major changes in GCC 12 include:
- An ABI incompatibility between C and C++ when passing or returning
by value certain aggregates containing zero width bit-fields has
been discovered on various targets. x86-64, ARM and AArch64
will always ignore them (so there is a C ABI incompatibility
between GCC 11 and earlier with GCC 12 or later), PowerPC64 ELFv2
always take them into account (so there is a C++ ABI
incompatibility, GCC 4.4 and earlier compatible with GCC 12 or
later, incompatible with GCC 4.5 through GCC 11). RISC-V has
changed the handling of these already starting with GCC 10. As
the ABI requires, MIPS takes them into account handling function
return values so there is a C++ ABI incompatibility with GCC 4.5
through 11.
- STABS: Support for emitting the STABS debugging format is
deprecated and will be removed in the next release. All ports now
default to emit DWARF (version 2 or later) debugging info or are
obsoleted.
- Vectorization is enabled at -O2 which is now equivalent to the
original -O2 -ftree-vectorize -fvect-cost-model=very-cheap.
- GCC now supports the ShadowCallStack sanitizer.
- Support for __builtin_shufflevector compatible with the clang
language extension was added.
- Support for attribute unavailable was added.
- Support for __builtin_dynamic_object_size compatible with the
clang language extension was added.
- New warnings:
-Wbidi-chars warns about potentially misleading UTF-8
bidirectional control characters.
-Warray-compare warns about comparisons between two operands of
array type.
- Some new features from the upcoming C2X revision of the ISO C
standard are supported with -std=c2x and -std=gnu2x.
- Several C++23 features have been implemented.
- Many C++ enhancements across warnings and -f options.
see https://gcc.gnu.org/gcc-12/changes.html for a full list.
@
text
@@
1.1.1.1.2.1
log
@Sync with HEAD.
@
text
@d64 1
a64 1
__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d78 1
a78 1
__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d87 1
a87 1
_GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
d90 37
a126 35
if (not __builtin_is_constant_evaluated())
{
constexpr size_t _Np = __x.size();
if constexpr (sizeof(__x) == 16 && _Np >= 4
&& !_Abi::template _S_is_partial<_Tp>)
{
const auto __halves = split>>(__x);
const auto __y = __binary_op(__halves[0], __halves[1]);
return _SimdImplNeon>::_S_reduce(
__y, static_cast<_BinaryOperation&&>(__binary_op));
}
else if constexpr (_Np == 8)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0, 3, 2, 5, 4, 7, 6>(__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<3, 2, 1, 0, 7, 6, 5, 4>(__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<7, 6, 5, 4, 3, 2, 1, 0>(__x._M_data)));
return __x[0];
}
else if constexpr (_Np == 4)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0, 3, 2>(__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<3, 2, 1, 0>(__x._M_data)));
return __x[0];
}
else if constexpr (_Np == 2)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0>(__x._M_data)));
return __x[0];
}
d128 3
a130 1
return _Base::_S_reduce(__x, static_cast<_BinaryOperation&&>(__binary_op));
d137 1
a137 2
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_sqrt(_Tp __x)
d160 1
a160 2
_GLIBCXX_SIMD_INTRINSIC static _TW
_S_trunc(_TW __x)
d219 1
a219 2
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_floor(_Tp __x)
d242 1
a242 2
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_ceil(_Tp __x)
d289 1
a289 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d309 1
a309 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d325 1
a325 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d349 1
a349 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d364 1
a364 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d403 1
a403 2
_GLIBCXX_SIMD_INTRINSIC static bool
_S_all_of(simd_mask<_Tp, _Abi> __k)
d422 1
a422 2
_GLIBCXX_SIMD_INTRINSIC static bool
_S_any_of(simd_mask<_Tp, _Abi> __k)
d441 1
a441 2
_GLIBCXX_SIMD_INTRINSIC static bool
_S_none_of(simd_mask<_Tp, _Abi> __k)
d475 1
a475 2
_GLIBCXX_SIMD_INTRINSIC static int
_S_popcount(simd_mask<_Tp, _Abi> __k)
@
1.1.1.2
log
@import GCC 12.4.0.
this includes at least 85 GCC PRs fixed, 2 C, 17 C++, 16 libstdc++-v3,
at least 13 target-specific (x86, arm64, riscv mostly), and at least
24 optimisation PRs.
@
text
@d64 1
a64 1
__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d78 1
a78 1
__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d87 1
a87 1
_GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
d90 37
a126 35
if (not __builtin_is_constant_evaluated())
{
constexpr size_t _Np = __x.size();
if constexpr (sizeof(__x) == 16 && _Np >= 4
&& !_Abi::template _S_is_partial<_Tp>)
{
const auto __halves = split>>(__x);
const auto __y = __binary_op(__halves[0], __halves[1]);
return _SimdImplNeon>::_S_reduce(
__y, static_cast<_BinaryOperation&&>(__binary_op));
}
else if constexpr (_Np == 8)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0, 3, 2, 5, 4, 7, 6>(__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<3, 2, 1, 0, 7, 6, 5, 4>(__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<7, 6, 5, 4, 3, 2, 1, 0>(__x._M_data)));
return __x[0];
}
else if constexpr (_Np == 4)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0, 3, 2>(__x._M_data)));
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<3, 2, 1, 0>(__x._M_data)));
return __x[0];
}
else if constexpr (_Np == 2)
{
__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
__vector_permute<1, 0>(__x._M_data)));
return __x[0];
}
d128 3
a130 1
return _Base::_S_reduce(__x, static_cast<_BinaryOperation&&>(__binary_op));
d137 1
a137 2
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_sqrt(_Tp __x)
d160 1
a160 2
_GLIBCXX_SIMD_INTRINSIC static _TW
_S_trunc(_TW __x)
d219 1
a219 2
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_floor(_Tp __x)
d242 1
a242 2
_GLIBCXX_SIMD_INTRINSIC static _Tp
_S_ceil(_Tp __x)
d289 1
a289 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d309 1
a309 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d325 1
a325 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d349 1
a349 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d364 1
a364 1
[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
d403 1
a403 2
_GLIBCXX_SIMD_INTRINSIC static bool
_S_all_of(simd_mask<_Tp, _Abi> __k)
d422 1
a422 2
_GLIBCXX_SIMD_INTRINSIC static bool
_S_any_of(simd_mask<_Tp, _Abi> __k)
d441 1
a441 2
_GLIBCXX_SIMD_INTRINSIC static bool
_S_none_of(simd_mask<_Tp, _Abi> __k)
d475 1
a475 2
_GLIBCXX_SIMD_INTRINSIC static int
_S_popcount(simd_mask<_Tp, _Abi> __k)
@
1.1.1.3
log
@initial import of GCC 14.3.0.
major changes in GCC 13:
- improved sanitizer
- zstd debug info compression
- LTO improvements
- SARIF based diagnostic support
- new warnings: -Wxor-used-as-pow, -Wenum-int-mismatch, -Wself-move,
-Wdangling-reference
- many new -Wanalyzer* specific warnings
- enhanced warnings: -Wpessimizing-move, -Wredundant-move
- new attributes to mark file descriptors, c++23 "assume"
- several C23 features added
- several C++23 features added
- many new features for Arm, x86, RISC-V
major changes in GCC 14:
- more strict C99 or newer support
- ia64* marked deprecated (but seemingly still in GCC 15.)
- several new hardening features
- support for "hardbool", which can have user supplied values of true/false
- explicit support for stack scrubbing upon function exit
- better auto-vectorisation support
- added clang-compatible __has_feature and __has_extension
- more C23, including -std=c23
- several C++26 features added
- better diagnostics in C++ templates
- new warnings: -Wnrvo, Welaborated-enum-base
- many new features for Arm, x86, RISC-V
- possible ABI breaking change for SPARC64 and small structures with arrays
of floats.
@
text
@d3 1
a3 1
// Copyright (C) 2020-2024 Free Software Foundation, Inc.
@