// Copyright (c) 2020 Nicholas Corgan // SPDX-License-Identifier: BSL-1.0 #include "SIMDConvert.hpp" #include #include #include #include #include #include #include #include #include #include // SIMDPP_USER_ARCH_INFO used by SIMDPP_MAKE_DISPATCHER below #if SIMDPP_HAS_GET_ARCH_RAW_CPUID #define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_raw_cpuid() #elif SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS #define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_gcc_builtin_cpu_supports() #elif SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO #define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_linux_cpuinfo() #else #error "Unsupported platform" #endif namespace SIMDPP_ARCH_NAMESPACE { namespace detail { /* * Make SFINAE structs out of arch-dependent #defines. Each copy * of this file will be compiled with different compiler flags, * so this SFINAE will behave as needed for each arch. */ template struct SIMDTraits { static constexpr bool Supported = false; template static constexpr bool canConvertTo() {return false;} }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT8_SIZE; template using SIMDPPType = simdpp::int8; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT8_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::int8::FastSize> outReg = simdpp::to_int8(inReg); return outReg; } }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT8_SIZE; template using SIMDPPType = simdpp::int8; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT8_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::int8::FastSize> outReg = simdpp::to_int8(inReg); return outReg; } }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT16_SIZE; template using SIMDPPType = simdpp::int16; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT16_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::int16::FastSize> outReg = simdpp::to_int16(inReg); return outReg; } }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT32_SIZE; template using SIMDPPType = simdpp::int32; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT32_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::int32::FastSize> outReg = simdpp::to_int32(inReg); return outReg; } }; // Buggy template <> constexpr bool SIMDTraits::canConvertTo() {return false;} // Buggy template <> constexpr bool SIMDTraits::canConvertTo() {return false;} // Buggy template <> constexpr bool SIMDTraits::canConvertTo() {return false;} template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT64_SIZE; template using SIMDPPType = simdpp::int64; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT64_SIMD; template static constexpr bool canConvertTo() {return false;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::int64::FastSize> outReg = simdpp::to_int64(inReg); return outReg; } }; // Depends on instruction set template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_INT64_TO_FLOAT32_CONVERSION);} // Depends on instruction set template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_INT64_TO_FLOAT64_CONVERSION);} template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT64_SIZE; template using SIMDPPType = simdpp::int64; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT64_SIMD; template static constexpr bool canConvertTo() {return SIMDTraits::canConvertTo();} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::int64::FastSize> outReg = simdpp::to_int64(inReg); return outReg; } }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT8_SIZE; template using SIMDPPType = simdpp::uint8; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT8_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::uint8::FastSize> outReg = simdpp::to_uint8(inReg); return outReg; } }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT16_SIZE; template using SIMDPPType = simdpp::uint16; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT16_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::uint16::FastSize> outReg = simdpp::to_uint16(inReg); return outReg; } }; template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT32_SIZE; template using SIMDPPType = simdpp::uint32; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT32_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::uint32::FastSize> outReg = simdpp::to_uint32(inReg); return outReg; } }; // Buggy template <> constexpr bool SIMDTraits::canConvertTo() {return false;} // Buggy template <> constexpr bool SIMDTraits::canConvertTo() {return false;} // Buggy template <> constexpr bool SIMDTraits::canConvertTo() {return false;} template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT64_SIZE; template using SIMDPPType = simdpp::uint64; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT64_SIMD; template static constexpr bool canConvertTo() {return false;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::uint64::FastSize> outReg = simdpp::to_uint64(inReg); return outReg; } }; // Depends on instruction set template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_INT64_TO_FLOAT32_CONVERSION);} // Depends on instruction set template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_INT64_TO_FLOAT64_CONVERSION);} template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_INT64_SIZE; template using SIMDPPType = simdpp::uint64; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_INT64_SIMD; template static constexpr bool canConvertTo() {return SIMDTraits::canConvertTo();} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::uint64::FastSize> outReg = simdpp::to_uint64(inReg); return outReg; } }; template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_INT64_TO_FLOAT32_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_INT64_TO_FLOAT64_CONVERSION);} template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_FLOAT32_SIZE; template using SIMDPPType = simdpp::float32; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_FLOAT32_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::float32::FastSize> outReg = simdpp::to_float32(inReg); return outReg; } }; template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT32_TO_INT64_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT32_TO_INT64_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT32_TO_UINT64_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT32_TO_UINT64_CONVERSION);} template <> struct SIMDTraits { static constexpr size_t FastSize = SIMDPP_FAST_FLOAT64_SIZE; template using SIMDPPType = simdpp::float64; using FastType = SIMDPPType; // Depends on instruction set static constexpr bool Supported = SIMDPP_HAS_FLOAT64_SIMD; template static constexpr bool canConvertTo() {return true;} template static SIMDPPType::FastSize> convertFromFastType(typename SIMDTraits::FastType inReg) { simdpp::float64::FastSize> outReg = simdpp::to_float64(inReg); return outReg; } }; template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT64_TO_INT64_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT64_TO_INT64_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT64_TO_UINT32_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT64_TO_UINT64_CONVERSION);} template <> constexpr bool SIMDTraits::canConvertTo() {return bool(SIMDPP_HAS_FLOAT64_TO_UINT64_CONVERSION);} template struct BothComplex: std::integral_constant::value && Pothos::Util::is_complex::value > {}; template struct NeitherComplex: std::integral_constant::value && !Pothos::Util::is_complex::value > {}; // See: libsimdpp/simdpp/capabilities.h template struct CanInstructionSetConvert: std::integral_constant::value && !std::is_same::value && // For some reason, double -> integral conversions don't compile with some // architectures. #if SIMDPP_ARCH_PP_NS_USE_POPCNT_INSN !(std::is_same::value && std::is_integral::value) && #endif !(std::is_floating_point::value && std::is_integral::value) && SIMDTraits::Supported && SIMDTraits::Supported && SIMDTraits::template canConvertTo()> {}; template using EnableIfInstructionSetCanConvert = typename std::enable_if< CanInstructionSetConvert::value, Ret >::type; template using EnableIfNonComplexTypesMatch = typename std::enable_if< std::is_same::value && !Pothos::Util::is_complex::value, Ret >::type; template using EnableIfNeitherComplex = typename std::enable_if< CanInstructionSetConvert::value && NeitherComplex::value, Ret >::type; template using EnableIfBothComplex = typename std::enable_if::value, Ret>::type; template using EnableIfConversionUnsupported = typename std::enable_if< !CanInstructionSetConvert::value && NeitherComplex::value && !std::is_same::value, Ret >::type; template static EnableIfInstructionSetCanConvert simdConvertBuffer( const InType* in, OutType* out, size_t bufferLen) { static constexpr size_t FrameSize = SIMDTraits::FastSize; using SIMDPPTypeIn = typename SIMDTraits::FastType; const auto numFrames = bufferLen / FrameSize; const InType* inPtr = in; OutType* outPtr = out; for(size_t frameIndex = 0; frameIndex < numFrames; ++frameIndex) { SIMDPPTypeIn inReg = simdpp::load_u(inPtr); auto outReg = SIMDTraits::template convertFromFastType(inReg); simdpp::store_u(outPtr, outReg); inPtr += FrameSize; outPtr += FrameSize; } // Perform remaining conversions manually. for(size_t i = (FrameSize * numFrames); i < bufferLen; ++i) { out[i] = static_cast(in[i]); } } template static EnableIfNonComplexTypesMatch simdConvertBuffer( const InType* in, OutType* out, size_t bufferLen) { std::memcpy(out, in, (bufferLen*sizeof(InType))); } template static EnableIfConversionUnsupported simdConvertBuffer( const InType* in, OutType* out, size_t bufferLen) { static_assert(NeitherComplex::value, "Unsupported overload called with complex types"); for(size_t i = 0; i < bufferLen; ++i) { out[i] = static_cast(in[i]); } } template static EnableIfBothComplex simdConvertBuffer( const InType* in, OutType* out, size_t bufferLen) { using ScalarInType = typename InType::value_type; using ScalarOutType = typename OutType::value_type; static_assert(NeitherComplex::value, "Complex overload called with double-complex types"); simdConvertBuffer( (const ScalarInType*)in, (ScalarOutType*)out, (bufferLen*2)); } } template void simdConvertBuffer(const void* in, void* out, size_t bufferLen) { detail::simdConvertBuffer( (const InType*) in, (OutType*)out, bufferLen); } } // This generates the underlying code that queries the runnable instruction // sets and chooses the most optimal to use. This code is only generated in // the first of the files generated by SIMDPP's CMake module. SIMDPP_MAKE_DISPATCHER( (template) () (void)(simdConvertBuffer) ((const void*) in, (void*) out, (size_t) bufferLen)) // Separate dispatcher macros because it there are only // underlying overloads for so many template specializations // at once. #define INSTANTIATE_DISPATCHERS(T) \ SIMDPP_INSTANTIATE_DISPATCHER( \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer(const void* in, void* out, size_t bufferLen)) \ ) \ SIMDPP_INSTANTIATE_DISPATCHER( \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)), \ (template void simdConvertBuffer, std::complex>(const void* in, void* out, size_t bufferLen)) \ ) INSTANTIATE_DISPATCHERS(char) INSTANTIATE_DISPATCHERS(std::int8_t) INSTANTIATE_DISPATCHERS(std::int16_t) INSTANTIATE_DISPATCHERS(std::int32_t) INSTANTIATE_DISPATCHERS(long) INSTANTIATE_DISPATCHERS(long long) INSTANTIATE_DISPATCHERS(std::uint8_t) INSTANTIATE_DISPATCHERS(std::uint16_t) INSTANTIATE_DISPATCHERS(std::uint32_t) INSTANTIATE_DISPATCHERS(unsigned long) INSTANTIATE_DISPATCHERS(unsigned long long) INSTANTIATE_DISPATCHERS(float) INSTANTIATE_DISPATCHERS(double)