%PDF- %PDF-
Direktori : /usr/include/xsimd/memory/ |
Current File : //usr/include/xsimd/memory/xsimd_load_store.hpp |
/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ #ifndef XSIMD_LOAD_STORE_HPP #define XSIMD_LOAD_STORE_HPP #include "../config/xsimd_config.hpp" #include "../types/xsimd_traits.hpp" namespace xsimd { /****************************** * Data transfer instructions * ******************************/ /** * @defgroup data_transfer Data Transfer Instructions */ /** * @ingroup data_transfer * Returns a batch with all values initialized to \c value. * @param value the scalar used to initialize the batch. * @return the batch wrapping the highest available instruction set. */ template <class T1, class T2 = T1> simd_return_type<T1, T2> set_simd(const T1& value); /** * @ingroup data_transfer * Loads the memory array pointed to by \c src into a batch and returns it. * \c src is required to be aligned. * @param src the pointer to the memory array to load. * @return the batch wrapping the highest available instruction set. */ template <class T1, class T2 = T1> simd_return_type<T1, T2> load_aligned(const T1* src); /** * @ingroup data_transfer * Loads the memory array pointed to by \c src into the batch \c dst. * \c src is required to be aligned. * @param src the pointer to the memory array to load. * @param dst the destination batch. */ template <class T1, class T2 = T1> void load_aligned(const T1* src, simd_type<T2>& dst); /** * @ingroup data_transfer * Loads the memory arrays pointed to by \c real_src and \c imag_src * into a batch of complex numbers and returns it. \c real_src and * \c imag_src are required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @return the batch of complex wrapping the highest available instruction set. */ template <class T1, class T2> simd_return_type<T1, T2> load_aligned(const T1* real_src, const T1* imag_src); /** * @ingroup data_transfer * Loads the memory arrays pointed to by \c real_src and \c imag_src * into the batch \c dst. \c real_src and \c imag_src are required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @param dst the destination batch. */ template <class T1, class T2> void load_aligned(const T1* real_src, const T1* imag_src, simd_type<T2>& dst); /** * @ingroup data_transfer * Loads the memory array pointed to by \c src into a batch and returns it. * \c src is not required to be aligned. * @param src the pointer to the memory array to load. * @return the batch wrapping the highest available instruction set. */ template <class T1, class T2 = T1> simd_return_type<T1, T2> load_unaligned(const T1* src); /** * @ingroup data_transfer * Loads the memory array pointed to by \c src into the batch \c dst. * \c src is not required to be aligned. * @param src the pointer to the memory array to load. * @param dst the destination batch. */ template <class T1, class T2 = T1> void load_unaligned(const T1* src, simd_type<T2>& dst); /** * @ingroup data_transfer * Loads the memory arrays pointed to by \c real_src and \c imag_src * into a batch of complex numbers and returns it. \c real_src and * \c imag_src are not required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @return the batch of complex wrapping the highest available instruction set. */ template <class T1, class T2> simd_return_type<T1, T2> load_unaligned(const T1* real_src, const T1* imag_src); /** * @ingroup data_transfer * Loads the memory arrays pointed to by \c real_src and \c imag_src * into the batch \c dst. \c real_src and \c imag_src are not required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @param dst the destination batch. */ template <class T1, class T2> void load_unaligned(const T1* real_src, const T1* imag_src, simd_type<T2>& dst); /** * @ingroup data_transfer * Stores the batch \c src into the memory array pointed to by \c dst. * \c dst is required to be aligned. * @param dst the pointer to the memory array. * @param src the batch to store. */ template <class T1, class T2 = T1> void store_aligned(T1* dst, const simd_type<T2>& src); /** * @ingroup data_transfer * Stores the boolean batch \c src into the memory array pointed to by \c dst. * \c dst is required to be aligned. * @param dst the pointer to the memory array. * @param src the boolean batch to store. */ template <class T1, class T2 = T1> void store_aligned(T1* dst, const simd_bool_type<T2>& src); /** * @ingroup data_transfer * Stores the batch \c src into the memory array pointed to by \c dst. * \c dst is not required to be aligned. * @param dst the pointer to the memory array. * @param src the batch to store. */ template <class T1, class T2 = T1> void store_unaligned(T1* dst, const simd_type<T2>& src); /** * @ingroup data_transfer * Stores the boolean batch \c src into the memory array pointed to by \c dst. * \c dst is not required to be aligned. * @param dst the pointer to the memory array. * @param src the boolean batch to store. */ template <class T1, class T2 = T1> void store_unaligned(T1* dst, const simd_bool_type<T2>& src); /** * @ingroup data_transfer * Stores the batch of complex numbers \c src into the memory arrays pointed * to by \c real_dst and \c imag_dst. \c real_dst and \c imag_dst are required * to be aligned. * @param real_dst the pointer to the memory array of the real part. * @param imag_dst the pointer to the memory array of the imaginary part. * @param src the batch to store. */ template <class T1, class T2> void store_aligned(T1* real_dst, T1* imag_dst, const simd_type<T2>& src); /** * @ingroup data_transfer * Stores the batch of complex numbers \c src into the memory arrays pointed * to by \c real_dst and \c imag_dst. \c real_dst and \c imag_dst are not required * to be aligned. * @param real_dst the pointer to the memory array of the real part. * @param imag_dst the pointer to the memory array of the imaginary part. * @param src the batch to store. */ template <class T1, class T2> void store_unaligned(T1* real_dst, T1* imag_dst, const simd_type<T2>& src); // Load / store generic functions /** * @defgroup generic_load_store Generic load and store */ /** * @ingroup generic_load_store * Loads the memory array pointed to by \c src into a batch and returns it. * \c src is required to be aligned. * @param src the pointer to the memory array to load. * @return the batch wrapping the highest available instruction set. */ template <class T1, class T2 = T1> simd_return_type<T1, T2> load_simd(const T1* src, aligned_mode); /** * @ingroup generic_load_store * Loads the memory array pointed to by \c src into the batch \c dst. * \c src is required to be aligned. * @param src the pointer to the memory array to load. * @param dst the destination batch. */ template <class T1, class T2 = T1> void load_simd(const T1* src, simd_type<T2>& dst, aligned_mode); /** * @ingroup generic_load_store * Loads the memory arrays pointed to by \c real_src and \c imag_src * into a batch of complex numbers and returns it. \c real_src and * \c imag_src are required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @return the batch of complex wrapping the highest available instruction set. */ template <class T1, class T2> simd_return_type<T1, T2> load_simd(const T1* real_src, const T1* imag_src, aligned_mode); /** * @ingroup generic_load_store * Loads the memory arrays pointed to by \c real_src and \c imag_src * into the batch \c dst. \c real_src and \c imag_src are required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @param dst the destination batch. */ template <class T1, class T2> void load_simd(const T1* real_src, const T1* imag_src, simd_type<T2>& dst, aligned_mode); /** * @ingroup generic_load_store * Loads the memory array pointed to by \c src into a batch and returns it. * \c src is not required to be aligned. * @param src the pointer to the memory array to load. * @return the batch wrapping the highest available instruction set. */ template <class T1, class T2 = T1> simd_return_type<T1, T2> load_simd(const T1* src, unaligned_mode); /** * @ingroup generic_load_store * Loads the memory array pointed to by \c src into the batch \c dst. * \c src is not required to be aligned. * @param src the pointer to the memory array to load. * @param dst the destination batch. */ template <class T1, class T2 = T1> void load_simd(const T1* src, simd_type<T2>& dst, unaligned_mode); /** * @ingroup generic_load_store * Loads the memory arrays pointed to by \c real_src and \c imag_src * into a batch of complex numbers and returns it. \c real_src and * \c imag_src are not required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @return the batch of complex wrapping the highest available instruction set. */ template <class T1, class T2> simd_return_type<T1, T2> load_simd(const T1* real_src, const T1* imag_src, unaligned_mode); /** * @ingroup generic_load_store * Loads the memory arrays pointed to by \c real_src and \c imag_src * into the batch \c dst. \c real_src and \c imag_src are not required to be aligned. * @param real_src the pointer to the memory array containing the real part. * @param imag_src the pointer to the memory array containing the imaginary part. * @param dst the destination batch. */ template <class T1, class T2> void load_simd(const T1* real_src, const T1* imag_src, simd_type<T2>& dst, unaligned_mode); /** * @ingroup generic_load_store * Stores the batch \c src into the memory array pointed to by \c dst. * \c dst is required to be aligned. * @param dst the pointer to the memory array. * @param src the batch to store. */ template <class T1, class T2 = T1> void store_simd(T1* dst, const simd_type<T2>& src, aligned_mode); /** * @ingroup generic_load_store * Stores the boolean batch \c src into the memory array pointed to by \c dst. * \c dst is required to be aligned. * @param dst the pointer to the memory array. * @param src the boolean batch to store. */ template <class T1, class T2 = T1> void store_simd(T1* dst, const simd_bool_type<T2>& src, aligned_mode); /** * @ingroup generic_load_store * Stores the batch \c src into the memory array pointed to by \c dst. * \c dst is not required to be aligned. * @param dst the pointer to the memory array. * @param src the batch to store. */ template <class T1, class T2 = T1> void store_simd(T1* dst, const simd_type<T2>& src, unaligned_mode); /** * @ingroup generic_load_store * Stores the boolean batch \c src into the memory array pointed to by \c dst. * \c dst is not required to be aligned. * @param dst the pointer to the memory array. * @param src the boolean batch to store. */ template <class T1, class T2 = T1> void store_simd(T1* dst, const simd_bool_type<T2>& src, unaligned_mode); /** * @ingroup generic_load_store * Stores the batch of complex numbers \c src into the memory arrays pointed * to by \c real_dst and \c imag_dst. \c real_dst and \c imag_dst are required * to be aligned. * @param real_dst the pointer to the memory array of the real part. * @param imag_dst the pointer to the memory array of the imaginary part. * @param src the batch to store. */ template <class T1, class T2> void store_simd(T1* real_dst, T1* imag_dst, const simd_type<T2>& src, aligned_mode); /** * @ingroup generic_load_store * Stores the batch of complex numbers \c src into the memory arrays pointed * to by \c real_dst and \c imag_dst. \c real_dst and \c imag_dst are not required * to be aligned. * @param real_dst the pointer to the memory array of the real part. * @param imag_dst the pointer to the memory array of the imaginary part. * @param src the batch to store. */ template <class T1, class T2> void store_simd(T1* real_dst, T1* imag_dst, const simd_type<T2>& src, unaligned_mode); // Prefetch template <class T> void prefetch(const T* address); /*************************** * detail implementation ***************************/ namespace detail { // Common implementation of SIMD functions for types supported // by vectorization. template <class T, class V> struct simd_function_invoker { inline static V set_simd(const T& value) { using batch_value_type = typename V::value_type; using value_type = typename std::conditional<std::is_same<T, bool>::value, bool, batch_value_type>::type; return V(value_type(value)); } inline static V load_aligned(const T* src) { V res; return res.load_aligned(src); } inline static void load_aligned(const T* src, V& dst) { dst.load_aligned(src); } inline static V load_unaligned(const T* src) { V res; return res.load_unaligned(src); } inline static void load_unaligned(const T* src, V& dst) { dst.load_unaligned(src); } inline static void store_aligned(T* dst, const V& src) { src.store_aligned(dst); } inline static void store_unaligned(T* dst, const V& src) { src.store_unaligned(dst); } }; template <class T, class V> struct simd_complex_invoker { inline static V load_aligned(const T* real_src, const T* imag_src) { V res; return res.load_aligned(real_src, imag_src); } inline static void load_aligned(const T* real_src, const T* imag_src, V& dst) { dst.load_aligned(real_src, imag_src); } inline static V load_unaligned(const T* real_src, const T* imag_src) { V res; return res.load_unaligned(real_src, imag_src); } inline static void load_unaligned(const T* real_src, const T* imag_src, V& dst) { dst.load_unaligned(real_src, imag_src); } inline static void store_aligned(T* real_dst, T* imag_dst, const V& src) { src.store_aligned(real_dst, imag_dst); } inline static void store_unaligned(T* real_dst, T* imag_dst, const V& src) { src.store_unaligned(real_dst, imag_dst); } }; // Default implementation of SIMD functions for types not supported // by vectorization. template <class T> struct simd_function_invoker<T, T> { inline static T set_simd(const T& value) { return value; } inline static T load_aligned(const T* src) { return *src; } inline static void load_aligned(const T* src, T& dst) { dst = *src; } inline static T load_unaligned(const T* src) { return *src; } inline static void load_unaligned(const T* src, T& dst) { dst = *src; } inline static void store_aligned(T* dst, const T& src) { *dst = src; } inline static void store_unaligned(T* dst, const T& src) { *dst = src; } }; } /*********************************************** * Data transfer instructions implementation ***********************************************/ template <class T1, class T2> inline simd_return_type<T1, T2> set_simd(const T1& value) { return detail::simd_function_invoker<T1, simd_return_type<T1, T2>>::set_simd(value); } template <class T1, class T2> inline simd_return_type<T1, T2> load_aligned(const T1* src) { return detail::simd_function_invoker<T1, simd_return_type<T1, T2>>::load_aligned(src); } template <class T1, class T2> inline void load_aligned(const T1* src, simd_type<T2>& dst) { detail::simd_function_invoker<T1, simd_type<T2>>::load_aligned(src, dst); } template <class T1, class T2> inline simd_return_type<T1, T2> load_aligned(const T1* real_src, const T1* imag_src) { return detail::simd_complex_invoker<T1, simd_return_type<T1, T2>>::load_aligned(real_src, imag_src); } template <class T1, class T2> inline void load_aligned(const T1* real_src, const T1* imag_src, simd_type<T2>& dst) { detail::simd_complex_invoker<T1, simd_type<T2>>::load_aligned(real_src, imag_src, dst); } template <class T1, class T2> inline simd_return_type<T1, T2> load_unaligned(const T1* src) { return detail::simd_function_invoker<T1, simd_return_type<T1, T2>>::load_unaligned(src); } template <class T1, class T2> inline void load_unaligned(const T1* src, simd_type<T2>& dst) { detail::simd_function_invoker<T1, simd_type<T2>>::load_unaligned(src, dst); } template <class T1, class T2> inline simd_return_type<T1, T2> load_unaligned(const T1* real_src, const T1* imag_src) { return detail::simd_complex_invoker<T1, simd_return_type<T1, T2>>::load_unaligned(real_src, imag_src); } template <class T1, class T2> inline void load_unaligned(const T1* real_src, const T1* imag_src, simd_type<T2>& dst) { detail::simd_complex_invoker<T1, simd_type<T2>>::load_unaligned(real_src, imag_src, dst); } template <class T1, class T2> inline void store_aligned(T1* dst, const simd_type<T2>& src) { detail::simd_function_invoker<T1, simd_type<T2>>::store_aligned(dst, src); } template <class T1, class T2> inline void store_aligned(T1* dst, const simd_bool_type<T2>& src) { detail::simd_function_invoker<T1, simd_bool_type<T2>>::store_aligned(dst, src); } template <class T1, class T2> inline void store_unaligned(T1* dst, const simd_type<T2>& src) { detail::simd_function_invoker<T1, simd_type<T2>>::store_unaligned(dst, src); } template <class T1, class T2> inline void store_unaligned(T1* dst, const simd_bool_type<T2>& src) { detail::simd_function_invoker<T1, simd_bool_type<T2>>::store_unaligned(dst, src); } template <class T1, class T2> inline void store_aligned(T1* real_dst, T1* imag_dst, const simd_type<T2>& src) { detail::simd_complex_invoker<T1, simd_type<T2>>::store_aligned(real_dst, imag_dst, src); } template <class T1, class T2> inline void store_unaligned(T1* real_dst, T1* imag_dst, const simd_type<T2>& src) { detail::simd_complex_invoker<T1, simd_type<T2>>::store_unaligned(real_dst, imag_dst, src); } /*************************************************** * Load / store generic functions implementation ***************************************************/ template <class T1, class T2> inline simd_return_type<T1, T2> load_simd(const T1* src, aligned_mode) { return load_aligned<T1, T2>(src); } template <class T1, class T2> inline void load_simd(const T1* src, simd_type<T2>& dst, aligned_mode) { load_aligned<T1, T2>(src, dst); } template <class T1, class T2> inline simd_return_type<T1, T2> load_simd(const T1* real_src, const T1* imag_src, aligned_mode) { return load_aligned<T1, T2>(real_src, imag_src); } template <class T1, class T2> inline void load_simd(const T1* real_src, const T1* imag_src, simd_type<T2>& dst, aligned_mode) { load_aligned<T1, T2>(real_src, imag_src, dst); } template <class T1, class T2> inline simd_return_type<T1, T2> load_simd(const T1* src, unaligned_mode) { return load_unaligned<T1, T2>(src); } template <class T1, class T2> inline void load_simd(const T1* src, simd_type<T2>& dst, unaligned_mode) { load_unaligned<T1, T2>(src, dst); } template <class T1, class T2> inline simd_return_type<T1, T2> load_simd(const T1* real_src, const T1* imag_src, unaligned_mode) { return load_unaligned<T1, T2>(real_src, imag_src); } template <class T1, class T2> inline void load_simd(const T1* real_src, const T1* imag_src, simd_type<T2>& dst, unaligned_mode) { load_unaligned<T1, T2>(real_src, imag_src, dst); } template <class T1, class T2> inline void store_simd(T1* dst, const simd_type<T2>& src, aligned_mode) { store_aligned<T1, T2>(dst, src); } template <class T1, class T2> inline void store_simd(T1* dst, const simd_bool_type<T2>& src, aligned_mode) { store_aligned<T1, T2>(dst, src); } template <class T1, class T2> inline void store_simd(T1* dst, const simd_type<T2>& src, unaligned_mode) { store_unaligned<T1, T2>(dst, src); } template <class T1, class T2> inline void store_simd(T1* dst, const simd_bool_type<T2>& src, unaligned_mode) { store_unaligned<T1, T2>(dst, src); } template <class T1, class T2> inline void store_simd(T1* real_dst, T1* imag_dst, const simd_type<T2>& src, aligned_mode) { store_aligned<T1, T2>(real_dst, imag_dst, src); } template <class T1, class T2> inline void store_simd(T1* real_dst, T1* imag_dst, const simd_type<T2>& src, unaligned_mode) { store_unaligned<T1, T2>(real_dst, imag_dst, src); } /***************************** * Prefetch implementation *****************************/ template <class T> inline void prefetch(const T* /*address*/) { } #if XSIMD_X86_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE template <> inline void prefetch<int32_t>(const int32_t* address) { _mm_prefetch(reinterpret_cast<const char*>(address), _MM_HINT_T0); } template <> inline void prefetch<int64_t>(const int64_t* address) { _mm_prefetch(reinterpret_cast<const char*>(address), _MM_HINT_T0); } template <> inline void prefetch<float>(const float* address) { _mm_prefetch(reinterpret_cast<const char*>(address), _MM_HINT_T0); } template <> inline void prefetch<double>(const double* address) { _mm_prefetch(reinterpret_cast<const char*>(address), _MM_HINT_T0); } #endif } #endif