%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/309157/task/309157/root/usr/include/xsimd/stl/
Upload File :
Create Path :
Current File : //proc/309157/task/309157/root/usr/include/xsimd/stl/algorithms.hpp

/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
* Martin Renou                                                             *
* Copyright (c) QuantStack                                                 *
*                                                                          *
* Distributed under the terms of the BSD 3-Clause License.                 *
*                                                                          *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_ALGORITHMS_HPP
#define XSIMD_ALGORITHMS_HPP

#include "../memory/xsimd_load_store.hpp"

namespace xsimd
{
    template <class I1, class I2, class O1, class UF>
    void transform(I1 first, I2 last, O1 out_first, UF&& f)
    {
        using value_type = typename std::decay<decltype(*first)>::type;
        using traits = simd_traits<value_type>;
        using batch_type = typename traits::type;

        std::size_t size = static_cast<std::size_t>(std::distance(first, last));
        std::size_t simd_size = traits::size;

        const auto* ptr_begin = &(*first);
        auto* ptr_out = &(*out_first);

        std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
        std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size);
        std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));

        if (align_begin == out_align)
        {
            for (std::size_t i = 0; i < align_begin; ++i)
            {
                out_first[i] = f(first[i]);
            }

            batch_type batch;
            for (std::size_t i = align_begin; i < align_end; i += simd_size)
            {
                xsimd::load_aligned(&first[i], batch);
                xsimd::store_aligned(&out_first[i], f(batch));
            }

            for (std::size_t i = align_end; i < size; ++i)
            {
                out_first[i] = f(first[i]);
            }
        }
        else
        {
            for (std::size_t i = 0; i < align_begin; ++i)
            {
                out_first[i] = f(first[i]);
            }

            batch_type batch;
            for (std::size_t i = align_begin; i < align_end; i += simd_size)
            {
                xsimd::load_aligned(&first[i], batch);
                xsimd::store_unaligned(&out_first[i], f(batch));
            }

            for (std::size_t i = align_end; i < size; ++i)
            {
                out_first[i] = f(first[i]);
            }
        }
    }

    template <class I1, class I2, class I3, class O1, class UF>
    void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
    {
        using value_type = typename std::decay<decltype(*first_1)>::type;
        using traits = simd_traits<value_type>;
        using batch_type = typename traits::type;

        std::size_t size = static_cast<std::size_t>(std::distance(first_1, last_1));
        std::size_t simd_size = traits::size;

        const auto* ptr_begin_1 = &(*first_1);
        const auto* ptr_begin_2 = &(*first_2);
        auto* ptr_out = &(*out_first);

        std::size_t align_begin_1 = xsimd::get_alignment_offset(ptr_begin_1, size, simd_size);
        std::size_t align_begin_2 = xsimd::get_alignment_offset(ptr_begin_2, size, simd_size);
        std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size);
        std::size_t align_end = align_begin_1 + ((size - align_begin_1) & ~(simd_size - 1));

        #define XSIMD_LOOP_MACRO(A1, A2, A3)                                    \
            for (std::size_t i = 0; i < align_begin_1; ++i)                     \
            {                                                                   \
                out_first[i] = f(first_1[i], first_2[i]);                       \
            }                                                                   \
                                                                                \
            batch_type batch_1, batch_2;                                        \
            for (std::size_t i = align_begin_1; i < align_end; i += simd_size)  \
            {                                                                   \
                xsimd::A1(&first_1[i], batch_1);                                \
                xsimd::A2(&first_2[i], batch_2);                                \
                xsimd::A3(&out_first[i], f(batch_1, batch_2));                  \
            }                                                                   \
                                                                                \
            for (std::size_t i = align_end; i < size; ++i)                      \
            {                                                                   \
                out_first[i] = f(first_1[i], first_2[i]);                       \
            }                                                                   \

        if (align_begin_1 == out_align && align_begin_1 == align_begin_2)
        {
            XSIMD_LOOP_MACRO(load_aligned, load_aligned, store_aligned);
        }
        else if (align_begin_1 == out_align && align_begin_1 != align_begin_2)
        {
            XSIMD_LOOP_MACRO(load_aligned, load_unaligned, store_aligned);
        }
        else if (align_begin_1 != out_align && align_begin_1 == align_begin_2)
        {
            XSIMD_LOOP_MACRO(load_aligned, load_aligned, store_unaligned);
        }
        else if (align_begin_1 != out_align && align_begin_1 != align_begin_2)
        {
            XSIMD_LOOP_MACRO(load_aligned, load_unaligned, store_unaligned);
        }

        #undef XSIMD_LOOP_MACRO
    }


    // TODO: Remove this once we drop C++11 support
    namespace detail
    {
        struct plus
        {
            template <class X, class Y>
            auto operator()(X&& x, Y&& y) -> decltype(x + y) { return x + y; }
        };
    }


    template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
    Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
    {
        using value_type = typename std::decay<decltype(*first)>::type;
        using traits = simd_traits<value_type>;
        using batch_type = typename traits::type;

        std::size_t size = static_cast<std::size_t>(std::distance(first, last));
        constexpr std::size_t simd_size = traits::size;

        if(size < simd_size)
        {
            while(first != last)
            {
                init = binfun(init, *first++);
            }
            return init;
        }

        const auto* const ptr_begin = &(*first);

        std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
        std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));

        // reduce initial unaligned part
        for (std::size_t i = 0; i < align_begin; ++i)
        {
            init = binfun(init, first[i]);
        }

        // reduce aligned part
        batch_type batch_init, batch;
        auto ptr = ptr_begin + align_begin;
        xsimd::load_aligned(ptr, batch_init);
        ptr += simd_size;
        for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
        {
            xsimd::load_aligned(ptr, batch);
            batch_init = binfun(batch_init, batch);
        }

        // reduce across batch
        alignas(batch_type) std::array<value_type, simd_size> arr;
        xsimd::store_aligned(arr.data(), batch_init);
        for (auto x : arr) init = binfun(init, x);

        // reduce final unaligned part
        for (std::size_t i = align_end; i < size; ++i)
        {
            init = binfun(init, first[i]);
        }

        return init;
    }

}

#endif

Zerion Mini Shell 1.0