%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/985914/root/lib/python3/dist-packages/pythran/pythonic/utils/
Upload File :
Create Path :
Current File : //proc/985914/root/lib/python3/dist-packages/pythran/pythonic/utils/broadcast_copy.hpp

#ifndef PYTHONIC_UTILS_BROADCAST_COPY_HPP
#define PYTHONIC_UTILS_BROADCAST_COPY_HPP

#include "pythonic/include/utils/broadcast_copy.hpp"

#include "pythonic/types/tuple.hpp"

#ifdef _OPENMP
#include <omp.h>

// as a macro so that an enlightened user can modify this variable :-)
#ifndef PYTHRAN_OPENMP_MIN_ITERATION_COUNT
#define PYTHRAN_OPENMP_MIN_ITERATION_COUNT 1000
#endif

#endif

PYTHONIC_NS_BEGIN

namespace utils
{

  /* helper for specialization of the broadcasting, vectorizing copy operator
   * due to expression templates, this may also triggers a lot of
   *computations!
   *
   * ``vector_form'' is set to true if the operation can be done using
   *Boost.SIMD
   *
   * the call operator has four template parameters:
   *
   * template <class E, class F, size_t N>
   * void operator()(E &&self, F const &other, utils::int_<N>, utils::int_<M>)
   *
   * ``E'' is the type of the object to which the data are copied
   *
   * ``F'' is the type of the object from which the data are copied
   *
   * ``N'' is the depth of the loop nest. When it reaches ``1'', we have a raw
   *loop
   *       that may be vectorizable
   *
   * ``D'' is the delta between the number of dimensions of E && F. When set
   *to a
   *       value greater than ``0'', some broadcasting is needed
   */

  template <typename vector_form, size_t N, size_t D>
  struct _broadcast_copy;

  struct fast_novectorize {
  };

  template <>
  struct _broadcast_copy<fast_novectorize, 0, 0> {
    template <class E, class F, class SelfIndices, class OtherIndices,
              size_t... Is>
    void helper(E &&self, F const &other, SelfIndices &&self_indices,
                OtherIndices &&other_indices, utils::index_sequence<Is...>)
    {
      std::forward<E>(self)
          .store((typename std::decay<E>::type::dtype)other.load(
                     (long)std::get<Is>(other_indices)...),
                 (long)std::get<Is>(self_indices)...);
    }

    template <class E, class F, class SelfIndices, class OtherIndices>
    void operator()(E &&self, F const &other, SelfIndices &&self_indices,
                    OtherIndices &&other_indices)
    {
      helper(std::forward<E>(self), other, self_indices, other_indices,
             utils::make_index_sequence<std::tuple_size<
                 typename std::decay<SelfIndices>::type>::value>());
    }
  };
  template <size_t N>
  struct _broadcast_copy<fast_novectorize, N, 0> {
    template <class E, class F, class SelfIndices, class OtherIndices>
    void operator()(E &&self, F const &other, SelfIndices &&self_indices,
                    OtherIndices &&other_indices)
    {
      long const other_size =
          other.template shape<std::decay<E>::type::value - N>();
      long const self_size =
          self.template shape<std::decay<E>::type::value - N>();
      if (self_size == other_size)
        for (long i = 0; i < self_size; ++i)
          _broadcast_copy<fast_novectorize, N - 1, 0>{}(
              std::forward<E>(self), other,
              std::tuple_cat(self_indices, std::make_tuple(i)),
              std::tuple_cat(other_indices, std::make_tuple(i)));
      else
        for (long i = 0; i < self_size; ++i)
          _broadcast_copy<fast_novectorize, N - 1, 0>{}(
              std::forward<E>(self), other,
              std::tuple_cat(self_indices, std::make_tuple(i)),
              std::tuple_cat(other_indices, std::make_tuple(0)));
    }
  };

  template <size_t N, size_t D>
  struct _broadcast_copy<fast_novectorize, N, D> {
    template <class E, class F, class SelfIndices, class OtherIndices>
    void operator()(E &&self, F const &other, SelfIndices &&self_indices,
                    OtherIndices &&other_indices)
    {
      using broadcaster = typename std::conditional<
          types::is_dtype<F>::value,
          types::broadcast<F, typename std::decay<E>::type::dtype>,
          types::broadcasted<F>>::type;
      _broadcast_copy<fast_novectorize, N, D - 1>{}(
          std::forward<E>(self), broadcaster(other),
          std::forward<SelfIndices>(self_indices),
          std::forward<OtherIndices>(other_indices));
    }
  };

  template <size_t N, class vectorizer>
  struct _broadcast_copy<vectorizer, N, 0> {
    template <class E, class F, class... Indices>
    void operator()(E &&self, F const &other, Indices... indices)
    {
      long self_size = std::distance(self.begin(), self.end()),
           other_size = std::distance(other.begin(), other.end());
#ifdef _OPENMP
      if (other_size >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT) {
        auto siter = self.begin();
        auto oiter = other.begin();
#pragma omp parallel for
        for (long i = 0; i < other_size; ++i)
          *(siter + i) = *(oiter + i);
      } else
#endif
        std::copy(other.begin(), other.end(), self.begin());

// eventually repeat the pattern
#ifdef _OPENMP
      if (self_size >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT * other_size)
#pragma omp parallel for
        for (long i = other_size; i < self_size; i += other_size)
          std::copy_n(self.begin(), other_size, self.begin() + i);
      else
#endif
        for (long i = other_size; i < self_size; i += other_size)
          std::copy_n(self.begin(), other_size, self.begin() + i);
    }
  };

  // ``D'' is not ``0'' so we should broadcast
  template <class vectorizer, size_t N, size_t D>
  struct _broadcast_copy {
    template <class E, class F>
    void operator()(E &&self, F const &other)
    {
      if (types::is_dtype<F>::value) {
        std::fill(self.begin(), self.end(), other);
      } else {
        auto sfirst = self.begin();
        *sfirst = other;
#ifdef _OPENMP
        auto siter = sfirst;
        long n = self.template shape<0>();
        if (n >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT)
#pragma omp parallel for
          for (long i = 1; i < n; ++i)
            *(siter + i) = *sfirst;
        else
#endif
          std::fill(self.begin() + 1, self.end(), *sfirst);
      }
    }
    template <class E, class F, class ES, class FS>
    void operator()(E &&self, F const &other, ES, FS)
    {
      if (types::is_dtype<F>::value) {
        std::fill(self.begin(), self.end(), other);
      } else {
        auto sfirst = self.begin();
        *sfirst = other;
#ifdef _OPENMP
        auto siter = sfirst;
        long n = self.template shape<0>();
        if (n >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT)
#pragma omp parallel for
          for (long i = 1; i < n; ++i)
            *(siter + i) = *sfirst;
        else
#endif
          std::fill(self.begin() + 1, self.end(), *sfirst);
      }
    }
  };

#ifdef USE_XSIMD
  // specialize for SIMD only if available
  // otherwise use the std::copy fallback
  template <class vectorizer, class E, class F>
  void vbroadcast_copy(E &&self, F const &other)
  {
    using T = typename F::dtype;
    using vT = xsimd::simd_type<T>;

    static const std::size_t vN = vT::size;

    long self_size = std::distance(self.begin(), self.end()),
         other_size = std::distance(other.begin(), other.end());
    auto oiter = vectorizer::vbegin(other);
    const long bound =
        std::distance(vectorizer::vbegin(other), vectorizer::vend(other));

#ifdef _OPENMP
    if (bound >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT) {
      auto iter = vectorizer::vbegin(self);
#pragma omp parallel for
      for (long i = 0; i < bound; ++i) {
        (iter + i).store(*(oiter + i));
      }
    } else
#endif
      for (auto iter = vectorizer::vbegin(self), end = vectorizer::vend(self);
           iter != end; ++iter, ++oiter) {
        iter.store(*oiter);
      }
    // tail
    {
      auto siter = self.begin();
      auto oiter = other.begin();
      for (long i = bound * vN; i < other_size; ++i)
        *(siter + i) = *(oiter + i);
    }

#ifdef _OPENMP
    if (self_size >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT * other_size)
#pragma omp parallel for
      for (long i = other_size; i < self_size; i += other_size)
        std::copy_n(self.begin(), other_size, self.begin() + i);
    else
#endif
      for (long i = other_size; i < self_size; i += other_size)
        std::copy_n(self.begin(), other_size, self.begin() + i);
  }

  template <>
  struct _broadcast_copy<types::vectorizer, 1, 0> {
    template <class E, class F>
    void operator()(E &&self, F const &other)
    {
      return vbroadcast_copy<types::vectorizer>(std::forward<E>(self), other);
    }
  };
  template <>
  struct _broadcast_copy<types::vectorizer_nobroadcast, 1, 0> {
    template <class E, class F>
    void operator()(E &&self, F const &other)
    {
      return vbroadcast_copy<types::vectorizer_nobroadcast>(
          std::forward<E>(self), other);
    }
  };

#endif
  template <class E, class F, size_t N, size_t D, bool vector_form>
  struct broadcast_copy_dispatcher;

  template <class E, class F, size_t N, size_t D>
  struct broadcast_copy_dispatcher<E, F, N, D, false> {
    void operator()(E &self, F const &other)
    {
      if (utils::no_broadcast_ex(other))
        _broadcast_copy<fast_novectorize, N, D>{}(
            self, other, std::make_tuple(), std::make_tuple());
      else
        _broadcast_copy<types::novectorize, N, D>{}(self, other);
    }
  };
  template <class E, class F, size_t N, size_t D>
  struct broadcast_copy_dispatcher<E, F, N, D, true> {
    void operator()(E &self, F const &other)
    {
      if (utils::no_broadcast_ex(other))
        _broadcast_copy<fast_novectorize, N, D>{}(
            self, other, std::make_tuple(), std::make_tuple());
      else
        _broadcast_copy<types::vectorizer, N, D>{}(self, other);
    }
  };

  template <class E, class F, size_t N, size_t D, bool vector_form>
  E &broadcast_copy(E &self, F const &other)
  {
    if (self.size())
      broadcast_copy_dispatcher<E, F, N, D, vector_form>{}(self, other);
    return self;
  }

  /* update
   */
  // ``D'' is not ``0'' so we should broadcast
  template <class Op, typename vector_form, size_t N, size_t D>
  struct _broadcast_update {

    template <class E, class F>
    void operator()(E &&self, F const &other)
    {
      long n = self.template shape<0>();
      auto siter = self.begin();
#ifdef _OPENMP
      if (n >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT)
#pragma omp parallel for
        for (long i = 0; i < n; ++i)
          Op{}(*(siter + i), other);
      else
#endif
        for (long i = 0; i < n; ++i)
          Op{}(*(siter + i), other);
    }
  };

  template <class Op, size_t N, class vector_form>
  struct _broadcast_update<Op, vector_form, N, 0> {
    template <class E, class F>
    void operator()(E &&self, F const &other)
    {
      long other_size = std::distance(other.begin(), other.end());
      auto siter = self.begin();
      auto oiter = other.begin();
#ifdef _OPENMP
      if (other_size >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT)
#pragma omp parallel for
        for (long i = 0; i < other_size; ++i)
          Op{}(*(siter + i), *(oiter + i));
      else
#endif
          if (other_size == 1) {
        auto value = *oiter;
        for (auto send = self.end(); siter != send; ++siter)
          Op{}(*siter, value);
      } else
        for (auto send = self.end(); siter != send;) {
          auto ooiter = oiter;
          for (long i = 0; i < other_size; ++i, ++siter, ++ooiter)
            Op{}(*siter, *ooiter);
        }
    }

    template <class E, class F0, class F1>
    void operator()(E &&self, types::broadcast<F0, F1> const &other)
    {
      auto value = *other.begin();
      for (auto siter = self.begin(), send = self.end(); siter != send; ++siter)
        Op{}(*siter, value);
    }

    template <class E, class F>
    void operator()(E &&self, types::broadcasted<F> const &other)
    {
      auto value = *other.end();
      for (auto siter = self.begin(), send = self.end(); siter != send; ++siter)
        Op{}(*siter, value);
    }
  };

  template <class Op>
  struct _broadcast_update<Op, fast_novectorize, 0, 0> {
    template <class E, class F, class SelfIndices, class OtherIndices,
              size_t... Is>
    void helper(E &&self, F const &other, SelfIndices &&self_indices,
                OtherIndices &&other_indices, utils::index_sequence<Is...>)
    {
      self.template update<Op>(other.load((long)std::get<Is>(other_indices)...),
                               (long)std::get<Is>(self_indices)...);
    }

    template <class E, class F, class SelfIndices, class OtherIndices>
    void operator()(E &&self, F const &other, SelfIndices &&self_indices,
                    OtherIndices &&other_indices)
    {
      helper(std::forward<E>(self), other, self_indices, other_indices,
             utils::make_index_sequence<std::tuple_size<
                 typename std::decay<SelfIndices>::type>::value>());
    }
  };
  template <class Op, size_t N>
  struct _broadcast_update<Op, fast_novectorize, N, 0> {
    template <class E, class F, class SelfIndices, class OtherIndices>
    void operator()(E &&self, F const &other, SelfIndices &&self_indices,
                    OtherIndices &&other_indices)
    {
      auto const other_size =
          other.template shape<std::decay<E>::type::value - N>();
      auto const self_size =
          self.template shape<std::decay<E>::type::value - N>();
      if (self_size == other_size)
        for (long i = 0; i < self_size; ++i)
          _broadcast_update<Op, fast_novectorize, N - 1, 0>{}(
              std::forward<E>(self), other,
              std::tuple_cat(self_indices, std::make_tuple(i)),
              std::tuple_cat(other_indices, std::make_tuple(i)));
      else
        for (long i = 0; i < self_size; ++i)
          _broadcast_update<Op, fast_novectorize, N - 1, 0>{}(
              std::forward<E>(self), other,
              std::tuple_cat(self_indices, std::make_tuple(i)),
              std::tuple_cat(other_indices, std::make_tuple(0)));
    }
  };
  template <class Op, size_t N, size_t D>
  struct _broadcast_update<Op, fast_novectorize, N, D> {
    template <class E, class F, class SelfIndices, class OtherIndices>
    void operator()(E &&self, F const &other, SelfIndices &&self_indices,
                    OtherIndices &&other_indices)
    {
      using broadcaster = typename std::conditional<
          types::is_dtype<F>::value,
          types::broadcast<F, typename std::decay<E>::type::dtype>,
          types::broadcasted<F>>::type;
      _broadcast_update<Op, fast_novectorize, N, D - 1>{}(
          std::forward<E>(self), broadcaster(other),
          std::forward<SelfIndices>(self_indices),
          std::forward<OtherIndices>(other_indices));
    }
  };

#ifdef USE_XSIMD
  // specialize for SIMD only if available
  // otherwise use the std::copy fallback
  template <class Op, class vectorizer, class E, class F>
  void vbroadcast_update(E &&self, F const &other)
  {
    using T = typename F::dtype;
    using vT = typename xsimd::simd_type<T>;
    long other_size = std::distance(other.begin(), other.end());

    static const std::size_t vN = vT::size;
    auto oiter = vectorizer::vbegin(other);
    auto iter = vectorizer::vbegin(self);
    const long bound =
        std::distance(vectorizer::vbegin(other), vectorizer::vend(other));

#ifdef _OPENMP
    if (bound >= PYTHRAN_OPENMP_MIN_ITERATION_COUNT)
#pragma omp parallel for
      for (long i = 0; i < bound; i++) {
        (iter + i).store(Op{}(*(iter + i), *(oiter + i)));
      }
    else
#endif
      for (auto end = vectorizer::vend(self); iter != end; ++iter, ++oiter) {
        iter.store(Op{}(*iter, *oiter));
      }
    // tail
    {
      auto siter = self.begin();
      auto oiter = other.begin();
      for (long i = bound * vN; i < other_size; ++i)
        Op{}(*(siter + i), *(oiter + i));
    }
  }

  template <class Op, class vectorizer, class E, class F0, class F1>
  void vbroadcast_update(E &&self, types::broadcast<F0, F1> const &other)
  {
    auto value = *other.begin();
    for (auto siter = self.begin(), send = self.end(); siter != send; ++siter)
      Op{}(*siter, value);
  }

  template <class Op, class vectorizer, class E, class F>
  void vbroadcast_update(E &&self, types::broadcasted<F> const &other)
  {
    auto value = *other.end();
    for (auto siter = self.begin(), send = self.end(); siter != send; ++siter)
      Op{}(*siter, value);
  }

  template <class Op>
  struct _broadcast_update<Op, types::vectorizer, 1, 0> {
    template <class... Args>
    void operator()(Args &&... args)
    {
      vbroadcast_update<Op, types::vectorizer>(std::forward<Args>(args)...);
    }
  };
  template <class Op>
  struct _broadcast_update<Op, types::vectorizer_nobroadcast, 1, 0> {
    template <class... Args>
    void operator()(Args &&... args)
    {
      vbroadcast_update<Op, types::vectorizer_nobroadcast>(
          std::forward<Args>(args)...);
    }
  };

#endif
  template <class Op, bool vector_form, class E, class F, size_t N, size_t D>
  struct broadcast_update_dispatcher;

  template <class Op, class E, class F, size_t N, size_t D>
  struct broadcast_update_dispatcher<Op, false, E, F, N, D> {
    void operator()(E &self, F const &other)
    {
      if (utils::no_broadcast_ex(other))
        _broadcast_update<Op, fast_novectorize, N, D>{}(
            self, other, std::make_tuple(), std::make_tuple());
      else
        _broadcast_update<Op, types::novectorize, N, D>{}(self, other);
    }
  };
  template <class Op, class E, class F, size_t N, size_t D>
  struct broadcast_update_dispatcher<Op, true, E, F, N, D> {
    void operator()(E &self, F const &other)
    {
      if (utils::no_broadcast_ex(other))
        _broadcast_update<Op, fast_novectorize, N, D>{}(
            self, other, std::make_tuple(), std::make_tuple());
      else
        _broadcast_update<Op, types::vectorizer, N, D>{}(self, other);
    }
  };

  template <class Op, class E, class F, size_t N, size_t D, bool vector_form>
  E &broadcast_update(E &self, F const &other)
  {
    if (self.size())
      broadcast_update_dispatcher<Op, vector_form, E, F, N, D>{}(self, other);
    return self;
  }
}
PYTHONIC_NS_END

#endif

Zerion Mini Shell 1.0