%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /usr/include/xsimd/math/
Upload File :
Create Path :
Current File : //usr/include/xsimd/math/xsimd_error.hpp

/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
* Martin Renou                                                             *
* Copyright (c) QuantStack                                                 *
*                                                                          *
* Distributed under the terms of the BSD 3-Clause License.                 *
*                                                                          *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_ERROR_HPP
#define XSIMD_ERROR_HPP

#include "xsimd_basic_math.hpp"
#include "xsimd_exponential.hpp"
#include "xsimd_fp_sign.hpp"
#include "xsimd_horner.hpp"

namespace xsimd
{
    /**
     * Computes the error function of the batch \c x.
     * @param x batch of floating point values.
     * @return the error function of \c x.
     */
    template <class B>
    batch_type_t<B> erf(const simd_base<B>& x);

    /**
     * Computes the complementary error function of the batch \c x.
     * @param x batch of floating point values.
     * @return the error function of \c x.
     */
    template <class B>
    batch_type_t<B> erfc(const simd_base<B>& x);

    /**********************
     * erf implementation *
     **********************/

    namespace detail
    {
        /* origin: boost/simd/arch/common/detail/generic/erf_kernel.hpp */
        /*
         * ====================================================
         * copyright 2016 NumScale SAS
         *
         * Distributed under the Boost Software License, Version 1.0.
         * (See copy at http://boost.org/LICENSE_1_0.txt)
         * ====================================================
         */
        template <class B, class T = typename B::value_type>
        struct erf_kernel;

        template <class B>
        struct erf_kernel<B, float>
        {
            // computes erf(a0)/a0
            // x is sqr(a0) and 0 <= abs(a0) <= 2/3
            static inline B erf1(const B& x)
            {
                return horner<B,
                              0x3f906eba,  //   1.128379154774254e+00
                              0xbec0937e,  //  -3.761252839094832e-01
                              0x3de70f22,  //   1.128218315189123e-01
                              0xbcdb61f4,  //  -2.678010670585737e-02
                              0x3ba4468d,  //   5.013293006147870e-03
                              0xba1fc83b  //  -6.095205117313012e-04
                              >(x);
            }

            // computes erfc(x)*exp(sqr(x))
            // x >=  2/3
            static inline B erfc2(const B& x)
            {
                return horner<B,
                              0x3f0a0e8b,  //   5.392844046572836e-01
                              0xbf918a62,  //  -1.137035586823118e+00
                              0x3e243828,  //   1.603704761054187e-01
                              0x3ec4ca6e,  //   3.843569094305250e-01
                              0x3e1175c7,  //   1.420508523645926e-01
                              0x3e2006f0,  //   1.562764709849380e-01
                              0xbfaea865,  //  -1.364514006347145e+00
                              0x4050b063,  //   3.260765682222576e+00
                              0xc0cd1a85,  //  -6.409487379234005e+00
                              0x40d67e3b,  //   6.702908785399893e+00
                              0xc0283611  //  -2.628299919293280e+00
                              >(x);
            }

            static inline B erfc3(const B& x)
            {
                return (B(1.) - x) * horner<B,
                                            0x3f7ffffe,  //   9.9999988e-01
                                            0xbe036d7e,  //  -1.2834737e-01
                                            0xbfa11698,  //  -1.2585020e+00
                                            0xbffc9284,  //  -1.9732213e+00
                                            0xc016c985,  //  -2.3560498e+00
                                            0x3f2cff3b,  //   6.7576951e-01
                                            0xc010d956,  //  -2.2632651e+00
                                            0x401b5680,  //   2.4271545e+00
                                            0x41aa8e55  //   2.1319498e+01
                                            >(x);
            }
        };

        template <class B>
        struct erf_kernel<B, double>
        {
            // computes erf(a0)/a0
            // x is sqr(a0) and 0 <= abs(a0) <= 0.65
            static inline B erf1(const B& x)
            {
                return horner<B,
                              0x3ff20dd750429b61ull,  // 1.12837916709551
                              0x3fc16500f106c0a5ull,  // 0.135894887627278
                              0x3fa4a59a4f02579cull,  // 4.03259488531795E-02
                              0x3f53b7664358865aull,  // 1.20339380863079E-03
                              0x3f110512d5b20332ull  // 6.49254556481904E-05
                              >(x) /
                    horner<B,
                           0x3ff0000000000000ull,  // 1
                           0x3fdd0a84eb1ca867ull,  // 0.453767041780003
                           0x3fb64536ca92ea2full,  // 8.69936222615386E-02
                           0x3f8166f75999dbd1ull,  // 8.49717371168693E-03
                           0x3f37ea4332348252ull  // 3.64915280629351E-04
                           >(x);
            }

            // computes erfc(x)*exp(x*x)
            // 0.65 <= abs(x) <= 2.2
            static inline B erfc2(const B& x)
            {
                return horner<B,
                              0x3feffffffbbb552bull,  // 0.999999992049799
                              0x3ff54dfe9b258a60ull,  // 1.33154163936765
                              0x3fec1986509e687bull,  // 0.878115804155882
                              0x3fd53dd7a67c7e9full,  // 0.331899559578213
                              0x3fb2488a6b5cb5e5ull,  // 7.14193832506776E-02
                              0x3f7cf4cfe0aacbb4ull,  // 7.06940843763253E-03
                              0x0ull  // 0
                              >(x) /
                    horner<B,
                           0x3ff0000000000000ull,  // 1
                           0x4003adeae79b9708ull,  // 2.45992070144246
                           0x40053b1052dca8bdull,  // 2.65383972869776
                           0x3ff9e677c2777c3cull,  // 1.61876655543871
                           0x3fe307622fcff772ull,  // 0.594651311286482
                           0x3fc033c113a7deeeull,  // 0.126579413030178
                           0x3f89a996639b0d00ull  // 1.25304936549413E-02
                           >(x);
            }

            // computes erfc(x)*exp(x*x)
            // 2.2 <= abs(x) <= 6
            static inline B erfc3(const B& x)
            {
                return horner<B,
                              0x3fefff5a9e697ae2ull,  //0.99992114009714
                              0x3ff9fa202deb88e5ull,  //1.62356584489367
                              0x3ff44744306832aeull,  //1.26739901455873
                              0x3fe29be1cff90d94ull,  //0.581528574177741
                              0x3fc42210f88b9d43ull,  //0.157289620742839
                              0x3f971d0907ea7a92ull,  //2.25716982919218E-02
                              0x0ll  //0
                              >(x) /
                    horner<B,
                           0x3ff0000000000000ull,  //1
                           0x400602f24bf3fdb6ull,  //2.75143870676376
                           0x400afd487397568full,  //3.37367334657285
                           0x400315ffdfd5ce91ull,  //2.38574194785344
                           0x3ff0cfd4cb6cde9full,  //1.05074004614827
                           0x3fd1d7ab774bb837ull,  //0.278788439273629
                           0x3fa47bd61bbb3843ull  //4.00072964526861E-02
                           >(x);
            }

            // computes erfc(rx)*exp(rx*rx)
            // x >=  6 rx = 1/x
            static inline B erfc4(const B& x)
            {
                return horner<B,
                              0xbc7e4ad1ec7d0000ll,  // -2.627435221016534e-17
                              0x3fe20dd750429a16ll,  // 5.641895835477182e-01
                              0x3db60000e984b501ll,  // 2.000889609806154e-11
                              0xbfd20dd753ae5dfdll,  // -2.820947949598745e-01
                              0x3e907e71e046a820ll,  // 2.457786367990903e-07
                              0x3fdb1494cac06d39ll,  // 4.231311779019112e-01
                              0x3f34a451701654f1ll,  // 3.149699042180451e-04
                              0xbff105e6b8ef1a63ll,  // -1.063940737150596e+00
                              0x3fb505a857e9ccc8ll,  // 8.211757799454056e-02
                              0x40074fbabc514212ll,  // 2.913930388669777e+00
                              0x4015ac7631f7ac4fll,  // 5.418419628850713e+00
                              0xc0457e03041e9d8bll,  // -4.298446704382794e+01
                              0x4055803d26c4ec4fll,  // 8.600373238783617e+01
                              0xc0505fce04ec4ec5ll  // -6.549694941594051e+01
                              >(x);
            }
        };

        /* origin: boost/simd/arch/common/simd/function/erf.hpp */
        /*
         * ====================================================
         * copyright 2016 NumScale SAS
         *
         * Distributed under the Boost Software License, Version 1.0.
         * (See copy at http://boost.org/LICENSE_1_0.txt)
         * ====================================================
         */

        template <class B, class T = typename B::value_type>
        struct erf_impl;

        template <class B>
        struct erf_impl<B, float>
        {
            static inline B compute(const B& a)
            {
                B x = abs(a);
                B r1 = B(0.);
                auto test1 = x < B(2.f / 3.f);
                if (any(test1))
                {
                    r1 = a * erf_kernel<B>::erf1(x * x);
                    if (all(test1))
                        return r1;
                }
                B z = x / (B(1.) + x);
                z -= B(0.4f);
                B r2 = B(1.) - exp(-x * x) * erf_kernel<B>::erfc2(z);
                r2 = select(a < B(0.), -r2, r2);
                r1 = select(test1, r1, r2);
#ifndef XSIMD_NO_INFINITIES
                r1 = select(xsimd::isinf(a), sign(a), r1);
#endif
                return r1;
            }
        };

        template <class B>
        struct erf_impl<B, double>
        {
            static inline B compute(const B& a)
            {
                B x = abs(a);
                B xx = x * x;
                B lim1 = B(0.65);
                B lim2 = B(2.2);
                auto test1 = x < lim1;
                B r1 = B(0.);
                if (any(test1))
                {
                    r1 = a * erf_kernel<B>::erf1(xx);
                    if (all(test1))
                        return r1;
                }
                auto test2 = x < lim2;
                auto test3 = test2 && !test1;
                B ex = exp(-xx);
                if (any(test3))
                {
                    B z = B(1.) - ex * erf_kernel<B>::erfc2(x);
                    B r2 = select(a < B(0.), -z, z);
                    r1 = select(test1, r1, r2);
                    if (all(test1 || test3))
                        return r1;
                }
                B z = B(1.) - ex * erf_kernel<B>::erfc3(x);
                z = select(a < B(0.), -z, z);
#ifndef XSIMD_NO_INFINITIES
                z = select(xsimd::isinf(a), sign(a), z);
#endif
                return select(test2, r1, z);
            }
        };
    }

    template <class B>
    inline batch_type_t<B> erf(const simd_base<B>& x)
    {
        return detail::erf_impl<batch_type_t<B>>::compute(x());
    }

    /***********************
     * erfc implementation *
     ***********************/

    namespace detail
    {
        template <class B, class T = typename B::value_type>
        struct erfc_impl;

        template <class B>
        struct erfc_impl<B, float>
        {
            static inline B compute(const B& a)
            {
                B x = abs(a);
                auto test0 = a < B(0.);
                B r1 = B(0.);
                auto test1 = x < B(2.f / 3.f);
                B z = x / (B(1.) + x);
                if (any(test1))
                {
                    r1 = erf_kernel<B>::erfc3(z);
                    if (all(test1))
                        return select(test0, B(2.) - r1, r1);
                }
                z -= B(0.4f);
                B r2 = exp(-x * x) * erf_kernel<B>::erfc2(z);
                r1 = select(test1, r1, r2);
#ifndef XSIMD_NO_INFINITIES
                r1 = select(x == infinity<B>(), B(0.), r1);
#endif
                return select(test0, B(2.) - r1, r1);
            }
        };

        template <class B>
        struct erfc_impl<B, double>
        {
            static inline B compute(const B& a)
            {
                B x = abs(a);
                B xx = x * x;
                B lim1 = B(0.65);
                B lim2 = B(2.2);
                auto test0 = a < B(0.);
                auto test1 = x < lim1;
                B r1 = B(0.);
                if (any(test1))
                {
                    r1 = B(1.) - x * erf_kernel<B>::erf1(xx);
                    if (all(test1))
                        return select(test0, B(2.) - r1, r1);
                }
                auto test2 = x < lim2;
                auto test3 = test2 && !test1;
                B ex = exp(-xx);
                if (any(test3))
                {
                    B z = ex * erf_kernel<B>::erfc2(x);
                    r1 = select(test1, r1, z);
                    if (all(test1 || test3))
                        return select(test0, B(2.) - r1, r1);
                }
                B z = ex * erf_kernel<B>::erfc3(x);
                r1 = select(test2, r1, z);
#ifndef XSIMD_NO_INFINITIES
                r1 = select(x == infinity<B>(), B(0.), r1);
#endif
                return select(test0, B(2.) - r1, r1);
            }
        };
    }

    template <class B>
    inline batch_type_t<B> erfc(const simd_base<B>& x)
    {
        return detail::erfc_impl<batch_type_t<B>>::compute(x());
    }
}

#endif

Zerion Mini Shell 1.0