%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /backups/router/usr/local/lib/python3.11/site-packages/numexpr/
Upload File :
Create Path :
Current File : //backups/router/usr/local/lib/python3.11/site-packages/numexpr/module.cpp

// Numexpr - Fast numerical array expression evaluator for NumPy.
//
//      License: MIT
//      Author:  See AUTHORS.txt
//
//  See LICENSE.txt for details about copyright and rights to use.
//
// module.cpp contains the CPython-specific module exposure.

#define DO_NUMPY_IMPORT_ARRAY

#include "module.hpp"
#include <structmember.h>
#include <vector>

#include <signal.h>

#include "interpreter.hpp"
#include "numexpr_object.hpp"

using namespace std;

// Global state. The file interpreter.hpp also has some global state
// in its 'th_params' variable
global_state gs;
long global_max_threads=DEFAULT_MAX_THREADS;

/* Do the worker job for a certain thread */
void *th_worker(void *tidptr)
{
    int tid = *(int *)tidptr;
    /* Parameters for threads */
    npy_intp start;
    npy_intp vlen;
    npy_intp block_size;
    NpyIter *iter;
    vm_params params;
    int *pc_error;
    int ret;
    int n_inputs;
    int n_constants;
    int n_temps;
    size_t memsize;
    char **mem;
    npy_intp *memsteps;
    npy_intp istart, iend;
    char **errmsg;
    // For output buffering if needed
    vector<char> out_buffer;

    while (1) {

        /* Sentinels have to be initialised yet */
        gs.init_sentinels_done = 0;

        /* Meeting point for all threads (wait for initialization) */
        pthread_mutex_lock(&gs.count_threads_mutex);
        if (gs.count_threads < gs.nthreads) {
            gs.count_threads++;
            /* Beware of spurious wakeups. See issue pydata/numexpr#306. */
            do {
                pthread_cond_wait(&gs.count_threads_cv,
                                  &gs.count_threads_mutex);
            } while (!gs.barrier_passed);
        }
        else {
            gs.barrier_passed = 1;
            pthread_cond_broadcast(&gs.count_threads_cv);
        }
        pthread_mutex_unlock(&gs.count_threads_mutex);

        /* Check if thread has been asked to return */
        if (gs.end_threads) {
            return(0);
        }

        /* Get parameters for this thread before entering the main loop */
        start = th_params.start;
        vlen = th_params.vlen;
        block_size = th_params.block_size;
        params = th_params.params;
        pc_error = th_params.pc_error;

        // If output buffering is needed, allocate it
        if (th_params.need_output_buffering) {
            out_buffer.resize(params.memsizes[0] * BLOCK_SIZE1);
            params.out_buffer = &out_buffer[0];
        } else {
            params.out_buffer = NULL;
        }

        /* Populate private data for each thread */
        n_inputs = params.n_inputs;
        n_constants = params.n_constants;
        n_temps = params.n_temps;
        memsize = (1+n_inputs+n_constants+n_temps) * sizeof(char *);
        /* XXX malloc seems thread safe for POSIX, but for Win? */
        mem = (char **)malloc(memsize);
        memcpy(mem, params.mem, memsize);

        errmsg = th_params.errmsg;

        params.mem = mem;

        /* Loop over blocks */
        pthread_mutex_lock(&gs.count_mutex);
        if (!gs.init_sentinels_done) {
            /* Set sentinels and other global variables */
            gs.gindex = start;
            istart = gs.gindex;
            iend = istart + block_size;
            if (iend > vlen) {
                iend = vlen;
            }
            gs.init_sentinels_done = 1;  /* sentinels have been initialised */
            gs.giveup = 0;            /* no giveup initially */
        } else {
            gs.gindex += block_size;
            istart = gs.gindex;
            iend = istart + block_size;
            if (iend > vlen) {
                iend = vlen;
            }
        }
        /* Grab one of the iterators */
        iter = th_params.iter[tid];
        if (iter == NULL) {
            th_params.ret_code = -1;
            gs.giveup = 1;
        }
        memsteps = th_params.memsteps[tid];
        /* Get temporary space for each thread */
        ret = get_temps_space(params, mem, BLOCK_SIZE1);
        if (ret < 0) {
            /* Propagate error to main thread */
            th_params.ret_code = ret;
            gs.giveup = 1;
        }
        pthread_mutex_unlock(&gs.count_mutex);

        while (istart < vlen && !gs.giveup) {
            /* Reset the iterator to the range for this task */
            ret = NpyIter_ResetToIterIndexRange(iter, istart, iend,
                                                errmsg);
            /* Execute the task */
            if (ret >= 0) {
                ret = vm_engine_iter_task(iter, memsteps, params, pc_error, errmsg);
            }

            if (ret < 0) {
                pthread_mutex_lock(&gs.count_mutex);
                gs.giveup = 1;
                /* Propagate error to main thread */
                th_params.ret_code = ret;
                pthread_mutex_unlock(&gs.count_mutex);
                break;
            }

            pthread_mutex_lock(&gs.count_mutex);
            gs.gindex += block_size;
            istart = gs.gindex;
            iend = istart + block_size;
            if (iend > vlen) {
                iend = vlen;
            }
            pthread_mutex_unlock(&gs.count_mutex);
        }

        /* Meeting point for all threads (wait for finalization) */
        pthread_mutex_lock(&gs.count_threads_mutex);
        if (gs.count_threads > 0) {
            gs.count_threads--;
            do {
                pthread_cond_wait(&gs.count_threads_cv,
                                  &gs.count_threads_mutex);
            } while (gs.barrier_passed);
        }
        else {
            gs.barrier_passed = 0;
            pthread_cond_broadcast(&gs.count_threads_cv);
        }
        pthread_mutex_unlock(&gs.count_threads_mutex);

        /* Release resources */
        free_temps_space(params, mem);
        free(mem);

    }  /* closes while(1) */

    /* This should never be reached, but anyway */
    return(0);
}

/* Initialize threads */
int init_threads(void)
{
    int tid, rc;

    if ( !(gs.nthreads > 1 && (!gs.init_threads_done || gs.pid != getpid())) ) {
        /* Thread pool must always be initialized once and once only. */
        return(0);
    }

    /* Initialize mutex and condition variable objects */
    pthread_mutex_init(&gs.count_mutex, NULL);
    pthread_mutex_init(&gs.parallel_mutex, NULL);

    /* Barrier initialization */
    pthread_mutex_init(&gs.count_threads_mutex, NULL);
    pthread_cond_init(&gs.count_threads_cv, NULL);
    gs.count_threads = 0;      /* Reset threads counter */
    gs.barrier_passed = 0;

    /*
     * Our worker threads should not deal with signals from the rest of the
     * application - mask everything temporarily in this thread, so our workers
     * can inherit that mask
     */
    sigset_t sigset_block_all, sigset_restore;
    rc = sigfillset(&sigset_block_all);
    if (rc != 0) {
        fprintf(stderr, "ERROR; failed to block signals: sigfillset: %s",
                strerror(rc));
        exit(-1);
    }
    rc = pthread_sigmask( SIG_BLOCK, &sigset_block_all, &sigset_restore);
    if (rc != 0) {
        fprintf(stderr, "ERROR; failed to block signals: pthread_sigmask: %s",
                strerror(rc));
        exit(-1);
    }

    /* Now create the threads */
    for (tid = 0; tid < gs.nthreads; tid++) {
        gs.tids[tid] = tid;
        rc = pthread_create(&gs.threads[tid], NULL, th_worker,
                            (void *)&gs.tids[tid]);
        if (rc) {
            fprintf(stderr,
                    "ERROR; return code from pthread_create() is %d\n", rc);
            fprintf(stderr, "\tError detail: %s\n", strerror(rc));
            exit(-1);
        }
    }

    /*
     * Restore the signal mask so the main thread can process signals as
     * expected
     */
    rc = pthread_sigmask( SIG_SETMASK, &sigset_restore, NULL);
    if (rc != 0) {
        fprintf(stderr,
                "ERROR: failed to restore signal mask: pthread_sigmask: %s",
                strerror(rc));
        exit(-1);
    }

    gs.init_threads_done = 1;                 /* Initialization done! */
    gs.pid = (int)getpid();                   /* save the PID for this process */

    return(0);
}

/* Set the number of threads in numexpr's VM */
int numexpr_set_nthreads(int nthreads_new)
{
    int nthreads_old = gs.nthreads;
    int t, rc;
    void *status;

    // if (nthreads_new > MAX_THREADS) {
    //     fprintf(stderr,
    //             "Error.  nthreads cannot be larger than MAX_THREADS (%d)",
    //             MAX_THREADS);
    //     return -1;
    // }
    if (nthreads_new > global_max_threads) {
        fprintf(stderr,
                "Error.  nthreads cannot be larger than environment variable \"NUMEXPR_MAX_THREADS\" (%ld)",
                global_max_threads);
        return -1;
    }
    else if (nthreads_new <= 0) {
        fprintf(stderr, "Error.  nthreads must be a positive integer");
        return -1;
    }

    /* Only join threads if they are not initialized or if our PID is
       different from that in pid var (probably means that we are a
       subprocess, and thus threads are non-existent). */
    if (gs.nthreads > 1 && gs.init_threads_done && gs.pid == getpid()) {
        /* Tell all existing threads to finish */
        gs.end_threads = 1;
        pthread_mutex_lock(&gs.count_threads_mutex);
        if (gs.count_threads < gs.nthreads) {
            gs.count_threads++;
            do {
                pthread_cond_wait(&gs.count_threads_cv,
                                  &gs.count_threads_mutex);
            } while (!gs.barrier_passed);
        }
        else {
            gs.barrier_passed = 1;
            pthread_cond_broadcast(&gs.count_threads_cv);
        }
        pthread_mutex_unlock(&gs.count_threads_mutex);

        /* Join exiting threads */
        for (t=0; t<gs.nthreads; t++) {
            rc = pthread_join(gs.threads[t], &status);
            if (rc) {
                fprintf(stderr,
                        "ERROR; return code from pthread_join() is %d\n",
                        rc);
                fprintf(stderr, "\tError detail: %s\n", strerror(rc));
                exit(-1);
            }
        }
        gs.init_threads_done = 0;
        gs.end_threads = 0;
    }

    /* Launch a new pool of threads (if necessary) */
    gs.nthreads = nthreads_new;
    init_threads();

    return nthreads_old;
}


#ifdef USE_VML

static PyObject *
_get_vml_version(PyObject *self, PyObject *args)
{
    int len=198;
    char buf[198];
    mkl_get_version_string(buf, len);
    return Py_BuildValue("s", buf);
}

static PyObject *
_set_vml_accuracy_mode(PyObject *self, PyObject *args)
{
    int mode_in, mode_old;
    if (!PyArg_ParseTuple(args, "i", &mode_in))
    return NULL;
    mode_old = vmlGetMode() & VML_ACCURACY_MASK;
    vmlSetMode((mode_in & VML_ACCURACY_MASK) | VML_ERRMODE_IGNORE );
    return Py_BuildValue("i", mode_old);
}

static PyObject *
_set_vml_num_threads(PyObject *self, PyObject *args)
{
    int max_num_threads;
    if (!PyArg_ParseTuple(args, "i", &max_num_threads))
    return NULL;
    mkl_domain_set_num_threads(max_num_threads, MKL_DOMAIN_VML);
    Py_RETURN_NONE;
}

static PyObject *
_get_vml_num_threads(PyObject *self, PyObject *args)
{
    int max_num_threads = mkl_domain_get_max_threads (MKL_DOMAIN_VML);
    return Py_BuildValue("i", max_num_threads);
}

#endif

static PyObject*
Py_set_num_threads(PyObject *self, PyObject *args)
{
    int num_threads, nthreads_old;
    if (!PyArg_ParseTuple(args, "i", &num_threads))
    return NULL;
    nthreads_old = numexpr_set_nthreads(num_threads);
    return Py_BuildValue("i", nthreads_old);
}

static PyObject*
Py_get_num_threads(PyObject *self, PyObject *args) 
{
    int n_thread;
    n_thread = gs.nthreads;
    return Py_BuildValue("i", n_thread);
}

static PyMethodDef module_methods[] = {
#ifdef USE_VML
    {"_get_vml_version", _get_vml_version, METH_VARARGS,
     "Get the VML/MKL library version."},
    {"_set_vml_accuracy_mode", _set_vml_accuracy_mode, METH_VARARGS,
     "Set accuracy mode for VML functions."},
    {"_set_vml_num_threads", _set_vml_num_threads, METH_VARARGS,
     "Suggests a maximum number of threads to be used in VML operations."},
    {"_get_vml_num_threads", _get_vml_num_threads, METH_VARARGS,
     "Gets the maximum number of threads to be used in VML operations."},
#endif
    {"_set_num_threads", Py_set_num_threads, METH_VARARGS,
     "Suggests a maximum number of threads to be used in operations."},
    {"_get_num_threads", Py_get_num_threads, METH_VARARGS,
     "Gets the maximum number of threads currently in use for operations."},
    {NULL}
};

static int
add_symbol(PyObject *d, const char *sname, int name, const char* routine_name)
{
    PyObject *o, *s;
    int r;

    if (!sname) {
        return 0;
    }

    o = PyLong_FromLong(name);
    s = PyBytes_FromString(sname);
    if (!o || !s) {
        PyErr_SetString(PyExc_RuntimeError, routine_name);
        r = -1;
    }
    else {
        r = PyDict_SetItem(d, s, o);
    }
    Py_XDECREF(o);
    Py_XDECREF(s);
    return r;
}

#ifdef __cplusplus
extern "C" {
#endif

/* XXX: handle the "global_state" state via moduledef */
static struct PyModuleDef moduledef = {
        PyModuleDef_HEAD_INIT,
        "interpreter",
        NULL,
        -1,                 /* sizeof(struct global_state), */
        module_methods,
        NULL,
        NULL,               /* module_traverse, */
        NULL,               /* module_clear, */
        NULL
};

#define INITERROR return NULL

PyObject *
PyInit_interpreter(void) {
    PyObject *m, *d;


    char *max_thread_str = getenv("NUMEXPR_MAX_THREADS");
    char *end;
    if (max_thread_str != NULL) {
        global_max_threads = strtol(max_thread_str, &end, 10);
    }



    th_params.memsteps    = (npy_intp**)calloc(sizeof(npy_intp*), global_max_threads);
    th_params.iter        = (NpyIter**)calloc(sizeof(NpyIter*),   global_max_threads);
    th_params.reduce_iter = (NpyIter**)calloc(sizeof(NpyIter*),   global_max_threads);
    gs.threads            = (pthread_t*)calloc(sizeof(pthread_t), global_max_threads);
    gs.tids               = (int*)calloc(sizeof(int),             global_max_threads);
    // TODO: for Py3, deallocate: https://docs.python.org/3/c-api/module.html#c.PyModuleDef.m_free
    // For Python 2.7, people have to exit the process to reclaim the memory.

    if (PyType_Ready(&NumExprType) < 0)
        INITERROR;

    m = PyModule_Create(&moduledef);

    if (m == NULL)
        INITERROR;

    Py_INCREF(&NumExprType);
    PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType);

    import_array();

    d = PyDict_New();
    if (!d) INITERROR;

#define OPCODE(n, name, sname, ...)                              \
    if (add_symbol(d, sname, name, "add_op") < 0) { INITERROR; }
#include "opcodes.hpp"
#undef OPCODE

    if (PyModule_AddObject(m, "opcodes", d) < 0) INITERROR;

    d = PyDict_New();
    if (!d) INITERROR;

#define add_func(name, sname)                           \
    if (add_symbol(d, sname, name, "add_func") < 0) { INITERROR; }
#define FUNC_FF(name, sname, ...)  add_func(name, sname);
#define FUNC_FFF(name, sname, ...) add_func(name, sname);
#define FUNC_DD(name, sname, ...)  add_func(name, sname);
#define FUNC_DDD(name, sname, ...) add_func(name, sname);
#define FUNC_CC(name, sname, ...)  add_func(name, sname);
#define FUNC_CCC(name, sname, ...) add_func(name, sname);
#include "functions.hpp"
#undef FUNC_CCC
#undef FUNC_CC
#undef FUNC_DDD
#undef FUNC_DD
#undef FUNC_DD
#undef FUNC_FFF
#undef FUNC_FF
#undef add_func

    if (PyModule_AddObject(m, "funccodes", d) < 0) INITERROR;

    if (PyModule_AddObject(m, "allaxes", PyLong_FromLong(255)) < 0) INITERROR;
    if (PyModule_AddObject(m, "maxdims", PyLong_FromLong(NPY_MAXDIMS)) < 0) INITERROR;

    if(PyModule_AddIntConstant(m, "MAX_THREADS", global_max_threads) < 0) INITERROR;

    // Let's export the block sizes to Python side for benchmarking comparisons
    if(PyModule_AddIntConstant(m, "__BLOCK_SIZE1__", BLOCK_SIZE1) < 0) INITERROR;
    // Export if we are using VML or not
#ifdef USE_VML
    if(PyModule_AddObject(m, "use_vml", Py_True) < 0) INITERROR;
#else
    if(PyModule_AddObject(m, "use_vml", Py_False) < 0) INITERROR;
#endif

    return m;
}

#ifdef __cplusplus
}  // extern "C"
#endif

Zerion Mini Shell 1.0