ó
àÆ÷Xc           @` s'  d  d l  m Z m Z m Z d  d l m Z d  d l m Z d  d l m	 Z	 m
 Z
 m Z d  d l m Z m Z m Z d  d l m Z m Z m Z d  d l m Z m Z m Z d  d l m Z d	 e f d
 „  ƒ  YZ d „  Z d e e f d „  ƒ  YZ e e ƒ Z e e ƒ Z e e e g ƒ d „  ƒ Z  e e e ƒ g ƒ d „  ƒ Z! e d „ Z" d e e f d „  ƒ  YZ# e# d e ƒ Z$ e# d e ƒ Z% d „  Z& e' e& _( e e e g ƒ d „  ƒ Z) e e# d e ƒ g ƒ d „  ƒ Z* e j+ d e e  e) ƒ d d d ƒ e j+ d e e! e* d d ƒd d d d ƒ d S(   i    (   t   absolute_importt   print_functiont   division(   t   config(   t   in2out(   t   ldflagst   blas_header_textt   blas_header_version(   t
   blas_optdbt   optdbt   local_optimizer(   t   Gert   gert   ger_destructive(   t   Gemvt   gemv_inplacet   gemv_no_inplace(   t   basict   BaseBLASc           B` s5   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z RS(   c         C` s   t  ƒ  S(   N(   R   (   t   self(    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   c_libraries   s    c         C` s   t  d t d t ƒ S(   Nt   libst   flags(   R   t   Falset   True(   R   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   c_compile_args   s    c         C` s   t  d t d t ƒ S(   NR   t   libs_dir(   R   R   R   (   R   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt
   c_lib_dirs   s    c         C` s   t  d t d t ƒ S(   NR   t   include_dir(   R   R   R   (   R   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   c_header_dirs   s    c         C` s   t  ƒ  S(   N(   R   (   R   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   c_support_code   s    (   t   __name__t
   __module__R   R   R   R   R   (    (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyR      s
   				c         C` s   d t  ƒ  S(   Ns¬+  

    int elemsize ;

    if (PyArray_NDIM(%(A)s) != 2)
    {PyErr_SetString(PyExc_NotImplementedError, "rank(A) != 2"); %(fail)s;}
    if (PyArray_NDIM(%(x)s) != 1)
    {PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 1"); %(fail)s;}
    if (PyArray_NDIM(%(y)s) != 1)
    {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 1"); %(fail)s;}
    if (PyArray_NDIM(%(a)s) != 0)
    {PyErr_SetString(PyExc_NotImplementedError, "rank(a) != 0"); %(fail)s;}

    if (PyArray_DESCR(%(A)s)->type_num != PyArray_DESCR(%(x)s)->type_num)
    { PyErr_SetString(PyExc_TypeError, "A vs. x"); %(fail)s; }
    if (PyArray_DESCR(%(A)s)->type_num != PyArray_DESCR(%(y)s)->type_num)
    { PyErr_SetString(PyExc_TypeError, "A vs. y"); %(fail)s; }

    if (PyArray_DIMS(%(A)s)[0] != PyArray_DIMS(%(x)s)[0])
    {
        PyErr_SetString(PyExc_ValueError,
                        "Shape mismatch: A.shape[0] != x.shape[0]");
        %(fail)s;
    }
    if (PyArray_DIMS(%(A)s)[1] != PyArray_DIMS(%(y)s)[0])
    {
        PyErr_SetString(PyExc_ValueError,
                        "Shape mismatch: A.shape[1] != y.shape[0]");
        %(fail)s;
    }

    if  (PyArray_DESCR(%(A)s)->type_num == NPY_DOUBLE) { elemsize = 8; }
    else if (PyArray_DESCR(%(A)s)->type_num == NPY_FLOAT) { elemsize = 4;}
    else
    {
        PyErr_SetString(PyExc_NotImplementedError, "complex CGer");
        %(fail)s;
    }

    // copy A if !self.destructive or A is fully strided
    if (!%(destructive)s
        || (PyArray_STRIDES(%(A)s)[0] < 0)
        || (PyArray_STRIDES(%(A)s)[1] < 0)
        || ((PyArray_STRIDES(%(A)s)[0] != elemsize)
            && (PyArray_STRIDES(%(A)s)[1] != elemsize)))
    {
        npy_intp dims[2];
        dims[0] = PyArray_DIMS(%(A)s)[0];
        dims[1] = PyArray_DIMS(%(A)s)[1];

        if ((NULL == %(Z)s)
            || (PyArray_DIMS(%(Z)s)[0] != PyArray_DIMS(%(A)s)[0])
            || (PyArray_DIMS(%(Z)s)[1] != PyArray_DIMS(%(A)s)[1])
            || (PyArray_STRIDES(%(Z)s)[0] < 0)
            || (PyArray_STRIDES(%(Z)s)[1] < 0)
            || ((PyArray_STRIDES(%(Z)s)[0] != elemsize)
                && (PyArray_STRIDES(%(Z)s)[1] != elemsize)))
        {
            Py_XDECREF(%(Z)s);
            %(Z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims,
                                                       PyArray_TYPE(%(A)s));
            if(!%(Z)s) {
                PyErr_SetString(PyExc_MemoryError,
                                "failed to alloc ger output");
                %(fail)s
            }
        }
        if (%(Z)s == %(A)s)
        {
            PyErr_SetString(PyExc_AssertionError, "%(Z)s != %(A)s");
            %(fail)s
        }
        if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
        {
            float * zoutdata = (float*)PyArray_DATA(%(Z)s);
            const float * zdata = (float*)PyArray_DATA(%(A)s);
            const float * xdata = (float*)PyArray_DATA(%(x)s);
            const float * ydata = (float*)PyArray_DATA(%(y)s);
            const float * adata = (float*)PyArray_DATA(%(a)s);
            const float alpha = adata[0];
            float tmp, xx;
            int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(float);
            int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(float);
            int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(float);
            int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(float);
            int xi = PyArray_STRIDES(%(x)s)[0]/sizeof(float);
            int yj = PyArray_STRIDES(%(y)s)[0]/sizeof(float);
            for (int i = 0; i < dims[0]; ++i)
            {
                xx = alpha * xdata[xi * i];
                for (int j = 0; j < dims[1]; ++j)
                {
                    tmp = zdata[Ai*i+Aj*j];
                    tmp += xx * ydata[yj * j];
                    zoutdata[Zi*i+Zj*j] = tmp;
                }
            }
        }
        else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
        {
            double * zoutdata = (double*) PyArray_DATA(%(Z)s);
            const double * zdata = (double*)PyArray_DATA(%(A)s);
            const double * xdata = (double*)PyArray_DATA(%(x)s);
            const double * ydata = (double*)PyArray_DATA(%(y)s);
            const double * adata = (double*)PyArray_DATA(%(a)s);
            const double alpha = adata[0];
            double tmp, xx;

            int Ai = PyArray_STRIDES(%(A)s)[0]/sizeof(double);
            int Aj = PyArray_STRIDES(%(A)s)[1]/sizeof(double);
            int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(double);
            int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(double);
            int xi = PyArray_STRIDES(%(x)s)[0]/sizeof(double);
            int yj = PyArray_STRIDES(%(y)s)[0]/sizeof(double);
            for (int i = 0; i < dims[0]; ++i)
            {
                xx = alpha * xdata[xi * i];
                for (int j = 0; j < dims[1]; ++j)
                {
                    tmp = zdata[Ai*i+Aj*j];
                    tmp += xx * ydata[yj * j];
                    zoutdata[Zi*i+Zj*j] = tmp;
                }
            }
        }
        else
        {
            PyErr_SetString(PyExc_AssertionError,
                            "neither float nor double dtype");
            %(fail)s
        }
    }
    else
    {
        if (%(Z)s != %(A)s)
        {
            if (%(Z)s) { Py_DECREF(%(Z)s); }
            %(Z)s = %(A)s;
            Py_INCREF(%(Z)s);
        }
        npy_intp dims[2];
        dims[0] = PyArray_DIMS(%(A)s)[0];
        dims[1] = PyArray_DIMS(%(A)s)[1];
        if ((dims[0] * dims[1]) < 100000)
        {
            if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
            {
                float * zoutdata = (float*)PyArray_DATA(%(Z)s);
                const float * xdata = (float*)PyArray_DATA(%(x)s);
                const float * ydata = (float*)PyArray_DATA(%(y)s);
                const float * adata = (float*)PyArray_DATA(%(a)s);
                const float alpha = adata[0];
                float tmp, axi;
                int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(float);
                int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(float);
                int xi = PyArray_STRIDES(%(x)s)[0]/sizeof(float);
                int yj = PyArray_STRIDES(%(y)s)[0]/sizeof(float);
                for (int i = 0; i < dims[0]; ++i)
                {
                    axi = alpha * xdata[xi * i];
                    for (int j = 0; j < dims[1]; ++j)
                    {
                        zoutdata[Zi*i+Zj*j] += axi * ydata[yj * j];
                    }
                }
            }
            else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
            {
                double * zoutdata = (double*) PyArray_DATA(%(Z)s);
                const double * xdata = (double*)PyArray_DATA(%(x)s);
                const double * ydata = (double*)PyArray_DATA(%(y)s);
                const double * adata = (double*)PyArray_DATA(%(a)s);
                const double alpha = adata[0];
                double tmp, axi;

                int Zi = PyArray_STRIDES(%(Z)s)[0]/sizeof(double);
                int Zj = PyArray_STRIDES(%(Z)s)[1]/sizeof(double);
                int xi = PyArray_STRIDES(%(x)s)[0]/sizeof(double);
                int yj = PyArray_STRIDES(%(y)s)[0]/sizeof(double);
                for (int i = 0; i < dims[0]; ++i)
                {
                    axi = alpha * xdata[xi * i];
                    for (int j = 0; j < dims[1]; ++j)
                    {
                        zoutdata[Zi*i+Zj*j] += axi * ydata[yj * j];
                    }
                }
            }
        }
        else
        {
            int Nz0 = PyArray_DIMS(%(Z)s)[0];
            int Nz1 = PyArray_DIMS(%(Z)s)[1];
            int Sx = PyArray_STRIDES(%(x)s)[0] / elemsize;
            int Sy = PyArray_STRIDES(%(y)s)[0] / elemsize;

            /* create appropriate strides for Z, if it is a row or column matrix.
             * In that case, the value of the stride does not really matter, but
             * some versions of BLAS insist that:
             *  - they are not smaller than the number of elements in the array,
             *  - they are not 0.
             */
            int Sz0 = (Nz0 > 1) ? (PyArray_STRIDES(%(Z)s)[0] / elemsize) : (Nz1 + 1);
            int Sz1 = (Nz1 > 1) ? (PyArray_STRIDES(%(Z)s)[1] / elemsize) : (Nz0 + 1);

            dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
            dtype_%(y)s* y_data = (dtype_%(y)s*) PyArray_DATA(%(y)s);
            // gemv expects pointers to the beginning of memory arrays,
            // but numpy provides provides a pointer to the first element,
            // so when the stride is negative, we need to get the last one.
            if (Sx < 0)
                x_data += (Nz0 - 1) * Sx;
            if (Sy < 0)
                y_data += (Nz1 - 1) * Sy;

            if (PyArray_STRIDES(%(Z)s)[0] == elemsize)
            {
                if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
                {
                    float alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
                    sger_(&Nz0, &Nz1, &alpha,
                        (float*)x_data, &Sx,
                        (float*)y_data, &Sy,
                        (float*)(PyArray_DATA(%(Z)s)), &Sz1);
                }
                else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
                {
                    double alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
                    dger_(&Nz0, &Nz1, &alpha,
                        (double*)x_data, &Sx,
                        (double*)y_data, &Sy,
                        (double*)(PyArray_DATA(%(Z)s)), &Sz1);


                }
                else {
                    PyErr_SetString(PyExc_NotImplementedError,
                                    "not float nor double");
                    %(fail)s
                }
            }
            else if (PyArray_STRIDES(%(Z)s)[1] == elemsize)
            {
                if (PyArray_DESCR(%(Z)s)->type_num == NPY_FLOAT)
                {
                    float alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
                    sger_(&Nz1, &Nz0, &alpha,
                        (float*)y_data, &Sy,
                        (float*)x_data, &Sx,
                        (float*)(PyArray_DATA(%(Z)s)), &Sz0);
                }
                else if (PyArray_DESCR(%(Z)s)->type_num == NPY_DOUBLE)
                {
                    double alpha = ((dtype_%(a)s*)PyArray_DATA(%(a)s))[0];
                    dger_(&Nz1, &Nz0, &alpha,
                        (double*)y_data, &Sy,
                        (double*)x_data, &Sx,
                        (double*)(PyArray_DATA(%(Z)s)), &Sz0);
                }
                else
                {
                    PyErr_SetString(PyExc_NotImplementedError,
                                    "not float nor double");
                    %(fail)s
                }
            }
            else
            {
                PyErr_SetString(PyExc_AssertionError,
                    "A is a double-strided matrix, and should have been copied "
                    "into a memory-contiguous one.");
                %(fail)s
            }
        }
    }

    (   t   locals(   t   At   at   xt   yt   Zt   destructivet   fail(    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt
   ger_c_code!   s    ÿ t   CGerc           B` s   e  Z d  „  Z d „  Z RS(   c         C` sP   | \ } } } }	 | \ }
 t  | | | |	 |
 d t |  j ƒ d | d ƒ} | S(   NR'   R(   (   R)   t   intR'   (   R   t   nodet   namet   inpt   outt   subR"   R#   R$   R%   R&   t   code(    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   c_code:  s    	c         C` s   d t  ƒ  f S(   Ni
   (   R   (   R   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   c_code_cache_versionB  s    (   R   R    R2   R3   (    (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyR*   9  s   	c         C` sŠ   t  j j s d  S|  j t k rK |  j d j d k rK t t ƒ |  j	 Œ  g S|  j t
 k r† |  j d j d k r† t t ƒ |  j	 Œ  g Sd  S(   Ni    t   float32t   float64(   R4   R5   (   R4   R5   (   R   t   blasR   t   opR   t   outputst   dtypeR*   R   t   inputsR   R   (   R,   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt	   use_c_gerH  s    c         C` s3   t  |  j t ƒ r/ |  j j r/ t |  j Œ  g Sd  S(   N(   t
   isinstanceR7   R*   R'   t   cger_inplaceR:   (   R,   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   make_c_ger_destructiveU  s    c	   
      C` s   d }	 |	 t  ƒ  S(   sl   
    z <- beta * y + alpha * dot(A, x)

    where A is a matrix, y and x are vectors (ergo z is vector)
    s„"  

    int elemsize;
    float fbeta;
    double dbeta;

    if (PyArray_DIMS(%(A)s)[0] != PyArray_DIMS(%(y)s)[0])
    {
        PyErr_SetString(PyExc_ValueError,
                        "Shape mismatch: A.shape[0] != y.shape[0]");
        %(fail)s;
    }
    if (PyArray_DIMS(%(A)s)[1] != PyArray_DIMS(%(x)s)[0])
    {
        PyErr_SetString(PyExc_ValueError,
                        "Shape mismatch: A.shape[1] != x.shape[0]");
        %(fail)s;
    }

    if  (PyArray_DESCR(%(y)s)->type_num == NPY_DOUBLE) { elemsize = 8; }
    else if (PyArray_DESCR(%(y)s)->type_num == NPY_FLOAT) { elemsize = 4;}
    else {
        PyErr_SetString(PyExc_NotImplementedError, "complex Gemv");
        %(fail)s;
    }

    fbeta = dbeta = ((dtype_%(beta)s*)PyArray_DATA(%(beta)s))[0];

    // copy y if not destructive
    if (!%(destructive)s)
    {
        if ((NULL == %(z)s)
            || (PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(y)s)[0]))
        {
            Py_XDECREF(%(z)s);
            %(z)s = (PyArrayObject*)PyArray_SimpleNew(1,
                PyArray_DIMS(%(y)s), PyArray_TYPE(%(y)s));
            if(!%(z)s) {
                PyErr_SetString(PyExc_MemoryError,
                                "failed to alloc gemv output");
                %(fail)s
            }
        }
        if (dbeta != 0)
        {
            if (PyArray_CopyInto(%(z)s, %(y)s) != 0) {
                %(fail)s
            }
        }
        else if (%(force_init_beta)d)
        {
            PyObject *zero = PyFloat_FromDouble(0.);
            if (zero == NULL) %(fail)s;
            if (PyArray_FillWithScalar(%(z)s, zero) != 0) %(fail)s;
            Py_DECREF(zero);
        }
    }
    else
    {
        if (%(z)s != %(y)s)
        {
            Py_XDECREF(%(z)s);
            %(z)s = %(y)s;
            Py_INCREF(%(z)s);
        }
    }
    {
        char TRANS = 'T';
        char NOTRANS = 'N';
        int NA0 = PyArray_DIMS(%(A)s)[0];
        int NA1 = PyArray_DIMS(%(A)s)[1];
        /* This formula is needed in the case where A is actually a row or
         * column matrix, because BLAS sometimes insists that the strides:
         *  - are not smaller than the number of elements in the array
         *  - are not 0.
         */
        int SA0 = (NA0 > 1) ? (PyArray_STRIDES(%(A)s)[0] / elemsize) : (NA1 + 1);
        int SA1 = (NA1 > 1) ? (PyArray_STRIDES(%(A)s)[1] / elemsize) : (NA0 + 1);
        int Sz = PyArray_STRIDES(%(z)s)[0] / elemsize;
        int Sx = PyArray_STRIDES(%(x)s)[0] / elemsize;

        dtype_%(x)s* x_data = (dtype_%(x)s*) PyArray_DATA(%(x)s);
        dtype_%(z)s* z_data = (dtype_%(z)s*) PyArray_DATA(%(z)s);
        // gemv expects pointers to the beginning of memory arrays,
        // but numpy provides provides a pointer to the first element,
        // so when the stride is negative, we need to get the last one.
        if (Sx < 0)
            x_data += (NA1 - 1) * Sx;
        if (Sz < 0)
            z_data += (NA0 - 1) * Sz;

        if (NA0 * NA1)
        {
            // If A is neither C- nor F-contiguous, we make a copy.
            // TODO:
            // - if one stride is equal to "- elemsize", we can still call
            //   gemv on reversed matrix and vectors
            // - if the copy is too long, maybe call vector/vector dot on
            //   each row instead
            if ((PyArray_STRIDES(%(A)s)[0] < 0)
                || (PyArray_STRIDES(%(A)s)[1] < 0)
                || ((PyArray_STRIDES(%(A)s)[0] != elemsize)
                    && (PyArray_STRIDES(%(A)s)[1] != elemsize)))
            {
                npy_intp dims[2];
                dims[0] = NA0;
                dims[1] = NA1;

                PyArrayObject * A_copy = (PyArrayObject *) PyArray_Copy(
                                                                   %(A)s);
                if (!A_copy)
                    %(fail)s
                Py_XDECREF(%(A)s);
                %(A)s = A_copy;
                SA0 = (NA0 > 1) ? (PyArray_STRIDES(%(A)s)[0] / elemsize) : (NA1 + 1);
                SA1 = (NA1 > 1) ? (PyArray_STRIDES(%(A)s)[1] / elemsize) : (NA0 + 1);
            }

            if (PyArray_STRIDES(%(A)s)[0] == elemsize)
            {
                if (PyArray_DESCR(%(A)s)->type_num == NPY_FLOAT)
                {
                    float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
                    sgemv_(&NOTRANS, &NA0, &NA1,
                        &alpha,
                        (float*)(PyArray_DATA(%(A)s)), &SA1,
                        (float*)x_data, &Sx,
                        &fbeta,
                        (float*)z_data, &Sz);
                }
                else if (PyArray_DESCR(%(A)s)->type_num == NPY_DOUBLE)
                {
                    double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];
                    dgemv_(&NOTRANS, &NA0, &NA1,
                        &alpha,
                        (double*)(PyArray_DATA(%(A)s)), &SA1,
                        (double*)x_data, &Sx,
                        &dbeta,
                        (double*)z_data, &Sz);
                }
                else
                {
                    PyErr_SetString(PyExc_AssertionError,
                                    "neither float nor double dtype");
                    %(fail)s
                }
            }
            else if (PyArray_STRIDES(%(A)s)[1] == elemsize)
            {
                if (PyArray_DESCR(%(A)s)->type_num == NPY_FLOAT)
                {
                    float alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];

                    // Check for vector-vector dot (NA0 == 1). The code may work
                    // for SA1 != 1 as well, but has not been tested for this case,
                    // so SA1 == 1 is required for safety.
                    if (NA0 == 1 && SA1 == 1)
                    {
                        if (fbeta != 0.f) {
                          z_data[0] = fbeta*z_data[0];
                        } else {
                          z_data[0] = 0.f;
                        }
                        z_data[0] += alpha*sdot_(&NA1,
                              (float*)(PyArray_DATA(%(A)s)), &SA1,
                              (float*)x_data, &Sx);
                    }
                    else
                    {
                        sgemv_(&TRANS, &NA1, &NA0,
                            &alpha,
                            (float*)(PyArray_DATA(%(A)s)), &SA0,
                            (float*)x_data, &Sx,
                            &fbeta,
                            (float*)z_data, &Sz);
                    }
                }
                else if (PyArray_DESCR(%(A)s)->type_num == NPY_DOUBLE)
                {
                    double alpha = ((dtype_%(alpha)s*)PyArray_DATA(%(alpha)s))[0];

                    // Check for vector-vector dot (NA0 == 1). The code may work
                    // for SA1 != 1 as well, but has not been tested for this case,
                    // so SA1 == 1 is required for safety.
                    if (NA0 == 1 && SA1 == 1)
                    {
                        if (dbeta != 0.) {
                          z_data[0] = dbeta*z_data[0];
                        } else {
                          z_data[0] = 0.;
                        }
                        z_data[0] += alpha*ddot_(&NA1,
                              (double*)(PyArray_DATA(%(A)s)), &SA1,
                              (double*)x_data, &Sx);
                    }
                    else
                    {
                        dgemv_(&TRANS, &NA1, &NA0,
                            &alpha,
                            (double*)(PyArray_DATA(%(A)s)), &SA0,
                            (double*)x_data, &Sx,
                            &dbeta,
                            (double*)z_data, &Sz);
                    }
                }
                else
                {
                    PyErr_SetString(PyExc_AssertionError,
                                    "neither float nor double dtype");
                    %(fail)s
                }
            }
            else
            {
                PyErr_SetString(PyExc_AssertionError,
                    "xx is a double-strided matrix, and should have been "
                    "copied into a memory-contiguous one.");
                %(fail)s
            }
        }
        else if (dbeta != 1.0)
        {
            // the matrix has at least one dim of length 0
            // so we do this loop, which either iterates over 0 elements
            // or else it does the right thing for length-0 A.
            dtype_%(z)s * zptr = (dtype_%(z)s*)(PyArray_DATA(%(z)s));
            for (int i = 0; i < NA0; ++i)
            {
                zptr[i * Sz] = (dbeta == 0.0 ? 0.0 : zptr[i * Sz] * dbeta);
            }
        }
    }
    (   R!   (
   R%   R"   R$   t   zt   alphat   betaR'   R(   t   force_init_betaR1   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   gemv_c_code`  s    ït   CGemvc           B` s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C` s   t  t |  ƒ j | ƒ d  S(   N(   t   superRD   t   __init__(   R   t   inplace(    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyRF   T  s    c         C` s_   | \ } } } }	 }
 | \ } t  | | |	 | | |
 d t |  j ƒ d | d d t ƒ  ƒ} | S(   NR'   R(   RB   (   RC   R+   RG   t   check_force_gemv_init(   R   R,   R-   R.   R/   R0   R%   R@   R"   R$   RA   R?   R1   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyR2   W  s    	
c         C` s   d t  ƒ  t ƒ  f S(   Ni   (   R   RH   (   R   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyR3   b  s    (   R   R    RF   R2   R3   (    (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyRD   S  s   		RG   c          C` sš   t  j d  k r“ d d l m }  d } |  j | d d d t d t d t d t ƒ d	 t ƒ} | r‡ | d r{ | d
 t  _ q t t  _ q“ t t  _ n  t  j S(   Ni    (   t   GCC_compilersö  
#include <math.h>
extern "C" void dgemv_(char*, const int*, const int*, const double *, const double *, const int*, const double *, const int*, const double *, double *, const int *);
int main() {
  double A[2][2] = {{1., 1.}, {1., 1.}};
  double x[2] = {1., 1.};
  double y[2] = {NAN, NAN};
  const int s = 2;
  const int inc = 1;
  const double alpha = 1.0;
  const double beta = 0.0;

  dgemv_("T", &s, &s, &alpha, A, &s, x, &inc, &beta, &y, &inc);

  return (isnan(y[0]) || isnan(y[1]) ? 1 : 0;
}
t
   tmp_prefixt   check_beta_R   R   R   t   try_runi   (	   RH   t   _force_init_betat   Nonet   theano.gof.cmoduleRI   t   try_compile_tmpR   R   R   (   RI   t	   test_codet   res(    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyRH   i  s    #		
c         C` s~   t  j j s d  S|  j t k rE |  j d j d k rE t |  j Œ  g S|  j t	 k rz |  j d j d k rz t
 |  j Œ  g Sd  S(   Ni    R4   R5   (   s   float32s   float64(   s   float32s   float64(   R   R6   R   R7   R   R8   R9   t   cgemv_no_inplaceR:   R   t   cgemv_inplace(   R,   (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt
   use_c_gemv   s    c         C` s¤   t  |  j t ƒ r  |  j j r  t |  j ƒ } | d } | j r“ t  | j j t j ƒ r“ t	 | j
 ƒ d k r“ t j | j ƒ | j j Œ  | d <n  t | Œ  g Sd  S(   Ni    i   (   R<   R7   RD   RG   t   listR:   t   ownert   Tt
   AllocEmptyt   lent   clientsR9   RT   (   R,   R:   t   dest(    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   make_c_gemv_destructive­  s    
	%t
   use_c_blasi   t   fast_runt   c_blast   c_blas_destructiveR-   g     €Q@N(,   t
   __future__R    R   R   t   theanoR   t   theano.tensor.optR   t   theano.tensor.blasR   R   R   R   R	   R
   R   R   R   R   R   R   t   theano.tensorR   RX   t   objectR   R)   R*   R   R=   R   t   cger_no_inplaceR;   R>   RC   RD   RT   RS   RH   RN   RM   RU   R]   t   register(    (    (    s4   /tmp/pip-build-X4mzal/theano/theano/tensor/blas_c.pyt   <module>   s>   	ÿ ò	4	!		