
Xc           @` s  d  Z  d d l m Z m Z m Z d d l Z d d l Z d d l Z d d l m	 Z	 d d l
 Z
 d d l
 m Z d d l
 m Z d d l m Z m Z d d l m Z d d	 l m Z m Z m Z m Z d d
 l m Z m Z d d l m Z d d l m Z d d l m Z m  Z  d d l! m" Z" d d l! m# Z# d d l$ m% Z% d e j& f d     YZ' e'   Z( d e j& f d     YZ) e)   Z* d e j& f d     YZ+ e+   Z, d e j& f d     YZ- e-   Z. e j/ d d  e j0 e j1 g  d     Z2 e j/ d d  e j0 e) g  d     Z3 d   Z4 d   Z5 d   Z6 e j/ d   e j0 e, g  d!     Z7 d"   Z8 e j9 j: e8 d#  d$ e j& f d%     YZ; d& e j& f d'     YZ< e;   Z= e<   Z> d(   Z? d)   Z@ d*   ZA d+   ZB d, e j& f d-     YZC eC   ZD d. e j& f d/     YZE eE   ZF e jG d   e j/ d   e jH d0      ZI e jH d1    ZJ e jK d2 eJ d3 d4 d5 d   e j/ d  d6  e j0 e* g  d7     ZL e j/ d   e j0 e jM g  d8     ZN d9   ZO eP d:  ZQ e j/ d   e j0 e jR e jS g  d;     ZT e j/ d   e j0 e* g  d<     ZU e j/ d   e j0 e( g  d=     ZV e j/ e jG e jW e j0 e< g  d>       ZX d?   ZY d@   ZZ dA e j& f dB     YZ[ dC e j& f dD     YZ\ e\   Z] e[ dE  Z^ e[ dF  Z_ d dG  Z` d dH  Zb dI dJ  Zc dK e
 j jd f dL     YZe ee e
 j jf dM dN Zg e j1 eg dM dO Zh dP   Zi d S(Q   s  
Provides neural-network specific Ops.

Notes
-----
TODO: factor this out into a neural-network toolbox.

We register all optimization with the gpu tag as we don't
implement all the intermediate case on the GPU (in particular
AdvancedSubtensor). So to make sure it run well on the gpu with
fast_compile, we register them as needed for the GPU. This can be
revisited later when all the intermediate part are on the GPU.

i    (   t   absolute_importt   print_functiont   divisionN(   t   xrange(   t   gof(   t   scalar(   t	   extra_opst   as_tensor_variable(   t   copy_stack_trace(   t   basict	   subtensort   optt   elemwise(   t   values_eq_approx_remove_inft   values_eq_approx_remove_nan(   t   optdb(   t   Apply(   t   sigmoidt   softplus(   t   DisconnectedType(   t   grad_not_implemented(   t   sparse_block_dott   SoftmaxWithBiasc           B` st   e  Z d  Z d Z d Z d Z d   Z d   Z d   Z d   Z	 d   Z
 e d    Z d	   Z e d
    Z RS(   sE  
    An L{Op} for the output of neural-net multiclass classifiers.

    Attributes
    ----------
    x : a matrix of floats (32 or 64)
    b : a [row] vector of floats (32 or 64), length is number of cols in x

    This L{Op}'s output is softmax(x+b).
    softmax(x[i]) is the i'th distribution over len(x[i]) options.

    i   i   c         C` s   t  j |  } t  j |  } | j j d k sE | j j t  j k rT t d   n  | j j d k s{ | j j t  j k r t d   n  | j   } t |  | | g | g  S(   Ni   s   x must be 2-d tensor of floatsi   s   b must be 1-d tensor of floats(   t   tensorR   t   typet   ndimt   dtypet   float_dtypest
   ValueErrorR   (   t   selft   xt   bt   sm(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt	   make_node=   s    c   	      C` s)  | \ } } | j  d | j  d k r5 t d   n  | j d k rk t j | j  d | j | d d <d  S| j } | j d k r | j d  } n  | | d  d  d   f } t j | | j	 d d  d  d   d  f  } | d | j
 d d  d  d   d  f 9} | j | d	 t | d d <d  S(
   Ni    i   s'   b must have same number of columns as xR   t   float16t   float32t   axisg      ?t   copy(   t   shapeR   t   sizet   numpyt   zerosR   t   astypet   Nonet   expt   maxt   sumt   False(	   R   t   nodet   input_storaget   output_storageR   R   t   x_dtypet   x_plus_bt   e_x(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   performJ   s    	#	/*c   	      C` sz   | \ } } | \ } t  | j t  r= t     t     g St | |  } t | |  } t j | d d } | | f S(   NR$   i    (   t
   isinstanceR   R   t   softmax_with_biast   softmax_gradR   R.   (	   R   t   inpt   gradsR   R   t   g_smR    t   dxt   db(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   gradg   s    	c         C` s   | d g S(   Ni    (    (   R   R0   R&   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   infer_shapes   s    c         C` s
   d d g S(   Ns
   <iostream>s   <cmath>(    (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt	   c_headersv   s    c         C` s   d } d } d } y< t  j j j |  d d d  } d t   } d t   } Wn t  j j j k
 rj n Xd } | | | | f S(	   Nsi  
        npy_intp* Nx = PyArray_DIMS(%(x)s);
        npy_intp Sx = 0;
        npy_intp Sb = 0;
        npy_intp Ssm = 0;


        if (PyArray_NDIM(%(x)s) != 2)
        {
            PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
            %(fail)s;
        }
        if (PyArray_NDIM(%(b)s) != 1)
        {
            PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
            %(fail)s;
        }
        if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(x)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError, "not a float");
            %(fail)s;
        }
        if ((PyArray_TYPE(%(b)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(b)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError, "b not float");
            %(fail)s;
        }
        if ((PyArray_DIMS(%(x)s)[1] != PyArray_DIMS(%(b)s)[0]))
        {
            PyErr_Format(PyExc_ValueError,
                         "number of columns in x (%%ld) does not match length of b (%%ld)",
                (long int)PyArray_DIMS(%(x)s)[1], (long int)PyArray_DIMS(%(b)s)[0]);
            %(fail)s;
        }

        if ((NULL == %(sm)s)
            || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
            || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
        {
            if (NULL != %(sm)s) Py_XDECREF(%(sm)s);
            %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
                                                       PyArray_TYPE(%(x)s));
            if(!%(sm)s) {
                PyErr_SetString(PyExc_MemoryError,
                     "failed to alloc sm output");
                %(fail)s
            }
        }
        Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
        Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
        Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);

        s  
        for (size_t i = 0; i < Nx[0]; ++i)
        {
            size_t j;
            double sum = 0.0;

            const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
            const dtype_%(b)s* __restrict__ b_i = (dtype_%(b)s*)(PyArray_BYTES(%(b)s));
            dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);

            npy_intp Sx = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
            npy_intp Sb = PyArray_STRIDES(%(b)s)[0]/sizeof(dtype_%(b)s);
            npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);

            size_t row_max_j=0;
            dtype_%(sm)s row_max = x_i[0] + b_i[0];
            //std::cout << "0 " << row_max << "\n";
            // Get the maximum value of the row
            for (j = 1; j < Nx[1]; ++j)
            {
                dtype_%(sm)s row_ij = x_i[j * Sx] +  b_i[j * Sb];
                //std::cout << "1 " << row_ij << "\n";
                row_max_j = (row_ij > row_max) ? j : row_max_j;
                row_max   = (row_ij > row_max) ? row_ij : row_max;
            }

        s  
            for (j = 0; j < Nx[1]; ++j)
            {
                dtype_%(sm)s row_ij = x_i[j * Sx] +  b_i[j * Sb];
                //std::cout << "2 " << j << " " << row_ij << " " << row_max << "\n";
                dtype_%(sm)s sm_ij = exp(row_ij - row_max);
                //std::cout << "3 " << j << " " << sm_ij << "\n";
                sum += sm_ij;
                sm_i[j * Ssm] = sm_ij;
            }

            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
            double sum_inv = 1.0 / sum;
            for (j = 0; j < Nx[1]; ++j)
            {
                sm_i[j * Ssm] *= sum_inv;
            }

        s   Nx[1]t   sm_is  
            for (j = 0; j < Nx[1]; ++j)
            {
                dtype_%%(sm)s row_ij = x_i[j * Sx] +  b_i[j * Sb];
                //std::cout << "2 " << j << " " << row_ij << " " << row_max << "\n";
                dtype_%%(sm)s sm_ij = row_ij - row_max;
                //std::cout << "3 " << j << " " << sm_ij << "\n";
                sm_i[j * Ssm] = sm_ij;
            }
            %(vec_exp)s;
            for (j = 0; j < Nx[1]; ++j)
            {
                sum += sm_i[j * Ssm];
            }

            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
            double sum_inv = 1.0 / sum;
            for (j = 0; j < Nx[1]; ++j)
            {
                sm_i[j * Ssm] *= sum_inv;
            }

        s   
            if(Ssm == 1){
                %(inside_row_loop_contig)s
            }else{
                %(inside_row_loop)s
            }
            s   
        }
        (   t   theanoR   R,   t   c_code_contiguous_rawt   localsR   t   utilst   MethodNotDefined(   R   t	   init_declt   begin_row_loopt   inside_row_loopt   vec_expt   inside_row_loop_contigt   end_row_loop(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   c_code_templatey   s    Bc   
      C` sU   | \ } } | \ } d j  |  j | j d j j   d   }	 |	 t t   |  S(   Nt    i    i   (   t   joinRN   t   inputsR   t   dtype_specst   dictRE   (
   R   R0   t   nameR:   t   outt   subR   R   R    t   code_template(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   c_code  s
    	 c           C` s   d S(   Ni   (   i   (    (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   c_code_cache_version  s    (    (   t   __name__t
   __module__t   __doc__t   nint   noutt	   __props__R!   R6   R?   R@   RA   t   staticmethodRN   RX   RY   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR   +   s   						t   SoftmaxGradc           B` sV   e  Z d  Z d Z d Z d	 Z d   Z d   Z d   Z d   Z	 d   Z
 d   Z RS(
   s,   
    Gradient wrt x of the Softmax Op.

    i   i   c         C` s   t  j |  } t  j |  } | j j d k sE | j j t  j k rZ t d | j   n  | j d k r t  j | d d } n  | j d k r t  j | d d } n  t |  | | g | j   g  S(   Ni   i   s,   dy must be 1-d or 2-d tensor of floats. Got t   n_ones(   i   i   (	   R   R   R   R   R   R   R   t   shape_padleftR   (   R   t   dyR    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!   /  s    c   	      C` sy   | \ } } t  j |  } xI t | j d  D]4 } | | | | } | t |  | | | | <q/ W| | d d <d  S(   Ni    (   R(   t
   zeros_likeR   R&   R.   (	   R   R0   R1   R2   Rd   R    R=   t   it   dy_times_sm_i(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6   <  s     c   
      C` s   | \ } } | \ } | t  j t  j | | d d j d   } | | } t  j | | d d j d  } | | | | }	 | |	 f S(   NR$   i   i    R   (   i    R   (   i    R   (   R   t   negR.   t
   dimshuffle(
   R   R:   R;   Rd   R    t   gt   tmpt   g_dyt   tmp2R<   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?   E  s    	/
"c         C` s   | d g S(   Ni   (    (   R   R0   R&   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@   Q  s    c         C` s   d S(   Ni   (   i   (    (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRY   T  s    c   	      C` s)   | \ } } | \ } d t  t   |  S(   Ns
  
        if ((PyArray_TYPE(%(dy)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(dy)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError,
                 "types should be float or float64");
            %(fail)s;
        }
        if ((PyArray_TYPE(%(sm)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(sm)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError,
                 "types should be float or float64");
            %(fail)s;
        }
        if ((PyArray_NDIM(%(dy)s) != 2)
            || (PyArray_NDIM(%(sm)s) != 2))
        {
            PyErr_SetString(PyExc_ValueError, "rank error");
            %(fail)s;
        }
        if (PyArray_DIMS(%(dy)s)[0] != PyArray_DIMS(%(sm)s)[0])
        {
            PyErr_SetString(PyExc_ValueError, "dy.shape[0] != sm.shape[0]");
            %(fail)s;
        }
        if ((NULL == %(dx)s)
            || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
            || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
        {
            Py_XDECREF(%(dx)s);
            %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
                                                        PyArray_DIMS(%(sm)s),
                                                        PyArray_TYPE(%(sm)s));
            if (!%(dx)s)
            {
                PyErr_SetString(PyExc_MemoryError,
                     "failed to alloc dx output");
                %(fail)s;
            }
        }

        for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
        {
            const dtype_%(dy)s* __restrict__ dy_i = (dtype_%(dy)s*) (PyArray_BYTES(%(dy)s) + PyArray_STRIDES(%(dy)s)[0] * i);
            npy_intp Sdy = PyArray_STRIDES(%(dy)s)[1]/sizeof(dtype_%(dy)s);
            const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*) (PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
            npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
            dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*) (PyArray_BYTES(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
            npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);

            double sum_dy_times_sm = 0.;
            for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
            {
                dx_i[j * Sdx] = dy_i[j * Sdy] * sm_i[j * Ssm];
                sum_dy_times_sm += dx_i[j * Sdx];
            }
            for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
            {
                dx_i[j * Sdx] -= sum_dy_times_sm * sm_i[j * Ssm];
            }
        }
        (   RS   RE   (	   R   R0   RT   R:   RU   RV   Rd   R    R=   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRX   W  s    	?(    (   RZ   R[   R\   R]   R^   R_   R!   R6   R?   R@   RY   RX   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRa   %  s   						t   Softmaxc           B` s}   e  Z d  Z d Z d Z d Z d   Z d   Z d   Z d   Z	 d   Z
 d   Z e d    Z d	   Z e d
    Z RS(   s   
    Softmax activation function
    :math:`\varphi(\mathbf{x})_j =
    \frac{e^{\mathbf{x}_j}}{\sum_{k=1}^K e^{\mathbf{x}_k}}`
    where :math:`K` is the total number of neurons in the layer. This
    activation function gets applied row-wise.

    i   c         C` s   t  j |  } | j j d k s6 | j j t  j k rL t d | j   n  | j d k rs t  j | d d } n  t |  | g | j   g  S(   Ni   i   s-   x must be 1-d or 2-d tensor of floats. Got %sRb   (   i   i   (	   R   R   R   R   R   R   R   Rc   R   (   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!     s    c         C` sp   | \ } t  j | | j d d  d  d   d  f  } | | j d d  d  d   d  f } | | d d <d  S(   NR$   i   i    (   R(   R,   R-   R+   R.   (   R   R0   R1   R2   R   R5   R    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6     s    	/&c         C` s.   | \ } | \ } t  |  } t | |  g S(   N(   t
   softmax_opR9   (   R   R:   R;   R   R<   R    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?     s    		c         C` s#   d  | k r d  g S|  j | |  S(   N(   R+   R?   (   R   RQ   t   eval_points(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   R_op  s    c         C` s   | S(   N(    (   R   R0   R&   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@     s    c         C` s
   d d g S(   Ns
   <iostream>s   <cmath>(    (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRA     s    c         C` s   d } d } d } y< t  j j j |  d d d  } d t   } d t   } Wn t  j j j k
 rj n Xd } | | | | f S(	   Ns`  
        npy_intp* Nx = PyArray_DIMS(%(x)s);
        npy_intp Sx1 = 0;
        npy_intp Ssm1 = 0;

        if (PyArray_NDIM(%(x)s) != 2)
        {
            PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
            %(fail)s;
        }
        if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(x)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError, "not a float");
            %(fail)s;
        }

        if ((NULL == %(sm)s)
            || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
            || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
        {
            Py_XDECREF(%(sm)s);
            %(sm)s = (PyArrayObject*)PyArray_SimpleNew(2, PyArray_DIMS(%(x)s),
                                                       PyArray_TYPE(%(x)s));
            if(!%(sm)s) {
                PyErr_SetString(PyExc_MemoryError,
                     "failed to alloc sm output");
                %(fail)s
            }
        }
        Sx1 = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
        Ssm1 = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
        s  
        for (size_t i = 0; i < Nx[0]; ++i)
        {
            size_t j;
            double sum = 0.0;

            const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
            dtype_%(sm) s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);

            dtype_%(sm)s row_max = x_i[0];
            //std::cout << "0 " << row_max << "\n";
            // Get the maximum value of the row
            for (j = 1; j < Nx[1]; ++j)
            {
                dtype_%(sm)s row_ij = x_i[j * Sx1] ;
                //std::cout << "1 " << row_ij << "\n";
                row_max   = (row_ij > row_max) ? row_ij : row_max;
            }

        st  
            for (j = 0; j < Nx[1]; ++j)
            {
                dtype_%(sm)s row_ij = x_i[j * Sx1] ;
                //std::cout << "2 " << j << " " << row_ij << " " << row_max << "\n";
                dtype_%(sm)s sm_ij = exp(row_ij - row_max);
                //std::cout << "3 " << j << " " << sm_ij << "\n";
                sum += sm_ij;
                sm_i[j * Ssm1] = sm_ij;
            }

            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
            double sum_inv = 1.0 / sum;
            for (j = 0; j < Nx[1]; ++j)
            {
                sm_i[j * Ssm1] *= sum_inv;
            }

        s   Nx[1]RB   s  
            for (j = 0; j < Nx[1]; ++j)
            {
                sm_i[j * Ssm1] = x_i[j * Sx1] - row_max;
            }
            %(vec_exp)s;
            for (j = 0; j < Nx[1]; ++j)
            {
                sum += sm_i[j * Ssm1];
            }

            //cblas_dscal(x.N, 1.0 / sum, &mat_at(s,i,0), s.n);
            double sum_inv = 1.0 / sum;
            for (j = 0; j < Nx[1]; ++j)
            {
                sm_i[j * Ssm1] *= sum_inv;
            }

            s   
            if(Ssm1 == 1){
                %(inside_row_loop_contig)s
            }else{
                %(inside_row_loop)s
            }
            s   
        }
        (   RC   R   R,   RD   RE   R   RF   RG   (   R   RH   RI   RJ   RK   RL   RM   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRN     s    +c   	      C` sR   | \ } | \ } d j  |  j | j d j j   d   } | t t   |  S(   NRO   i    i   (   RP   RN   RQ   R   RR   RS   RE   (	   R   R0   RT   R:   RU   RV   R   R    RW   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRX   J  s
    		 c           C` s   d S(   Ni   (   i   (    (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRY   Q  s    (    (   RZ   R[   R\   R]   R^   R_   R!   R6   R?   Rq   R@   RA   R`   RN   RX   RY   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRn     s   						|	t
   LogSoftmaxc           B` sq   e  Z d  Z d
 Z d   Z d   Z d   Z d   Z d   Z d   Z	 e
 d    Z d   Z e
 d	    Z RS(   s   
    LogSoftmax activation function
    :math:`\varphi(\mathbf{x})_j =
    \e^{(\mathbf{x}_j - log{\sum_{k=1}^K e^{\mathbf{x}_k})}}
    where :math:`K` is the total number of neurons in the layer. This
    activation function gets applied row-wise.

    c         C` s   t  j |  } | j j d k s6 | j j t  j k rL t d | j   n  | j d k rs t  j | d d } n  t |  | g | j   g  S(   Ni   i   s-   x must be 1-d or 2-d tensor of floats. Got %sRb   (   i   i   (	   R   R   R   R   R   R   R   Rc   R   (   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!   c  s    c         C` sr   | \ } | | j  d d  d  d   d  f } | t j t j t j |  d d d t  } | | d d <d  S(   NR$   i   t   keepdimsi    (   R-   R+   R(   t   logR.   R,   t   True(   R   R0   R1   R2   R   t   xdevt   lsm(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6   n  s
    	&$c         C` sA   | \ } t  |  } | d t j | d d d d t | g S(   Ni    R$   i   Rs   (   Ro   R   R.   Ru   (   R   R:   R;   R   R    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?   u  s    	c         C` s#   d  | k r d  g S|  j | |  S(   N(   R+   R?   (   R   RQ   Rp   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRq   z  s    c         C` s   | S(   N(    (   R   R0   R&   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@     s    c         C` s   d g S(   Ns   <cmath>(    (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRA     s    c         C` s(   d } d } d } d } | | | | f S(   Ns  
          npy_intp* Nx = PyArray_DIMS(%(x)s);
          npy_intp Sx1 = 0;
          npy_intp Ssm1 = 0;

          if (PyArray_NDIM(%(x)s) != 2)
          {
              PyErr_SetString(PyExc_ValueError, "not a 2d tensor");
              %(fail)s;
          }
          if ((PyArray_TYPE(%(x)s) != NPY_DOUBLE) &&
              (PyArray_TYPE(%(x)s) != NPY_FLOAT))
          {
              PyErr_SetString(PyExc_TypeError, "not a float");
              %(fail)s;
          }

          if ((NULL == %(sm)s)
              || (PyArray_DIMS(%(sm)s)[0] != PyArray_DIMS(%(x)s)[0])
              || (PyArray_DIMS(%(sm)s)[1] != PyArray_DIMS(%(x)s)[1]))
          {
              Py_XDECREF(%(sm)s);
              %(sm)s = (PyArrayObject*)PyArray_SimpleNew(
                  2, PyArray_DIMS(%(x)s),
                  PyArray_TYPE(%(x)s));
              if(!%(sm)s) {
                  PyErr_SetString(PyExc_MemoryError,
                       "failed to alloc sm output");
                  %(fail)s
              }
          }
          Sx1 = PyArray_STRIDES(%(x)s)[1]/sizeof(dtype_%(x)s);
          Ssm1 = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);
          s  
          // minibatch loop
          for (size_t i = 0; i < Nx[0]; ++i)
          {
              size_t j;
              double sum = 0.0;

              const dtype_%(x)s* __restrict__ x_i = (dtype_%(x)s*)(
                  PyArray_BYTES(%(x)s) + PyArray_STRIDES(%(x)s)[0] * i);
              dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(
                  PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);

              dtype_%(sm)s row_max = x_i[0];
              // Get the maximum value of the row
              for (j = 1; j < Nx[1]; ++j)
              {
                  dtype_%(sm)s x_ij = x_i[j * Sx1] ;
                  row_max = (x_ij > row_max) ? x_ij : row_max;
              }
              s  
              // Compute xdev and sum(exp(xdev), axis=1)
              double xdev_exp_row_sum = 0.0;
              for (j = 0; j < Nx[1]; j++)
              {
                  // use sm_i to temporary store xdev
                  sm_i[j * Ssm1] = (dtype_%(sm)s) (x_i[j * Sx1] - row_max);
                  xdev_exp_row_sum += exp(sm_i[j * Ssm1]);
              }

              // Write sm = xdev - log(sum(exp(xdev), axis=1))
              xdev_exp_row_sum = log(xdev_exp_row_sum);
              for (j = 0; j < Nx[1]; ++j)
              {
                  sm_i[j * Ssm1] -= (dtype_%(sm)s) xdev_exp_row_sum;
              }
              s   
          }
          (    (   R   RH   RI   RJ   RM   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRN     s
    #c   	      C` sR   | \ } | \ } d j  |  j | j d j j   d   } | t t   |  S(   NRO   i    i   (   RP   RN   RQ   R   RR   RS   RE   (	   R   R0   RT   R:   RU   RV   R   R    RW   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRX     s
    		 c           C` s   d S(   Ni    (   i    (    (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRY     s    (    (   RZ   R[   R\   R_   R!   R6   R?   Rq   R@   RA   R`   RN   RX   RY   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRr   X  s   						P	t	   stabilizet   fast_compilec         C` s   t  |  j t j  r t  |  j j t j j  r t |  j	  d k r |  j	 d j
 d k	 r t  |  j	 d j
 j t  r |  j	 d j
 j	 d } t   } | |  } t | j _ t |  j	 d |  j d g |  | g Sd S(   sk   
    Detect Log(Softmax(x)) and replace it with LogSoftmax(x)

    Note: only forward pass is affected
    i   i    N(   R7   t   opR   t   Elemwiset	   scalar_opR   R	   t   Logt   lenRQ   t   ownerR+   Rn   Rr   R   t   tagt   values_eq_approxR   t   outputs(   R0   t   inVarst   new_opt   ret(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   local_logsoftmax  s    	!c         C` s  t  |  j t  rt |  j  d k r|  j d j d k	 r|  j d j j t j k rt |  j d j j  d k r|  j d j j d j d k	 r|  j d j j d j j t	 k r|  j d |  j d j j d k r|  j d j j t j k oA|  j d j j d j d k	 oAt  |  j d j j d j j t
 j  r|  j d j j d } |  j d j j d } | j d r| j d rt j | | j d | j d  } n  | t j | d d d t | } t | j _ t |  j d |  | g Sd S(   sq   
    Detect Log(Softmax(x))'s grad and replace it with LogSoftmax(x)'s grad

    Note: only grad is affected
    i   i    i   R$   Rs   N(   R7   Rz   Ra   R~   RQ   R   R+   R   t   true_divRo   R
   t   AdvancedIncSubtensort   broadcastablet   allocR&   R.   Ru   R   R   R   R   R   (   R0   R    R;   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   local_logsoftmax_grad  s(     #$ &#c         C` s,   t  j |   t  j |   j d d d t  S(   NR$   iRs   (   R   R,   R.   Ru   (   t   c(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   softmax_graph!  s    c         C` s3   t  |   }  |  j d r) t j d  n  t |   S(   NisY   The softmax is applied on a dimension of shape 1, which does not have a semantic meaning.(   R   R   t   warningst   warnRo   (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   softmax%  s    c         C` s
   t  |   S(   N(   t   logsoftmax_op(   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt
   logsoftmax,  s    t   fast_compile_gpuc   	      C` sO  |  j  t k rK|  j \ } | j rK| j j  t j k rKg  } g  } x | j j D] } t | j j  t	 t
 g k r | j r t | j j  t j  r t | j j d j j  t
 g k r | j | j j d  q| j t j t	 t
 f d  |   qR | j |  qR Wt |  d k r^t |  d k s9t  | j   } | j t j |   n  | sjt  | rHt |  d k rt j |   } t | |  n
 | d } t |  d k rt j |   } t | |  n
 | d } y' t | |  } t |  j d |  Wn t k
 r!d SX| j |  j d j k rE| g SqHqKn  d S(   sM   
    Try to turn softmax(sum_of_stuff) -> softmax_w_bias(matrix, bias).

    i    i   N(   i   (   Rz   Ro   RQ   R   R   t   addt   listR   R   Ru   R/   R7   t
   DimShufflet   appendR~   t   AssertionErrort   popRc   R   R8   R   t	   Exception(	   R0   R   t   vectorst   non_vectorst   x_int   promoted_vectort
   vector_sumt   non_vector_sumt   sm_bias(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   local_softmax_with_bias0  sF    	%

c         C` s  xt  |   D]r} | j j j d  s. q n  | j d k rC q n  | j r | j j t j k r | j j	 d } n q d  } x | D] } | j r t | j j t j  r | j j j d k r2| j j	 d } | j r/t | j j t j  r/| j j j d k r,| j j	 d | k r)| } Pq)q,q/q2q q W| r t |  } t | |  |  j |  | j |  |  j |  q q W|  | f S(   Nt   floati   i    R   i   (   i    R   (   i   (   R   R   R   t
   startswithR   R   Rz   R   R,   RQ   R+   R7   R   t	   new_ordert   SumR$   Ro   R   t   removeR   (   t
   numeratorst   denominatorst	   numeratorR   t   matching_denomt   denominatort   zR   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   softmax_simplifierr  s4    !R   t%   CrossentropySoftmaxArgmax1HotWithBiasc           B` s   e  Z d  Z d Z d Z d Z d   Z d   Z d   Z d   Z	 d   Z
 d   Z d   Z e d	    Z d
   Z d   Z RS(   s  
    A special compound L{Op} for the output of neural-net classifiers.

    Parameters
    ----------
    x : a matrix of floats (32 or 64)
    b : a [row] vector of floats (32 or 64), length is number of cols in x
    y_idx : a [column] vector of int (32 or 64), length is number of rows in x

    Returns
    -------
    object
        row-wise NLL, softmax(x+b), row-wise argmax of (x+b).

    @precondition: every entry in y_idx is a valid (non-negative)
                   column index into x

    This L{Op} has three outputs:
     - KL(softmax(x+b), y)
     - softmax(x+b)
     - argmax(x+b)

    softmax(x[i]) is the i'th distribution over len(x[i]) options
    argmax(x) is the index of x's greatest element
    y_idx[i] is an integer index, encoding a 1-hot distribution.

    In practice, when we are trying to do classification, we have one row in x
    and y_idx per example, and y[i] is the index of the (correct) class of the
    i'th example.

    i   c         K` s   t  j j |  |  d  S(   N(   R   t   Opt   __init__(   R   t   kwargs(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s    c         C` s?  t  j |  } t  j |  } t  j |  } | j j d k sT | j j t  j k ri t d | j   n  | j j d k s | j j t  j k r t d | j   n  | j j d k s | j j t  j k r t d | j   n  t  j | j j | j j	  j
   } | j   } | j   } t |  | | | g | | | g  S(   Ni   s   x must be 2-d tensor of floatsi   s   b must be 1-d tensor of floatss#   y_idx must be 1-d tensor of [u]ints(   R   R   R   R   R   R   R   t   discrete_dtypest
   TensorTypeR   t   make_variableR   (   R   R   R   t   y_idxt   nllR    t   am(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!     s"    c         C` s  | \ } } } | j  d | j  d k r8 t d   n  | j  d | j  d k ra t d   n  t | d k   r t d   n  t j |  } t j | j  d d | j d j j } t j |  }	 x t	 | j  d  D] }
 | |
 | } t j
 |  |	 |
 <| |	 |
 } t j | |  | |
 <t j | |
  } | |
 c d | 9<| | |
 | t j |  | |
 <q W| | d d <| | d d <|	 | d d <d	 S(
   s  
        The math, where x is an input vector, and t is a target index:

            softmax(x)[i] = exp(x[i]) / sum_j(exp(x[j]))
            nll(x,t) = -log(softmax(x)[t])

        We compute this by subtracting off the max of x. This avoids
        numerical instability.

            m = max_j x[j]
            softmax(x)[i] = exp(x[i] -m) / sum_j(exp(x[j] - m))

            nll = -log(exp(x[t] -m) / sum_j(exp(x[j] - m)))
                = -x[t] + m + log( sum_j(exp(x[j] - m)))

        i    i   s'   b must have same number of columns as xs(   y_idx must have same number of rows as xs   y_i value out of boundsR   g      ?i   N(   R&   R   t   anyR(   Re   R)   R   R   R   R   t   argmaxR,   R.   Rt   (   R   R0   R1   R2   R   R   R   R    R   R   Rf   t   rowt   mt   sum_j(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6     s*    )(c   	      C` s5   | \ } } } | d f } | } | } | | | g S(   Ni    (    (	   R   R0   t   shapest   x_shpt   b_shpt   idx_shpt   nll_shpt   sm_shpt   am_shp(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@   
  s
    c         C` s(   t  t  t  g t  t  t  g t t t  g g S(   N(   Ru   R/   (   R   R0   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   connection_pattern  s    c         C` sn  | \ } } } | \ } } } g  }	 g  }
 g  } t  | j t  s t | | |  \ } } t | | |  } t j | d d g } |	 j |  |
 j |  n  t  | j t  s t j	 | | f | f  \ } } |	 j |  |
 j |  n  t  | j t  s?|	 j | j
    |
 j | j
    | j | j
    n  d   } g  |	 |
 | g D] } | |  ^ qXS(   NR$   i    c         S` sI   t  |   d k r t     S|  d } x |  d D] } | | } q1 W| S(   Ni    i   (   R~   R   (   t   termst   rvalt   term(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt	   fancy_sum0  s    

(   R7   R   R   t#   crossentropy_softmax_1hot_with_biast&   crossentropy_softmax_1hot_with_bias_dxR   R.   R   R8   R?   Re   (   R   R:   R;   R   R   R   t   g_nllR<   t   g_amt   dx_termst   db_termst   d_idx_termsR   R    R=   R>   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?     s,    !	c         C` s
   d d g S(   Ns
   <iostream>s   <cmath>(    (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRA   ;  s    c         C` s4   t  j |   \ } } } } | d | d | d | f S(   Ns  
        if (PyArray_NDIM(%(y_idx)s) != 1)
        {
            PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
            %(fail)s;
        }
        if (PyArray_DIMS(%(x)s)[0] != PyArray_DIMS(%(y_idx)s)[0])
        {
            PyErr_Format(PyExc_ValueError,
                "number of rows in x (%%ld) does not match length of y (%%ld)",
                (long int)PyArray_DIMS(%(x)s)[0],
                (long int)PyArray_DIMS(%(y_idx)s)[0]);
            %(fail)s;
        }

        if ((NULL == %(nll)s) //initial condition
            || (PyArray_DIMS(%(nll)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
        {
            if (NULL != %(nll)s) Py_XDECREF(%(nll)s);
            %(nll)s = (PyArrayObject*)PyArray_SimpleNew(1,
                PyArray_DIMS(%(y_idx)s), PyArray_TYPE(%(x)s));
            if(!%(nll)s)
            {
                PyErr_SetString(PyExc_MemoryError,
                     "failed to alloc nll output");
                %(fail)s;
            }
        }
        if ((NULL == %(am)s)
            || (PyArray_DIMS(%(am)s)[0] != PyArray_DIMS(%(y_idx)s)[0]))
        {
            Py_XDECREF(%(am)s);
            %(am)s = (PyArrayObject*) PyArray_SimpleNew(1,
                PyArray_DIMS(%(y_idx)s), PyArray_TYPE(%(y_idx)s));
            if(!%(am)s)
            {
                PyErr_SetString(PyExc_MemoryError,
                     "failed to alloc am output");
                %(fail)s;
            }
        }
                s  
            const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_BYTES(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];
            dtype_%(nll) s* __restrict__ nll_i = (dtype_%(nll)s*)(PyArray_BYTES(%(nll)s) + PyArray_STRIDES(%(nll)s)[0] * i);
            %(am_type)s* __restrict__ am_i = (%(am_type)s*) (PyArray_BYTES(%(am)s) + PyArray_STRIDES(%(am)s)[0] * i);
                s  
            if ((y_i >= PyArray_DIMS(%(x)s)[1]) || (y_i < 0))
            {
                PyErr_SetString(PyExc_ValueError, "y_i value out of bounds");
                %(fail)s;
            }
            nll_i[0] = - x_i[y_i*Sx]
                       - b_i[y_i*Sb]
                       + row_max
                       + log(sum);
            am_i[0] = row_max_j;
                (   R   RN   (   R   RH   RI   RJ   RM   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRN   >  s    *c         C` s   d t  j   S(   Ni   (   i   (   R   RY   (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRY     s    c         C` s   | \ } } } | \ }	 }
 } | j  d j j   d } | } | j  d j j   d } d j |  j |   } | t t   |  S(   Ni   i   i    RO   (   RQ   R   RR   RP   RN   RS   RE   (   R   R0   RT   R:   RU   RV   R   R   R   R   R    R   t
   y_idx_typet   am_typeR   RW   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRX     s    (    (   RZ   R[   R\   R]   R^   R_   R   R!   R6   R@   R   R?   RA   R`   RN   RY   RX   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s   			2			$	M	t!   CrossentropySoftmax1HotWithBiasDxc           B` sV   e  Z d  Z d Z d Z d	 Z d   Z d   Z d   Z d   Z	 d   Z
 d   Z RS(
   sJ   
    Gradient wrt x of the CrossentropySoftmaxArgmax1HotWithBias Op.

    i   i   c         K` s  t  j |  } t  j |  } t  j |  } | j j d k sT | j j t  j k ri t d | j   n  | j j d k s | j j t  j k r t d | j   n  | j j d k s | j j t  j k r t d | j   n  t |  | | | g | j   g  S(   Ni   s#   dy must be {0,1}-d tensor of floatsi   s   sm must be 2-d tensor of floatss#   y_idx must be 1-d tensor of [u]ints(	   R   R   R   R   R   R   R   R   R   (   R   Rd   R    R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!     s    c         C` s   | \ } } } t  | d k   r0 t d   n  t j |  } | j d k r[ | d  } n  t | j d d k  } xU t | j d  D]@ }	 | |	 | }
 |
 | |	 | |	 <| |	 | |	 f c |
 8<q W| | d d <d  S(   Ni    s   y_i value out of boundsi   (	   R   R   R(   Re   R   R+   t   intR&   R   (   R   R0   R1   R2   Rd   R    R   R=   t   incrRf   t   dy_i(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6     s    c         C` s   | d g S(   Ni   (    (   R   R0   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@     s    c         C` s   | \ } } } | \ } t  j | j d  } t  j | t j   | t  j | d  | |  d d } | j d d  | }	 t |  d |  }
 | |	 |
 g S(   Ni    iR$   i   R   i   (	   R   t   arangeR&   R.   R
   R   t   fillRi   R   (   R   R:   R;   Rd   R    R   t   g_dxt   y_idx_rangeRl   R<   t   g_y_idx(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?     s    	(c         C` s   d S(   Ni   (   i   (    (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRY     s    c         C` sF   | \ } } } | \ }	 | j  d j j   d }
 d t t   |  S(   Ni   i   s&  
        if ((PyArray_TYPE(%(dnll)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(dnll)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError,
                 "dnll type should be float32 or float64");
            %(fail)s;
        }
        if ((PyArray_TYPE(%(sm)s) != NPY_DOUBLE) &&
            (PyArray_TYPE(%(sm)s) != NPY_FLOAT))
        {
            PyErr_SetString(PyExc_TypeError,
                 "sm type should be float32 or float64");
            %(fail)s;
        }

        // new scope because of variable declaration
        // TODO: proper indentation, but the diff will get messy
        {
        // Get `dnll.shape[0]` or set it to zero if `dnll` is a scalar.
        const npy_intp %(dnll)s_dims0 = (PyArray_NDIM(%(dnll)s) > 0 ?
                                         PyArray_DIMS(%(dnll)s)[0] :
                                         (npy_intp) 0);

        // Get `dnll.strides[0]` and set it to zero if `dnll` is a scalar
        // or a vector with just one element.
        const npy_intp %(dnll)s_strides0 = (%(dnll)s_dims0 > 1 ?
                                            PyArray_STRIDES(%(dnll)s)[0] :
                                            (npy_intp) 0);

        if ((PyArray_NDIM(%(dnll)s) > 1)
            || (PyArray_NDIM(%(sm)s) != 2)
            || (PyArray_NDIM(%(y_idx)s) != 1))
        {
            PyErr_SetString(PyExc_ValueError, "rank error");
            %(fail)s;
        }
        if (%(dnll)s_dims0 != PyArray_DIMS(%(sm)s)[0] && %(dnll)s_dims0 > 1)
        {
            PyErr_Format(PyExc_ValueError,
                         "dnll.shape[0] (%%ld) != sm.shape[0] (%%ld)",
                         (long int)%(dnll)s_dims0,
                         (long int)PyArray_DIMS(%(sm)s)[0]);
            %(fail)s;
        }
        if (%(dnll)s_dims0 != PyArray_DIMS(%(y_idx)s)[0] && %(dnll)s_dims0 > 1)
        {
            PyErr_Format(PyExc_ValueError,
                         "dnll.shape[0] (%%ld) != y_idx.shape[0] (%%ld)",
                         (long int)%(dnll)s_dims0,
                         (long int)PyArray_DIMS(%(y_idx)s)[0]);
            %(fail)s;
        }
        if (PyArray_DIMS(%(sm)s)[0] !=
            PyArray_DIMS(%(y_idx)s)[0])
        {
            PyErr_SetString(PyExc_ValueError,
                            "sm.shape[0] != y_idx.shape[0]");
            %(fail)s;
        }
        if ((NULL == %(dx)s)
            || (PyArray_DIMS(%(dx)s)[0] != PyArray_DIMS(%(sm)s)[0])
            || (PyArray_DIMS(%(dx)s)[1] != PyArray_DIMS(%(sm)s)[1]))
        {
            if (NULL != %(dx)s) Py_XDECREF(%(dx)s);
            %(dx)s = (PyArrayObject*) PyArray_SimpleNew(2,
                                                        PyArray_DIMS(%(sm)s),
                                                        PyArray_TYPE(%(sm)s));
            if(!%(dx)s) {
                PyErr_SetString(PyExc_MemoryError,
                     "failed to alloc dx output");
                %(fail)s
            }
        }

        for (size_t i = 0; i < PyArray_DIMS(%(dx)s)[0]; ++i)
        {
            const dtype_%(dnll)s dnll_i = ((dtype_%(dnll)s*)(PyArray_BYTES(%(dnll)s) + %(dnll)s_strides0 * i))[0];

            const %(y_idx_type) s y_i = ((%(y_idx_type)s*)(PyArray_BYTES(%(y_idx)s) + PyArray_STRIDES(%(y_idx)s)[0] * i))[0];

            const dtype_%(sm)s* __restrict__ sm_i = (dtype_%(sm)s*)(PyArray_BYTES(%(sm)s) + PyArray_STRIDES(%(sm)s)[0] * i);
            npy_intp Ssm = PyArray_STRIDES(%(sm)s)[1]/sizeof(dtype_%(sm)s);

            dtype_%(dx) s* __restrict__ dx_i = (dtype_%(dx)s*)(PyArray_BYTES(%(dx)s) + PyArray_STRIDES(%(dx)s)[0] * i);
            npy_intp Sdx = PyArray_STRIDES(%(dx)s)[1]/sizeof(dtype_%(dx)s);

            for (size_t j = 0; j < PyArray_DIMS(%(dx)s)[1]; ++j)
            {
                dx_i[j * Sdx] = dnll_i * sm_i[j * Ssm];
            }
            if (y_i >= PyArray_DIMS(%(dx)s)[1] || (y_i < 0))
            {
                PyErr_SetString(PyExc_ValueError, "y_i >= dx dimensions[1] or y_i < 0.");
                %(fail)s;
            }
            dx_i[y_i * Sdx] -= dnll_i;
        }
        }
        (   RQ   R   RR   RS   RE   (   R   R0   RT   R:   RU   RV   t   dnllR    R   R=   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRX     s    	d(    (   RZ   R[   R\   R]   R^   R_   R!   R6   R@   R?   RY   RX   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s   					c         K` s   t  |  | | |  d d !S(   Ni    i   (   t*   crossentropy_softmax_argmax_1hot_with_bias(   R   R   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR   D  s    c         K` s2   t  j |  d d  d   f  } t |  | | |  S(   Ni    (   R   Re   R   (   R   R   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   crossentropy_softmax_1hotI  s    c         K` sF   t  |  | | |  \ } } t j | d d \ } } | | | | f S(   s  
    Returns
    -------
    object
        The cross-entropy, the softmax output, the max probability,
        and the argmax index.

    TODO: Since we are recomputing the argmax,
           we might as well assert that it is correct.

    TODO: Make this entire function is
    unnecessary? e.g. CrossentropySoftmaxArgmax1HotWithBias should return
    the appropriate information (i.e. the max probability)?

    R$   i(   R   R   t   max_and_argmax(   R   R   R   R   t   xentR   t   max_prR   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt2   crossentropy_softmax_max_and_argmax_1hot_with_biasN  s    c         K` s2   t  j |  d d  d   f  } t |  | | |  S(   Ni    (   R   Re   R   (   R   R   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt(   crossentropy_softmax_max_and_argmax_1hotc  s    t   CrossentropyCategorical1HotGradc           B` s)   e  Z d Z d    Z d   Z d   Z RS(   c         C` s"   t  |  | | | g | j   g  S(   N(   R   R   (   R   t   g_yt   coding_distt   true_one_of_n(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!   m  s    c   
      C` s}   | \ } } } | \ } t  j |  } xE t t |   D]1 }	 | |	 | |	 | |	 f | |	 | |	 f <q: W| | d <d  S(   Ni    (   R(   Re   R   R~   (
   R   R0   R:   RU   R   R   R   t   g_coding_strgt   g_codingRf   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6   q  s    	'c         C` s   | d g S(   Ni   (    (   R   R0   t	   in_shapes(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@   z  s    (    (   RZ   R[   R_   R!   R6   R@   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR   i  s   			t   CrossentropyCategorical1Hotc           B` s8   e  Z d  Z d Z d   Z d   Z d   Z d   Z RS(   s  
    Compute the cross entropy between a coding distribution and
    a true distribution of the form [0, 0, ... 0, 1, 0, ..., 0].

    .. math::

        y[i] = - \log(coding_dist[i, one_of_n[i])

    Notes
    -----
    In the case that the coding distribution is the output of a
    softmax, an application of this Op will probably be optimized
    away in favour of one with a C implementation.

    c         C` s   t  j |  } t  j |  } | j j d k r? t d   n  | j t  j t  j f k ry t d | j t  j f   n  t |  | | g t  j d | j	 d t
 g    g  S(   s   
        Parameters
        ----------
        coding_dist : dense matrix
        true_one_of_n : lvector

        Returns
        -------
        dvector

        i   s)   matrix required for argument: coding_distsP   integer vector required for argument: true_one_of_n(got type: %s instead of: %s)R   R   (   R   R   R   R   t	   TypeErrort   lvectort   ivectorR   t   TensorR   R/   (   R   R   R   t   _coding_distt   _true_one_of_n(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!     s    c   	      C` s   | \ } } | \ } t  j | d  d   d f  } x< t t |   D]( } t  j | | | | f  | | <qG W| | d <d  S(   Ni    (   R(   Re   R   R~   Rt   (	   R   R0   R:   RU   t   codingt   one_of_nt   y_outt   yRf   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6     s    	&c         C` s   | d d f g S(   Ni    (    (   R   R0   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@     s    c         C` s7   | \ } } | \ } t  | | |  t |  d |  g S(   Ni   (   t"   crossentropy_categorical_1hot_gradR   (   R   R:   R;   R   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?     s    	(    (   RZ   R[   R\   R_   R!   R6   R@   R?   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s   			c         ` s#     f d   } x |   r q Wd S(   s   
    This is a stabilization optimization.

    Notes
    -----
    Not a local optimization because we are replacing outputs
    from several nodes at once.

    c    	      ` s   x   j    D] }  |  j t k r |  j \ } |  j \ } } | j r | j j t k r | j j \ } } t | | |  \ } } }   j | | f | | f g d d t	 Sq q Wt
 S(   Nt   reasont3   crossentropy_to_crossentropy_with_softmax_with_bias(   t   toposortRz   t   crossentropy_categorical_1hotR   RQ   R   R8   R   t   replace_all_validateRu   R/   (	   R0   R   R    R   R   R   t   new_nllt   new_smt   new_am(   t   fgraph(    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   search_make_one_sub  s    N(    (   R   R   (    (   R   s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s    c         ` s#     f d   } x |   r q Wd S(   s  
    This is a stabilization optimization that is more general than
    crossentropy_to_crossentropy_with_softmax_with_bias.

    It must be executed after local_softmax_with_bias optimization in
    specialize.

    TODO : This is a stabilization optimization! How to make this more cleanly?

    Notes
    -----
    Not a local optimization because we are replacing outputs from several
    nodes at once.

    c    	      ` s4  x-  j    D]}  |  j t k r |  j \ } |  j \ } } | j r | j j t k r | j j \ } t | t j	 | d  |  \ } } }   j
 | | f | | f g d d t S| j r,| j j t k r,| j j \ } } t | | |  \ } } }   j
 | | f | | f g d d t Sq q Wt S(   Ni    R   t)   crossentropy_to_crossentropy_with_softmax(   R   Rz   R   R   RQ   R   Ro   R   R   Re   R   Ru   R8   R/   (	   R0   R   R    R   R   R   R   R   R   (   R   (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s*    %	N(    (   R   R   (    (   R   s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR    s    R  gGz @t   fast_runR   t4   local_crossentropy_to_crossentropy_with_softmax_gradc         C` s   |  j  t k r~ |  j \ } } | j r~ | j j  t k r~ | j j \ } } } t | | |  } t |  j d |  | g Sn  d  S(   Ni    (   Rz   R9   RQ   R   R   R   R   R   (   R0   t   g_coding_distR   R   R   R=   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt4   local_softmax_grad_to_crossentropy_with_softmax_grad  s    			c         C` s  t  |  j t j  r |  j d j r t |  j d j  d k r |  j d j j t	 t
 t j t j t j t t f k r t j j j r t j d  j d  q n  t  |  j t j  r|  j d j rt |  j d j  d k r|  j \ } } |  j d } |  j j |   } | j rz| j j t	 t
 t j t j t j t f k rz| j j \ } t j | |  } t | |  | S| j r| j j t k r| j j \ } } t j | t j | j d  |  |  } t | |  | Sn  d  S(   Ni    s   theano.tensor.nnet.nnets+  WARNING: there was a bug in Theano fixed on May 27th, 2010 in this case. I.E. when we take the max of a softplus, softmax, exp, log, tanh, sigmoid, softmax_with_bias op, we were doing the max of the parent of the input. To remove this warning set the Theano flags 'warn.argmax_pushdown_bug' to FalseR   (   R   i    (   R7   Rz   R   t   MaxAndArgmaxRQ   R   R~   R   t   clientsRo   R   R,   Rt   t   tanhR   R8   RC   t   configR   t   argmax_pushdown_bugt   loggingt	   getLoggert
   get_paramsR   R   R   R   (   R0   t   x_maxt   x_argmaxR   R$   t   pre_xR   t   pre_bias(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   local_argmax_pushdown&  s4    %,,	c         C` s  | j  rm t | j  j d  rm | j  j j j } t | |  d k rm t | | d d  rm t |  d  Sn  |  j  rt |  j  j t	 j
  r|  j  j \ } } } t | d d  d k r t St | d d  d k r t S| j  s t St | j  j t j  rr| j  } | j j | j d t d g k r| j d } | j  ro| j  j t	 j k ro| j  j d | k Sqq| j  j j j } | | d | k Sn  d S(   s   
    Check that 'rows' is the same node as T.arange(labels.shape[0]).

    Also considers the case where labels.shape[0] is constant and equal
    to 1, and T.arange(labels.shape[0]) has been constant-folded into 0.

    t   shape_featurei   i    t   datat   allow_partialN(   R   t   hasattrR   R  t   shape_ofR~   t	   _is_constR7   Rz   R   t   ARangeRQ   t   getattrR+   R/   R
   t	   Subtensort   get_constant_idxRu   R&   (   t   rowst   labelsR  t   startt   stopt   stept   shape_subtensort	   shape_var(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt    _check_rows_is_arange_len_labelsP  s*    	-!		c         C` sX   y t  j |   } Wn t j k
 r* t SX| rA t j | |  St j | | k  Sd  S(   N(   R   t   get_scalar_constant_valueR   t   NotScalarConstantErrorR/   R(   t   allcloset   all(   R   t   valt   approxt   maybe(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR  z  s    c         C` s  d  } d  } t |  j t j  r y |  j \ } } } Wn t k
 rJ n X| r | j r | j j t j	 k r | j j d } q nm |  j t j	 k r |  j d j } | r t | j t j  r y | j \ } } } Wq t k
 r q Xq n  | d  k	 r | j r | j j t
 t f k r t j | j  } | rl| d j j t k sSt  | d j j \ } } n# | j j d } t j | d  } t | |  r | j d k r| j d k rt | | |  d }	 |	 }
 t |  j d |	 |
 g  |
 g Sq n  d  S(   Ni    i   i   (   R+   R7   Rz   R
   t   AdvancedSubtensorRQ   R   R   R   Rt   Ro   R8   R   t	   transformR   Re   R$  R   R   R   R   (   R0   Rt   R    R  R  t   pre_logt	   sm_w_biast   x_vart   b_vart	   minus_retR   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt+   local_advanced_indexing_crossentropy_onehot  s@    $
!c         C` s7  |  j  t k s d  Sd  } y |  j \ } } Wn t k
 r@ d  SX| d  k	 r | j r | j j  t t f k r t j	 | j  } | r | d j j  t k s t
  | d j j \ } } q | j j d } n d  S| j rt | j j  t j  ry | j j \ } } } }	 Wn t k
 r#d  SXt | d  s7d  Sd  }
 d } | j r{| j j  t j k r{| } | j j d } n  | j r| j j  t j k r| j j \ } } | j d k st j | j  r| | 9} n d  S| j sd  St | j j  t j  r| }
 n | j j  t j k rx t | j j  D] \ } } | j r4t | j j  t j  r4g  t | j j  D] \ } } | | k rt| ^ qt} t |  d k r| d } n t j | g   } | j d k st j | j  r| }
 | | } Pqq4q4Wn d  St j | d  d   d f |  } |
 d  k	 ry |
 j j \ } } } Wn t k
 rgd  SX| | k o| | k o| |	 k sd  Sqd  Sn d  St | |	  sd  Sn0| j r| j j  t j k ry | j j \ } } Wn t k
 rd  SX| | k rd  S| j rt | j j  t j  ry | j j \ } } } }	 Wn t k
 r^d  SXt | d  srd  S| j t t f t t f g k rd  S| j d k s| j t j k rd  S| } t | |	  sd  Sqd  Sn d  S|	 j d k r/| j d k r/t  | | |	  } t! |  j" d |  | g Sd  Sd  S(   Ni    g      ?i   i   (#   Rz   R9   R+   RQ   R   R   Ro   R8   R   R-  R   R7   R
   R   R  R   Rh   R   R   R(   R(  R   R,  t   mult	   enumerateR~   R   R$  R/   Ru   R   R   R   R   R   (   R0   R    t   d_smR/  R0  R1  R   R   R  R  t   adv_subtensort   out_gradt   numt   denomRf   t   inputt   jt   in_t   other_inputst   restt   maybe_smt
   maybe_rowst   maybe_labelsR   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt0   local_advanced_indexing_crossentropy_onehot_grad  s    !!
!		4!
"!!!c   
      C` s   |  j  t k r |  j \ } } x | j D] } | d j  t k r( | d } | g  | j D] } | d ^ qX k r | j \ } } } | j  | | |  }	 t |  j d |	 d  |	 d g Sq( q( Wn  d  S(   Ni    i   (   Rz   R8   RQ   R  R   R   R   (
   R0   R   R   t   x_clientt
   big_clientt   b_clientt   xxt   bbt   llt   mergeable_client(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt-   graph_merge_softmax_with_crossentropy_softmax  s    
&c         C` s  t  |  j t  r|  j \ } } } | j d k r7 t S| j d k rW | j d rW t S| j d k sl t  | j d k	 rt  | j j t
 j  r| j j d } y |  j j } Wn t k
 r t SX| j } | j } t f | j | j | j } | d r| | | d d d d r| | d d k rt
 j t
 j | j d d  t
 j | j d | j d   }	 d }
 t j |
  | |	  } n  |  j | | |  } t |  j d |  | g Sn  d S(   s  
    Replace a CrossentropySoftmax1HotWithBiasDx op, whose incoming gradient is
    an `alloc` of a scalar variable or one that has either broadcastable or
    matching dimensions with the output variable, by one that skips the
    intermediate `alloc`.

    i    i   t   dim_xt   dim_ys)   `sm` and `dy` do not have the same shape.N(   R7   Rz   R   RQ   R   R/   R   R   R   R+   R   t   AllocR   R  t   AttributeErrorR  t
   same_shapeRu   t   or_t   eqR&   R   t   AssertR   R   (   R0   Rd   R    R   t   dzR  R  RP  t   dz_broadt   condt   msgR   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt:   local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc  s4    '		
#c         C` s+   | t  j |   d | t  j d |   S(   s  
    Compute the crossentropy of binary random variables.

    Output and target are each expectations of binary random
    variables; target may be exactly 0 or 1 but output must
    lie strictly between 0 and 1.

    Notes
    -----
    We could use the x log y op to support output=0 and output=1.
    The gradient would still be undefined though.

    We do not sum, crossentropy is computed by component.
    TODO : Rewrite as a scalar, and then broadcast to tensor.

    g      ?(   R   Rt   (   t   outputt   target(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   binary_crossentropy  s    c         C` sm   | j  |  j  k r: t j | t j |   d |  j  d S| j  |  j  d k r] t |  |  St d   d S(   s  
    Return the cross-entropy between an approximating distribution and a true
    distribution.

    .. warning:: THIS FUNCTION IS UNNECESSARILY POLYMORPHIC.
    We ultimately don't want the polymorphism, and will move this function
    to pylearn.algorithms.cost. The 1hot version will be removed.
    The length of the documentation here is a form of code smell.

    The cross entropy between two probability distributions measures the average
    number of bits needed to identify an event from a set of possibilities, if a
    coding scheme is used based on a given probability distribution q, rather
    than the "true" distribution p.

    Mathematically it is defined as follows:

    .. math::

        H(p,q) = - \sum_x p(x) \log(q(x))

    Parameters
    ----------
    coding_dist : a dense matrix
        Each slice along axis represents one distribution.
    true_dist : a dense matrix or sparse matrix or integer vector
        In the case of a matrix argument, each slice along axis represents one
        distribution. In the case of an integer vector argument, each element
        represents the position of the '1' in a 1-of-N encoding.

    Returns
    -------
    tensor of rank one-less-than `coding_dist`
        The cross entropy between each coding and true distribution.

    Notes
    -----
    axis : int
        The dimension over which each distribution runs
        (1 for row distributions, 0 for column distributions).

    R$   i   s3   rank mismatch between coding and true distributionsN(   R   R   R.   Rt   R   R   (   R   t	   true_dist(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   categorical_crossentropy  s    *t#   Prepend_scalar_constant_to_each_rowc           B` sG   e  Z d Z d  d  Z d   Z d   Z d   Z d   Z d   Z RS(   i    c         C` s.   t  | t  r! t j |  } n  | |  _ d  S(   N(   R7   R   R   t   constantR)  (   R   R)  (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR     s    c         C` s   d |  j  j |  j f S(   Ns   %s{%s}(   t	   __class__RZ   R)  (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   __str__"  s    c         C` s   t  j |  } | j j t t f k s6 t d   n  t  j |  j  } | j d k s] t  | j j	 | j j	 k r t d  n  t
 d |  d | g d | j   g  } | S(   Ns   Expected a matrix as inputi    s;   the value to prepend don't have the same type as the matrixRz   RQ   R   (   R   R   R   R   R/   R   R)  R   R   R   R   (   R   t   matR   R   R0   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!   %  s    
'c         C` s  | \ } | \ } | j  d | j  d d f } | d d  k ri t j | d | j | d <| d } ne | d j  | k r y | d j |  Wq t k
 r t j | d | j | d <q Xn  | d } | d  d   d f j |  j j	  | | d  d   d d   f <d  S(   Ni    i   R   (
   R&   R+   R(   t   emptyR   t   resizeR   R   R)  R  (   R   R0   R:   RU   Rb  RY  t	   new_shape(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6   3  s    		#
#c         C` s'   | d d | d d d f } | g S(   Ni    i   (    (   R   R0   R   t   shp(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@   E  s     c         C` s,   | \ } | \ } | d  d   d d   f S(   Ni   (    (   R   R:   R;   Rb  t   goutput(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?   I  s    		(    (	   RZ   R[   R_   R   Ra  R!   R6   R@   R?   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR^    s   				t   Prepend_scalar_to_each_rowc           B` s2   e  Z d Z d    Z d   Z d   Z d   Z RS(   c         C` s   t  j |  } t | t  r0 t j |  } n  | j j t t f k sW t	 d   n  t  j |  } | j
 d k s{ t  | j j | j j k r t	 d  n  t d |  d | | g d | j   g  } | S(   Ns   Expected a matrix as inputi    s;   the value to prepend don't have the same type as the matrixRz   RQ   R   (   R   R   R7   R   R   R_  R   R   R/   R   R   R   R   R   (   R   R)  Rb  R   R   R0   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR!   S  s    
*c         C` s  | \ } } | \ } | j  d | j  d d f } | d d  k rl t j | d | j | d <| d } ne | d j  | k r y | d j |  Wq t k
 r t j | d | j | d <q Xn  | d } | d  d   d f j |  | | d  d   d d   f <d  S(   Ni    i   R   (   R&   R+   R(   Rc  R   Rd  R   R   (   R   R0   R:   RU   R)  Rb  RY  Re  (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR6   c  s    	#
c         C` s'   | d d | d d d f } | g S(   Ni   i    (    (   R   R0   R   Rf  (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR@   t  s     c         C` sE   | \ } } | \ } | d  d   d f | d  d   d d   f f S(   Ni    i   (    (   R   R:   R;   R)  Rb  Rg  (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?   x  s    	(    (   RZ   R[   R_   R!   R6   R@   R?   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRh  O  s
   			g        g      ?c         C` sc   | d k r d |  t  |   St j |  } d d | } d d | } | |  | t  |   Sd S(   s  
    Compute the element-wise rectified linear activation function.

    .. versionadded:: 0.7.1

    Parameters
    ----------
    x : symbolic tensor
        Tensor to compute the activation function for.
    alpha : `scalar or tensor, optional`
        Slope for negative input, usually between 0 and 1. The default value
        of 0 will lead to the standard rectifier, 1 will lead to
        a linear activation function, and any value in between will give a
        leaky rectifier. A shared variable (broadcastable against `x`) will
        result in a parameterized rectifier with learnable slope(s).

    Returns
    -------
    symbolic tensor
        Element-wise rectifier applied to `x`.

    Notes
    -----
    This is numerically equivalent to ``T.switch(x > 0, x, alpha * x)``
    (or ``T.maximum(x, alpha * x)`` for ``alpha < 1``), but uses a faster
    formulation or an optimized Op, so we encourage to use this function.

    i    g      ?i   N(   t   absR   R   (   R   t   alphat   f1t   f2(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   relu  s    !c
         C` s  t  j j j t j |  |  |  }
 |	 d k r t j |  | d	  | } t  j j j | j d | f   } | j | | d f  } |
 j d d d  | } | j | d f  } | d d  d |  f } n |	 j	   }	 |	 | } |	 | } t
 | j d d d d  |  j d d d  t j | d f d d | | j d d   } t  j j j | j d d   } |
 t j |  | f } | t j |  | f } | | } | S(
   sx   Two-level hierarchical softmax.

    This function implements a two-layer hierarchical softmax. It is commonly
    used as an alternative of the softmax when the number of outputs is
    important (it is common to use it for millions of outputs). See
    reference [1]_ for more information about the computational gains.

    The `n_outputs` outputs are organized in `n_classes` classes, each class
    containing the same number `n_outputs_per_class` of outputs.
    For an input `x` (last hidden activation), the first softmax layer predicts
    its class and the second softmax layer predicts its output among its class.

    If `target` is specified, it will only compute the outputs of the
    corresponding targets. Otherwise, if `target` is `None`, it will compute
    all the outputs.

    The outputs are grouped in classes in the same order as they are initially
    defined: if `n_outputs=10` and `n_classes=2`, then the first class is
    composed of the outputs labeled `{0,1,2,3,4}` while the second class is
    composed of `{5,6,7,8,9}`. If you need to change the classes, you have to
    re-label your outputs.

    .. versionadded:: 0.7.1

    Parameters
    ----------
    x: tensor of shape (batch_size, number of features)
        the minibatch input of the two-layer hierarchical softmax.
    batch_size: int
        the size of the minibatch input x.
    n_outputs: int
        the number of outputs.
    n_classes: int
        the number of classes of the two-layer hierarchical softmax. It
        corresponds to the number of outputs of the first softmax. See note at
        the end.
    n_outputs_per_class: int
        the number of outputs per class. See note at the end.
    W1: tensor of shape (number of features of the input x, n_classes)
        the weight matrix of the first softmax, which maps the input x to the
        probabilities of the classes.
    b1: tensor of shape (n_classes,)
        the bias vector of the first softmax layer.
    W2: tensor of shape (n_classes, number of features of the input x,
            n_outputs_per_class)
        the weight matrix of the second softmax, which maps the input x to
        the probabilities of the outputs.
    b2: tensor of shape (n_classes, n_outputs_per_class)
        the bias vector of the second softmax layer.
    target: tensor of shape either (batch_size,) or (batch_size, 1)
        (optional, default None)
        contains the indices of the targets for the minibatch
        input x. For each input, the function computes the output for its
        corresponding target. If target is None, then all the outputs are
        computed for each input.

    Returns
    -------
    tensor of shape (`batch_size`, `n_outputs`) or (`batch_size`, 1)
        Output tensor of the two-layer hierarchical softmax for input `x`.
        Depending on argument `target`, it can have two different shapes.
        If `target` is not specified (`None`), then all the outputs are
        computed and the returned tensor has shape (`batch_size`, `n_outputs`).
        Otherwise, when `target` is specified, only the corresponding outputs
        are computed and the returned tensor has thus shape (`batch_size`, 1).

    Notes
    -----
    The product of `n_outputs_per_class` and `n_classes` has to be greater or
    equal to `n_outputs`. If it is strictly greater, then the irrelevant
    outputs will be ignored.
    `n_outputs_per_class` and `n_classes` have to be the same as the
    corresponding dimensions of the tensors of `W1`, `b1`, `W2` and `b2`.
    The most computational efficient configuration is when
    `n_outputs_per_class` and `n_classes` are equal to the square root of
    `n_outputs`.

    Examples
    --------
    The following example builds a simple hierarchical softmax layer.

    >>> import numpy as np
    >>> import theano
    >>> from theano import tensor
    >>> from theano.tensor.nnet import h_softmax
    >>>
    >>> # Parameters
    >>> batch_size = 32
    >>> n_outputs = 100
    >>> dim_x = 10  # dimension of the input
    >>> n_classes = int(np.ceil(np.sqrt(n_outputs)))
    >>> n_outputs_per_class = n_classes
    >>> output_size = n_outputs_per_class * n_outputs_per_class
    >>>
    >>> # First level of h_softmax
    >>> floatX = theano.config.floatX
    >>> W1 = theano.shared(
    ...     np.random.normal(0, 0.001, (dim_x, n_classes)).astype(floatX))
    >>> b1 = theano.shared(np.zeros((n_classes,), floatX))
    >>>
    >>> # Second level of h_softmax
    >>> W2 = np.random.normal(0, 0.001,
    ...     size=(n_classes, dim_x, n_outputs_per_class)).astype(floatX)
    >>> W2 = theano.shared(W2)
    >>> b2 = theano.shared(np.zeros((n_classes, n_outputs_per_class), floatX))
    >>>
    >>> # We can now build the graph to compute a loss function, typically the
    >>> # negative log-likelihood:
    >>>
    >>> x = tensor.imatrix('x')
    >>> target = tensor.imatrix('target')
    >>>
    >>> # This only computes the output corresponding to the target.
    >>> # The complexity is O(n_classes + n_outputs_per_class).
    >>> y_hat_tg = h_softmax(x, batch_size, output_size, n_classes,
    ...                      n_outputs_per_class, W1, b1, W2, b2, target)
    >>>
    >>> negll = -tensor.mean(tensor.log(y_hat_tg))
    >>>
    >>> # We may need to compute all the outputs (at test time usually):
    >>>
    >>> # This computes all the outputs.
    >>> # The complexity is O(n_classes * n_outputs_per_class).
    >>> output = h_softmax(x, batch_size, output_size, n_classes,
    ...                    n_outputs_per_class, W1, b1, W2, b2)


    References
    ----------
    .. [1] J. Goodman, "Classes for Fast Maximum Entropy Training,"
        ICASSP, 2001, <http://arxiv.org/abs/cs/0108006>`.
    i   ii    R   Ni   R   t   int32(   i   i   (   RC   R   t   nnetR   t   dotR+   t	   tensordott   reshapeRi   t   flattenR   R)   R   (   R   t
   batch_sizet	   n_outputst	   n_classest   n_outputs_per_classt   W1t   b1t   W2t   b2RZ  t   class_probst   activationst   output_probst   target_classest   target_outputs_in_classt   target_class_probs(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt	   h_softmax  s.    %

'!


i   c         C` s*   t  j |  d k |  | t  j |   d  S(   sA  
    Compute the element-wise exponential linear activation function.

    .. versionadded:: 0.8.0

    Parameters
    ----------
    x : symbolic tensor
        Tensor to compute the activation function for.
    alpha : scalar


    Returns
    -------
    symbolic tensor
        Element-wise exponential linear activation function applied to `x`.

    References
    -----
    .. [1] Djork-Arne Clevert,  Thomas Unterthiner, Sepp Hochreiter
        "Fast and Accurate Deep Network Learning by
        Exponential Linear Units (ELUs)" <http://arxiv.org/abs/1511.07289>`.
    i    i   (   R   t   switchR,   (   R   Rj  (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   elua	  s    t   ScalarSoftsignc           B` s8   e  Z d  Z e d    Z d   Z d   Z d   Z RS(   sY   
    Softsign activation function
    :math:`\varphi(\mathbf{x}) = \frac{1}{1+|x|}`

    c         C` s   |  d t  |   S(   Ng      ?(   Ri  (   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   static_impl	  s    c         C` s   t  j |  S(   N(   R  R  (   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   impl	  s    c         C` sK   | \ } | \ } d | j  j k rC d t |  } | | | g St Sd  S(   NR   g      ?(   R   R   Ri  t   NotImplemented(   R   R:   R;   R   t   gzt   d(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR?   	  s    		c         C` sU   | \ } | \ } | j  d j t j j t j j g k rE d t   St d   d  S(   Ni    s"   %(z)s = %(x)s / (1.0+fabs(%(x)s));s$   only floating point x is implemented(   RQ   R   RC   R   R#   t   float64RE   t   NotImplementedError(   R   R0   RT   R:   RU   RV   R   R   (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyRX   	  s    		(   RZ   R[   R\   R`   R  R  R?   RX   (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyR  |	  s
   			RT   t   scalar_softsignt   softsignc         C` s   |  j  d k r t d   n  | j  d k r< t d   n  t j t t t  t j |  | g   } |  j d d  } | j d d  } t j | |  j	 d  } t j | |  j	 d  } t j
 | j |  } | | g S(   s  
    Computes the confusion matrix of given vectors containing
    actual observations and predicted observations.

    Parameters
    ----------
    actual : 1-d tensor variable
    pred : 1-d tensor variable

    Returns
    -------
    conf_mat : Confusion matrix of actual and predictions observations as shown below.

               | Predicted
    ___________|___________
       Actual  |
               |

    order : 1-d array of order of entries in rows and columns

    Examples
    --------
    >>> import theano
    >>> from theano.tensor.nnet import confusion_matrix

    >>> x = theano.tensor.vector()
    >>> y = theano.tensor.vector()
    >>> f = theano.function([x, y], confusion_matrix(x, y))
    >>> y_true = [2, 0, 2, 2, 0, 1]
    >>> y_pred = [0, 0, 2, 2, 0, 2]
    >>> print(f(y_true, y_pred))
    [array([[2, 0, 0],
       [0, 0, 1],
       [1, 0, 2]]), array([ 0.,  1.,  2.])]
    i   s"   actual must be 1-d tensor variables    pred must be 1-d tensor variablei    R   t   int64(   R   R   R   t   UniqueR/   R   t   concatenateRi   RR  R*   Rp  t   T(   t   actualt   predt   ordert   colAt   colPt   oneHotAt   oneHotPt   conf_mat(    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   confusion_matrix	  s    $*(j   R\   t
   __future__R    R   R   R  R   R(   t	   six.movesR   RC   R   R   t   theano.tensorR   R   t   theano.gof.optR   R	   R   R
   R   R   t   theano.tensor.typeR   R   t   theano.compileR   t
   theano.gofR   t   theano.tensor.nnet.sigmR   R   t   theano.gradientR   R   t   theano.tensor.nnet.blocksparseR   R   R   R8   Ra   R9   Rn   Ro   Rr   R   t   register_specializet   local_optimizerR{   R   R   R   R   R   R   R   t   local_mul_canonizert   add_simplifierR   R   R   R   R   R   R   R   R   R   R   R   t   register_stabilizet	   optimizerR   R  t   registerR  R  R  R$  R/   R  R,  Rt   R3  RC  RK  t   register_canonicalizeRX  R[  R]  R^  Rh  t   prepend_scalar_to_each_rowt   prepend_0_to_each_rowt   prepend_1_to_each_rowRm  R+   R  R  t   UnaryScalarOpR  t   upgrade_to_floatR  R  R  (    (    (    s7   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/nnet.pyt   <module>   s   "	t			!"			A	%							=	!.	!)	*'+$;		36.	.	