ó
àÆ÷Xc           @` s{   d  d l  m Z m Z m Z d  d l m Z d  d l Z d „  Z d „  Z d d „ Z	 d d „ Z d d	 „ Z d
 „  Z d S(   i    (   t   absolute_importt   print_functiont   division(   t   xrangeNc   
      C` s£   d } x– t  t |  | ƒ ƒ D] \ } \ } } | d | } | d t ƒ  7} xK t  | ƒ D]= \ } }	 |	 d k r† | d t ƒ  7} qZ | d t ƒ  7} qZ Wq W| S(   s;   
    Produce code to declare all necessary variables.

    t    s   lv%is*   
        %(dtype)s* %(var)s_iter;
        t   xs    
                npy_intp %(var)s_n%(value)i;
                ssize_t %(var)s_stride%(value)i;
                int %(var)s_jump%(value)i_%(j)i;
                sB   
                int %(var)s_jump%(value)s_%(j)i;
                (   t	   enumeratet   zipt   locals(
   t   loop_orderst   dtypest   subt   declt   it
   loop_ordert   dtypet   vart   jt   value(    (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt   make_declare   s    (c         C` s×  d } xt  t |  | ƒ ƒ D]ÿ \ } \ } } d | } g  | D] } | d k r? | ^ q? }	 |	 r‡ t |	 ƒ d }
 | d t ƒ  7} n  d } x‹ t t t  | ƒ ƒ ƒ D]q \ } } | d k rö d d t ƒ  | f } | d	 t ƒ  7} d
 t ƒ  } q¦ d | } | d t ƒ  7} d } q¦ Wq Wd } xŸ t |  Œ  D]‘ } g  t  | ƒ D]$ \ } } | d k rE| | f ^ qE} t | ƒ d k  r‡q2n  | d \ } } x) | d D] \ } } | d t ƒ  7} q¢Wq2W| | | | S(   NR   s	   %%(lv%i)sR   i   sÁ   
            if (PyArray_NDIM(%(var)s) < %(min_nd)s) {
                PyErr_SetString(PyExc_ValueError, "Not enough dimensions on input.");
                %%(fail)s
            }
            t   0s   (%s) - (%s)s   %(var)s_stride%(index)ssy  
                %(var)s_n%(index)s = PyArray_DIMS(%(var)s)[%(index)s];
                %(var)s_stride%(index)s = PyArray_STRIDES(%(var)s)[%(index)s] / sizeof(%(dtype)s);
                %(var)s_jump%(index)s_%(j)s = %(jump)s;
                //printf("%(var)s_jump%(index)s_%(j)s is:");
                //std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
                s*   %(var)s_n%(index)s*%(var)s_stride%(index)ss   -(%s)sÏ   
                %(var)s_jump%(index)s_%(j)s = %(jump)s;
                //printf("%(var)s_jump%(index)s_%(j)s is:");
                //std::cout << %(var)s_jump%(index)s_%(j)s << std::endl;
                i   i    sã  
            if (%%(lv%(j0)s)s_n%(x0)s != %%(lv%(j)s)s_n%(x)s)
            {
                PyErr_Format(PyExc_ValueError, "Input dimension mis-match. (input[%%%%i].shape[%%%%i] = %%%%i, input[%%%%i].shape[%%%%i] = %%%%i)",
                   %(j0)s,
                   %(x0)s,
                   %%(lv%(j0)s)s_n%(x0)s,
                   %(j)s,
                   %(x)s,
                   %%(lv%(j)s)s_n%(x)s
                );
                %%(fail)s
            }
            (   R   R   t   maxR   t   reversedt   listt   len(   R	   R
   R   t   initR   R   R   R   R   t   nonxt   min_ndt   adjustR   t   indext   jumpt   checkt   matchest
   to_comparet   j0t   x0(    (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt   make_checks(   s:    (
%%
7R   c         C` sÚ   | j  ƒ  } | j d ƒ r0 | j d d ƒ } n  t |  d ƒ } d } x} t t |  Œ  ƒ D]i \ } } xZ t | ƒ D]; \ }	 }
 |
 d k rr | d |	 } | d t ƒ  7} Pqr qr W| d t ƒ  7} qY Wd	 t t ƒ  |  S(
   s  Generate C code to allocate outputs.

    Parameters
    ----------
    fortran : str
        A string included in the generated code. If it
        evaluate to non-zero, an ndarray in fortran order will be
        created, otherwise it will be c order.

    t   THEANO_COMPLEXt   NPY_COMPLEXi    R   R   s   lv%is&   dims[%(i)s] = %(var)s_n%(candidate)s;
s   dims[%(i)s] = 1;
s”  
    {
        npy_intp dims[%(nd)s];
        //npy_intp* dims = (npy_intp*)malloc(%(nd)s * sizeof(npy_intp));
        %(init_dims)s
        if (!%(olv)s) {
            %(olv)s = (PyArrayObject*)PyArray_EMPTY(%(nd)s, dims,
                                                    %(type)s,
                                                    %(fortran)s);
        }
        else {
            PyArray_Dims new_dims;
            new_dims.len = %(nd)s;
            new_dims.ptr = dims;
            PyObject* success = PyArray_Resize(%(olv)s, &new_dims, 0, NPY_CORDER);
            if (!success) {
                // If we can't resize the ndarray we have we can allocate a new one.
                PyErr_Clear();
                Py_XDECREF(%(olv)s);
                %(olv)s = (PyArrayObject*)PyArray_EMPTY(%(nd)s, dims, %(type)s, 0);
            }
        }
        if (!%(olv)s) {
            %(fail)s
        }
    }
    (   t   uppert
   startswitht   replaceR   R   R   R   t   dict(   R	   R   R   t   fortrant   typet   ndt	   init_dimsR   t
   candidatesR   t	   candidateR   (    (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt
   make_allocv   s     c         ` sc  ‡  ‡ ‡ f d †  } i  } x¬ t  t |  ˆ  ƒ ƒ D]• \ } \ } }	 x€ t  | ƒ D]G \ }
 } | d k rP | j |
 d ƒ | |
 c d t ƒ  ˆ 7<PqP qP W| j d d ƒ | d c d t ƒ  ˆ 7<q1 Wd } xz t t t t t | ƒ d ƒ | t t |  Œ  ƒ ƒ ƒ ƒ D]> \ } \ } } } | | j | d ƒ | | | | | ƒ } qW| | d 7} d | S(	   s/  
    Make a nested loop over several arrays and associate specific code
    to each level of nesting.

    Parameters
    ----------
    loop_orders : list of N tuples of length M
        Each value of each tuple can be either the index of a dimension to
        loop over or the letter 'x' which means there is no looping to be done
        over that variable at that point (in other words we broadcast
        over that dimension). If an entry is an integer, it will become
        an alias of the entry of that rank.
    loop_tasks : list of M+1 pieces of code
        The ith loop_task is a pair of strings, the first
        string is code to be executed before the ith loop starts, the second
        one contains code to be executed just before going to the next element
        of the ith dimension.
        The last element if loop_tasks is a single string, containing code
        to be executed at the very end.
    sub : dictionary
        Maps 'lv#' to a suitable variable name.
        The 'lvi' variable corresponds to the ith element of loop_orders.

    c         ` s¼   d | } d } d } x_ t  | ƒ D]Q \ } } ˆ d | }	 ˆ  | }
 | d t ƒ  7} | d k r# d t ƒ  } q# q# Wˆ rš t j j } d t ƒ  } n d } | d	 t ƒ  7} d
 t ƒ  S(   Ns   ITER_%iR   t   1s   lv%isU   %(dtype)s &%(var)s_i = * ( %(var)s_iter + %(iterv)s * %(var)s_jump%(index)s_%(i)s );
R   s   %(var)s_n%(index)ssK   #pragma omp parallel for if( %(suitable_n)s >=%(openmp_elemwise_minsize)s)
s>   for (int %(iterv)s = 0; %(iterv)s<%(suitable_n)s; %(iterv)s++)si   
        %(preloop)s
        %(forloop)s {
            %(update)s
            %(code)s
        }
        (   R   R   t   theanot   configt   openmp_elemwise_minsize(   t   preloopt   codet   indicesR   t   itervt   updatet
   suitable_nR   R   R   R   R5   t   forloop(   R
   t   openmpR   (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt	   loop_overÎ   s    

R   R   s>   %%(lv%(i)s)s_iter = (%(dtype)s*)(PyArray_DATA(%%(lv%(i)s)s));
i    i   iÿÿÿÿs   {%s}(	   R   R   t
   setdefaultR   R   R   R   R   t   get(   R	   R
   t
   loop_tasksR   R=   R>   t   preloopsR   R   R   R   R   t   st   pre_taskt   taskR8   (    (   R
   R=   R   s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt	   make_loopµ   s    (P-c         ` sé  t  |  ƒ } t  |  d ƒ } ˆ d | } d t ƒ  }	 x` t |  | ƒ D]N \ }
 } | d k rt |	 d t ƒ  7}	 n |	 d t ƒ  7}	 |	 d t ƒ  7}	 qH W|	 d t ƒ  7}	 g  } x{ t t |  Œ  ƒ D]g \ }
 } xK t | ƒ D]7 \ } } | d k rÝ ˆ d | } d	 t ƒ  } PqÝ qÝ Wd
 } | j | ƒ qÄ Wd t d | d d j | ƒ ƒ } | d t ƒ  7} x% t | ƒ D] }
 | d t ƒ  7} qoW‡ f d †  ‰  d t d | d | d d j ‡  f d †  t |  ƒ Dƒ ƒ ƒ } | d t ƒ  7} xa t | ƒ D]S }
 ˆ d |
 } | d t ƒ  7} x+ t t | ƒ ƒ D] } | d t ƒ  7} q-WqõWd } x9 t | ƒ D]+ \ }
 } ˆ d |
 } | d t ƒ  7} q_Wd } x t | ƒ D]s \ } } ˆ d | } | d t ƒ  7} d } x5 t t	 | ƒ ƒ D]! }
 d |
 } | d t ƒ  7} qåW| d 7} q¡W| } x¦ t t	 | ƒ ƒ D]’ }
 d |
 } d  |
 } d } d } |
 | d! k rp| } n  |
 d k r¥| r¥t
 j j } | d" t ƒ  7} q¥n  | d# t ƒ  7} d$ t ƒ  } q1Wd% j d& |	 | | | | d' g ƒ S((   sÂ  A bit like make_loop, but when only the inner-most loop executes code.

    All the loops will be reordered so that the loops over the output tensor
    are executed with memory access as contiguous as possible.
    For instance, if the output tensor is c_contiguous, the inner-most loop
    will be on its rows; if it's f_contiguous, it will be on its columns.

    The output tensor's index among the loop variables is indicated by olv_index.

    i    s   lv%is¦   
    std::vector< std::pair<int, int> > %(ovar)s_loops(%(nnested)i);
    std::vector< std::pair<int, int> >::iterator %(ovar)s_loops_it = %(ovar)s_loops.begin();
    R   s_   
            %(ovar)s_loops_it->first = abs(PyArray_STRIDES(%(ovar)s)[%(index)i]);
            s7   
            %(ovar)s_loops_it->first = 0;
            sQ   
        %(ovar)s_loops_it->second = %(i)i;
        ++%(ovar)s_loops_it;
        s•   
    // rbegin and rend are reversed iterators, so this sorts in decreasing order
    std::sort(%(ovar)s_loops.rbegin(), %(ovar)s_loops.rend());
    s   %(var)s_n%(candidate)sR2   s6   
    int init_totals[%(nnested)s] = {%(totals)s};
    t   nnestedt   totalss   , s5   
    %(ovar)s_loops_it = %(ovar)s_loops.begin();
    sh   
        int TOTAL_%(i)i = init_totals[%(ovar)s_loops_it->second];
        ++%(ovar)s_loops_it;
        c         ` sY   ˆ  d | } g  } x> |  D]6 } | d k rD | j  d t ƒ  ƒ q | j  d ƒ q W| S(   s¨   
        Returns a list containing a C expression representing the
        stride for each dimension of the ith variable, in the
        specified loop_order.

        s   lv%iR   s   %(var)s_stride%(index)sR   (   t   appendR   (   R   R   R   t   rR   (   R   (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt   get_loop_stridesU  s    sL   
    int init_strides[%(nvars)i][%(nnested)i] = {
        %(strides)s
    };t   nvarst   stridess   , 
c         3` s?   |  ]5 \ } } t  | ƒ d  k r d j ˆ  | | ƒ ƒ Vq d S(   i    s   , N(   R   t   join(   t   .0R   t   lo(   RK   (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pys	   <genexpr>l  s   	sR   
    std::vector< std::pair<int, int> >::reverse_iterator %(ovar)s_loops_rit;
    s6   
        %(ovar)s_loops_rit = %(ovar)s_loops.rbegin();sˆ   
            int %(var)s_stride_l%(j)i = init_strides[%(i)i][%(ovar)s_loops_rit->second];
            ++%(ovar)s_loops_rit;
            R   s4   %(var)s_iter = (%(dtype)s*)(PyArray_DATA(%(var)s));
s'   %(dtype)s &%(var)s_i = * ( %(var)s_iters   ITER_%is    +%(var)s_stride_l%(i)i*%(iterv)ss   );
s   TOTAL_%ii   sF   #pragma omp parallel for if( %(total)s >=%(openmp_elemwise_minsize)s)
s8   for(int %(iterv)s = 0; %(iterv)s<%(total)s; %(iterv)s++)sƒ   
        %(forloop)s
        { // begin loop %(i)i
            %(update)s
            %(loop)s
        } // end loop %(i)i
        s   
t   {s   }
(   R   R   R   R   RI   R*   RN   R   R   t   rangeR3   R4   R5   (   t   init_loop_orderst	   olv_indexR
   t
   inner_taskR   R=   RL   RG   t   ovart   order_loopsR   R   RH   R/   R   R0   R   t   totalt   declare_totalst   declare_stridest   declare_iterR   t   pointer_updatet   tot_jumpR9   t   loopR:   R<   R5   (    (   RK   R   s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt   make_reordered_loopú   sœ    	


		c         ` s„  ‡  f d †  } i  } x¬ t  t |  | ƒ ƒ D]• \ } \ } } x€ t  | ƒ D]G \ }	 }
 |
 d k rJ | j |	 d ƒ | |	 c d t ƒ  ˆ  7<PqJ qJ W| j d d ƒ | d c d t ƒ  ˆ  7<q+ Wt | ƒ d k rë | j d d ƒ } nƒ d } xz t t t t t | ƒ d ƒ | t t |  Œ  ƒ ƒ ƒ ƒ D]> \ } \ } } } | | j | d ƒ | | | | | ƒ } q,W| | d 7} d | S(	   s.  
    Make a nested loop over several arrays and associate specific code
    to each level of nesting.

    Parameters
    ----------
    loop_orders : list of N tuples of length M
        Each value of each tuple can be either the index of a dimension to
        loop over or the letter 'x' which means there is no looping to be done
        over that variable at that point (in other words we broadcast
        over that dimension). If an entry is an integer, it will become
        an alias of the entry of that rank.
    loop_tasks : list of M+1 pieces of code
        The ith loop_task is a pair of strings, the first
        string is code to be executed before the ith loop starts, the second
        one contains code to be executed just before going to the next element
        of the ith dimension.
        The last element if loop_tasks is a single string, containing code
        to be executed at the very end.
    sub: dictionary
        Maps 'lv#' to a suitable variable name.
        The 'lvi' variable corresponds to the ith element of loop_orders.

    c   
      ` sy   d | } d } d } xU t  | ƒ D]G \ } } ˆ  d | }	 | d t ƒ  7} | d k r# d t ƒ  } q# q# Wd t ƒ  S(	   Ns   ITER_%iR   R2   s   lv%is-   %(var)s_iter += %(var)s_jump%(index)s_%(i)s;
R   s   %(var)s_n%(index)ssš   
        %(preloop)s
        for (int %(iterv)s = %(suitable_n)s; %(iterv)s; %(iterv)s--) {
            %(code)s
            %(update)s
        }
        (   R   R   (
   R6   R7   R8   R   R9   R:   R;   R   R   R   (   R   (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyR>   í  s    
R   R   s>   %%(lv%(i)s)s_iter = (%(dtype)s*)(PyArray_DATA(%%(lv%(i)s)s));
i    i   iÿÿÿÿs   {%s}(	   R   R   R?   R   R   R@   R   R   R   (   R	   R
   RA   R   R>   RB   R   R   R   R   R   RC   RD   RE   R8   (    (   R   s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt   make_loop_careduceÓ  s"    (P-(   t
   __future__R    R   R   t	   six.movesR   R3   R   R$   R1   t   NoneRF   R_   R`   (    (    (    s;   /tmp/pip-build-X4mzal/theano/theano/tensor/elemwise_cgen.pyt   <module>   s   	"	N?FØ