ó
àÆ÷Xc           @` s¶   d  d l  m Z m Z m Z d  d l Z d  d l m Z d  d l Z d  d l	 m
 Z d  d l m Z d  d l m Z d  d l m Z d e j f d	 „  ƒ  YZ e ƒ  Z e d
 „ Z d S(   i    (   t   absolute_importt   print_functiont   divisionN(   t   xrange(   t   basic(   t   strutil(   t   grad_undefined(   t   DisconnectedTypet   ConvTransp3Dc           B` sV   e  Z d  Z d Z d „  Z d	 d „ Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z RS(
   s›   
    "Transpose" of Conv3D (Conv3D implements multiplication by an implicitly
    defined matrix W. This implements multiplication by its transpose).

    c         C` s   d S(   Ni   (   i   (    (   t   self(    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   c_code_cache_version   s    c         C` s¸   t  j | ƒ } t  j | ƒ } t  j | ƒ } t  j | ƒ }	 | rT t  j | ƒ }
 n t  j d d d g ƒ }
 t j |  d | | | |	 |
 g d t  j |	 j t t t t t f ƒ ƒ  g ƒS(   sû   
        Parameters
        ----------
        W
            Weights, filter
        b
            Bias, shape == (W.shape[0],).
        d
            Strides when moving the filter over the input.
        H
            The output of Conv3D.

        iÿÿÿÿt   inputst   outputs(   t   Tt   as_tensor_variablet   theanot   Applyt
   TensorTypet   dtypet   False(   R	   t   Wt   bt   dt   Ht   RShapet   W_t   b_t   d_t   H_t   RShape_(    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt	   make_node   s    c         C` sW   | j  \ } } } } } | \ } }	 }
 } } | d | d | d | d | d f g S(   Ni    i   i   i   (   R   (   R	   t   nodet   input_shapesR   R   R   R   R   t   W_shapet   b_shapet   d_shapet   H_shapet   RShape_shape(    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   infer_shape5   s    c         C` s"   t  g t  g t  g t  g t g g S(   N(   t   TrueR   (   R	   R   (    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   connection_pattern:   s    c         C` sî  | \ } } } } } | \ } t  j j j | | t j | d d d d d  d  … f ƒ | ƒ }	 | j }
 t  j j j | | |
 | ƒ } t j | d d ƒ} t	 |  d | ƒ } t
 ƒ  ƒ  } d t | ƒ k rå | j d  k	 rå | j } n d } d t | ƒ k r| j d  k	 r| j } n d } d t | ƒ k rK| j d  k	 rK| j } n d	 } d t | ƒ k r~| j d  k	 r~| j } n d
 } d | d | d | | _ d | d | d | d | | _ d | d | |	 _ | | | |	 | g S(   Ni    t   axisi   i   i   t   namet	   anon_dCdRt   anon_Ht   anon_Wt   anon_bs   ConvTransp3D_dCdW.H=s   ,dCdR=s   ,W=s   ConvTransp3D_dCdb.H=s   ,b=s   ConvTransp3D_dCdH.H=(   i    i   i   i   (   R   t   tensort   nnett   conv3DR   t
   zeros_liket   shapet
   convGrad3Dt   sumR   R   t   dirR*   t   None(   R	   R   t   output_gradientsR   R   R   R   R   t   dCdRt   dCdHt   WShapet   dCdWt   dCdbt   dCddt	   dCdRShapet	   dCdR_namet   H_namet   W_namet   b_name(    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   grad=   s0    	@	!!!!%c   	      C` s9   | \ } } } } } t  | | | | | ƒ | d d <d  S(   Ni    (   t   computeR(	   R	   R   R   t   output_storageR   R   R   R   R   (    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   performe   s    c         C` sB   | \ } } } }	 }
 | d } | d } d } t  j | t ƒ  ƒ S(   Nt   faili    s“3  
                    ///////////// < code generated by ConvTransp3D >

                    //printf("				ConvTransp3D c code\n");

                    //Check dimensionality of inputs
                    if (PyArray_NDIM(%(H)s) != 5)
                    {
                        PyErr_Format(PyExc_ValueError,
                                     "H must be a 5-D tensor but it is %%i-D",
                                     PyArray_NDIM(%(H)s));
                        %(fail)s
                    }

                    if (PyArray_NDIM(%(W)s) != 5)
                    {
                         PyErr_Format(PyExc_ValueError, "ConvTransp3D: W must be a 5-D tensor");
                %(fail)s
                    }

                    if (PyArray_NDIM(%(b)s) != 1)
                    {
                         PyErr_Format(PyExc_ValueError, "ConvTransp3D: b must be a vector");
                         %(fail)s
                    }

                    if (PyArray_NDIM(%(d)s) != 1)
                    {
                         PyErr_Format(PyExc_ValueError, "ConvTransp3D: d must be a vector");
                         %(fail)s
                    }

                    //Read and check stride arguments
                    if (PyArray_DIMS(%(d)s)[0] != 3)
                    {
                         PyErr_Format(PyExc_ValueError, "ConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", (long)PyArray_DIMS(%(d)s)[0] );
                         %(fail)s
                    }

                    { // for fail 1
                         int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
                         int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
                         int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);

                         if (dr <= 0 || dc <= 0 || dt <= 0)
                         {
                             PyErr_Format(PyExc_ValueError, "ConvTransp3D: Strides must all be positive but are %%i, %%i, %%i",dr,dc,dt);
                             %(fail)s
                          }


                         //Read and check sizes of inputs

                        { // for fail 2
                            const int batchSize = PyArray_DIMS(%(H)s)[0];
                            const int outputChannels =  PyArray_DIMS(%(W)s)[0];

                            if (PyArray_DIMS(%(H)s)[4] != outputChannels)
                            {
                                PyErr_Format(PyExc_ValueError, "W produces a %%i channel image but the image has %%li channels. W.shape: (%%li, %%li, %%li, %%li, %%li) H.shape: (%%li, %%li, %%li, %%li, %%li)", outputChannels, (long)PyArray_DIMS(%(H)s)[4], (long)PyArray_DIMS(%(W)s)[0], (long)PyArray_DIMS(%(W)s)[1], (long)PyArray_DIMS(%(W)s)[2], (long)PyArray_DIMS(%(W)s)[3], (long)PyArray_DIMS(%(W)s)[4], (long)PyArray_DIMS(%(H)s)[0], (long)PyArray_DIMS(%(H)s)[1], (long)PyArray_DIMS(%(H)s)[2], (long)PyArray_DIMS(%(H)s)[3], (long)PyArray_DIMS(%(H)s)[4]);
                                %(fail)s
                            }

                            { // for fail 3

                                const int inputChannels = PyArray_DIMS(%(W)s)[4];

                                if (PyArray_DIMS(%(b)s)[0] != inputChannels)
                                {
                                    PyErr_Format(PyExc_ValueError, "ConvTransp3D: b operates on a %%li channel image but the image has %%i channels", (long)PyArray_DIMS(%(b)s)[0], inputChannels );
                                    %(fail)s
                                }

                                { // for fail 4

                                const int filterHeight = PyArray_DIMS(%(W)s)[1];
                                const int filterWidth = PyArray_DIMS(%(W)s)[2];
                                const int filterDur = PyArray_DIMS(%(W)s)[3];
                                const int outputHeight = PyArray_DIMS(%(H)s)[1];
                                const int outputWidth = PyArray_DIMS(%(H)s)[2];
                                const int outputDur = PyArray_DIMS(%(H)s)[3];

                                int videoHeight = (outputHeight-1) * dr + filterHeight;
                                int videoWidth = (outputWidth-1) * dc + filterWidth;
                                int videoDur = (outputDur-1) * dt + filterDur;

                                if (%(RShape)s)
                                {
                                    if (PyArray_NDIM(%(RShape)s) != 1)
                                    {
                                        PyErr_Format(PyExc_ValueError, "ConvTransp3D: RShape must be a vector");
                                        %(fail)s
                                    }

                                    if (PyArray_DIMS(%(RShape)s)[0] != 3)
                                    {
                                        PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
                                        %(fail)s
                                    }

                                    dtype_%(RShape)s RShape0 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,0);
                                    dtype_%(RShape)s RShape1 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,1);
                                    dtype_%(RShape)s RShape2 = *(dtype_%(RShape)s*)PyArray_GETPTR1(%(RShape)s,2);

                                    if (RShape0 != -1)
                                    {
                                        if (RShape0 < videoHeight || RShape1 < videoWidth || RShape2 < videoDur)
                                        {
                                            PyErr_Format(PyExc_ValueError, "Reconstruction must have physical shape of at least [%%i,%%i,%%i] but RShape argument requests that it be [%%i,%%i,%%i]\n",videoHeight,videoWidth,videoDur,(int) RShape0,(int) RShape1,(int) RShape2);
                                            %(fail)s
                                        }

                                        videoHeight = RShape0;
                                        videoWidth = RShape1;
                                        videoDur = RShape2;
                                   }
                               } //closes if RShape

                               { // for fail 5

                                   //Allocate the reconstruction
                                   npy_intp dims[5];
                                   dims[0] = batchSize;
                                   dims[4] = inputChannels;
                                   dims[1] = videoHeight;
                                   dims[2] = videoWidth;
                                   dims[3] = videoDur;

                                   if(!(%(R)s) || PyArray_DIMS(%(R)s)[0]!=dims[0] ||
                                    PyArray_DIMS(%(R)s)[1]!=dims[1] ||
                                    PyArray_DIMS(%(R)s)[2]!=dims[2] ||
                                    PyArray_DIMS(%(R)s)[3]!=dims[3] ||
                                    PyArray_DIMS(%(R)s)[4]!=dims[4])
                                   {
                                       Py_XDECREF(%(R)s);
                                       %(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(H)s)->type_num);
                                       if (!(%(R)s)) {
                                           PyErr_Format(PyExc_MemoryError, "ConvTransp3D: could not allocate R");
                                           %(fail)s
                                       }
                                   }

                                   { // for fail 6

                                       #define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
                                       #define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )



                                       dtype_%(b)s * b = (dtype_%(b)s *) PyArray_DATA(%(b)s);

                                       int rs4 = PyArray_STRIDES(%(R)s)[4];
                                       int ws0 = PyArray_STRIDES(%(W)s)[0];
                                       int ws4 = PyArray_STRIDES(%(W)s)[4];
                                       int hs4 = PyArray_STRIDES(%(H)s)[4];

                                       // Compute R
                                       // R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]

                                       for (int i = 0; i < batchSize; i++) {
                                        for (int r = 0; r < videoHeight; r++) {
                                         const int frc = (int)std::max(0.0f, ceilf(float(r-filterHeight+1)/float(dr)));
                                         for (int c = 0; c < videoWidth; c++) {
                                          const int fcc = (int)std::max(0.0f, ceilf(float(c-filterWidth +1)/float(dc)));
                                          for (int t = 0; t < videoDur; t++) {
                                           const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1)  /float(dt)));

                                           long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];

                                           long long Rpos = Rpost;
                                           for (int j = 0; j < inputChannels; j++)
                                           {
                                            //ELEM5(%(R)s, i,r,c,t,j) = b[j];
                                            ELEM_AT(%(R)s,Rpos) = b[j];
                                            Rpos += rs4;
                                           }


                                           for (int rc = frc; rc < outputHeight; rc++) {
                                            const int rk = r - rc * dr;
                                            if (rk < 0) break;

                                            for (int cc = fcc; cc < outputWidth; cc++) {
                                             const int ck = c - cc * dc;
                                             if (ck < 0) break;

                                             for (int tc = ftc; tc < outputDur; tc++)
                                             {
                                              const int tk = t - tc * dt;
                                              if (tk < 0) break;

                                              int Wpos = rk * PyArray_STRIDES(%(W)s)[1] +  ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
                                              int Hpostc = i * PyArray_STRIDES(%(H)s)[0] +      rc * PyArray_STRIDES(%(H)s)[1] +  cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
                                              Rpos = Rpost;
                                              for (int j = 0; j < inputChannels; j++)
                                              {
                                               int Wposj = Wpos;
                                               dtype_%(R)s & writePos = ELEM_AT(%(R)s,Rpos);

                                               int Hpos = Hpostc;

                                               for (int k = 0; k < outputChannels; k++) {
                                                //TODO-- it's probably bad in terms of cache that our inner loop is over the largest stride of W.... maybe OK since it's the smallest stride of H
                                                //writePos += ELEM5(%(W)s,k,rk,ck,tk,j) * ELEM5(%(H)s,i,rc,cc,tc,k);
                                                //writePos += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(H)s,Hpos);

                                                writePos  += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(H)s,Hpos);

                                                Wpos += ws0;
                                                Hpos += hs4;

                                               } //close the k loop
                                               Rpos += rs4;
                                               Wpos = Wposj +  ws4;
                                              } //close the j loop
                                             } // close the tc loop
                                            } //cc
                                           } //rc
                                          } //t
                                         } //c
                                        } //r
                                       } //i
                                   } //for fail 6
                               } //for fail 5
                           } //for fail 4
                       } //for fail 3
                   } //for fail 2
               } // for fail 1
               ///////////// < /code generated by ConvTransp3D >
                     (   R   t   render_stringt   locals(   R	   R   t   nodenameR   R   t   subR   R   R   R   R   RH   t   Rt
   codeSource(    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   c_codej   s
    

ç(    N(   t   __name__t
   __module__t   __doc__t	   __props__R
   R7   R   R&   R(   RD   RG   RO   (    (    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyR      s   				(	c   #      C` s[  t  |  j ƒ d k s t ‚ t  | j ƒ d k s6 t ‚ t  | j ƒ d k sQ t ‚ t  | ƒ d k si t ‚ |  j \ } } } } }	 | j \ }
 } } } } | | k s« t ‚ | j d |	 k sÄ t ‚ | \ } } } | d k så t ‚ | d k s÷ t ‚ | d k s	t ‚ | d | | } | d | | } | d | | } | d  k	 rÌ| d d k rÌ| d | k  rŽt | d | f ƒ t sŽt ‚ n  | d | k s¤t ‚ | d | k sºt ‚ | \ } } } n  t j |
 | | | |	 f d | j ƒ} xat	 d |
 ƒ D]P} xGt	 d |	 ƒ D]6} x-t	 d | ƒ D]} xt	 d | ƒ D]} xùt	 d | ƒ D]è} | | | | | | | | f <t
 d t t j t | | d ƒ t | ƒ ƒ ƒ g ƒ } t
 d t t j t | | d ƒ t | ƒ ƒ ƒ g ƒ } t
 d t t j t | | d ƒ t | ƒ ƒ ƒ g ƒ } x| | k  rB| | | } | d k  rVPn  | } xÖ | | k  r4| | | }  |  d k  r‰Pn  | }! x• |! | k  r&| |! | }" |" d k  r¼Pn  | | | | | | f c t j |  d  d  … | |  |" | f | | | | |! d  d  … f ƒ 7<|! d 7}! q’W| d 7} q_W| d 7} q,Wq[WqEWq/WqWqW| S(   Ni   i   i   i    iÿÿÿÿi   R   (   t   lenR3   t   AssertionErrorR7   t   printR   t   Nt   zerosR   R   t   maxt   intt   ceilt   floatt   dot(#   R   R   R   R   t   Rshapet   outputChannelst   filterHeightt   filterWidtht	   filterDurt   inputChannelst	   batchSizet   outputHeightt   outputWidtht	   outputDurt   outputChannelsAgaint   drt   dct   dtt   videoHeightt
   videoWidtht   videoDurRM   t   it   jt   rt   ct   tt   ftct   fcct   rct   rkt   cct   ckt   tct   tk(    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyRE   _  sz    ***>(   t
   __future__R    R   R   t   numpyRW   t	   six.movesR   R   t   theano.tensorR   R   t   theano.miscR   t   theano.gradientR   R   t   OpR   t   convTransp3DR7   RE   (    (    (    s?   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/ConvTransp3D.pyt   <module>   s   ÿ N	