ó
àÆ÷Xc           @` s.  d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l m Z m Z d  d l m	 Z	 d  d l
 m Z d  d l m Z m Z d  d l m Z d  d l m Z d  d	 l m Z m Z d  d
 l m Z m Z m Z m Z d e f d „  ƒ  YZ d d „ Z d „  Z d d d d d d „ Z d d d „ Z d e f d „  ƒ  YZ  d e f d „  ƒ  YZ! d e f d „  ƒ  YZ" e	 e  g ƒ d „  ƒ Z# e	 e" g ƒ d „  ƒ Z$ e	 e! g ƒ d „  ƒ Z% e j& j' j( ƒ  Z) d e) _* e e) d d  ƒ e) j+ d! e# d" d d  ƒ e) j+ d# e$ d" d d  ƒ e) j+ d$ e% d" d d  ƒ d S(%   i    (   t   absolute_importt   print_functiont   divisionN(   t   Applyt   Op(   t   local_optimizer(   t   copy_stack_trace(   t   as_tensor_variablet
   TensorType(   t   basic(   t   register_specialize_device(   t	   Compositet   as_common_dtype(   t   addt   subt   true_divt   mult   BNCompositec           B` s5   e  Z d Z e j j d d ƒ d „  ƒ Z d „  Z RS(   t   dtypet   compute_test_valuet   offc   
      C` sõ   | |  _  t j j d | ƒ j ƒ  } t j j d | ƒ j ƒ  } t j j d | ƒ j ƒ  } t j j d | ƒ j ƒ  } t j j d | ƒ j ƒ  } t t t t | | ƒ | ƒ | ƒ | ƒ } | | | | | g } | g }	 t	 t
 |  ƒ j | |	 ƒ d  S(   NR   (   R   t   theanot   scalart   Scalart   make_variableR   R   R   R   t   superR   t   __init__(
   t   selfR   t   xt   meant   stdt   gammat   betat   ot   inputst   outputs(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR      s    	*	c         C` sw   | \ } } } } } | \ } | | }	 | | }
 |	 | } | } |	 |
 | | } | |
 | } | | | | | g S(   N(    (   R   t   inpst   gradsR   R   R   R   R    t   topt	   top_gammat   x_meant   dxt   dmeant   dstdt   dgamma(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   grad   s    	


(   s   dtype(   t   __name__t
   __module__t
   init_paramR   t   configparsert   change_flagsR   R-   (    (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR      s   t   low_memc         C` s€   | d k rK t  j j j d t d |  j ƒ ƒ } | |  | | | | ƒ } n1 | d k rp |  | | | | } n t d ƒ ‚ | S(   sM  
    This function will build the symbolic graph for applying batch normalization
    to a set of activations.
    Also works on GPUs, but is not optimized using cuDNN.

    .. versionadded:: 0.7.1

    Parameters
    ----------
    inputs : symbolic tensor
        Mini-batch of activations
    gamma: symbolic tensor
        BN scale parameter, must be of same dimensionality as
        inputs and broadcastable against it
    beta: symbolic tensor
        BN shift parameter, must be of same dimensionality as
        inputs and broadcastable against it
    mean: symbolic tensor
        inputs means, must be of same dimensionality as
        inputs and broadcastable against it
    std: symbolic tensor
        inputs standard deviation, must be of same dimensionality as
        inputs and broadcastable against it
    mode: 'low_mem' or 'high_mem'
        Specify which batch_normalization implementation that will be
        used.
        As no intermediate representations are stored for the back-propagation,
        'low_mem' implementation lower the memory usage, however,
        it is 5-10% slower than 'high_mem' implementation. Note that 5-10% computation
        time difference compare the batch_normalization operation only, time difference
        between implementation is likely to be less important on the full model fprop/bprop.
    R3   t	   scalar_opR   t   high_mems)   mode must be either "low_mem", "high_mem"(   R   t   tensort   elemwiset   ElemwiseR   R   t
   ValueError(   R"   R   R    R   R   t   modet   elm_bnt   rval(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   batch_normalization*   s    "$	c         ` s(  ˆ  d k r d
 ‰  nq ˆ  d k r= d t  t d | ƒ ƒ ‰  nI t ˆ  t  t t j f ƒ rq t  d „  ˆ  Dƒ ƒ ‰  n t d t ˆ  ƒ ƒ ‚ t  t ˆ  ƒ ƒ ‰  t	 ˆ  ƒ d k r¹ t d ƒ ‚ n  t
 ˆ  ƒ d k  sÝ t ˆ  ƒ | k rü t d | t ˆ  ƒ f ƒ ‚ n  t  ‡  f d	 †  t | ƒ Dƒ ƒ } ˆ  | f S(   Ns   per-activationi    t   spatiali   c         s` s   |  ] } t  | ƒ Vq d  S(   N(   t   int(   t   .0t   a(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pys	   <genexpr>]   s    s   invalid axes: %ss/   there should be at least one normalization axiss1   axes should be less than ndim (<%d), but %s givenc         3` s!   |  ] } | ˆ  k r | Vq d  S(   N(    (   R@   t   i(   t   axes(    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pys	   <genexpr>e   s    (   i    (   i    (   t   tuplet   ranget
   isinstancet   listt   numpyt   ndarrayR9   t   strt   sortedt   lent   mint   max(   RC   t   ndimt   non_bc_axes(    (   RC   s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt!   _prepare_batch_normalization_axesW   s    	$"s   per-activationg-Cëâ6?gš™™™™™¹?c         C` s•  |  j  } t | | ƒ \ } }	 | j  | k r6 | }
 n@ t |	 ƒ }
 d g | } x$ t |	 ƒ D] \ } } | | | <q\ W| j  |
 k s” | j  |
 k r¶ t d | j  | j  |
 f ƒ ‚ n  | d k | d k k rÝ t d ƒ ‚ n  | d k	 r| j  |
 k rt d | j  |
 f ƒ ‚ n  | d k	 rK| j  |
 k rKt d | j  |
 f ƒ ‚ n  t j t j	 j
 | ƒ } | d k  r‰t d t | ƒ ƒ ‚ n  t |  ƒ }  t | ƒ } t | ƒ } |
 | k rÚ| j | ƒ } | j | ƒ } n$ t j | | Œ } t j | | Œ } t d | ƒ } | d k	 r3| d k	 r3t | ƒ } t | ƒ } |
 | k rj| j | ƒ } | j | ƒ } n$ t j | | Œ } t j | | Œ } | |  | | d	 | d
 | d | d | ƒ\ } } } } } | j | j k rñt j | | j ƒ } n  | j | j k rt j | | j ƒ } n  | | | | | f } n | |  | | d	 | ƒ} |
 | k r‹| d g g  | d D] } | j |	 ƒ ^ ql} n  t | ƒ S(   sþ  
    Performs batch normalization of the given inputs, using the mean and
    variance of the inputs.

    Parameters
    ----------
    axes : 'per-activation', 'spatial' or a tuple of ints
        The axes along which the input should be normalized. ``'per-activation'``
        normalizes per activation and is equal to ``axes=(0,)``.
        ``'spatial'`` shares normalization factors across spatial dimensions
        (i.e., all dimensions past the second), which for 4D inputs would be
        equal to ``axes=(0, 2, 3)``.
    gamma : tensor
        Learnable scale factors. The shape must match the shape of `inputs`,
        except for the axes in `axes`. These axes should be set to 1 or be
        skipped altogether (such that `gamma.ndim == inputs.ndim - len(axes)`).
    beta : tensor
        Learnable biases. Must match the tensor layout of `gamma`.
    epsilon : float
        Epsilon value used in the batch normalization formula. Minimum allowed
        value is 1e-5 (imposed by cuDNN).
    running_average_factor : float
        Factor for updating the values or `running_mean` and `running_var`.
        If the factor is close to one, the running averages will update quickly,
        if the factor is close to zero it will update slowly.
    running_mean : tensor or None
        Previous value of the running mean. If this is given, the new value
        ``running_mean * (1 - r_a_factor) + batch mean * r_a_factor``
        will be returned as one of the outputs of this function.
        `running_mean` and `running_var` should either both be given or
        both be None. The shape should match that of `gamma` and `beta`.
    running_var : tensor or None
        Previous value of the running variance. If this is given, the new value
        ``running_var * (1 - r_a_factor) + (m / (m - 1)) * batch var * r_a_factor``
        will be returned as one of the outputs of this function,
        where `m` is the product of lengths of the averaged-over dimensions.
        `running_mean` and `running_var` should either both be given or
        both be None. The shape should match that of `gamma` and `beta`.

    Returns
    -------
    out : tensor
        Batch-normalized inputs.
    mean : tensor
        Means of `inputs` across the normalization axes.
    invstd : tensor
        Inverse standard deviations of `inputs` across the normalization axes.
    new_running_mean : tensor
        New value of the running mean (only if both `running_mean` and
        `running_var` were given).
    new_running_var : tensor
        New value of the running variance (only if both `running_var` and
        `running_mean` were given).

    Notes
    -----
    If per-activation or spatial normalization is selected, this operation
    will use the cuDNN implementation. (This requires cuDNN 5 or newer.)

    The returned values are equivalent to:

    .. code-block:: python

        # for per-activation normalization
        axes = (0,)
        # for spatial normalization
        axes = (0,) + tuple(range(2, inputs.ndim))
        mean = inputs.mean(axes, keepdims=True)
        var = inputs.var(axes, keepdims=True)
        invstd = T.inv(T.sqrt(var + epsilon))
        out = (inputs - mean) * gamma * invstd + beta

        m = T.cast(T.prod(inputs.shape) / T.prod(mean.shape), 'float32')
        running_mean = running_mean * (1 - running_average_factor) + \
                       mean * running_average_factor
        running_var = running_var * (1 - running_average_factor) + \
                      (m / (m - 1)) * var * running_average_factor
    R   sœ   gamma and beta dimensionality must match the number of non-normalized axes, or have the same number of dimensions as the inputs; got %d and %d instead of %dsF   running_mean and running_var must either both be given or both be NonesW   running_mean must be of the same dimensionality as gamma and beta; got %d instead of %dsV   running_var must be of the same dimensionality as gamma and beta; got %d instead of %dgñhãˆµøä>s%   epsilon must be at least 1e-5, got %sRC   t   epsilont   running_average_factort   running_meant   running_vari    i   N(   RO   RQ   RL   t	   enumerateR9   t   NoneRH   t   castR   t   configt   floatXRJ   R   t
   dimshufflet   Tt   addbroadcastt   AbstractBatchNormTraint   broadcastablet   patternbroadcastRD   (   R"   R   R    RC   RR   RS   RT   RU   RO   RP   t   params_ndimt   params_dimshuffle_patternRB   t   axist   batchnorm_opt   outR   t   invstdt   new_running_meant   new_running_vart   resultst   r(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   batch_normalization_traini   sj    Q		
*c         C` s"  |  j  } t | | ƒ \ } } | j  | k r6 | }	 n@ t | ƒ }	 d g | }
 x$ t | ƒ D] \ } } | |
 | <q\ W| j  |	 k s” | j  |	 k r¶ t d | j  | j  |	 f ƒ ‚ n  | j  |	 k sÔ | j  |	 k rö t d | j  | j  |	 f ƒ ‚ n  t j t j j	 | ƒ } | d k  r4t d t
 | ƒ ƒ ‚ n  t | ƒ } t | ƒ } t | ƒ } t | ƒ } |	 | k r¯| j |
 ƒ } | j |
 ƒ } | j |
 ƒ } | j |
 ƒ } nH t j | | Œ } t j | | Œ } t j | | Œ } t j | | Œ } t d | ƒ } | |  | | | | d | ƒS(   s  
    Performs batch normalization of the given inputs, using the given mean and
    variance.

    Parameters
    ----------
    axes : 'per-activation', 'spatial' or a tuple of ints
        The axes along which the input should be normalized. ``'per-activation'``
        normalizes per activation and is equal to ``axes=(0,)``.
        ``'spatial'`` shares normalization factors across spatial dimensions
        (i.e., all dimensions past the second), which for 4D inputs would be
        equal to ``axes=(0, 2, 3)``.
    gamma : tensor
        Scale factors. The shape must match the shape of `inputs`,
        except for the axes in `axes`. These axes should be set to 1 or be
        skipped altogether (such that `gamma.ndim == inputs.ndim - len(axes)`).
    beta : tensor
        Biases. Must match the tensor layout of `gamma`.
    mean : tensor
        Means. Usually these are running averages computed during training.
        Must match the tensor layout of `gamma`.
    var : tensor
        Variances. Usually these are running averages computed during training.
        Must match the tensor layout of `gamma`.
    epsilon : float
        Epsilon value used in the batch normalization formula. Minimum allowed
        value is 1e-5 (imposed by cuDNN).

    Returns
    -------
    out : tensor
        Batch-normalized inputs.

    Notes
    -----
    If per-activation or spatial normalization is selected, this operation
    will use the cuDNN implementation. (This requires cuDNN 5 or newer.)

    The returned value is equivalent to:

    .. code-block:: python

        # for per-activation normalization
        axes = (0,)
        # for spatial normalization
        axes = (0,) + tuple(range(2, inputs.ndim))
        gamma, beta, mean, var = (T.addbroadcast(t, *axes)
                                  for t in (gamma, beta, mean, var))
        out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
    R   sœ   gamma and beta dimensionality must match the number of non-normalized axes, or have the same number of dimensions as the inputs; got %d and %d instead of %ds^   mean and var must be of the same dimensionality as gamma and beta; got %d and %d instead of %dgñhãˆµøä>s%   epsilon must be at least 1e-5, got %sRC   RR   (   RO   RQ   RL   RV   R9   RH   RX   R   RY   RZ   RJ   R   R[   R\   R]   t   AbstractBatchNormInference(   R"   R   R    R   t   varRC   RR   RO   RP   Ra   Rb   RB   Rc   Rd   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   batch_normalization_test  s@    4		R^   c           B` sY   e  Z d  Z d Z d d „ Z d „  Z d d d d d „ Z d „  Z d	 „  Z	 d
 „  Z
 RS(   sÀ  
    Abstract Op for Batch Normalization.

    Parameters
    ----------
    axes : a tuple of ints
        The axes along which the input should be normalized.
    x : tensor
        The input to be normalized along `axes`.
    scale : tensor
        `scale` should have the same number of dimensions as `x`.
        All dimensions listed in `axes` should have length 1.
    bias : tensor
        `bias` should have the same number of dimensions as `x`.
        All dimensions listed in `axes` should have length 1.
    epsilon
        Epsilon value used in the batch normalization formula. Minimum allowed
        value is 1e-5 (imposed by cuDNN).
    running_average_factor : float
        Factor for updating the values or `running_mean` and `running_var`.
        If the factor is close to one, the running averages will update quickly,
        if the factor is close to zero it will update slowly.
    running_mean : tensor or None
        Previous value of the running mean. If this is given, the new value
        ``running_mean * (1 - running_average_factor) + batch mean * running_average_factor``
        will be returned as one of the outputs of this function.
        `running_mean` and `running_var` should either both be given or
        both be None.
    running_var : tensor or None
        Previous value of the running variance. If this is given, the new value
        ``running_var * (1 - running_average_factor) + (m / (m - 1)) * batch var * running_average_factor``
        will be returned as one of the outputs of this function,
        where `m` is the product of lengths of the averaged-over dimensions.
        `running_mean` and `running_var` should either both be given or
        both be None.
    RC   i    c         C` sV   t  | t t f ƒ s t ‚ t | ƒ d k s3 t ‚ t d „  | Dƒ ƒ } | |  _ d  S(   Ni    c         s` s   |  ] } t  | ƒ Vq d  S(   N(   R?   (   R@   RA   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pys	   <genexpr>—  s    (   RF   RD   RG   t   AssertionErrorRL   RC   (   R   RC   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR   ”  s    c         C` s'   | d g | d g t  | j ƒ d S(   Ni    i   (   RL   R#   (   R   t   nodet   shape(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   infer_shapeš  s    g-Cëâ6?gš™™™™™¹?c   
      C` s  t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } | d  k	 rW t  | ƒ } n  | d  k	 rr t  | ƒ } n  | j | j k o’ | j k n s t ‚ | d  k rµ | d  k sÓ | d  k	 rÍ | d  k	 sÓ t ‚ | d  k s÷ | j | j k s÷ t ‚ | d  k s| j | j k st ‚ | rKt | | | | | ƒ \ } } } } } n t | | | ƒ \ } } } | | | | | g } | j ƒ  | j ƒ  | j ƒ  g }	 | d  k	 r÷| d  k	 r÷| j | ƒ | j | ƒ |	 j | j ƒ  ƒ |	 j | j ƒ  ƒ n  t |  | |	 ƒ S(   N(   R   RW   RO   Ro   R   t   typet   appendR   (
   R   R   t   scalet   biasRR   RS   RT   RU   R"   t   output_types(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt	   make_node  s4    +$$'!c         C` s¹   | d  \ } } } } } | d }	 | d  \ }
 } } t  j j ƒ  ƒ  t  j j ƒ  ƒ  g } x6 t d t | ƒ ƒ D] } | j t  j j ƒ  ƒ  ƒ qp Wt |  j ƒ | |	 | | | | ƒ | S(   Ni   i    i   (   R   t   gradientt   DisconnectedTypeRE   RL   Rt   t   AbstractBatchNormTrainGradRC   (   R   R"   R#   R%   R   Ru   Rv   RR   RS   t   dyt   _R(   t   x_invstdt   disconnected_outputsRB   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   L_op¾  s    
c         C` s¿   t  t  t  g t  t  t  g t  t  t  g t t t g t t t g g } xv t d t | j ƒ ƒ D]\ } | d j t  ƒ x | d D] } | j t ƒ q} W| j t g d | d t  g ƒ q[ W| S(   Ni   i    i   i   (   t   Truet   FalseRE   RL   R"   Rt   (   R   Rp   t   patternsRB   t   pattern(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   connection_patternË  s    'c         C` sª  | d  \ } } } } } |  j  }	 t |	 ƒ d k  sI t |	 ƒ | j k rk t d | j t |	 ƒ f ƒ ‚ n  | j |	 d t ƒ}
 | j |	 d t ƒ} d t	 j
 | | ƒ } | |
 | | | } | | d d <|
 | d d <| | d d <t | ƒ d k r/| d } | d | |
 | } | | d d <n  t | ƒ d	 k r¦t t	 j | j ƒ t	 j | j ƒ ƒ } | d	 } | d | | | d | | } | | d
 d <n  d  S(   Ni   i    s1   axes should be less than ndim (<%d), but %s givent   keepdimsg      ð?i   i   i   i   i   (   RC   RM   RN   RO   R9   RJ   R   R   Rm   RH   t   sqrtRL   t   floatt   prodRq   (   R   Rp   R"   t   output_storageR   Ru   Rv   RR   RS   RC   R   Rm   Rf   Re   RT   t   mRU   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   performÛ  s,    	'"
(
(   s   axes(   i    N(   R.   R/   t   __doc__t	   __props__R   Rr   RW   Rx   R€   R…   RŒ   (    (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR^   l  s   $			Rl   c           B` sP   e  Z d  Z d
 Z d d „ Z d „  Z d d „ Z d „  Z d „  Z d	 „  Z	 RS(   s#  
    Abstract Op for Batch Normalization.

    Parameters
    ----------
    axes : a tuple of ints
        The axes along which the input is normalized.
    epsilon
        Epsilon value used in the batch normalization formula. Minimum allowed
        value is 1e-5 (imposed by cuDNN).
    RC   i    c         C` sV   t  | t t f ƒ s t ‚ t | ƒ d k s3 t ‚ t d „  | Dƒ ƒ } | |  _ d  S(   Ni    c         s` s   |  ] } t  | ƒ Vq d  S(   N(   R?   (   R@   RA   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pys	   <genexpr>	  s    (   RF   RD   RG   Ro   RL   RC   (   R   RC   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR     s    c         C` s   | d g S(   Ni    (    (   R   Rp   Rq   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRr     s    g-Cëâ6?c         C` sá   t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t | | | | | ƒ \ } } } } } | j | j k o« | j k o« | j k o« | j k n s¶ t ‚ t |  | | | | | | g | j ƒ  g ƒ S(   N(   R   R   RO   Ro   R   Rs   (   R   R   Ru   Rv   t   estimated_meant   estimated_varianceRR   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRx     s    $Gc         ` s‚  | \ } } } } } } | d }	 |  j  ‰  t ˆ  ƒ d k  sR t ˆ  ƒ | j k rt t d | j t ˆ  ƒ f ƒ ‚ n  ‡  f d †  | | | | f Dƒ \ } } } } | | }
 t j j |
 ƒ } t j j	 d ƒ } |	 | | } |	 | | j
 ˆ  d t ƒ| } |	 j
 ˆ  d t ƒ} |	 j
 ˆ  d t ƒ| | } |	 | | j
 ˆ  d t ƒ| | |
 | } | | | | | t j j ƒ  ƒ  g S(   Ni    s1   axes should be less than ndim (<%d), but %s givenc         3` s$   |  ] } t  j j | ˆ  Œ Vq d  S(   N(   R   R6   R]   (   R@   t   t(   RC   (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pys	   <genexpr>%  s   g       @R†   (   RC   RM   RN   RO   R9   RJ   R   R6   R‡   t   constantt   sumR   Ry   Rz   (   R   R"   R%   R   Ru   Rv   t   est_meant   est_varRR   R|   t   est_var_epst   est_stdt   twoR)   t   dscalet   dbiasR*   t   dvar(    (   RC   s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR-     s     
	'""
!.c         C` s(   t  g t  g t  g t  g t  g t g g S(   N(   R   R‚   (   R   Rp   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR…   5  s    c         C` sM   | \ } } } } } }	 | | | t  j | |	 ƒ | }
 |
 | d d <d  S(   Ni    (   RH   R‡   (   R   Rp   R"   RŠ   R   Ru   Rv   R   R   RR   Re   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRŒ   9  s    #(   s   axes(   i    (
   R.   R/   R   RŽ   R   Rr   Rx   R-   R…   RŒ   (    (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRl   ÷  s   			R{   c           B` s8   e  Z d Z d d „ Z d d „ Z d „  Z d „  Z RS(	   RC   i    c         C` sV   t  | t t f ƒ s t ‚ t | ƒ d k s3 t ‚ t d „  | Dƒ ƒ } | |  _ d  S(   Ni    c         s` s   |  ] } t  | ƒ Vq d  S(   N(   R?   (   R@   RA   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pys	   <genexpr>E  s    (   RF   RD   RG   Ro   RL   RC   (   R   RC   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR   B  s    g-Cëâ6?c         C` só   t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t  | ƒ } t | | | | | ƒ \ } } } } } | j | j k o« | j k o« | j k o« | j k n s¶ t ‚ t |  | | | | | | g | j ƒ  | j ƒ  | j ƒ  g ƒ S(   N(   R   R   RO   Ro   R   Rs   (   R   R   R|   Ru   R(   R~   RR   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRx   H  s    $Gc         C` s   | d | d | d g S(   Ni    i   (    (   R   Rp   Rq   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRr   X  s    c         C` s<  | \ } } } } } }	 |  j  }
 t |
 ƒ d k  sH t |
 ƒ | j k rj t d | j t |
 ƒ f ƒ ‚ n  | | } t j | | d |
 d t ƒ} | | | | | d } | | t j | d |
 d t ƒ} t j	 | | | d |
 d t ƒ} t j	 | d |
 d t ƒ} | | d d <| | d d <| | d d <d  S(   Ni    s1   axes should be less than ndim (<%d), but %s givenRc   R†   i   i   i   (
   RC   RM   RN   RO   R9   RJ   RH   R   R   R“   (   R   Rp   R"   RŠ   R   R|   Ru   R(   R~   RR   RC   t   x_difft   mean_dy_x_difft   ct   g_wrt_inputst   g_wrt_scalet
   g_wrt_bias(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyRŒ   [  s    	'"
##(   s   axes(   i    (   R.   R/   RŽ   R   Rx   Rr   RŒ   (    (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyR{   ?  s
   	c         C` s'  t  |  j t ƒ s d  S|  j d  \ } } } } } |  j j } t | ƒ d k  se t | ƒ | j k ri d  St  | j	 t
 ƒ sÈ t  | j	 t
 ƒ sÈ t  | j	 t
 ƒ sÈ t  | j	 t
 ƒ sÈ t  | j	 t
 ƒ rÌ d  St |  j ƒ d k rÿ t  |  j d j	 t
 ƒ rÿ d  St |  j ƒ d k r2t  |  j d j	 t
 ƒ r2d  S| j | d t ƒ} | j | d t ƒ} | j d k rŒ| j d k rŒ| j d ƒ } n  t j t j | | ƒ ƒ }	 | | | |	 | }
 |
 | |	 g } t |  j ƒ d k r|  j d } | d | | | } | j | ƒ n  t |  j ƒ d k rt j t j | j ƒ t j | j ƒ t j j ƒ } |  j d } | d | | | d | | } | j | ƒ n  g  t | |  j ƒ D]! \ } } t j | | j ƒ ^ q°} xI t j j  j! |  j | ƒ D], } | |  j k rót" |  j d | ƒ qóqóW| S(	   Ni   i    i   R†   t   float32t   float64g      ð?i   (#   RF   t   opR^   RW   R"   RC   RM   RN   RO   Rs   R   RL   R   R   Rm   R   t   astypeR\   t   invR‡   Rt   RX   R‰   Rq   R   RY   RZ   t   zipR#   R`   R_   t   goft   grapht	   variablesR   (   Rp   R   Ru   Rv   RR   RS   RC   R   Rm   Rf   Re   Ri   RT   R‹   RU   Rj   t   r_orig(    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   local_abstract_batch_norm_trainn  sP    '//47"c         C` s  t  |  j t ƒ s d  S|  j \ } } } } } } |  j j } t | ƒ d k  sd t | ƒ | j k rh d  St  | j	 t
 ƒ sÚ t  | j	 t
 ƒ sÚ t  | j	 t
 ƒ sÚ t  | j	 t
 ƒ sÚ t  | j	 t
 ƒ sÚ t  | j	 t
 ƒ rÞ d  S| | } t j | | d | d t ƒ}	 | | | |	 | d }
 | |
 t j |
 d | d t ƒ} t j | | | d | d t ƒ} t j | d | d t ƒ} | | | g } g  t | |  j ƒ D]! \ } } t j | | j ƒ ^ q¤} xI t j j j |  j | ƒ D], } | |  j k rçt |  j d | ƒ qçqçW| S(   Ni    Rc   R†   i   (   RF   R¤   R{   RW   R"   RC   RM   RN   RO   Rs   R   R\   R   R   R“   R§   R#   R`   R_   R   R¨   R©   Rª   R   (   Rp   R   R|   Ru   R(   R~   RR   RC   Rœ   R   Rž   RŸ   R    R¡   Ri   Rj   R«   Rm   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt$   local_abstract_batch_norm_train_grad¡  s4    '
##7"c   	      C` sl  t  |  j t ƒ s d  S|  j \ } } } } } } t  | j t ƒ s£ t  | j t ƒ s£ t  | j t ƒ s£ t  | j t ƒ s£ t  | j t ƒ s£ t  | j t ƒ r§ d  S| j d k r× | j d k r× | j d ƒ } n  | | | t	 j
 | | ƒ | } t	 j | |  j d j ƒ } xL t j j j |  j | g ƒ D], } | |  j k r5t |  j d | ƒ q5q5W| g S(   NR¢   R£   i    (   RF   R¤   Rl   RW   R"   Rs   R   R   R¥   R\   R‡   R`   R#   R_   R   R¨   R©   Rª   R   (	   Rp   R   Ru   Rv   R   R   RR   t   resultRm   (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt#   local_abstract_batch_norm_inferenceÄ  s$    #%t   batchnorm_optst   fast_compilet   fast_runR¬   i   R­   R¯   (,   t
   __future__R    R   R   RH   R   R   R   t
   theano.gofR   t   theano.gof.optR   t   theano.tensorR   R   R	   R\   t   theano.tensor.optR
   t   theano.scalarR   R   R   R   R   R   R   R=   RQ   RW   Rk   Rn   R^   Rl   R{   R¬   R­   R¯   R¨   t   optdbt   LocalGroupDBt   bn_groupoptR.   t   register(    (    (    s5   /tmp/pip-build-X4mzal/theano/theano/tensor/nnet/bn.pyt   <module>   sF   ",	d‹H/3#		
	
	