ó
àÆ÷Xc        
   @` s  d  Z  d d l m Z m Z m Z d d l m Z d d l j j	 Z	 d d l
 Z
 d d l Z d d l Z d d l Z d d l m Z d d l Z d d l m Z d d l m Z m Z d d l m Z d d	 l m Z m Z d d
 l m Z m Z d d l m Z d d l m  Z  m! Z! m" Z" e Z# d Z$ d Z% d Z& d Z' d Z( e
 j) d ƒ Z* e+ a, d Z- d a. d „  Z/ d d „ Z0 d d „ Z1 d e j j2 j3 f d „  ƒ  YZ4 e4 ƒ  Z5 d „  Z6 e+ d d „ Z7 e+ d e8 e+ d d d „ Z9 e+ e+ e: d „ Z; d  „  Z< d! „  Z= d" e> f d# „  ƒ  YZ? d$ e@ f d% „  ƒ  YZA e+ d& „ ZB d' „  ZC d( „  ZD d) eE f d* „  ƒ  YZF d+ „  ZG d, e+ e+ e+ e+ e+ e+ e: e8 d- „	 ZH d. eI f d/ „  ƒ  YZJ eJ eH _K e+ d d0 „ ZL e+ d d1 „ ZM d2 „  ZN d3 e  f d4 „  ƒ  YZO eO ƒ  ZP d5 „  ZQ d6 e  f d7 „  ƒ  YZR eR ƒ  ZS d8 „  ZT d9 e  f d: „  ƒ  YZU eU ƒ  ZV d; „  ZW d< e  f d= „  ƒ  YZX d> „  ZY d? e  f d@ „  ƒ  YZZ dA „  Z[ d S(B   s!   Driver for gradient calculations.i    (   t   absolute_importt   print_functiont   division(   t   OrderedDictN(   t
   itervalues(   t   gof(   t   utilst   Variable(   t   izip(   t   xranget   reduce(   t   NullTypet	   null_type(   t   get_debug_values(   t   ViewOpt   FAST_RUNt	   DebugModes?   James Bergstra, Razvan Pascanu, Arnaud Bergeron, Ian Goodfellows    (c) 2011, Universite de Montreals   3-clause BSD Licenses(   theano-dev <theano-dev@googlegroups.com>s   restructuredtext ens   theano.gradients    op.grad(...) returned a non-listc         C` sË   |  o	 | s t  d ƒ ‚ |  s% | rR t | t t f ƒ rR |  rH | g S| f Snu |  p[ | rš t | t t f ƒ rš t | ƒ d k s’ t  d ƒ ‚ | d S|  s¦ | rÃ |  r¶ t | ƒ St | ƒ Sn | Sd S(   s»  
    Formats the outputs according to the flags `use_list` and `use_tuple`.
    If `use_list` is True, `outputs` is returned as a list (if `outputs`
    is not a list or a tuple then it is converted in a one element list).
    If `use_tuple` is True, `outputs` is returned as a tuple (if `outputs`
    is not a list or a tuple then it is converted into a one element tuple).
    Otherwise (if both flags are false), `outputs` is returned.
    s(   Both flags cannot be simultaneously Truei   s,   Wrong arguments. Expected a one element listi    N(   t   AssertionErrort
   isinstancet   listt   tuplet   len(   t   use_listt	   use_tuplet   outputs(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt	   format_as-   s    		"
"	
t    c         C` s   t  d | | |  | f ƒ ƒ  S(   sä  
    Return an un-computable symbolic variable of type `x.type`.

    If any call to tensor.grad results in an expression containing this
    un-computable variable, an exception (NotImplementedError) will be
    raised indicating that the gradient on the
    `x_pos`'th input of `op` has not been implemented. Likewise if
    any call to theano.function involves this variable.

    Optionally adds a comment to the exception explaining why this
    gradient is not implemented.
    sc   This variable is Null because the grad method for input %s (%s) of the %s op is not implemented. %s(   R   (   t   opt   x_post   xt   comment(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   grad_not_implementedJ   s    c         C` s   t  d | | |  | f ƒ ƒ  S(   sâ  
    Return an un-computable symbolic variable of type `x.type`.

    If any call to tensor.grad results in an expression containing this
    un-computable variable, an exception (GradUndefinedError) will be
    raised indicating that the gradient on the
    `x_pos`'th input of `op` is mathematically undefined. Likewise if
    any call to theano.function involves this variable.

    Optionally adds a comment to the exception explaining why this
    gradient is not defined.
    sl   This variable is Null because the grad method for input %s (%s) of the %s op is mathematically undefined. %s(   R   (   R   R   R   R   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   grad_undefined^   s    t   DisconnectedTypec           B` sD   e  Z d  Z e d d „ Z d „  Z d „  Z e d „ Z	 d „  Z
 RS(   s   A type indicating that a variable is a result
        of taking the gradient of c with respect to x
        when c is not a function of x.
        A symbolic placeholder for 0, but to convey
        the extra information that this gradient is 0
        because it is disconnected.
    c         C` s   t  d ƒ ‚ d  S(   Nsy   If you're assigning to a DisconnectedType you're doing something wrong. It should only be used as a symbolic placeholder.(   R   (   t   selft   datat   strictt   allow_downcast(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   filter}   s    c         C` s   t  d ƒ ‚ d  S(   Nsy   If you're assigning to a DisconnectedType you're doing something wrong. It should only be used as a symbolic placeholder.(   R   (   R"   t   other(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   fiter_variable…   s    c         C` s   t  S(   N(   t   False(   t   at   b(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   may_share_memory   s    c         C` s   t  d ƒ ‚ d  S(   Nsy   If you're assigning to a DisconnectedType you're doing something wrong. It should only be used as a symbolic placeholder.(   R   (   R*   R+   t   force_same_dtype(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   value_eq   s    c         C` s   d S(   NR!   (    (   R"   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   __str__˜   s    N(   t   __name__t
   __module__t   __doc__R)   t   NoneR&   R(   R,   t   TrueR.   R/   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR!   s   s   		c         ` sŽ  d d l  m ‰ t |  t ƒ } t |  t ƒ } t ˆ t t f ƒ sO ˆ g ‰ n  t ˆ t t f ƒ sp ˆ g ‰ n  t |  t t f ƒ s‘ |  g }  n  t ˆ ƒ t ˆ ƒ k s¯ t ‚ xî t t ˆ ˆ ƒ ƒ D]× } | d } | d \ } } t | t	 j
 ƒ sˆ | ƒ } n  t | t	 j
 ƒ s'ˆ | ƒ } n  ya | j j | j j k r‡t d t | ƒ d d t | j j ƒ d t | j j ƒ ƒ ‚ n  WqÅ t k
 r›qÅ XqÅ Wt ƒ  ‰ ‡  ‡ ‡ ‡ ‡ f d †  ‰  x |  D] }	 ˆ  |	 j ƒ qËWg  }
 x“ |  D]‹ }	 |	 ˆ k r|
 j ˆ ˆ j |	 ƒ ƒ qïˆ |	 j |	 j j j |	 ƒ d
 k rSt d	 ƒ ‚ qï|
 j ˆ |	 j |	 j j j |	 ƒ ƒ qïWt | | |
 ƒ S(   sh  
    Computes the R operation on `f` wrt to `wrt` evaluated at points given
    in `eval_points`. Mathematically this stands for the jacobian of `f` wrt
    to `wrt` right muliplied by the eval points.

    :type f: Variable or list of Variables
             `f` stands for the output of the computational graph to which you
             want to apply the R operator
    :type wrt: Variable or list of `Variables`s
               variables for which you compute the R operator of the expression
               described by `f`
    :type eval_points: Variable or list of Variables
                       evalutation points for each of the variables in `wrt`
    :rtype: :class:`~theano.gof.Variable` or list/tuple of Variables depending on type of f
    :return: symbolic expression such that
        R_op[i] = sum_j ( d f[i] / d wrt[j]) eval_point[j]
        where the indices in that expression are magic multidimensional
        indices that specify both the position within a list and all
        coordinates of the tensor element in the last.
        If `wrt` is a list/tuple, then return a list/tuple with the results.
    i    (   t   as_tensor_variablei   s   Element s#    of wrt/eval_point have mismatched s   dimensionality: s    versus c         ` s6  |  d k r d S|  j } |  j } g  } xî | D]æ } | ˆ k r^ | j ˆ ˆ j | ƒ ƒ q/ | j d k r¨ y | j | j ƒ  ƒ Wqt k
 r¤ | j d ƒ qXq/ | j ˆ k rá | j ˆ | j | j j j | ƒ ƒ q/ ˆ  | j ƒ | j ˆ | j | j j j | ƒ ƒ q/ Wg  } x÷ t	 | | ƒ D]æ \ } } | d k	 rt
 | t j ƒ shˆ | ƒ } n  t
 | t j ƒ s‰ˆ | ƒ } n  y | j j | ƒ } Wn> t k
 rßt j j | | j j ƒ } | j j | ƒ } n X| j | j k søt ‚ | j | ƒ q/| j | ƒ q/W| j |  j | ƒ ˆ |  <d S(   s    TODO: writeme N(   R3   R   t   inputst   appendt   indext   ownert
   zeros_liket	   ExceptionR   t   zipR   R   R   t   typet   filter_variablet	   TypeErrort   theanot   tensort   castt   dtypeR   t   R_op(   t   nodeR   R6   t   local_eval_pointst   inpt   same_type_eval_pointsR   t   y(   t	   _traverseR5   t   eval_pointst
   seen_nodest   wrt(    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRJ   ã   sF    		$%sH   The function is not differentiable with respect to the provided inputs !N(   t   theano.tensorR5   R   R   R   R   R   t	   enumerateR<   R   R   R=   t   ndimt
   ValueErrort   strt   AttributeErrorR   R9   R7   R8   R   R3   R   (   t   fRM   RK   t
   using_listt   using_tuplet   packt   it   wrt_elemt
   eval_pointt   outt   rval(    (   RJ   R5   RK   RL   RM   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   Rop¤   sH    
)	B&+t   raisec   
      C` s  t  | ƒ t t f k r$ | g } n  t | t ƒ } t | t ƒ } t |  t t f ƒ sc |  g }  n  t |  ƒ }  t | ƒ } t | t t f ƒ sœ | g } n  t |  ƒ t | ƒ k sº t ‚ t t |  | ƒ ƒ } t d d d | d | d | d | ƒ }	 t
 | | |	 ƒ S(   sL  
    Computes the L operation on `f` wrt to `wrt` evaluated at points given
    in `eval_points`. Mathematically this stands for the jacobian of `f` wrt
    to `wrt` left muliplied by the eval points.

    :type f: Variable or list of Variables
        `f` stands for the output of the computational graph to which you
        want to apply the L operator
    :type wrt: Variable or list of `Variables`s
        variables for which you compute the L operator of the expression
        described by `f`
    :type eval_points: Variable or list of Variables
                        evalutation points for each of the variables in `f`

    :rtype: :class:`~theano.gof.Variable` or list/tuple of Variables depending on type of f
    :return: symbolic expression such that
        L_op[i] = sum_i ( d f[i] / d wrt[j]) eval_point[i]
        where the indices in that expression are magic multidimensional
        indices that specify both the position within a list and all
        coordinates of the tensor element in the last
        If `f` is a list/tuple, then return a list/tuple with the results.
    t   costt   known_gradst   consider_constantRM   t   disconnected_inputsN(   R=   R   R   R   R   R   R   R   t   gradR3   R   (
   RT   RM   RK   Ra   Rb   RU   RV   t   gradst   knownt   ret(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   Lop5  s     	t   zeroc         ` sc  t  j  ƒ  } t d k r+ d d l m a n  |  d k rU | d k rU t d ƒ ‚ qU n  |  d k	 rŒ t |  j t ƒ rŒ t d |  j j	 ƒ ‚ n  |  d k	 r¶ |  j
 d k r¶ t d ƒ ‚ n  t | t ƒ rÔ t d ƒ ‚ n  t | t ƒ }	 t | t ƒ }
 |	 r|
 r| g } n  xJ | D]B } t | t ƒ st d t | ƒ d t t | ƒ ƒ ƒ ‚ qqWg  } |  d k	 r{| j |  ƒ n  | d k	 r£| j t | j ƒ  ƒ ƒ n  t | | | ƒ } t ƒ  } | d k rÖt ƒ  } n3 d	 } t | t ƒ s	t | ƒ d
 k s	t | ƒ ‚ |  d k	 r¶|  | k r.| |  } n t |  ƒ } t | j d ƒ ry|  j j t j k ry| j |  j j ƒ } n  t | j d ƒ r©| j j t j k s©t ‚ n  | | |  <n  x | D]• } | | } t | d ƒ sût d t t | ƒ ƒ ƒ ‚ n  t | j t t f ƒ rHd t | j j ƒ k rHt d t | j ƒ ƒ ‚ n  | | | <q½W‡  f d †  } xL | D]D } | | k rl| |  k	 rl| | k rl| | ƒ t ƒ  | | <qlqlWd } | rØ|  d k	 rØ|  j } n  xH | D]@ } | | } t | j d ƒ rß| j j t j k st ‚ qßqßWt  | | | | ƒ } xã t! t | ƒ ƒ D]Ï } t | | j t ƒ r¥| d k rt" d | | j j	 ƒ ‚ q¥| d k s¥t ‚ n  t | | j t ƒ rK| | | ƒ | d k rìt# | | ƒ | | <q| d k rd | | <q| d k st ‚ qKqKW|
 r3t | ƒ } n |	 sE| \ } n  t  j  ƒ  } t$ | | 7a$ | S(   s›	  
    Return symbolic gradients for one or more variables with respect to some
    cost.

    For more information about how automatic differentiation works in Theano,
    see :mod:`gradient`. For information on how to implement the gradient of
    a certain Op, see :func:`grad`.

    Parameters
    ----------
    cost : :class:`~theano.gof.Variable` scalar (0-dimensional) tensor variable or None
        Value with respect to which we are differentiating.  May be
        `None` if known_grads is provided.
    wrt : :class:`~theano.gof.Variable` or list of Variables
        term[s] for which we want gradients
    consider_constant : list of variables
        expressions not to backpropagate through
    disconnected_inputs : {'ignore', 'warn', 'raise'}
        Defines the behaviour if some of the variables in `wrt` are
        not part of the computational graph computing `cost` (or if
        all links are non-differentiable). The possible values are:

        - 'ignore': considers that the gradient on these parameters is zero.
        - 'warn': consider the gradient zero, and print a warning.
        - 'raise': raise DisconnectedInputError.
    add_names : bool
        If True, variables generated by grad will be named
        (d<cost.name>/d<wrt.name>) provided that both cost and wrt
        have names
    known_grads : OrderedDict, optional
        A ordered dictionary mapping variables to their gradients. This is
        useful in the case where you know the gradient on some
        variables but do not know the original cost.
    return_disconnected : {'zero', 'None', 'Disconnected'}
        - 'zero' : If wrt[i] is disconnected, return value i will be
                   wrt[i].zeros_like()
        - 'None' : If wrt[i] is disconnected, return value i will be
                   None
        - 'Disconnected' : returns variables of type DisconnectedType
    null_gradients : {'raise', 'return'}
        Defines the behaviour if some of the variables in `wrt` have a
        null gradient. The possibles values are:

        - 'raise' : raise a NullTypeGradError exception
        - 'return' : return the null gradients

    Returns
    -------
    variable or list/tuple of variables (matches `wrt`)
        symbolic expression of gradient of `cost` with respect to each
        of the `wrt` terms.  If an element of `wrt` is not
        differentiable with respect to the output, then a zero
        variable is returned.

    i    (   RA   s(   cost and known_grads can't both be None.s3   Can't differentiate a NaN cost.cost is NaN because s   cost must be a scalar.so   wrt must not be a set. sets have no defined iteration order, so we can't return gradients in a  matching order.s   Expected Variable, got s	    of type s$   known_grads must be an OrderedDict. i   RC   R=   so   output grads must be theano variables.Ambiguous whether %s should be made into tensor or sparse theano variablet   floatsl   Gradients must always be NullType, DisconnectedType, or continuous, but grad was given a known_grad of type c         ` su   d |  } ˆ  d k r nX ˆ  d k r; t  j | d d ƒn6 ˆ  d k re t j |  ƒ } t | ƒ ‚ n t d ƒ ‚ d  S(   Ns¶   grad method was asked to compute the gradient with respect to a variable that is not part of the computational graph of the cost, or is used only by a non-differentiable operator: %st   ignoret   warnt
   stackleveli   R^   s_   Invalid value for keyword 'disconnected_inputs', valid values are 'ignore', 'warn' and 'raise'.(   t   warningsRk   R   t   get_variable_trace_stringt   DisconnectedInputErrorRQ   (   t   vart   message(   Rb   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   handle_disconnected  s    R^   s   tensor.grad encountered a NaN. t   returnRh   R3   t   DisconnectedN(%   t   timeRA   R3   R@   R   R   R=   R   RQ   t   why_nullRP   R?   t   setR   R   R   RR   R7   t   extendt   keyst   _populate_var_to_app_to_idxR   R   t   _float_ones_liket   hasattrRC   t   continuous_dtypest   astypeR!   t   disconnected_typet   namet   float_dtypest   _populate_grad_dictR	   t   NullTypeGradErrort   _float_zeros_liket	   grad_time(   R_   RM   Ra   Rb   t	   add_namesR`   t   return_disconnectedt   null_gradientst   t0RU   RV   t   elemR   t   var_to_app_to_idxt	   grad_dictt   mt   g_costRp   t   g_varRr   t	   cost_namet   gR\   RX   t   t1(    (   Rb   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRc   k  s°    ;.	-


"c         ` sÔ  | d	 k	 s | d	 k	 s t ‚ t | t ƒ s3 t ‚ t |  t ƒ sH t ‚ | d	 k	 rl t | t ƒ sl t ‚ n  t t |  | ƒ ƒ } d	 } d	 } | d	 k	 rÍ t t j d d	 d | d | d | d d ƒ ƒ } n  | d	 k	 rt t j d | d | d | d d ƒ ƒ } n  d	 } | d	 k r!| } nF | } | d	 k	 rgx1 t t	 | ƒ ƒ D] }	 | |	 c | |	 7<qFWn  t
 t | | ƒ ƒ ‰  t ‡  f d †  |  Dƒ ƒ }
 t ‡  f d †  | Dƒ ƒ } | rÊ|
 | | | f S|
 | f S(
   s/  
    With respect to `wrt`, computes gradients of cost and/or from
    existing `start` gradients, up to the `end` variables of a
    symbolic digraph.  In other words, computes gradients for a
    subgraph of the symbolic theano function. Ignores all disconnected
    inputs.

    This can be useful when one needs to perform the gradient descent
    iteratively (e.g. one layer at a time in an MLP), or when a
    particular operation is not differentiable in theano
    (e.g. stochastic sampling from a multinomial). In the latter case,
    the gradient of the non-differentiable process could be
    approximated by user-defined formula, which could be calculated
    using the gradients of a cost with respect to samples (0s and
    1s). These gradients are obtained by performing a subgraph_grad
    from the `cost` or previously known gradients (`start`) up to the
    outputs of the stochastic process (`end`).  A dictionary mapping
    gradients obtained from the user-defined differentiation of the
    process, to variables, could then be fed into another
    subgraph_grad as `start` with any other `cost` (e.g. weight
    decay).

    In an MLP, we could use subgraph_grad to iteratively backpropagate:

    .. code-block:: python

        x, t = theano.tensor.fvector('x'), theano.tensor.fvector('t')
        w1 = theano.shared(np.random.randn(3,4))
        w2 = theano.shared(np.random.randn(4,2))
        a1 = theano.tensor.tanh(theano.tensor.dot(x,w1))
        a2 = theano.tensor.tanh(theano.tensor.dot(a1,w2))
        cost2 = theano.tensor.sqr(a2 - t).sum()
        cost2 += theano.tensor.sqr(w2.sum())
        cost1 = theano.tensor.sqr(w1.sum())

        params = [[w2],[w1]]
        costs = [cost2,cost1]
        grad_ends = [[a1], [x]]

        next_grad = None
        param_grads = []
        for i in xrange(2):
            param_grad, next_grad = theano.subgraph_grad(
                wrt=params[i], end=grad_ends[i],
                start=next_grad, cost=costs[i]
            )
            next_grad = dict(zip(grad_ends[i], next_grad))
            param_grads.extend(param_grad)

    :type wrt: list of variables
    :param wrt:
      Gradients are computed with respect to `wrt`.

    :type end: list of variables
    :param end:
      Theano variables at which to end gradient descent (they are
      considered constant in theano.grad).  For convenience, the
      gradients with respect to these variables are also returned.

    :type start: dictionary of variables
    :param start:
      If not None, a dictionary mapping variables to their
      gradients. This is useful when the gradient on some variables
      are known. These are used to compute the gradients backwards up
      to the variables in `end` (they are used as known_grad in
      theano.grad).

    :type cost: :class:`~theano.gof.Variable` scalar (0-dimensional) variable
    :param cost:
      Additional costs for which to compute the gradients.  For
      example, these could be weight decay, an l1 constraint, MSE,
      NLL, etc. May optionally be None if start is provided.  Warning
      : If the gradients of `cost` with respect to any of the `start`
      variables is already part of the `start` dictionary, then it may
      be counted twice with respect to `wrt` and `end`.

      .. warning::

        If the gradients of `cost` with respect to any of the `start`
        variables is already part of the `start` dictionary, then it
        may be counted twice with respect to `wrt` and `end`.


    :type details: bool
    :param details:
      When True, additionally returns the list of gradients from
      `start` and of `cost`, respectively, with respect to `wrt` (not
      `end`).

    :rtype: Tuple of 2 or 4 Lists of Variables

    :return: Returns lists of gradients with respect to `wrt` and `end`,
            respectively.

    .. versionadded:: 0.7
    R_   RM   R`   Ra   Rb   Rj   c         3` s   |  ] } ˆ  | Vq d  S(   N(    (   t   .0t   k(   t   pgrads(    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>Ï  s    c         3` s   |  ] } ˆ  | Vq d  S(   N(    (   R“   R”   (   R•   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>Ð  s    N(   R3   R   R   R   t   dictRw   R@   Rc   t   rangeR   R   R   (   RM   t   endt   startR_   t   detailst   paramst   start_gradst
   cost_gradsRd   RX   t	   wrt_gradst	   end_grads(    (   R•   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   subgraph_gradG  sB    a			c         C` sÚ  t  |  j d ƒ r|  j j |  ƒ } t | t ƒ sa t d d |  j d | t | ƒ f ƒ ‚ n  t | ƒ t |  j ƒ k r¯ t	 d |  j t |  j ƒ f d t | ƒ ƒ ‚ n  x t
 | ƒ D]M \ } } t | t ƒ s¼ t d |  j d | d	 | t | ƒ f ƒ ‚ q¼ q¼ Wn2 g  |  j D]" } g  |  j D] } t ^ q*^ q} t | t ƒ sWt ‚ t | ƒ t |  j ƒ k sxt ‚ x[ t t |  j ƒ ƒ D]D } t | | t ƒ s­t ‚ t | | ƒ t |  j ƒ k sŽt ‚ qŽW| S(
   sÓ    given an apply node, obtain its connection pattern
     this is just a wrapper around Op.connection_pattern
     that does type checking and supplies the default value
     if the method is not implemented
    t   connection_patterns$   Op.connection_pattern should return s#   list of list of bool, but for Op=%ss   got %s with type %s.s$   %s.connection_pattern should have %ds    rows but has %d.s#   %s.connection_pattern should returns     a list of lists, but element %ds   is %s of type %s.(   R|   R   R¡   R   R   R?   R=   R   R6   RQ   RO   R   R4   R   R	   (   RE   R¡   t   iit   output_patternt   iptt   output(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   _node_to_patternØ  s4    /!)c         ` sV  ˆ d k r g  ‰ n€ y t ˆ ƒ Wn- t k
 rR t d t t ˆ ƒ ƒ ƒ ‚ n Xx? ˆ D]7 } t | t j ƒ sZ t d t t | ƒ ƒ ƒ ‚ qZ qZ Wt ƒ  ‰ t	 g  ƒ ‰ ‡  ‡ ‡ ‡ f d †  ‰  x |  D] } ˆ  | ƒ qÉ Wt	 g  ƒ ‰ ‡ ‡ ‡ f d †  ‰ x | D] } ˆ | ƒ qWt
 ˆ j ƒ  ƒ } x$ | D] } | ˆ k r2ˆ | =q2q2Wˆ S(   sñ  
    Helper function for grad function.

    outputs: a list of variables we want to take gradients of

    wrt: a list of variables we want to take the gradient with
        respect to.

    consider_constant: a list of variables not to backpropagate
        through.

    returns:

     var_to_app_to_idx:

      A dictionary mapping a variable to a second dictionary.
      The second dictionary maps apply nodes acting on this
      variable to the variable's index in the apply node's
      input list.

      This dictionary will only contain variables that
      meet two criteria:

       1) The elements of at least one output are a
          function of the elements of the variable

       2) The elements of the variable are a function of the
          elements of at least one member of wrt.

      This set is exactly the set of variables that connect
      the variables in wrt to the cost being differentiated.

      (A variable in consider_constant is not a function of
      anything)

    s6   consider_constant must be an iterable collection, got s9   Elements of consider_constant must be variables, but got c         ` s  |  ˆ k r d  Sˆ j  |  ƒ |  ˆ k r- d  S|  j d  k	 r	|  j } t | ƒ } | j j |  ƒ } x£ t | j ƒ D] \ } } | | | s“ qs n  | ˆ k r¯ t ƒ  ˆ | <n  ˆ | } | | k rÒ g  | | <n  | | } | | k rø | j	 | ƒ n  ˆ  | ƒ qs Wn  d  S(   N(
   t   addR9   R3   R¦   R   R8   RO   R6   R   R7   (   Rp   t   appR¡   t   var_idxRX   R¤   t
   app_to_idxt   idx(   t   account_fort   accounted_forRa   R‹   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR¬   E  s*    	

c         ` s¨   |  ˆ k r d  S|  ˆ  k r  d  Sˆ j  |  ƒ ˆ  |  } xj | D]b } t | ƒ } xM | | D]A } x8 t | j ƒ D]' \ } } | | | rq ˆ | ƒ qq qq Wq[ Wq> Wd  S(   N(   R§   R¦   RO   R   (   Rp   t   nodesRE   R¡   R«   R¢   R¥   (   R‹   t   visitt   visited(    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR¯   x  s    
N(   R3   t   iterR?   RR   R=   R   R   R   R   Rw   R   Ry   (   R   RM   Ra   RŠ   R¥   t	   orig_varsRp   (    (   R¬   R­   Ra   R‹   R¯   R°   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRz   ÿ  s2    '			+Rƒ   c           B` s   e  Z d  Z RS(   s1   
    Raised when grad encounters a NullType.
    (   R0   R1   R2   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRƒ   ’  s   Ro   c           B` s   e  Z d  Z RS(   s‰   
    Raised when grad is asked to compute the gradient
    with respect to a disconnected input and
    disconnected_inputs='raise'.
    (   R0   R1   R2   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRo   ˜  s   c         ` sV   t  ƒ  ‰ ‡  ‡ f d †  ‰ ‡ ‡ ‡ ‡ f d †  ‰  g  | D] } ˆ  | ƒ ^ q: } | S(   s=  
        Helper function for grad function.

        var_to_app_to_idx: a dictionary mapping a variable to
                a second dictionary.
                the second dictionary maps apply nodes acting on
                this variable to the variable's index in the apply
                node's input list

        grad_dict: A dictionary mapping variables to their gradients.
                   Should be populated by grad function, which should:
                       -Set the gradient with respect to the cost to 1
                       -Load all gradients from known_grads, possibly
                        overriding the cost
                       -Set the gradient for disconnected
                        inputs to a variable with type DisconnectedType()

        wrt: the minimal set of variables that must be included in grad_dict

        cost_name: The name of the cost being differentiated, optional.
                    used to name the grad with respect to x as
                    (d<cost_name>/dx)

        returns: a list of gradients corresponding to wrt

    c   6      ` sÖ  |  ˆ k rÎ|  j  } g  |  j D] } ˆ | ƒ ^ q } g  | D] } t | j t ƒ ^ q> } t |  ƒ } g  | D]: } t g  t | | ƒ D] \ } }	 | ož |	 ^ q‰ k ^ qp }
 g  |  j D]0 } t | j d ƒ oç | j j	 t
 j j k ^ qº } g  | D] } t | j t ƒ ^ q÷ } g  | D]G } t g  t | | | ƒ D]" \ } } } | oW| oW| ^ q8k ^ q} t |
 k r”g  | D] } t ƒ  ^ q|} n_t | k rãg  } xJ|
 D]/ } | rÌ| j t ƒ  ƒ q­| j t ƒ  ƒ q­Wny4 g  t |  j j ƒ D] } |  j  | d ^ qù‰  Wn t k
 r0g  ‰  n X‡  f d †  } g  | D] } | | ƒ ^ qG} g  } x” t |  j | ƒ D]€ \ } } t | j d d ƒ } t | j d d ƒ } | t
 j j k rë| rë| | k rë| j | j | ƒ ƒ qx| j | ƒ qxWx} t |  j | ƒ D]i \ } } t | j d d ƒ } t | j d d ƒ } | d k	 r| t
 j j k r| | k sxt ‚ qqWx5 | D]- } t | j d d ƒ t
 j j k sƒt ‚ qƒWxÁ t t |  j | ƒ ƒ D]§ \ } }  |  \ }! }" t |! d ƒ súqÍn  t |" j t ƒ rqÍn  x_ t |  Œ  D]Q \ }# }$ |# j }% |$ j }& |% |& k rt d t |% ƒ d t |& ƒ ƒ ‚ qqWqÍW|  j j  | |  j | ƒ } | d k r»t! d t |  j ƒ ƒ ‚ n  t" | ƒ t" | ƒ k rót d d	 t |  j ƒ ƒ ‚ n  t# | ƒ } xw t$ t" | ƒ ƒ D]c }' xZ t$ t" | ƒ ƒ D]F }( | |( r+| |' |( r+t | |' j t ƒ r+| |( | |' <q+q+WqWg  | D]G } t g  t | | | ƒ D]" \ } } }) | o»| o»|) ^ qœk ^ q€}* xët | ƒ D]Ý\ }+ }, |, d k rt! d
 d |  j ƒ ‚ n  t |, d ƒ rˆ| |+ }- x` t |- |, ƒ D]L \ }. }/ |. j }0 |/ j }1 |0 |1 k r5t d |  j |1 |+ |0 f ƒ ‚ q5q5Wn  t |, j t t f ƒ sÚ|, j j	 t
 j j% k rät! t |  j ƒ d |+ |, j j	 f ƒ ‚ n  | |+ r	t |, j t ƒ s	t ‚ n  |* |+ r·t& |, ƒ }2 |2 d k s1t ‚ |2 d k red }3 |3 |  j |, t |, ƒ |+ f ;}3 q´|2 d k r´d }3 |3 |  j |, t |, ƒ |+ t
 j' |, ƒ f ;}3 t |3 ƒ ‚ q´q·qÚqÚWxt t | | |
 ƒ ƒ D]é \ }+ \ } }4 } t |4 j t ƒ }5 |5 rL| rLd }3 |3 t |  j ƒ t |4 ƒ t |4 j ƒ |+ f ;}3 t! |3 ƒ ‚ qÔ| rÔ|5 rÔd }3 |3 t |  j ƒ |+ f ;}3 t |  j d ƒ r£|3 d 7}3 t! |3 ƒ ‚ q½|3 d 7}3 t( j) |3 ƒ qÔqÔW| ˆ |  <n  ˆ |  S(   s*    Populates term_dict[node] and returns it RC   i    c         ` s)   |  ˆ  k r% t  |  d ƒ r% |  j ƒ  S|  S(   Nt   copy(   R|   R³   (   Rp   (   t   dinputs(    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   try_to_copy_if_needed	  s    
t   shapes   Got a gradient of shape s    on an output of shape s-   %s.grad returned NoneType, expected iterable.s   %s returned the wrong number ofs    gradient terms.s   %s.grad returned None forsÎ    a gradient term, this is prohibited. Instead of None,return zeros_like(input), disconnected_type(), or a NullType variable such as those made with the grad_undefined or grad_unimplemented helper functions.sL   %s.grad returned object of shape %s as gradient term on input %d of shape %ssP   .grad illegally  returned an integer-valued variable. (Input index %d, dtype %s)t   yest   not   maybes9  %s.grad returned %s of type %s for input %d. This input's only connections to the cost through this op are via integer-valued outputs so it should be NullType, DisconnectedType, or some form of zeros. It is not NullType or DisconnectedType and theano can't simplify it to a constant, so it's not verifiably zeros.sž   %s.grad returned %s of type %s for input %d. Since this input is only connected to integer-valued outputs, it should evaluate to zeros, but it evaluates to%s.s‹   %s.grad returned %s of type %s for input %d. Expected DisconnectedType instance based on  the output of the op's connection_pattern method.s/   %s.grad returned DisconnectedType for input %d.R¡   s3    Its connection_pattern method does not allow this.s>    You may want to implement a connection_pattern method for it.N(   s   yess   nos   maybe(*   R6   R   R   R=   R!   R¦   R4   R<   R|   RC   R@   RA   t   discrete_dtypesR   R   R)   R7   R   R   R   t   destroy_mapRS   t   getattrR3   R~   R   RO   R   R   R¶   RQ   RR   t   L_opR?   R   R   R—   R   t   _is_zerot   get_scalar_constant_valueRm   Rk   (6   RE   R6   Rp   t   output_gradsR‘   t   outputs_connectedR¡   t   input_to_outputst   input_to_outputt   output_to_costt   inputs_connectedR¥   t   output_is_intt   ograd_is_nant
   in_to_outst	   in_to_outt   out_to_costt   out_nant   only_connected_to_nanR¤   t   input_gradst	   connectedR   Rµ   t   new_output_gradst   ot   ogt   o_dtt   og_dtt   ngt   ng_dtR«   t   packedt   orig_outputt   new_output_gradt   orig_output_vt   new_output_grad_vt   o_shapet   g_shapet   inp_idxt   out_idxt   out_intt   only_connected_to_intRX   t   termt   orig_iptt
   orig_ipt_vt   term_vt   i_shapet   t_shapet   is_zerot   msgt   igt   actually_connected(   t   access_grad_cachet	   term_dict(   R´   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   access_term_cacheÀ  s   	"#D="Q1
					

Q
		#	


"(.

c         ` sâ  |  ˆ k rÚ|  ˆ k rÊg  } g  } ˆ |  } x| D]û } xò | | D]æ } ˆ  | ƒ | } t  | t j ƒ s– t d t | j ƒ t | ƒ f ƒ ‚ n  t  | j t ƒ r» | j | ƒ qF n  t  | j t	 ƒ rÓ qF n  t
 |  d ƒ r| j |  j k rt d t | j ƒ | j |  j f ƒ ‚ n  | j | ƒ qF Wq5 Wt | ƒ d k rW| d ˆ |  <n8 t | ƒ d k r‚t d „  | ƒ ˆ |  <n t ƒ  ˆ |  <ˆ d  k	 r×|  j d  k	 r×d ˆ |  j f ˆ |  _ q×qÚt ƒ  ˆ |  <n  ˆ |  S(   Ns0   %s.grad returned %s, expected Variable instance.RP   s@   %s.grad returned a term with %d dimensions, but %d are required.i    c         S` s   |  | S(   N(    (   R   RI   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   <lambda>  s    s	   (d%s/d%s)(   R   R   R   R?   RR   R   R=   R   R7   R!   R|   RP   RQ   R   R
   R   R3   R€   (   Rp   t
   null_termst   termst   node_to_idxRE   R«   Rá   (   Rí   R   RŒ   R‹   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRë   î  s@    
!% (   R   (   R‹   RŒ   RM   R   RŠ   R\   (    (   Rë   Rí   R   RŒ   Rì   R‹   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR‚      s    	ÿ /7c         C` s>   |  j  ƒ  } | j j j d ƒ d k r+ | S| j t j j ƒ S(   sM    Like zeros_like, but forces the object to have a
    a floating point dtype Ri   iÿÿÿÿ(   R:   R=   RC   t   findR~   R@   t   configt   floatX(   R   R\   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR„   *  s    c         C` s:   |  j  j } | t j k r* t j j } n  |  j d | ƒ S(   sJ    Like ones_like, but forces the object to have a
    floating point dtype RC   (   R=   RC   RA   R   R@   Ró   Rô   t	   ones_like(   R   RC   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR{   6  s    t   numeric_gradc           B` s‰   e  Z d  Z i d d 6d d 6d d 6d e j d ƒ 6d e j d ƒ 6d e j d ƒ 6Z d d d „ Z e d „  ƒ Z	 d	 „  Z
 d
 „  Z RS(   s_   
    Compute the numeric derivative of a scalar-valued function at a particular
    point.
    gH¯¼šò×z>t   float64ga2U0*©3?t   float32gš™™™™™¹?t   float16c         ` s¡  d „  ‰  t  } t | t t f ƒ s6 | g } t } n  g  | D] } t j | ƒ ^ q= } g  | D] } | j ^ q_ } g  | D] } t | j	 ƒ ^ q{ }	 t
 j ‡  f d †  | Dƒ ƒ }
 t
 j ‡ f d †  |	 Dƒ ƒ d } t j |
 f d | ƒ} | d
 k	 r&| j d ƒ r&t j |
 f d | ƒ} n t j |
 f d | ƒ} | d
 k rlt
 j ‡ f d †  |	 Dƒ ƒ } n  d } g  ˆ _ x‰ t | ƒ D]{ \ } } ˆ  | j ƒ } | | | | !j | j ƒ | | <ˆ j j | | | | !j | j ƒ ƒ | | | d	 <| | 7} qˆW| g  | D] } | j ƒ  ^ qŒ  } | j ƒ  } xI t |
 ƒ D]; } | | (| | c | 7<| | Œ  } | | | | | <qEW| rˆ j d ˆ _ n  d
 S(   s„  Return the gradient of f at pt.

        :param f: a differentiable function such that f(*pt) is a scalar
        :param pt: an ndarray, a list of ndarrays or tuple of ndarrays
        :param out_type: dtype of output, if complex (i.e. 'complex32' or
        'complex64')
        This function computes the gradient by a one-sided finite
        differences of a fixed step size (eps).

        It is assumed that f(...) will return a scalar.
        It is assumed that all f's inputs are numpy.ndarray objects.

        :param eps: the stepsize for the finite differencing.  None means
          input dtype-dependent. See `type_eps`.
        c         S` s%   d } x |  D] } | | 9} q W| S(   Ni   (    (   R6   R\   RX   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   prodv  s    c         3` s   |  ] } ˆ  | ƒ Vq d  S(   N(    (   R“   t   sh(   Rú   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>Œ  s    c         3` s"   |  ] } ˆ  j  | | f Vq d  S(   N(   t   type_eps(   R“   t   dt(   R"   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>  s    i   RC   t   complexc         3` s   |  ] } ˆ  j  | Vq d  S(   N(   Rü   (   R“   Rý   (   R"   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>š  s    i    .N(   R)   R   R   R   R4   t   numpyt   arrayR¶   RR   RC   t   builtinst   sumt   mint   ndarrayR3   t
   startswitht   maxt   gfRO   t   reshapeR7   R³   R	   (   R"   RT   t   ptt   epst   out_typet	   packed_ptt   pt   aptt   shapest   dtypest
   total_sizet   working_dtypeR   t   gxt   cur_posRX   t   p_sizet   f_xt   x_copyt   f_eps(    (   Rú   R"   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   __init__e  sD    			"""	!'%c         C` s^   t  |  | ƒ } | t j t  |  ƒ t  | ƒ d ƒ } t j | ƒ } t j | ƒ } | | f S(   s»  Return absolute and relative error between a and b.

        The relative error is a small number when a and b are close, relative
        to how big they are.

        Formulas used:
            abs_err = abs(a - b)
            rel_err = abs_err / max(abs(a) + abs(b), 1e-8)

        The denominator is clipped at 1e-8 to avoid dividing by 0 when a and b
        are both close to 0.

        The tuple (abs_err, rel_err) is returned
        g:Œ0âŽyE>(   t   absRÿ   t   maximumt   asarray(   R*   R+   t   abs_errt   rel_err(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   abs_rel_err½  s
    &c         C` sÂ   t  | ƒ t  |  j ƒ k r3 t d t  | ƒ ƒ ‚ n  g  } x‚ t t | |  j ƒ ƒ D]h \ } \ } } | j | j k r¡ t d | t | j | j f ƒ f ƒ ‚ n  | j t j	 | | ƒ ƒ qR W| S(   s(  Return the abs and rel error of gradient estimate `g_pt`

        `g_pt` must be a list of ndarrays of the same length as self.gf,
        otherwise a ValueError is raised.

        Corresponding ndarrays in `g_pt` and `self.gf` must have the same
        shape or ValueError is raised.

        s%   argument has wrong number of elementss&   argument element %i has wrong shape %s(
   R   R  RQ   RO   R<   R¶   RR   R7   Rö   R  (   R"   t   g_ptt   errsRX   R*   R+   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   abs_rel_errors×  s    
+%c         C` sG  g  } g  } g  } g  } |  j  | ƒ } xì | D]ä \ }	 }
 t j t j |	 ƒ ƒ sj t d t |	 ƒ ƒ ‚ n  t j t j |
 ƒ ƒ sš t d t |
 ƒ ƒ ‚ n  t j |	 | |
 | ƒ } | j ƒ  } | j | ƒ | j | j	 ƒ  | ƒ | j |	 j	 ƒ  | ƒ | j |
 j	 ƒ  | ƒ q. Wt j | ƒ } | | } | | | | | | f S(   s~  Find the biggest error between g_pt and self.gf.

        What is measured is the violation of relative and absolute errors,
        wrt the provided tolerances (abs_tol, rel_tol).
        A value > 1 means both tolerances are exceeded.

        Return the argmax of min(abs_err / abs_tol, rel_err / rel_tol) over
        g_pt, as well as abs_err and rel_err at this point.
        s   abs_err not finites   rel_err not finite(
   R"  Rÿ   t   allt   isfiniteRQ   t   reprt   minimumt   argmaxR7   t   flatten(   R"   R   t   abs_tolt   rel_tolt   posR!  t   abs_errst   rel_errst   abs_rel_errsR  R  t
   scaled_errt   max_it   max_argt   max_pos(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   max_errì  s&    

N(   R0   R1   R2   Rÿ   RC   Rü   R3   R  t   staticmethodR  R"  R3  (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRö   A  s   
X	c         C` s0   t  |  t ƒ r( |  j } t j d | ƒ S|  Sd  S(   Nt	   optimizer(   R   R   R5  R   t   clone(   t   modet   opt(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   mode_not_debug  s    	i   c   #   	   ` s)  d d l  m ‰ m } d d l } d d l m } m } t | t t f ƒ sS t	 ‚ g  | D] } t
 j | ƒ ^ qZ } xE t | ƒ D]7 \ } } | j d! k r‚ t d | | j f ƒ ‚ q‚ q‚ Wt d d	 d d
 d d ƒ ‰  | d k rt j ‡  f d †  | Dƒ ƒ } n  | d k r4t j ‡  f d †  | Dƒ ƒ } n  ˆ d k rOt d ƒ ‚ n  | ‡ f d † } g  t | ƒ D]: \ } } | | | ƒ j | | ƒ j ƒ d d | ƒ ^ qn} |  | Œ  ‰ t ˆ t ƒ rØt d ƒ ‚ n  | | ˆ d d ƒ} | g  | D] } | j ƒ  ^ q÷Œ  ‰ t ˆ t ƒ s0t ˆ t ƒ r?t d ƒ ‚ n  ‡ ‡ ‡ ‡ f d †  } | | ƒ  ƒ } d | _ | j j | ˆ ƒ } |
 ršt | ƒ } n | } | | | d d d | ƒ} t | | d d ƒ} | | | d d ƒ} x=t | ƒ D]/} yï t | g  | D] } | j ƒ  ^ q| | ƒ } | g  | D] } | j ƒ  ^ q3Œ  } t | t ƒ sct	 ‚ | j | | | ƒ \ } } }  }! |  | k r½|! | k r½t j | | |  |! | | ƒ ‚ n  | | d k  ræ| j  | ƒ  d t! ƒn  Wqòt" k
 r }" |" j# d | d | d  | f 7_# ‚  qòXqòWd S("   s3  Test a gradient by Finite Difference Method. Raise error on failure.

    Example:
        >>> verify_grad(theano.tensor.tanh,
        ...             (numpy.asarray([[2,3,4], [-1, 3.3, 9.9]]),),
        ...             rng=numpy.random)

    Raises an Exception if the difference between the analytic gradient and
    numerical gradient (computed through the Finite Difference Method) of a
    random projection of the fun's output to a scalar exceeds the given
    tolerance.

    :param fun: a Python function that takes Theano variables as inputs,
        and returns a Theano variable. For instance, an Op instance with
        a single output.
    :param pt: the list of numpy.ndarrays to use as input values.
        These arrays must be either float16, float32, or float64 arrays.
    :param n_tests: number of times to run the test
    :param rng: random number generator used to sample u, we test gradient
        of sum(u * fun) at pt
    :param eps: stepsize used in the Finite Difference Method (Default
        None is type-dependent)
        Raising the value of eps can raise or lower the absolute and
        relative errors of the verification depending on the
        Op. Raising eps does not lower the verification quality
        for linear operations. It
        is better to raise eps than raising abs_tol or rel_tol.
    :param out_type: dtype of output, if complex (i.e. 'complex32' or
        'complex64')
    :param abs_tol: absolute tolerance used as threshold for gradient
        comparison
    :param rel_tol: relative tolerance used as threshold for gradient
        comparison
    :param cast_to_output_type: if the output is float32 and
        cast_to_output_type is True, cast the random projection to
        float32. Otherwise it is float64. float16 is not handled here.
    :param no_debug_ref: Don't use DebugMode for the numerical
        gradient function.

    :note: This function does not support multiple outputs. In
        tests/test_scan.py there is an experimental verify_grad that
        covers that case as well by using random projections.

    i    (   t   compilet   sharedN(   R5   t
   TensorTypeRù   Rø   R÷   sR   verify_grad can work only with floating point inputs, but input %i has dtype "%s".gš™™™™™©?g{®Gáz„?g-Cëâ6?c         3` s"   |  ] } ˆ  t  | j ƒ Vq d  S(   N(   RR   RC   (   R“   R  (   t	   _type_tol(    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>Z  s    c         3` s"   |  ] } ˆ  t  | j ƒ Vq d  S(   N(   RR   RC   (   R“   R  (   R=  (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>\  s    sŸ   rng should be a valid instance of numpy.random.RandomState. You may want to use theano.tests.unittest_tools.verify_grad instead of theano.gradient.verify_grad.c         ` s4   ˆ  j  |  | d t d t d | d d d | ƒ} | S(   Nt   accept_inplacet   allow_input_downcastR7  t   on_unused_inputRj   R€   (   t   functionR4   (   R6   R¥   R€   R7  RT   (   R:  (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRA  g  s    R€   s   input %is9   cant (yet) autotest gradient of fun with multiple outputss   gradient.py fwds‘   It seems like you are trying to use verify_grad on an op or a function which outputs a list: there should be a single (array-like) output insteadc          ` sB   ˆ j  ˆ j Œ  d }  ˆ  r> ˆ j d k r> t j |  ˆ j ƒ S|  S(   Ng      à?Rø   (   t   randR¶   RC   Rÿ   R   (   t   plain(   t   cast_to_output_typet   o_fn_outt   o_outputt   rng(    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   random_projectionˆ  s    RH  s   gradient.py costR7  Rb   Rj   s   gradient.py symbolic gradi   t   borrows.   
The error happened with the following inputs:s   
The value of eps is:s   
The out_type is:(   s   float16s   float32s   float64($   R@   R:  R;  RN   R5   R<  R   R   R   R   Rÿ   R   RO   RC   R?   R–   R3   R  R  t   broadcastablet   NotImplementedErrorR³   R€   RA   R  R9  Rc   R	   Rö   R3  t   verify_gradt   E_gradt	   set_valueR4   R;   t   args(#   t   funR	  t   n_testsRG  R
  R  R)  R*  R7  RD  t   no_debug_refR;  R@   R5   R<  R  RX   RA  t	   tensor_ptt   o_fnRH  t   t_rR_   t   mode_for_costt   cost_fnt   symbolic_gradt   grad_fnt   test_numt   num_gradt   analytic_gradR1  t   max_err_post   max_abs_errt   max_rel_errt   e(    (   R=  RD  R:  RE  RF  RG  s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRL    s~    1"	""J%				"%t   GradientErrorc           B` s    e  Z d  Z d „  Z d „  Z RS(   sB   This error is raised when a gradient is calculated, but incorrect.c         C` sG   t  j |  ƒ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ d  S(   N(   R;   R  t   argt   err_posR  R  R)  R*  (   R"   Rb  Rc  R  R  R)  R*  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR  Â  s    					c         C` sK   d j  d „  |  j Dƒ ƒ } d |  j |  j |  j |  j |  j |  j | f S(   Ns   , c         s` s   |  ] } t  | ƒ Vq d  S(   N(   RR   (   R“   R*   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pys	   <genexpr>Í  s    så   GradientError: numeric gradient and analytic gradient exceed tolerance:
        At position %i of argument %i,
            abs. error = %f,  abs. tolerance = %f
            rel. error = %f,  rel. tolerance = %f
Exception args: %s(   t   joinRO  Rc  Rb  R  R)  R  R*  (   R"   t   args_msg(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR/   Ë  s
    (   R0   R1   R2   R  R/   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRa  À  s   		c   
   
   ` s/  d d l  m } t |  t ƒ s+ t d ƒ ‚ |  j d k  sF t d ƒ ‚ t | t ƒ } t | t ƒ } t | t t f ƒ rˆ t | ƒ } n	 | g } |  j d k rÅ t | | t	 |  | d ˆ  d ˆ ƒƒ S‡  ‡ f d †  } t
 j | d	 | |  j d ƒ d
 |  g | ƒ\ } }	 |	 st d ƒ ‚ t | | | ƒ S(   s3  
    :type expression: Vector (1-dimensional) Variable
    :type wrt: Variable or list of Variables

    :param consider_constant: a list of expressions not to backpropagate
        through

    :type disconnected_inputs: string
    :param disconnected_inputs: Defines the behaviour if some of the variables
        in ``wrt`` are not part of the computational graph computing ``cost``
        (or if all links are non-differentiable). The possible values are:
        - 'ignore': considers that the gradient on these parameters is zero.
        - 'warn': consider the gradient zero, and print a warning.
        - 'raise': raise an exception.

    :return: either a instance of Variable or list/tuple of Variables
            (depending upon `wrt`) repesenting the jacobian of `expression`
            with respect to (elements of) `wrt`. If an element of `wrt` is not
            differentiable with respect to the output, then a zero
            variable is returned. The return value is of same type
            as `wrt`: a list/tuple or TensorVariable in all cases.
    i    (   t   aranges2   tensor.jacobian expects a Variable as `expression`i   sh   tensor.jacobian expects a 1 dimensional variable as `expression`. If not use flatten to make it a vectorRa   Rb   c          ` s_   |  d } |  d } g  } x> |  d D]2 } t  | | | d ˆ  d ˆ ƒ} | j | ƒ q% W| S(   Ni    i   i   Ra   Rb   (   Rc   R7   (   RO  R«   t   exprt   rvalsRG   R\   (   Ra   Rb   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   inner_function  s    


	t	   sequencest   non_sequencess‹   Scan has returned a list of updates. This should not happen! Report this to theano-users (also include the script that generated the error)(   RN   Rf  R   R   R   RP   R   R   R   Rc   R@   t   scanR¶   (
   t
   expressionRM   Ra   Rb   Rf  RU   RV   Ri  t   jacobst   updates(    (   Ra   Rb   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   jacobianÜ  s.    				

	c         ` sf  d d l  m } t |  t ƒ s+ t d ƒ ‚ |  j d k sF t d ƒ ‚ t | t ƒ } t | t ƒ } t | t t f ƒ rˆ t | ƒ } n	 | g } g  } x¼ | D]´ } t | t ƒ s¿ t d ƒ ‚ | j d k sÚ t d ƒ ‚ t |  | d ˆ  d	 | ƒ}	 t	 j
 ‡  f d
 †  d | |	 j d ƒ d |	 | g ƒ\ }
 } | sEt d ƒ ‚ | j |
 ƒ qž Wt | | | ƒ S(   s|  
    :type cost: Scalar (0-dimensional) Variable.
    :type wrt: Vector (1-dimensional tensor) 'Variable' or list of
               vectors (1-dimensional tensors) Variables

    :param consider_constant: a list of expressions not to backpropagate
        through

    :type disconnected_inputs: string
    :param disconnected_inputs: Defines the behaviour if some of the variables
        in ``wrt`` are not part of the computational graph computing ``cost``
        (or if all links are non-differentiable). The possible values are:
        - 'ignore': considers that the gradient on these parameters is zero.
        - 'warn': consider the gradient zero, and print a warning.
        - 'raise': raise an exception.

    :return: either a instance of Variable or list/tuple of Variables
            (depending upon `wrt`) repressenting the Hessian of the `cost`
            with respect to (elements of) `wrt`. If an element of `wrt` is not
            differentiable with respect to the output, then a zero
            variable is returned. The return value is of same type
            as `wrt`: a list/tuple or TensorVariable in all cases.
    i    (   Rf  s+   tensor.hessian expects a Variable as `cost`s9   tensor.hessian expects a 0 dimensional variable as `cost`s4   tensor.hessian expects a (list of) Variable as `wrt`i   sB   tensor.hessian expects a (list of) 1 dimensional variable as `wrt`Ra   Rb   c         ` s   t  | |  | d ˆ  d d ƒS(   NRa   Rb   Rj   (   Rc   (   RX   RI   R   (   Ra   (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRî   Z  s   Rj  Rk  s‹   Scan has returned a list of updates. This should not happen! Report this to theano-users (also include the script that generated the error)(   RN   Rf  R   R   R   RP   R   R   Rc   R@   Rl  R¶   R7   R   (   R_   RM   Ra   Rb   Rf  RU   RV   t   hessianst   inputRg  t   hessRo  (    (   Ra   s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   hessian%  s2    						
	c         C` s¨   t  |  d ƒ s" t j |  d k ƒ St |  j t ƒ r8 d St |  j t ƒ rN d St } y t j	 |  ƒ } t
 } Wn t j j j k
 r‰ n X| r” d S| d k r¤ d Sd S(   sÄ   
    Returns 'yes', 'no', or 'maybe' indicating whether x
    is always 0.
    'maybe' means that x is an expression that is complicated enough
    that we can't tell that it simplifies to 0.
    R=   g        R¸   R·   R¹   (   R|   t   npR#  R   R=   R   R!   R4   R@   R¿   R)   RA   t   basict   NotScalarConstantError(   R   t   no_constant_valuet   constant_value(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR¾   i  s"    
t   ConsiderConstantc           B` s   e  Z d  „  Z RS(   c         C` s    g  | D] } | j  | ƒ ^ q S(   N(   R:   (   R"   RO  t   g_outst   g_out(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRc   ˆ  s    (   R0   R1   Rc   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRz  ‡  s   c         C` s   t  j d d d ƒt |  ƒ S(   sc  
    DEPRECATED: use zero_grad() or disconnected_grad() instead.

    Consider an expression constant when computing gradients.

    The expression itself is unaffected, but when its gradient is
    computed, or the gradient of another expression that this
    expression is a subexpression of, it will not be backpropagated
    through. In other words, the gradient of the expression is
    truncated to 0.

    :param x: A Theano expression whose gradient should be truncated.

    :return: The expression is returned unmodified, but its gradient
        is now truncated to 0.

    .. versionadded:: 0.7
    sR   consider_constant() is deprecated, use zero_grad() or disconnected_grad() instead.Rl   i   (   Rm   Rk   t   consider_constant_(   R   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRa     s    t   ZeroGradc           B` s   e  Z d  „  Z RS(   c         C` s    g  | D] } | j  | ƒ ^ q S(   N(   R:   (   R"   RO  R{  R|  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRc   «  s    (   R0   R1   Rc   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR~  ª  s   c         C` s
   t  |  ƒ S(   s  
    Consider an expression constant when computing gradients.

    The expression itself is unaffected, but when its gradient is
    computed, or the gradient of another expression that this
    expression is a subexpression of, it will be backpropagated
    through with a value of zero. In other words, the gradient of
    the expression is truncated to 0.

    :param x: A Theano expression whose gradient should be truncated.

    :return: The expression is returned unmodified, but its gradient
        is now truncated to 0.
    (   t
   zero_grad_(   R   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt	   zero_grad²  s    t   DisconnectedGradc           B` s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C` s   g  | D] } t  ƒ  ^ q S(   N(   R   (   R"   RO  R{  R|  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRc   Å  s    c         C` s   d  g S(   N(   R3   (   R"   R6   RK   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRD   È  s    c         C` s
   t  g g S(   N(   R)   (   R"   RE   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR¡   Ë  s    (   R0   R1   Rc   RD   R¡   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR  Ä  s   		c         C` s
   t  |  ƒ S(   sÿ  
    Consider an expression constant when computing gradients,
    while effectively not backpropagating through it.

    The expression itself is unaffected, but when its gradient is
    computed, or the gradient of another expression that this
    expression is a subexpression of, it will not be backpropagated
    through. This is effectively equivalent to truncating the gradient
    expression to 0, but is executed faster than zero_grad(), which stilll
    has to go through the underlying computational graph related to the
    expression.

    :param x: A Theano expression whose gradient should not be
              backpropagated through.

    :return: The expression is returned unmodified, but its gradient
        is now effectively truncated to 0.
    (   t   disconnected_grad_(   R   (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   disconnected_gradÒ  s    t   GradClipc           B` s    e  Z d Z d  „  Z d „  Z RS(   c         C` s.   | |  _  | |  _ |  j |  j  k s* t ‚ d  S(   N(   t   clip_lower_boundt   clip_upper_boundR   (   R"   R…  R†  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR  ì  s    		c         C` s/   g  | D]$ } t  j j | |  j |  j ƒ ^ q S(   N(   R@   RA   t   clipR…  R†  (   R"   RO  R{  R|  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRc   ó  s    (    (   R0   R1   t	   __props__R  Rc   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR„  è  s   	c         C` s   t  | | ƒ |  ƒ S(   s­  
    This op do a view in the forward, but clip the gradient.

    This is an elemwise operation.

    :param x: the variable we want its gradient inputs clipped
    :param lower_bound: The lower bound of the gradient value
    :param upper_bound: The upper bound of the gradient value.

    :examples:

        x = theano.tensor.scalar()

        z = theano.tensor.grad(grad_clip(x, -1, 1)**2, x)
        z2 = theano.tensor.grad(x**2, x)

        f = theano.function([x], outputs = [z, z2])

        print(f(2.0))  # output (1.0, 4.0)

    :note: We register an opt in tensor/opt.py that remove the GradClip.
       So it have 0 cost in the forward and only do work in the grad.

    (   R„  (   R   t   lower_boundt   upper_bound(    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt	   grad_clipù  s    t	   GradScalec           B` s   e  Z d  „  Z d „  Z RS(   c         C` s   | |  _  d  S(   N(   t
   multiplier(   R"   R  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyR    s    c         C` s   g  | D] } |  j  | ^ q S(   N(   R  (   R"   RO  R{  R|  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRc     s    (   R0   R1   R  Rc   (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyRŒ    s   	c         C` s   t  | ƒ |  ƒ S(   s$  
    This op scale or inverse the gradient in the backpropagation.

    :param x: the variable we want its gradient inputs scale
    :param multiplier: scale of the gradient

    :examples:

        x = theano.tensor.fscalar()
        fx = theano.tensor.sin(x)

        fp = theano.tensor.grad(fx, wrt=x)
        fprime = theano.function([x], fp)
        print(fprime(2))#-0.416

        f_inverse=grad_scale(fx,-1.)
        fpp = theano.tensor.grad(f_inverse, wrt=x)
        fpprime = theano.function([x], fpp)
        print(fpprime(2))#0.416
    (   RŒ  (   R   R  (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt
   grad_scale  s    (\   R2   t
   __future__R    R   R   t   collectionsR   t   six.moves.builtinst   movesR  t   loggingRu   Rm   Rÿ   t   sixR   R@   R   t
   theano.gofR   R   t   theano.compatR   t	   six.movesR	   R
   t   theano.gof.null_typeR   R   t   theano.gof.opR   t   theano.compileR   R   R   Ru  t   __authors__t   __copyright__t   __license__t   __contact__t   __docformat__t	   getLoggert   _loggerR3   RA   t   _msg_retTypeR…   R   R   R    R=   t   TypeR!   R   R]   Rg   R4   Rc   R)   R    R¦   Rz   R?   Rƒ   RQ   Ro   R‚   R„   R{   t   objectRö   R9  RL  R;   Ra  RM  Rp  Rt  R¾   Rz  R}  Ra   R~  R  R€  R  R‚  Rƒ  R„  R‹  RŒ  RŽ  (    (    (    s/   /tmp/pip-build-X4mzal/theano/theano/gradient.pyt   <module>   s’   		)		‘5Ù‘	'	“	ÿ Š		Î			¦	HC								