ó
ąĘ÷Xc           @` sf  d  Z  d d l m Z m Z m Z d d l Z d d l Z d d l Z d d l Z d d l	 Z	 d d l
 Z
 d d l Z d d l m Z d d l m Z d d l Z d d l Z d d l m Z d Z d Z d	 Z d
 Z d Z d Z e j d  Z e j   Z d Z d Z d Z  e j! Z! g  a" e# a$ d   Z% d   Z& d e' f d     YZ( d e( f d     YZ) d S(   s8   
ProfileStats object for runtime and memory profiling.

i    (   t   absolute_importt   print_functiont   divisionN(   t   defaultdict(   t	   iteritems(   t   graphs   James Bergstras   Razvan Pascanus    (c) 2011, Universite de Montreals   3-clause BSD Licenses(   theano-dev <theano-dev@googlegroups.com>s   restructuredtext ens   theano.compile.profilingg        c       
   C` s,  t  j rg  }  t  j j d k r- t j } n3 t  j j d k rK t j } n t t  j j d  } x“ t t	 d d   d d d  D] } | j
 d k sæ | j d k sæ t | d	 d
  d k r	| j d | d t  j j d t  j j  t | t  s|  j |  qq t d  q Wt |   d k rt j |  d
  } d t |   } | | _ x|  d D]} xO d d d d d d d d d g	 D], } t | | t | |  t | |   qWxq d d d d d d g D]W } t | |  } x? t t | |   D]( \ } } | | k st  | | | <qüWqŃW| j rŚ| j rŚyf | j d
 j | j d | j d  }	 t |	  t | j d  k st  | j d
 |	 f | _ Wqćt k
 rÖ}
 t d  t |
  d | _ qćXq`d | _ q`W| j d | d t  j j d t  j j  qn  t  j r(t   n  d S(    sQ   
    Print ProfileStat objects in _atexit_print_list to _atexit_print_file.

    t   stderrt   stdoutt   wt   keyc         S` s   |  j  |  j S(   N(   t   compile_timet   fct_call_time(   t   a(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   <lambda>@   s    Ni’’’’i   t	   callcounti    t   filet   n_ops_to_printt   n_apply_to_prints   Skipping empty ProfilesB   Sum of all(%d) printed profiles at exit excluding Scan op profile.R
   R   t   fct_callcountt   vm_call_timet   optimizer_timet   linker_timet   validate_timet   import_timet   linker_node_make_thunkst
   apply_timet   apply_callcountt   apply_cimplt   variable_shapet   variable_stridest   linker_make_thunk_times&   Got an exception while merging profile(   t   configt   profilet	   profilingt   destinationt   sysR   R   t   opent   sortedt   _atexit_print_listR   R
   t   getattrt   summaryt   n_opst   n_applyt
   isinstancet   ScanProfileStatst   appendt   printt   lent   copyt   messaget   setattrR   t   AssertionErrort   optimizer_profilet   merge_profilet	   Exceptiont   Nonet   print_global_stats(   t   to_sumt   destination_filet   pst   cumt   msgt   attrt   cum_attrR	   t   valt   merget   e(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   _atexit_print_fn/   sh    			*		"
%

	c          C` sŖ   t  j j d k r t j }  n3 t  j j d k r< t j }  n t t  j j d  }  t d d d |  t d d t j   t	 t
 t t f d |  t d d d |  d	 S(
   sī   
    Print the following stats:
      -- Time elapsed since Theano was imported
      -- Time spent inside Theano functions
      -- Time spent in compiling Theano functions
           -- on graph optimization
           -- on linker
    R   R   R   t   =i2   R   s   Global stats: s¢   Time elasped since Theano import = %6.3fs, Time spent in Theano functions = %6.3fs, Time spent compiling Theano functions:  optimzation = %6.3fs, linker = %6.3fs N(   R   R!   R"   R#   R   R   R$   R.   t   timet   theano_imported_timet   total_fct_exec_timet   total_graph_opt_timet   total_time_linker(   R:   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR8   w   s    

t   ProfileStatsc           B` sm  e  Z d  Z d   Z d Z d Z d Z d Z e Z	 e Z
 e Z e Z i  Z i  Z d Z d Z d Z d Z d Z i  Z e j j Z d Z e Z e e e d  Z d   Z d   Z d   Z d	   Z  d
   Z! d   Z" d   Z# d   Z$ d   Z% d   Z& e' j( e d  Z) e' j( e d  Z* e' j( e d  Z+ d   Z, d   Z- e d  Z. e' j( d d d  Z/ d   Z0 RS(   sæ  
    Object to store runtime and memory profiling information for all of
    Theano's operations: compilation, optimization, execution.

    Parameters
    ----------
    atexit_print : bool
        True means that this object will be printed to stderr (using .summary())
        at the end of the program.
    **kwargs : misc initializers
        These should (but need not) match the names of the class vars declared
        in this class.

    c         C` s1   d |  _  d |  _ d |  _ i  |  _ i  |  _ d S(   s    Ignore previous function callg        i    N(   R   R   R   R   R   (   t   self(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   reset¦   s
    				g        i    i’’’’c         K` s  t  j r | r t t d  r? t t j d  r? t j j j sZ t t d  r t j j r t	 j
 j d d  d k r t d   q n  t  j rĪ | rĪ t t d  rĪ t j j rĪ t  j j rĪ t j d  n  i  |  _ i  |  _ i  |  _ i  |  _ i  |  _ i  |  _ | d  k r"t  j j |  _ n	 | |  _ |  j j |  | rmt j |   t smt j  t!  t" a qmn  t j  j j |  _ d  S(	   Nt   sandboxt   cudat   gpuarrayt   CUDA_LAUNCH_BLOCKINGt   0t   1s$  You are running the Theano profiler with CUDA enabled. Theano GPU ops execution is asynchronous by default. So by default, the profile is useless. You must set the environment variable CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to synchronize the execution to get a meaningful profile.s   Theano flag profiling.ignore_first_call is False. This cause bad profiling result in the new gpu back-end, we as sometimes we compile at the first call.(#   R   R    t   hasattrt   theanoRM   RN   t   cuda_enabledRO   t   pygpu_activatedt   ost   environt   getR6   R!   t   ignore_first_callt   loggert   warnR   t   output_sizeR   R   R   R   R7   t   time_thunkst   flag_time_thunkst   __dict__t   updateR&   R-   t   _atexit_registeredt   atexitt   registerRC   t   True(   RK   t   atexit_printR_   t
   gpu_checkst   kwargs(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   __init__ž   s>    
							c         C` sY   i  } xL t  |  j  D]; \ } } t | j  } | j | d  | | c | 7<q W| S(   s2   
        dict op -> total time on thunks

        i    (   R   R   t   typet   opt
   setdefault(   RK   t   rvalt   nodet   tt   typ(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt
   class_time+  s    c         C` sY   i  } xL t  |  j  D]; \ } } t | j  } | j | d  | | c | 7<q W| S(   s9   
        dict op -> total number of thunk calls

        i    (   R   R   Rj   Rk   Rl   (   RK   Rm   Rn   t   countRp   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   class_callcount8  s    c         C` sY   i  } xL t  |  j  D]; \ } } t | j  } | j | d  | | c d 7<q W| S(   s3   
        dict op -> total number of nodes

        i    i   (   R   R   Rj   Rk   Rl   (   RK   Rm   Rn   Rr   Rp   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   class_nodesE  s    c         C` s   i  } x |  j  D]z } t | j  } |  j | r; d } n d } | j | |  | | | k r t | |  d k r | | c | 7<q q W| S(   s3   
        dict op -> total number of nodes

        s   C t   Pyi   (   R   Rj   Rk   R   Rl   R/   (   RK   Rm   Rn   Rp   t   impl(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt
   class_implR  s    	&c         C` sP   i  } xC t  |  j  D]2 \ } } | j | j d  | | j c | 7<q W| S(   s2   
        dict op -> total time on thunks

        i    (   R   R   Rl   Rk   (   RK   Rm   Rn   Ro   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   op_timed  s
    c         C` s}   |  j  | } x_ | j   D]Q } | j |  j  k r | j | k rW |  j | j |  n  | | | j 7} q q W| | | <d S(   sQ   
        node -> fill total time icluding its parents (returns nothing)

        N(   R   t   get_parentst   ownert   fill_node_total_time(   RK   Rn   t   total_timest   totalt   parent(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR{   p  s    c         C` s=   i  } x0 |  j  D]% } | | k r |  j | |  q q W| S(   sF   
        dict op -> total time icluding the time for parents

        (   R   R{   (   RK   Rm   Rn   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   compute_total_times~  s
    c         C` sP   i  } xC t  |  j  D]2 \ } } | j | j d  | | j c | 7<q W| S(   s9   
        dict op -> total number of thunk calls

        i    (   R   R   Rl   Rk   (   RK   Rm   Rn   Rr   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   op_callcount  s
    c         C` sP   i  } xC t  |  j  D]2 \ } } | j | j d  | | j c d 7<q W| S(   s3   
        dict op -> total number of nodes

        i    i   (   R   R   Rl   Rk   (   RK   Rm   Rn   Rr   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   op_nodes  s
    c         C` sH   i  } x; |  j  D]0 } |  j | r3 d | | j <q d | | j <q W| S(   sM   
        dict op -> 'C' or 'Py' depending how the op is implemented

        s   C Ru   (   R   R   Rk   (   RK   Rm   Rn   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   op_impl”  s    c         C` s  |  j  r! t |  j  j    } n d } | d k rG t d d | d  S|  j   } |  j   } |  j   } |  j   } | d  k r t	 |  j  } n  g  t
 |  D]P \ } }	 |	 d | |	 | | j | d  | j | d  | j | d  f ^ q¢ }
 |
 j d d   d t  d } t d	 d | t d
 d | g  } g  } | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} t j g  | D] } t	 |  ^ q t	 |  } t |  j | d  } | d g 7} | d g 7} d j |  } d j |  } t | d | x§ |
 |  D] \ } }	 } } } } | d k rŠ|	 d k st  qn  | |	 7} | d | } t |  d d !|  } t | | | |	 |	 | | | | | f d | qWt d t d t	 |
  |  t d   |
 | D  t d    |
 | D  f d | t d! d | d  S("   Ni    sM   ProfileStats.summary_class: total time 0 (did you forget to enable counters?)R   id   s     R	   c         S` s   |  d |  d |  d f S(   Ni   i   i   (    (   Ro   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR   Å  s    t   reverset   Classs   ---s   <% time>s
     %4.1f%% s   <sum %>s	    %5.1f%% s   <apply time>s
      %7.3fs s   <time per call>s        %8.2es s   <type>s      %2s s   <#call>s   %6d  s   <#apply>s    %4d  s   <Class name>s   %st    i   iž’’’sG      ... (remaining %i Classes account for %6.2f%%(%.2fs) of the runtime)c         s` s'   |  ] \ } } } } } } | Vq d  S(   N(    (   t   .0t   fRo   R   t   cit   nb_callt   nb_op(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>ž  s    c         s` s'   |  ] \ } } } } } } | Vq d  S(   N(    (   R   R   Ro   R   R   R   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>’  s    t    (   R   t   sumt   valuesR.   Rq   Rs   Rt   Rw   R7   R/   R   RY   t   sortRe   t   npt   maxt
   line_widtht   joinR3   t   str(   RK   R   t   Nt
   local_timeRq   t
   class_callt   class_applyRw   t   clasRo   t   otimest   tott   hst   est   xt   upto_lengtht   maxlent
   header_strt
   format_strR   R   Rv   R   t   nb_applyt   ftott
   class_name(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   summary_classÆ  st    		`2#
c         C` st  |  j  r! t |  j  j    } n d } | d k rG t d d | d  S|  j   } |  j   } |  j   } |  j   } g  t |  D]P \ } }	 |	 d | |	 | | j	 | d  | j	 | d  | j	 | d  f ^ q }
 |
 j
 d d   d t  d } t d	 d | t d
 d | g  } g  } | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} t j g  | D] } t |  ^ qč t |  } t |  j | d  } | d g 7} | d g 7} d j |  } d j |  } t | d | x |
 |  D] \ } }	 } } } } | d k r²|	 d k svt  qvn  | |	 7} | d | } t | | | |	 |	 | | | | t |  |  f d | qvWt d t d t |
  |  t d   |
 | D  t d   |
 | D  f d | t d d | d  S(   Ni    sK   ProfileStats.summary_ops: total time 0 (did you forget to enable counters?)R   id   s     R	   c         S` s   |  d |  d |  d f S(   Ni   i   i   (    (   Ro   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR     s    R   t   Opss   ---s   <% time>s
     %4.1f%% s   <sum %>s	    %5.1f%% s   <apply time>s
      %7.3fs s   <time per call>s        %8.2es s   <type>s      %2s s   <#call>s     %4d  s   <#apply>s	   <Op name>s   %sR   sC      ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)c         s` s'   |  ] \ } } } } } } | Vq d  S(   N(    (   R   R   Ro   R   R   R   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>L  s    c         s` s'   |  ] \ } } } } } } | Vq d  S(   N(    (   R   R   Ro   R   R   R   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>M  s    R   (   R   R   R   R.   Rx   R   R   R   R   RY   R   Re   R   R/   R   R   R   R3   R   (   RK   R   R   R   Rx   t   op_callt   op_applyR   Rk   Ro   R   R   R   R   R   R   R   R    R”   R   R   Rv   R   R¢   R£   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   summary_ops  sn    		`2#
"c         C` s:  |  j  r! t |  j  j    } n d } | d k rG t d d | d  St d d | t d d | g  } g  } | d g 7} | d g 7} | d g 7} | d	 g 7} | d
 g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d g 7} | d d g 7} |  j r;| d d g 7} n  t j g  | D] } t |  ^ qH t |  } t |  j | d  } | d g 7} | d g 7} d j	 |  }	 d j	 |  }
 t |	 d | i  } g  } x t
 |  j   D]{ \ } } | j | k r!| j j   } | | | j <n | | j } | j | d | | | | j |  |  j | f  qēW~ | j d t d d    d } xC| |  D]7\ } } } } } | | 7} | d | } | d k rŅqn  |  j sźd } d } n t | j d  rz| j j g  | j D] } |  j | ^ qg  | j D] } |  j | ^ q/ } d | d d } d  | d d d | } n d! } d" } t |
 | | | | | | | | | t |  |  f	 d | t j sŅqn  xy t | j  D]h \ } } |  j j | d#  } |  j j | d$  } t | d% d&  } t d' | | | | f d | qāWxy t | j  D]h \ } } |  j j | d#  } |  j j | d$  } t | d% d&  } t d( | | | | f d | q^WqWt d) t d t |  |  t d*   | | D  t d+   | | D  f d | t d d | d  S(,   Ni    sM   ProfileStats.summary_nodes: total time 0 (did you forget to enable counters?)R   t   Applys   ------s   <% time>s
     %4.1f%% s   <sum %>s	    %5.1f%% s   <apply time>s
      %7.3fs s   <time per call>s        %8.2es s   <#call>s    %4d  s   <id>s   %3ds   %ss   <Mflops>s
   <Gflops/s>s   <Apply name>R   id   R   R	   c         S` s   |  d |  d f S(   Ni   i   (    (   Ro   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR     s    R   t   flopss   %8.1fg      @i   s   %10.1fs           s
             s   no shapes
   no stridest   dtypes   no dtypes-       input %d: dtype=%s, shape=%s, strides=%s s.       output %d: dtype=%s, shape=%s, strides=%s sN      ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)c         s` s$   |  ] \ } } } } } | Vq d  S(   N(    (   R   R   Ro   R   t   nd_idR   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>½  s    c         s` s$   |  ] \ } } } } } | Vq d  S(   N(    (   R   R   Ro   R   R­   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>¾  s    (   R   R   R   R.   R   R   R/   R   R   R   R   t   fgrapht   toposortR-   t   indexR   R   Re   RS   Rk   R«   t   inputst   outputsR   R   t   profile_memoryt	   enumerateRY   R   R'   (   RK   R   R   R   R   R   R   R   R   R    R”   t   topost   atimesR   Ro   t   topoR   R   R­   R   R£   R«   t   flops_st   vart   flt   idxt   sht   stR¬   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   summary_nodesQ  s²    			2 
		#	"%c         C` s  t  d d | t  d d | t  d |  j d | t  d |  j |  j f d | |  j d k rŻ t  d |  j d |  j |  j f d | t |  j j    } | d k rŻ t  d	 | d | |  j f d | qŻ n  t  d
 |  j d | t  d |  j	 d | t  d |  j
 d | t  d |  j d | t  d |  j d | t  d |  j d | t  d |  j d | x] t |  j j   d t j d  d  d  d  d  D]& \ } } t  d | | f d | q“Wt  d d | |  j
 d k r|  j |  j
 k  st  n  d  S(   Ns   Function profilingR   s   ==================s     Message: %ss,     Time in %i calls to Function.__call__: %esi    s,     Time in Function.fn.__call__: %es (%.3f%%)id   s     Time in thunks: %es (%.3f%%)s     Total compile time: %ess       Number of Apply nodes: %ds       Theano Optimizer time: %ess           Theano validate time: %essH       Theano Linker time (includes C, CUDA code generation/compiling): %ess          Import time %ess          Node make_thunk time %esR	   i   i’’’’i   s              Node %s time %esR   (   R.   R1   R   R   R   R   R   R   R
   t   nb_nodesR   R   R   R   R   R%   R   t   itemst   operatort
   itemgetterR3   (   RK   R   R   Rn   Ro   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   summary_functionĮ  s@    -c         C` sB   t  d t j j d | t j   t } t  d | d | d  S(   Ns%   Time in all call to theano.grad() %esR   s   Time since theano import %.3fs(   R.   RT   t   gradientt	   grad_timeRE   RF   (   RK   R   t
   total_time(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   summary_globalsę  s    c   .      ` sX  i  } i  } i   i  } x|  j  D]} | j | j i   | | j j | g   | j | j i   | | j j | g   d } x§ | j D] } | |  j k rč |  j | }	 t | j d  rß | j j |	  }
 | |
 7} qī d }
 n d }
 |
  | <| | j | j |
  | | j | j |	  q W| | | <q" W~
 d } d d d g d d d g d d g d d d g d d d g d d g d d d g d d d g d d g d d d g d d d g d d g g } d } d } t	  f d  }  f d   } x;t
 |  D]-\ } } t d   t
 |  D  } | j   } t | |  } d   } | j j } xs t | t	 f | t f | t	 f | t f g  D]A \ } \ } } | | | | d | } | | | |  | | <qWt j j r.| j } t j   } | | | |  } | t j   | 7} t | |  } n  ~ ~ qWt |  d k r]t d	 d
   n t d d
   t d d
   t d d
     f d   } t d d
   t d d
   | | d | d  t d d
   | | d | d  | d \ } }  }  }  | d \ }! }  }  }  t d d
   t d t t |! d d   d
   t d t t |! d d   d
   t d t t |! d d   d
   t d d
   | rżt d t t | d   | f d
   t d d
   n  t t d  rt t j d  rt t j j d  rt t j j j j d  rt j j j j }" |" j   \ }  }# t d t t |# d   d
   n  t d d
   t |  d k rĪt d  d
   n  t d! d
   t d d
   t  | j!    }$ |$ j" d" d#   d$ t  xCt |$ |   D]1\ }% \ } }& d% g t | j  }' x3 t
 t# | j$ d& i    D] \ } }( d' |' | <qnWx3 t
 t# | j$ d( i    D] \ } }( d) |' | <q¤Wt% | | j |  }) t& g  | j D] } t | j d  ^ qā r,d* |& }* |& t j j' k  r2|% } Pq2n d7 }* t d- |* |) d. j( |'  | f d
   q*Wt d/   |$ | D  }+ t | j)    }, |, d k r d0 }- n d1 t* |+  |, d2 }- t d3 t d t |  |  |+ |, |- f d
   t d d
   | d k r$t d4 t j j' d
   n  t d5 d
   t d6 d
   t d d
   d  S(8   Ni    t   get_sizec         ` s  d d l  m } d d g } d d g } d d g } d } d }	 | j j }
 t d    } x | j D] } d | | d <qe Wi  } x | j D] } g  | | <q Wi  } xÉ|  D]Į} x | j D] } d | | d <q¾ Wd } | rė d	 } n t	 | j
 d d	  } t	 | j
 d d	  } | | } xV | D]N } | rK| | k rK|	 | 7}	 n | rj| | k rj| | 7} n  | d 7} q&Wd } xl| j D]a} t | j |  r©d } n d } d	 } | r| | k r| | } t |  d k sļt d   | j | d } n  | rc| | k rc| d	 k s't  | | } t |  d k sOt d   | j | d } n  | d	 k	 r·t | t j  st  | j | |  } | | | <| | j |  n( | | c   | 7<| | c   | 7<| d 7} qWt | d | d  | d <t | d | d  | d <xIt | j  D]8} | | k oL| | sVt  t | j |  rqd } n d } |
 | r3| | j k r3| j r3t g  |
 | D] } | | d ^ q§ r3| | k rś| j | g   rś| | c   | 8<qk| | k rk| | } | | j |  | | rh| | j k rht | t j  rh| | c   | 8<qhqkq3q3Wq® W| | | |	 | g S(
   s  
            Calculate memory with specific node order.

            Return a list including the following values
            1.  node_memory_size
                Sum of the size of all variables that actually allocate
                memory (excluding views, and inplace).
            2.  running_memory_size
                The memory allocated after the current apply node.
            3.  running_max_memory_size
                The maximum of running_memory_size during the function.
            4.  node_memory_saved_by_view
                The sum of memory saved by returning view instead of new
                allocation.
            5.  node_memory_saved_by_inplace
                The sum of memory saved by reusing the input instead of
                new allocation.

            i    (   t   CudaNdarrayTypec           S` s   d g S(   Ni    (    (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR   5  s    i   t   destroy_mapt   view_maps9   Here we only support the possibility to destroy one inputs6   Here we only support the possibility to view one inputN(   t   theano.sandbox.cudaRÉ   R    t   dependenciesR   R±   t	   variablesR²   R7   R'   Rk   R+   Rj   R/   R3   RT   t   VariableRY   R-   R   t   setRz   t   allt   removet   Constant(   t   orderR®   t	   nodes_memt   ignore_dmapRÉ   t   node_memory_sizet   running_memory_sizet   running_max_memory_sizet   node_memory_saved_by_viewt   node_memory_saved_by_inplaceRĶ   t   compute_mapR¹   t	   viewed_byt   view_ofRn   R»   t   dmapt   vmapR@   t   vt   idx2t   outt   cgt   inst   vidxt   origin(   t   var_mem(    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   count_running_memory  s    	
	




	
	%
c         ` s  t  |   }  d a d a t j a  j j  t g    i   t	 d      x  j
 D] } d   | d <qX WxE |  D]= } x4 | j
 D]) } t | t j  r d   | d <q q Wqw Wt   } xv  j
 D]k } xb | j D]W \ } } | d k rŪ | j
 | j } t   f d   | D  r2| j |  q2qŪ qŪ WqĖ W        f d    i  }	 x  j D] } g  |	 | <qkWi  }
  | |	 |
  t S(   Ni    c           S` s   d g S(   Ni    (    (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR   ­  s    i   t   outputc         3` s   |  ] }   | d  Vq d S(   i    N(    (   R   Rį   (   RÜ   (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>»  s    c         ` s/  x(|  D] } |  j    } | j |  t t k r8 q n  g  } t d    } t d    } x | j D] } d   | d <qf Wd }	 d }
 t } t | j d d  } t | j d d  } d } xO| j D]D} d } | r$| | k r$| | } t	 |  d k st
 d   | j | d } n  | r| | k r| d k sHt
  | | } t	 |  d k spt
 d   | j | d } n  | d k	 röt | t j  sØt
  | j | |  } | | | <| j |  | | j |  | | j |  n |	  | 7}	 | d 7} qŹ Wt |	 7a t t t  a x'| j D]} | | k oN| | sXt
   | r5|  j k r5| j r5t g   | D] }   | d ^ q r5| | k rÕ| j | g   rÕ|
  | 7}
 qQ| | k rQ| | } | | j |  | | j |  | | rN|  j k rNt | t j  rN|
  | 7}
 qNqQq5q5Wt |
 8a  j |  t   }  j | t d  t k rHt  | <xv | j D]k } xb | j D]W \ } } | d	 k rø| j | j } t   f d
   | D  r| j |  qqøqøWqØW| s5t t k  rEt a qEqH | | |  n   j |  t |	 8a | a t |
 7a x | j D] } d   | d <qyWx< t |  D]. \ } } x | D] } | | j |  q±WqWx< t |  D]. \ } } x | D] } | | j |  qšWqŻWx | D] } | | =qWq Wd S(   s  
                Generate all valid node order from node_list and compute its
                memory peak.

                Parameters
                ----------
                executable_nodes
                    Set of executable nodes.

                c           S` s   g  S(   N(    (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR   Ö  s    c           S` s   g  S(   N(    (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR   ×  s    i   i    RŹ   RĖ   s9   Here we only support the possibility to destroy one inputs6   Here we only support the possibility to view one inputRź   c         3` s   |  ] }   | d  Vq d S(   i    N(    (   R   Rį   (   RÜ   (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>6  s    N(   R0   RŅ   t   max_mem_countt	   mem_boundR   R²   R'   Rk   R7   R/   R3   R±   R+   RT   RĻ   RY   R-   t	   mem_countR   Rz   RŃ   RÓ   t   addt	   frozensett   clientst   destroy_dependenciesR   (   t   executable_nodesRŻ   RŽ   Rn   t   new_exec_nodest   viewof_changet   viewedby_addt   viewedby_removeR¹   t   mem_createdt	   mem_freedt   max_storageRß   Rą   R»   Rć   Rå   Rę   Rē   Rį   t
   frozen_sett   ct   _t   depst   k_removet   v_removet   it   k_addt   v_addt   k(   RÜ   RĶ   t	   done_dictt   done_setR®   t   min_memory_generatorRč   (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR  ¾  s“    




	%

	


(   t   listRķ   Rė   R   t   infRģ   R    RĶ   RŠ   R   R±   R+   R   RÓ   Rš   Rń   RŃ   Rī   RĪ   (   t	   node_listR®   RÕ   R¹   R@   Rņ   Rū   Rü   Rż   RŻ   RŽ   (   Rč   (   RÜ   RĶ   R  R  R®   R  s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   count_minimum_peak”  s8    		!c         s` s+   |  ]! \ } } t  d    | D  Vq d S(   c         s` s$   |  ] } t  | t  s | Vq d  S(   N(   R+   R   (   R   Rį   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>e  s    N(   R   (   R   R	   R@   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>e  s   c         S` s  | \ } } } } t  | d t |  d   | d <t  | d t |  d   | d <t  | d |  d d  | d <t  | d |  d d  | d <t  | d |  d d  | d <t  | d |  d d  | d <t  | |  d  } t  | |  d  } | | | | f S(   Ni    i   i   i   i   (   R   R   (   t   running_memoryt   statst   max_node_memory_sizet   max_running_max_memory_sizet   max_node_memory_saved_by_viewt    max_node_memory_saved_by_inplace(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   compute_max_statsq  s"    
!

RÖ   i   s>   Memory Profile (the max between all functions in that profile)R   s   Memory Profiles   (Sparse variables are ignored)s/   (For values in brackets, it's for linker = c|pyc         ` sī   |  \ } } } } | \ } } } } t  d t t | d d   t t | d d   f d   t  d t t | d d   t t | d d   f d   t  d t t | d d   t t | d d   f d   d  S(	   Ns           CPU: %dKB (%dKB)i   g      @R   s           GPU: %dKB (%dKB)i   s           CPU + GPU: %dKB (%dKB)i    (   R.   t   intt   round(   t   stats1t   stats2Rü   R  t   new_max_running_max_memory_size(   R   (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   print_stats­  s    s   ---s(       Max peak memory with current settingi   sT       Max peak memory with current setting and Theano flag optimizer_excluding=inplacei   sF       Max peak memory if allow_gc=False (linker don't make a difference)s           CPU: %dKBg      @s           GPU: %dKBs           CPU + GPU: %dKBsO       Minimum peak from all valid apply node order is %dKB(took %.3fs to compute)RM   RN   t   cuda_ndarrayt   theano_allocateds=       Max Memory allocated on the GPU (for all functions): %dKBR   s6       This list is based on all functions in the profilesY       <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>R	   c         S` s   |  d S(   Ni   (    (   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR   ē  s    R   Rū   RŹ   R   RĖ   Rį   s   %9dBs   %10st   Unknowns        %s  %s %s %sR   c         s` s   |  ] \ } } | Vq d  S(   N(    (   R   Rü   t   size(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>ü  s    s   0%s   (%.2f%%)id   s[      ... (remaining %i Apply account for %4dB/%dB (%s) of the Apply with dense outputs sizes)s>       All Apply nodes have output sizes that take less than %dB.s>       <created/inplace/view> is taken from the Op's declaration.s¢       Apply nodes marked 'inplace' or 'view' may actually allocate memory, this is not reported here. If you use DebugMode, warnings will be emitted in those cases.s
      Unknown(+   R   Rl   R®   R²   R   RS   Rj   RČ   R-   t   FalseR   R   RÆ   R   R    t   node_executed_orderR“   Re   R   R!   t   min_peak_memoryt   apply_nodesRE   R/   R.   R  R  RT   RM   RN   R  R  R  RĄ   R   R'   Rk   R   RŃ   t   min_memory_sizeR   R   t   float(.   RK   R   R   t
   fct_memoryt
   fct_shapest   node_memRn   t	   sum_denseRć   R¼   Rį   t   max_sum_sizeR  t   min_max_peakt   min_peak_timeRé   R
  R®   RÕ   t   sum_sizeRŌ   R  t	   new_orderR   t   ordRÖ   R  R	  t   tttt   min_peakR  R  Rü   t   new_max_node_memory_sizeR  t   gpu_maxRĄ   R»   t   node_outputs_sizet   codet   inpt   shapesR  t   sum_remainingt   size_sum_denset   p(    (   R   Rč   s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   summary_memoryģ  s   	
!!!'Ā				"	
	
$	!	
	#(((
"			i   c         C` s`  |  j  |  |  j |  t |  j j    } | d k rn |  j | |  |  j | |  |  j | |  n" |  j d k r t	 d d | n  t
 j j rą t g  |  j j   D] } | j ^ qÆ  } t j j | d t n  |  j sņ |  j r|  j | |  n  |  j rOt	 d d | t	 d d | |  j d j | |  j d  n  |  j |  d  S(   Ni    sJ     No execution time accumulated (hint: try config profiling.time_thunks=1)R   t
   print_types   Optimizer Profiles   -----------------i   (   RĆ   RĒ   R   R   R   R„   R©   R¾   R   R.   R   R!   t
   debugprintRŠ   t   keysR®   RT   t   printingRe   R   R   R7  R4   t   print_profilet
   print_tips(   RK   R   R   R   R   t   nt   fcts(    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR(     s*    	
+	c         ` sP  t  d d  d d  l   j j j }  j   j    j  j  j  j	  j
  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  j  g   j!  j"  j#  j$  j%  j&  j'  j(  j)  j*  j+  j,  j-  j.  j/  j0  j1  j2  j3   j4 j5 j6   j4 j5 j7 g    f d      f d          f d   }     f d   } t8 } t9 j: d k r÷t  d	 d  t; } n  t9 j< j= rHt> g  |  j? D] } | | j@  ^ q rHt  d
 d  t; } n  t9 j< j= rÆt> g  |  j? D]+ } | | j@  o| jA d jB d k ^ qb rÆt  d d  t; } n  x |  j? D] } | } tC | j@   jD  r¹tE g  | jA D] } tF | jG jH  d k ^ qē r¹t  d g  | jA D] } | jG ^ qd  t; } q¹q¹Wxh |  j? D]] } | } tC | j@ |  rRt; } t  d d  t9 jI jJ d  r«t  d d  n  PqRqRWx |  j? D]| } | } tC | j@   jD  r½tF tK d   | jA D   d k r½t  d g  | jA D] } | jG ^ qd  t; } q½q½Wd d  lL jM jN }	 d d lO mP }
 d d  lQ j jR jS } d d  lT  x° |  j? D]„ } | } tC | j@ | jU  rä|	 jV jW   rä jX jV jY   rät  d  t; } qän  tC | j@ |
  r|	 jV jW   r/ jX jV jY   r/t  d  t; } q/qqW| sLt  d d  n  d  S(   Nsß   Here are tips to potentially make your code run faster
                 (if you think of new ones, suggest them on the mailing list).
                 Test them first, as they are not guaranteed to always provide a speedup.R   i    c         ` sW   t  |   j j  rL g  } x* |  j j   D] } |   | j  7} q+ W| S|  g Sd  S(   N(   R+   t   scalart	   CompositeR®   RÆ   Rk   (   t   st   lRn   (   t   get_scalar_opsRT   (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyRD  G  s    c         ` s3   t  |  j  j j  r%   |  j  S|  j g Sd  S(   N(   R+   t	   scalar_opR@  RA  (   Rk   (   RD  RT   (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   list_scalar_opP  s    c         ` ss   t  |    j  s t S |   } xF | D]> } | j  k rB t S| j  k r) t d | d  q) q) Wt Sd  S(   Ns8   We don't know if amdlibm will accelerate this scalar op.R   (   R+   t   ElemwiseR  t	   __class__Re   R.   (   Rk   RC  t   s_op(   t   TR   RF  t   scalar_op_amdlibm_no_speed_upt   scalar_op_amdlibm_speed_up(    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   amdlibm_speed_upV  s    c         ` sR   t  |    j  s t S |   } t g  | D] } | j  j g k ^ q,  Sd  S(   N(   R+   RG  R  t   anyRH  t   Exp(   Rk   RC  RI  (   RJ  RF  t   scal(    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   exp_float32_opc  s    t   float64s&     - Try the Theano flag floatX=float32sq     - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speeds up only some Elemwise operation.t   float32s„     - With the default gcc libm, exp in float32 is slower than in float64! Try Theano flag floatX=float64, or install amdlibm and set the theano flags lib.amdlibm=Truei   sµ     - You have a dot operation that was not optimized to dot22 (which is faster). Make sure the inputs are float32 or float64, and are the same for both inputs. Currently they are: %ssČ     - Replace the default random number generator by 'from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams', as this is is faster. It is still experimental, but seems to work correctly.t   gpusR        - MRG_RandomStreams is the only random number generator supported on the GPU.c         s` s   |  ] } | j  Vq d  S(   N(   R¬   (   R   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pys	   <genexpr>  s    i   ss     - You have a dot operation that has different dtype  for inputs (%s). Make sure that the inputs have same  dtype.(   t
   LogSoftmaxsI   Install CuDNN to do pooling fasterthis allows the operation to run on GPUsL   Install CuDNN to do LogSoftmax fasterthis allows the operation to run on GPUs     Sorry, no tip for today.(Z   R.   RT   t   tensort
   raw_randomt   RandomFunctionR@  t   LTt   GTt   LEt   GEt   EQt   NEQt   InRanget   Switcht   ORt   XORt   ANDt   Invertt   Maximumt   Minimumt   Addt   Mult   Subt   TrueDivt   IntDivt   Clipt   Secondt   Identityt   Castt   Sgnt   Negt   Invt   Sqrt   Modt   Powt   Ceilt   Floort   RoundHalfToEvent   RoundHalfAwayFromZerot   Logt   Log2t   Log10t   Log1pRO  t   Sqrtt   Abst   Cost   Sint   Tant   Tanht   Cosht   Sinht   nnett   sigmt   ScalarSigmoidt   ScalarSoftplusR  R   t   floatXRe   t   libt   amdlibmRN  R   Rk   R±   R¬   R+   t   DotRŃ   R/   Rj   t   broadcastablet   devicet
   startswithRŠ   RĢ   RM   RN   t   theano.tensor.nnetRU  t   theano.tensor.signal.poolt   signalt   poolt   theano.gpuarrayt   Poolt   dnnt   dnn_availableRO   t   dnn_present(   RK   R   RX  RM  RQ  t   printed_tipR   Rn   R   RN   RU  R  (    (   RJ  R   RD  RF  RP  RK  RL  RT   s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR=  +  s¤    				%		;		.'		
:'#
#
(1   t   __name__t
   __module__t   __doc__RL   R
   R   R   R   R7   R   R   R   R1   R   R   R   R   R   R   R   R   R   R!   t   output_line_widthR   Ræ   R4   Re   Ri   Rq   Rs   Rt   Rw   Rx   R{   R   R   R   R   R#   R   R„   R©   R¾   RĆ   RĒ   R7  R(   R=  (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyRJ      sX   	,										TNp	%	’ ’ )	R,   c           B` s;   e  Z d  Z d  Z d  Z e d d  Z d   Z d   Z	 RS(   g        c         K` s&   t  t |   j | |  | |  _ d  S(   N(   t   superR,   Ri   t   name(   RK   Rf   R   Rh   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyRi   Į  s    c         C` s   d  S(   N(    (   RK   R   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyRĒ   Å  s    c         C` sl  |  j  d k r d  St d d | |  j d  k	 rN t d |  j d d | n t d d | t d d | t d |  j d | t d	 |  j  |  j |  j f d | t d d | d } |  j d k rē |  j d
 |  j } n  t d |  j | f d | d
 } |  j d k r4d |  j d
 |  j } n  t d |  j |  j | f d | t d d | d  S(   Ni    R   R   s   Scan Op profiling (t   )s   Scan Op profilings   ==================s     Message: %ss:     Time in %i calls of the op (for a total of %i steps) %esid   s1     Total time spent in calling the VM %es (%.3f%%)g      Y@s2     Total overhead (computing slices..) %es (%.3f%%)(   R   R.   R   R7   R1   t   nbstepst	   call_timeR   (   RK   R   R@   (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyRĆ   Ź  s,     N(
   R  R  R   R¢  R£  Re   R7   Ri   RĒ   RĆ   (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyR,   ¼  s   	(*   R  t
   __future__R    R   R   Rc   R0   t   loggingRĮ   RW   R#   RE   t   collectionsR   t   sixR   t   numpyR   RT   t
   theano.gofR   t   __authors__t   __reviewer__t   __copyright__t   __license__t   __contact__t   __docformat__t	   getLoggerR[   RF   RG   RH   RI   R   R&   R  Rb   RC   R8   t   objectRJ   R,   (    (    (    s8   /tmp/pip-build-X4mzal/theano/theano/compile/profiling.pyt   <module>   sH   		H	’ ’ ’ ’ ’ +