ó
àÆ÷Xc           @` sÙ   d  Z  d d l m Z m Z m Z d d l Z d d l Z d d l Z d d l	 Z	 d d l
 m Z d d l m Z d „  Z e d k rÕ d d l Z e e j ƒ d k r· e d	 ƒ ‚ n  e e j d
 e j d ƒ n  d S(   s™   
This file compare the runtime of two independent dot products on one
and two GPU to measure the speedup.

This should be 2x if the GPUs are equivalent.
i    (   t   absolute_importt   print_functiont   divisionN(   t   init_dev(   t	   gpu_dot22c         C` sÃ  t  |  d ƒ t  | d ƒ d } t j j | | ƒ j d ƒ } t j | d d ƒ} t j | d d ƒ} t j | d d ƒ} t j | d d ƒ} t j | d d ƒ} t j | d d ƒ}	 t j g  t | | ƒ t | | ƒ g ƒ }
 t j g  t | | ƒ t | |	 ƒ g ƒ } t j g  t | | ƒ g ƒ } t j g  t | |	 ƒ g ƒ } t j g  t | | ƒ d j	 d ƒ g ƒ } t j g  t | |	 ƒ d j	 d ƒ g ƒ } |
 j
 ƒ  } | d j ƒ  | d	 j ƒ  f | j
 ƒ  } | d j ƒ  | d	 j ƒ  f | j
 ƒ  } | d j ƒ  | j
 ƒ  } | d j ƒ  | j
 ƒ  } | j
 ƒ  } d  } t j ƒ  } |
 j
 ƒ  } | d j ƒ  | d	 j ƒ  f t j ƒ  } d  } t d
 | | f ƒ t j ƒ  } | j
 ƒ  } | d j ƒ  | d	 j ƒ  f t j ƒ  } d  } t d | | f ƒ t j ƒ  } | j
 ƒ  } | j
 ƒ  } | d j ƒ  | d j ƒ  t j ƒ  } d  } t d | | f ƒ t j ƒ  } | j
 ƒ  } | j
 ƒ  } t j ƒ  } d  } t d | | f ƒ d t j f d „  ƒ  Y} | d | t ƒ } | d | t ƒ } t j ƒ  } | j ƒ  | j ƒ  | j ƒ  | j ƒ  t j ƒ  } t d | | f ƒ | d | t ƒ } | d | t ƒ } t j ƒ  } | j ƒ  | j ƒ  | j ƒ  | j ƒ  t j ƒ  } t d | | f ƒ d  S(   Nt   ctx1t   ctx2i   i   t   float32t   targeti    t   cpui   s   one ctx async %fs   two ctx async %fs   two ctx, 2 fct async %fs   two ctx, 2 fct with transfer %ft   myThreadc           B` s   e  Z d  „  Z d „  Z RS(   c         S` s/   t  j j |  ƒ | |  _ | |  _ | |  _ d  S(   N(   t	   threadingt   Threadt   __init__t   ft   namet   sync(   t   selfR   R   R   (    (    s;   /tmp/pip-build-X4mzal/theano/theano/misc/check_multi_gpu.pyR   Z   s    		c         S` s3   |  j  ƒ  } |  j r& | d j ƒ  n  | |  _ d  S(   Ni    (   R   R   t   r(   R   R   (    (    s;   /tmp/pip-build-X4mzal/theano/theano/misc/check_multi_gpu.pyt   run`   s    	(   t   __name__t
   __module__R   R   (    (    (    s;   /tmp/pip-build-X4mzal/theano/theano/misc/check_multi_gpu.pyR
   Y   s   	s   Thread-3s   Thread-4s"   two ctx, 2 fct async, 2 threads %fs   Thread-5s   Thread-6s*   two ctx, 2 fct with transfer, 2 threads %fi @  (   i    i    (   i    i    (   R   t   npt   randomt   randnt   astypet   theanot   sharedt   functionR   t   transfert   fnR   t   Nonet   timet   printR   R   t   Truet   startt   joint   False(   t   dev1t   dev2t   sizet   datat   val1at   val1bt   val1ct   val1dt   val2at   val2bt   f1t   f2t   f3t   f4t   f5t   f6R   t   tt   t2t   r2R
   t   thread1t   thread2(    (    s;   /tmp/pip-build-X4mzal/theano/theano/misc/check_multi_gpu.pyt   main   s”    ++







t   __main__i   s%   This script require two device names.i   i   (   t   __doc__t
   __future__R    R   R   R   R    t   numpyR   R   t   theano.gpuarrayR   t   theano.gpuarray.blasR   R;   R   t   syst   lent   argvt
   ValueError(    (    (    s;   /tmp/pip-build-X4mzal/theano/theano/misc/check_multi_gpu.pyt   <module>   s   	n