ó
ÉÈ÷Xc           @` s´   d  Z  d d l m Z d d l m Z d d l m Z d d l Z d d l Z d d l m	 Z	 d d l
 Z d d l m Z d d l m Z d d	 l m Z d
 e f d „  ƒ  YZ d S(   s7   Sequence-to-sequence model with an attention mechanism.i    (   t   absolute_import(   t   division(   t   print_functionN(   t   xrange(   t   rnn_cell(   t   seq2seq(   t
   data_utilst   Seq2SeqModelc           B` s2   e  Z d  Z e d e d „ Z d „  Z d „  Z RS(   sß  Sequence-to-sequence model with attention and for multiple buckets.

  This class implements a multi-layer recurrent neural network as encoder,
  and an attention-based decoder. This is the same as the model described in
  this paper: http://arxiv.org/abs/1412.7449 - please look there for details,
  or into the seq2seq library for complete model implementation.
  This class also allows to use GRU cells in addition to LSTM cells, and
  sampled softmax to handle large output vocabulary size. A single-layer
  version of this model, but with bi-directional encoder, was presented in
    http://arxiv.org/abs/1409.0473
  and sampled softmax is described in Section 3 of the following paper.
    http://arxiv.org/pdf/1412.2007v2.pdf
  i   c         ` s¦  ˆ ˆ _  ˆ ˆ _ | ˆ _ | ˆ _ t j t | ƒ d t ƒˆ _ ˆ j j	 ˆ j |	 ƒ ˆ _
 t j d d t ƒˆ _ d ‰ d } ˆ d k r"ˆ ˆ j k  r"t j d ƒ G t j d | ˆ j g ƒ } t j | ƒ ‰ t j d ˆ j g ƒ ‰  Wd QX| ˆ  f ‰ ‡  ‡ ‡ ‡ f d †  } | } n  t j | ƒ } |
 rIt j | ƒ } n  | ‰ | d k rtt j | g | ƒ ‰ n  ‡ ‡ ‡ ‡ f d	 †  ‰ g  ˆ _ g  ˆ _ g  ˆ _ xP t | d
 d ƒ D]: } ˆ j j t j t j d d g d d j | ƒ ƒƒ q¼Wxˆ t | d
 d d ƒ D]n } ˆ j j t j t j d d g d d j | ƒ ƒƒ ˆ j j t j t j d d g d d j | ƒ ƒƒ qWg  t t ˆ j ƒ d ƒ D] } ˆ j | d ^ qŸ} | rzt j ˆ j ˆ j | ˆ j | ˆ j ‡ f d †  d | ƒ\ ˆ _  ˆ _! ˆ d k	 rÂx^ t t | ƒ ƒ D]G ‰  g  ˆ j  ˆ  D]& } t j" j# | ˆ d ˆ d ƒ ^ q=ˆ j  ˆ  <q)WqÂnH t j ˆ j ˆ j | ˆ j | ˆ j ‡ f d †  d | ƒ\ ˆ _  ˆ _! t j$ ƒ  } | s‡g  ˆ _% g  ˆ _& t j' j( ˆ j ƒ } x‰ t t | ƒ ƒ D]r ‰  t j) ˆ j! ˆ  | ƒ } t j* | | ƒ \ } } ˆ j% j | ƒ ˆ j& j | j+ t, | | ƒ d ˆ j ƒƒ qWn  t j' j- t j. ƒ  ƒ ˆ _/ d S(   sé  Create the model.

    Args:
      source_vocab_size: size of the source vocabulary.
      target_vocab_size: size of the target vocabulary.
      buckets: a list of pairs (I, O), where I specifies maximum input length
        that will be processed in that bucket, and O specifies maximum output
        length. Training instances that have inputs longer than I or outputs
        longer than O will be pushed to the next bucket and padded accordingly.
        We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
      size: number of units in each layer of the model.
      num_layers: number of layers in the model.
      max_gradient_norm: gradients will be clipped to maximally this norm.
      batch_size: the size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: learning rate to start with.
      learning_rate_decay_factor: decay learning rate by this much when needed.
      use_lstm: if true, we use LSTM cells instead of GRU cells.
      num_samples: number of samples for sampled softmax.
      forward_only: if set, we do not construct the backward pass in the model.
    t	   trainablei    s   /cpu:0t   proj_wt   proj_bNc         ` sT   t  j d ƒ ? t  j | d d g ƒ } t  j j ˆ ˆ  |  | ˆ ˆ j ƒ SWd  QXd  S(   Ns   /cpu:0iÿÿÿÿi   (   t   tft   devicet   reshapet   nnt   sampled_softmax_losst   target_vocab_size(   t   inputst   labels(   t   bt   num_samplest   selft   w_t(    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt   sampled_loss_   s    i   c      
   ` s%   t  j |  | ˆ  ˆ ˆ d ˆ d | ƒS(   Nt   output_projectiont   feed_previous(   R   t   embedding_attention_seq2seq(   t   encoder_inputst   decoder_inputst	   do_decode(   t   cellR   t   source_vocab_sizeR   (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt	   seq2seq_fo   s    iÿÿÿÿt   shapet   names
   encoder{0}s
   decoder{0}s	   weight{0}c         ` s   ˆ  |  | t  ƒ S(   N(   t   True(   t   xt   y(   R    (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt   <lambda>‹   s    t   softmax_loss_functionc         ` s   ˆ  |  | t  ƒ S(   N(   t   False(   R$   R%   (   R    (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyR&   —   s    t   global_step(0   R   R   t   bucketst
   batch_sizeR   t   Variablet   floatR(   t   learning_ratet   assignt   learning_rate_decay_opR)   t   NoneR   t   get_variablet	   transposeR   t   GRUCellt   BasicLSTMCellt   MultiRNNCellR   R   t   target_weightsR   t   appendt   placeholdert   int32t   formatt   float32t   lenR   t   model_with_bucketst   outputst   lossesR   t	   xw_plus_bt   trainable_variablest   gradient_normst   updatest   traint   GradientDescentOptimizert	   gradientst   clip_by_global_normt   apply_gradientst   zipt   Savert   all_variablest   saver(   R   R   R   R*   t   sizet
   num_layerst   max_gradient_normR+   R.   t   learning_rate_decay_factort   use_lstmR   t   forward_onlyR'   t   wR   t   single_cellt   it   targetst   outputt   paramst   optRG   t   clipped_gradientst   norm(    (	   R   R   R   R   R   R    R   R   R   so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt   __init__1   s„    									!!!4H			#c         C` s  |  j  | \ } } t | ƒ | k rD t d t | ƒ | f ƒ ‚ n  t | ƒ | k ru t d t | ƒ | f ƒ ‚ n  t | ƒ | k r¦ t d t | ƒ | f ƒ ‚ n  i  }	 x, t | ƒ D] }
 | |
 |	 |  j |
 j <q¹ WxD t | ƒ D]6 }
 | |
 |	 |  j |
 j <| |
 |	 |  j |
 j <qè W|  j | j } t j	 |  j
 g d t j ƒ|	 | <| s|  j | |  j | |  j | g } n? |  j | g } x, t | ƒ D] }
 | j |  j | |
 ƒ qžW| j | |	 ƒ } | sí| d | d d f Sd | d | d f Sd S(	   sî  Run a step of the model feeding the given inputs.

    Args:
      session: tensorflow session to use.
      encoder_inputs: list of numpy int vectors to feed as encoder inputs.
      decoder_inputs: list of numpy int vectors to feed as decoder inputs.
      target_weights: list of numpy float vectors to feed as target weights.
      bucket_id: which bucket of the model to use.
      forward_only: whether to do the backward step or only forward.

    Returns:
      A triple consisting of gradient norm (or None if we did not do backward),
      average perplexity, and the outputs.

    Raises:
      ValueError: if length of enconder_inputs, decoder_inputs, or
        target_weights disagrees with bucket size for the specified bucket_id.
    s<   Encoder length must be equal to the one in bucket, %d != %d.s<   Decoder length must be equal to the one in bucket, %d != %d.s<   Weights length must be equal to the one in bucket, %d != %d.t   dtypei   i   i    N(   R*   R=   t
   ValueErrorR   R   R"   R   R7   t   npt   zerosR+   R:   RD   RC   R@   R8   R?   t   runR1   (   R   t   sessionR   R   R7   t	   bucket_idRS   t   encoder_sizet   decoder_sizet
   input_feedt   lt   last_targett   output_feedR?   (    (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt   stepª   s:    "

c         C` s+  |  j  | \ } } g  g  } } x  t |  j ƒ D] } t j | | ƒ \ } }	 t j g | t | ƒ }
 | j t	 t
 | |
 ƒ ƒ ƒ | t |	 ƒ d } | j t j g |	 t j g | ƒ q0 Wg  g  g  } } } xW t | ƒ D]I } | j t j g  t |  j ƒ D] } | | | ^ qd t j ƒƒ qä Wxê t | ƒ D]Ü } | j t j g  t |  j ƒ D] } | | | ^ q`d t j ƒƒ t j |  j d t j ƒ} xh t |  j ƒ D]W } | | d k  rÝ| | | d } n  | | d k sü| t j k r²d | | <q²q²W| j | ƒ q>W| | | f S(   s½  Get a random batch of data from the specified bucket, prepare for step.

    To feed data in step(..) it must be a list of batch-major vectors, while
    data here contains single length-major cases. So the main logic of this
    function is to re-index data cases to be in the proper format for feeding.

    Args:
      data: a tuple of size len(self.buckets) in which each element contains
        lists of pairs of input and output data that we use to create a batch.
      bucket_id: integer, which bucket to get the batch for.

    Returns:
      The triple (encoder_inputs, decoder_inputs, target_weights) for
      the constructed batch that has the proper format to call step(...) later.
    i   R^   g        (   R*   R   R+   t   randomt   choiceR   t   PAD_IDR=   R8   t   listt   reversedt   GO_IDR`   t   arrayR:   t   onesR<   (   R   t   dataRd   Re   Rf   R   R   t   _t   encoder_inputt   decoder_inputt   encoder_padt   decoder_pad_sizet   batch_encoder_inputst   batch_decoder_inputst   batch_weightst
   length_idxt	   batch_idxt   batch_weightt   target(    (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt	   get_batchæ   s4    	8	4(   t   __name__t
   __module__t   __doc__R(   R]   Rk   R   (    (    (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyR   "   s
   v	<(   R„   t
   __future__R    R   R   Rl   t   numpyR`   t	   six.movesR   t
   tensorflowR   t   tensorflow.models.rnnR   R   t   tensorflow.models.rnn.translateR   t   objectR   (    (    (    so   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/translate/seq2seq_model.pyt   <module>   s   