σ
ΙΘχXc           @` sή   d  Z  d d l m Z d d l m Z d d l m Z d d l Z d d l Z d d l Z d d l Z d d l	 Z
 d d l Z d d l m Z d d l
 Z d d l	 m Z d   Z d	   Z d
   Z d d  Z d   Z d S(   s%   Utilities for parsing PTB text files.i    (   t   absolute_import(   t   division(   t   print_functionN(   t   xrange(   t   gfilec         C` s;   t  j |  d  # } | j   j d d  j   SWd  QXd  S(   Nt   rs   
s   <eos>(   R   t   GFilet   readt   replacet   split(   t   filenamet   f(    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   _read_words%   s    c         C` ss   t  |   } t j |  } t | j   d d   } t t |    \ } } t t | t t	 |     } | S(   Nt   keyc         S` s	   |  d S(   Ni   (    (   t   x(    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   <lambda>.   s    (
   R   t   collectionst   Countert   sortedt   itemst   listt   zipt   dictt   ranget   len(   R
   t   datat   countert   count_pairst   wordst   _t
   word_to_id(    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   _build_vocab*   s    !c         C` s'   t  |   } g  | D] } | | ^ q S(   N(   R   (   R
   R   R   t   word(    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   _file_to_word_ids6   s    c   	      C` s   t  j j |  d  } t  j j |  d  } t  j j |  d  } t |  } t | |  } t | |  } t | |  } t |  } | | | | f S(   s  Load PTB raw data from data directory "data_path".

  Reads PTB text files, converts strings to integer ids,
  and performs mini-batching of the inputs.

  The PTB dataset comes from Tomas Mikolov's webpage:

  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

  Args:
    data_path: string path to the directory where simple-examples.tgz has
      been extracted.

  Returns:
    tuple (train_data, valid_data, test_data, vocabulary)
    where each of the data objects can be passed to PTBIterator.
  s   ptb.train.txts   ptb.valid.txts   ptb.test.txt(   t   ost   patht   joinR   R!   R   (	   t	   data_patht
   train_patht
   valid_patht	   test_pathR   t
   train_datat
   valid_datat	   test_datat
   vocabulary(    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   ptb_raw_data;   s    c   
      c` s'  t  j |  d t  j }  t |   } | | } t  j | | g d t  j } x1 t |  D]# } |  | | | | d !| | <qY W| d | } | d k r© t d   n  xw t |  D]i } | d d  | | | d |  f } | d d  | | d | d | d  f }	 | |	 f VqΆ Wd S(   s"  Iterate on the raw PTB data.

  This generates batch_size pointers into the raw PTB data, and allows
  minibatch iteration along these pointers.

  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.

  Yields:
    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
    The second element of the tuple is the same data time-shifted to the
    right by one.

  Raises:
    ValueError: if batch_size or num_steps are too high.
  t   dtypei   i    s1   epoch_size == 0, decrease batch_size or num_stepsN(   t   npt   arrayt   int32R   t   zerosR   t
   ValueError(
   t   raw_datat
   batch_sizet	   num_stepst   data_lent	   batch_lenR   t   it
   epoch_sizeR   t   y(    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   ptb_iteratorZ   s    
!(0(   t   __doc__t
   __future__R    R   R   R   R"   t   syst   timet   tensorflow.python.platformt
   tensorflowt   numpyR/   t	   six.movesR   t   tfR   R   R   R!   t   NoneR-   R<   (    (    (    sb   /tmp/pip-build-UG86a1/tensorflow/tensorflow-0.6.0.data/purelib/tensorflow/models/rnn/ptb/reader.pyt   <module>   s"   			