σ
ίΖχXc        
   @@  s   d  d l  m Z d d l m Z d  d l m Z d  d l Z d  d l Z d  d l	 Z	 d d d  d d d d	 d d
 d 	 Z d d  Z d S(   i    (   t   absolute_importi   (   t   get_file(   t   zipNs   reuters.npzgΙ?iq   i   i   c	         K@  sZ  d |	 k r+ t  j d  |	 j d  } n  |	 rJ t d t |	    n  t |  d d }  t j |   }
 |
 d } |
 d } |
 j   t j	 j
 |  t j	 j |  t j	 j
 |  t j	 j |  | d	 k	 rg  | D]* } | g g  | D] } | | ^ qο ^ qά } n9 | rHg  | D]# } g  | D] } | | ^ q)^ q} n  | rΈg  } g  } xL t | |  D]; \ } } t |  | k  rj| j |  | j |  qjqjW| } | } n  | sζt g  | D] } t |  ^ qΘ } n  | d	 k	 r?g  | D]= } g  | D]* } | | k s$| | k  r*| n | ^ q^ qω} ni g  } xZ | D]R } g  } x6 | D]. } | | k s}| | k  r_| j |  q_q_W| j |  qLW| } t j | t t |  d |    } t j | t t |  d |    } t j | t t |  d |   } t j | t t |  d |   } | | f | | f f S(
   s7  Loads the Reuters newswire classification dataset.

    # Arguments
        path: where to cache the data (relative to `~/.keras/dataset`).
        num_words: max number of words to include. Words are ranked
            by how often they occur (in the training set) and only
            the most frequent words are kept
        skip_top: skip the top N most frequently occuring words
            (which may not be informative).
        maxlen: truncate sequences after this length.
        test_split: Fraction of the dataset to be used as test data.
        seed: random seed for sample shuffling.
        start_char: The start of a sequence will be marked with this character.
            Set to 1 because 0 is usually the padding character.
        oov_char: words that were cut out because of the `num_words`
            or `skip_top` limit will be replaced with this character.
        index_from: index actual words with this index and higher.

    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.

    Note that the 'out of vocabulary' character is only used for
    words that were present in the training set but are not included
    because they're not making the `num_words` cut here.
    Words that were not seen in the training set but are in the test set
    have simply been skipped.
    t   nb_wordssD   The `nb_words` argument in `load_data` has been renamed `num_words`.s    Unrecognized keyword arguments: t   origins2   https://s3.amazonaws.com/text-datasets/reuters.npzt   xt   yi   N(   t   warningst   warnt   popt	   TypeErrort   strR   t   npt   loadt   closet   randomt   seedt   shufflet   NoneR   t   lent   appendt   maxt   arrayt   int(   t   patht	   num_wordst   skip_topt   maxlent
   test_splitR   t
   start_chart   oov_chart
   index_fromt   kwargst   npzfilet   xst   labelsR   t   wt   new_xst
   new_labelsR   t   nxt   x_traint   y_traint   x_testt   y_test(    (    s5   /tmp/pip-build-X4mzal/keras/keras/datasets/reuters.pyt	   load_data
   sX    


:3	(M''''s   reuters_word_index.jsonc         C@  s;   t  |  d d }  t |   } t j |  } | j   | S(   sΜ   Retrieves the dictionary mapping word indices back to words.

    # Arguments
        path: where to cache the data (relative to `~/.keras/dataset`).

    # Returns
        The word index dictionary.
    R   s>   https://s3.amazonaws.com/text-datasets/reuters_word_index.json(   R   t   opent   jsonR   R   (   R   t   ft   data(    (    s5   /tmp/pip-build-X4mzal/keras/keras/datasets/reuters.pyt   get_word_indexe   s
    	
(   t
   __future__R    t   utils.data_utilsR   t	   six.movesR   t   numpyR   R.   R   R   R,   R1   (    (    (    s5   /tmp/pip-build-X4mzal/keras/keras/datasets/reuters.pyt   <module>   s   		Y