ó
Ë½÷Xc           @` s:  d  d l  m Z m Z m Z d  d l Z d  d l Z d  d l Z d  d l m Z d  d l	 Z
 d  d l m Z d d d d g Z e j d	 ƒ Z e j d
 ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z e j d ƒ Z d e f d „  ƒ  YZ d e f d „  ƒ  YZ d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z  d „  Z! d „  Z" d „  Z# d „  Z$ d „  Z% d „  Z& d  „  Z' d! „  Z( d" „  Z) d e* f d# „  ƒ  YZ+ d$ „  Z, d% „  Z- d& „  Z. d' „  Z/ d( „  Z0 e1 e0 _2 e3 d) k r6d  d l4 Z4 e4 j5 d* Z6 e0 e6 ƒ n  d S(+   i    (   t   divisiont   print_functiont   absolute_importN(   t   partial(   t   nextt   MetaDatat   loadarfft	   ArffErrort   ParseArffErrors   ^\s*@s   ^%s   ^\s+$s   ^@\S*s   ^@[Dd][Aa][Tt][Aa]s*   ^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)s/   ^@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)s   '(..+)'\s+(..+$)s   (\S+)\s+(..+$)c           B` s   e  Z RS(    (   t   __name__t
   __module__(    (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyR   7   s   c           B` s   e  Z RS(    (   R	   R
   (    (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyR   ;   s   c         C` sÖ   |  j  ƒ  j ƒ  } | d d k r& d S| t d ƒ  d k r@ d S| t d ƒ  d k rZ d S| t d ƒ  d k rt d S| t d ƒ  d k rŽ d S| t d ƒ  d k r¨ d S| t d	 ƒ  d	 k rÂ d	 St d
 | ƒ ‚ d S(   s`   Given an arff attribute value (meta data), returns its type.

    Expect the value to be a name.i    t   {t   nominalt   realt   numerict   integert   stringt
   relationalt   dates   unknown attribute %sN(   t   lowert   stript   lenR   (   t   attrtypet
   uattribute(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt
   parse_typeE   s     c         C` s   |  j  d ƒ S(   s5   If attribute is nominal, returns a list of the valuest   ,(   t   split(   t	   attribute(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   get_nominal\   s    c         C` s\   t  |  ƒ g } | d j ƒ  d d k r8 t d ƒ ‚ n  | j g  |  D] } | ^ qE ƒ | S(   s4   Read each line of the iterable and put it in a list.i    R   s0   This looks like a sparse ARFF: not supported yet(   R   R   t
   ValueErrort   extend(   t   ofilet   datat   i(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   read_data_lista   s
     c         C` s]   t  |  ƒ g } d } | d j ƒ  d d k r> t d ƒ ‚ n  x |  D] } | d 7} qE W| S(   s5   Read the whole file to get number of data attributes.i   i    R   s0   This looks like a sparse ARFF: not supported yet(   R   R   R   (   R   R    t   locR!   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt	   get_ndataj   s    c         C` s    t  |  ƒ } t d „  | Dƒ ƒ S(   s
  Given a string containing a nominal type definition, returns the
    string len of the biggest component.

    A nominal type is defined as seomthing framed between brace ({}).

    Parameters
    ----------
    atrv : str
       Nominal type definition

    Returns
    -------
    slen : int
       length of longest component

    Examples
    --------
    maxnomlen("{floup, bouga, fl, ratata}") returns 6 (the size of
    ratata, the longest nominal value).

    >>> maxnomlen("{floup, bouga, fl, ratata}")
    6
    c         s` s   |  ] } t  | ƒ Vq d  S(   N(   R   (   t   .0R!   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pys	   <genexpr>Ž   s    (   t   get_nom_valt   max(   t   atrvt   nomtp(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt	   maxnomlenu   s    c         C` sZ   t  j d ƒ } | j |  ƒ } | rJ t d „  | j d ƒ j d ƒ Dƒ ƒ St d ƒ ‚ d S(   s©  Given a string containing a nominal type, returns a tuple of the
    possible values.

    A nominal type is defined as something framed between braces ({}).

    Parameters
    ----------
    atrv : str
       Nominal type definition

    Returns
    -------
    poss_vals : tuple
       possible values

    Examples
    --------
    >>> get_nom_val("{floup, bouga, fl, ratata}")
    ('floup', 'bouga', 'fl', 'ratata')
    s   {(.+)}c         s` s   |  ] } | j  ƒ  Vq d  S(   N(   R   (   R%   R!   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pys	   <genexpr>©   s    i   R   s(   This does not look like a nominal stringN(   t   ret   compilet   matcht   tuplet   groupR   R   (   R(   t	   r_nominalt   m(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyR&   ‘   s
    &c         C` s  t  j d ƒ } | j |  ƒ } | r| j d ƒ j ƒ  } d  } d | k rf | j d d ƒ } d } n | j d d ƒ } d } d | k r¥ | j d d	 ƒ } d
 } n  d | k rÌ | j d d ƒ } d } n  d | k ró | j d d ƒ } d } n  d | k r| j d d ƒ } d } n  d | k rA| j d d ƒ } d } n  d | k sYd | k rht d ƒ ‚ n  | d  k rƒt d ƒ ‚ n  | | f St d ƒ ‚ d  S(   Ns%   [Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$i   t   yyyys   %Yt   Yt   yys   %yt   MMs   %mt   Mt   dds   %dt   Dt   HHs   %Ht   ht   mms   %MR1   t   sss   %St   st   zt   Zs6   Date type attributes with time zone not supported, yets"   Invalid or unsupported date formats   Invalid or no date format(   R+   R,   R-   R/   R   t   Nonet   replaceR   (   R(   t   r_dateR1   t   patternt   datetime_unit(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   get_date_format®   s>    						
c         C` s   t  j d „  |  ƒ S(   sW   Skip header.

    the first next() call of the returned iterator will be the @data linec         S` s   t  j |  ƒ S(   N(   t
   r_datametaR-   (   t   x(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   <lambda>Ú   s    (   t	   itertoolst	   dropwhile(   R   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   go_dataÖ   s    c         C` s×   | j  ƒ  } t j | ƒ } | rŸ | j d ƒ } t j | ƒ r` t | ƒ \ } } t |  ƒ } q¯ t j | ƒ r t | ƒ \ } } t |  ƒ } q¯ t	 d ƒ ‚ n t	 d | ƒ ‚ | d k rÊ t	 d ƒ ‚ n  | | | f S(   sh  Parse a raw string in header (eg starts by @attribute).

    Given a raw string attribute, try to get the name and type of the
    attribute. Constraints:

    * The first line must start with @attribute (case insensitive, and
      space like characters before @attribute are allowed)
    * Works also if the attribute is spread on multilines.
    * Works if empty lines or comments are in between

    Parameters
    ----------
    attribute : str
       the attribute string.

    Returns
    -------
    name : str
       name of the attribute
    value : str
       value of the attribute
    next : str
       next line to be parsed

    Examples
    --------
    If attribute is a string defined in python as r"floupi real", will
    return floupi as name, and real as value.

    >>> iterable = iter([0] * 10) # dummy iterator
    >>> tokenize_attribute(iterable, r"@attribute floupi real")
    ('floupi', 'real', 0)

    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
    and real as value.

    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
    ('floupi 2', 'real', 0)

    i   s   multi line not supported yets   First line unparsable: %sR   s'   relational attributes not supported yet(
   R   t   r_attributeR-   R/   t   r_comattrvalt   tokenize_single_commaR   t   r_wcomattrvalt   tokenize_single_wcommaR   (   t   iterableR   t   sattrt   mattrR(   t   namet   typet	   next_item(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   tokenize_attributeà   s    )c         C` s€   t  j |  ƒ } | rf y. | j d ƒ j ƒ  } | j d ƒ j ƒ  } Wqv t k
 rb t d ƒ ‚ qv Xn t d |  ƒ ‚ | | f S(   Ni   i   s    Error while tokenizing attributes    Error while tokenizing single %s(   RM   R-   R/   R   t
   IndexErrorR   (   t   valR1   RT   RU   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyRN   !  s    c         C` s€   t  j |  ƒ } | rf y. | j d ƒ j ƒ  } | j d ƒ j ƒ  } Wqv t k
 rb t d ƒ ‚ qv Xn t d |  ƒ ‚ | | f S(   Ni   i   s    Error while tokenizing attributes    Error while tokenizing single %s(   RO   R-   R/   R   RX   R   (   RY   R1   RT   RU   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyRP   0  s    c   	      C` s  t  |  ƒ } x t j | ƒ r- t  |  ƒ } q Wd } g  } x½ t j | ƒ sù t j | ƒ } | rê t j | ƒ } | r¤ t |  | ƒ \ } } } | j | | f ƒ qö t	 j | ƒ } | rË | j
 d ƒ } n t d | ƒ ‚ t  |  ƒ } q= t  |  ƒ } q= W| | f S(   s&   Read the header of the iterable ofile.i   s   Error parsing line %sN(   R   t	   r_commentR-   R@   RF   t   r_headerlineRL   RW   t   appendt
   r_relationR/   R   (	   R   R!   t   relationt
   attributesR1   t   isattrRT   RU   t   isrel(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   read_header?  s&    c         C` s!   d |  k r t  j St |  ƒ Sd S(   sv  given a string x, convert it to a float. If the stripped string is a ?,
    return a Nan (missing value).

    Parameters
    ----------
    x : str
       string to convert

    Returns
    -------
    f : float
       where float can be nan

    Examples
    --------
    >>> safe_float('1')
    1.0
    >>> safe_float('1\n')
    1.0
    >>> safe_float('?\n')
    nan
    t   ?N(   t   npt   nant   float(   RG   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt
   safe_floata  s    c         C` sR   |  j  ƒ  } | | k r | S| d k r, | St d t | ƒ t | ƒ f ƒ ‚ d  S(   NRc   s   %s value not in %s(   R   R   t   str(   t   valuet   pvaluet   svalue(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   safe_nominal~  s    c         C` sm   |  j  ƒ  j  d ƒ j  d ƒ } | d k r: t j d | ƒ St j j | | ƒ } t j | ƒ j d | ƒ Sd  S(   Nt   't   "Rc   t   NaTs   datetime64[%s](   R   Rd   t
   datetime64t   datetimet   strptimet   astype(   Ri   t   date_formatRD   t   date_strt   dt(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt	   safe_dateˆ  s
    c           B` sD   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   sy  Small container to keep useful informations on a ARFF dataset.

    Knows about attributes names and types.

    Examples
    --------
    ::

        data, meta = loadarff('iris.arff')
        # This will print the attributes names of the iris.arff dataset
        for i in meta:
            print i
        # This works too
        meta.names()
        # Getting attribute type
        types = meta.types()

    Notes
    -----
    Also maintains the list of attributes in order, i.e. doing for i in
    meta, where meta is an instance of MetaData, will return the
    different attribute names in the order they were defined.
    c         C` s¹   | |  _  i  |  _ g  |  _ x— | D] \ } } t | ƒ } |  j j | ƒ | d k rr | t | ƒ f |  j | <q" | d k rž | t | ƒ d f |  j | <q" | d  f |  j | <q" Wd  S(   NR   R   i    (   RT   t   _attributest
   _attrnamesR   R\   R&   RE   R@   (   t   selft   relt   attrRT   Ri   t   tp(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   __init__©  s    			 c         C` s‹   d } | d |  j  7} xm |  j D]b } | d | |  j | d f 7} |  j | d ry | d t |  j | d ƒ 7} n  | d 7} q! W| S(   Nt    s   Dataset: %s
s   	%s's type is %si    i   s   , range is %ss   
(   RT   Ry   Rx   Rh   (   Rz   t   msgR!   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   __repr__¹  s    "c         C` s   t  |  j ƒ S(   N(   t   iterRy   (   Rz   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   __iter__Ã  s    c         C` s   |  j  | S(   N(   Rx   (   Rz   t   key(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   __getitem__Æ  s    c         C` s   |  j  S(   s#   Return the list of attribute names.(   Ry   (   Rz   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   namesÉ  s    c         C` s+   g  |  j  D] } |  j | d ^ q
 } | S(   s#   Return the list of attribute types.i    (   Ry   Rx   (   Rz   RT   t
   attr_types(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   typesÍ  s    '(	   R	   R
   t   __doc__R~   R   Rƒ   R…   R†   Rˆ   (    (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyR   ‘  s   		
			c         C` sV   t  |  d ƒ r |  } n t |  d ƒ } z t | ƒ SWd | |  k	 rQ | j ƒ  n  Xd S(   s'  
    Read an arff file.

    The data is returned as a record array, which can be accessed much like
    a dictionary of numpy arrays.  For example, if one of the attributes is
    called 'pressure', then its first 10 data points can be accessed from the
    ``data`` record array like so: ``data['pressure'][0:10]``


    Parameters
    ----------
    f : file-like or str
       File-like object to read from, or filename to open.

    Returns
    -------
    data : record array
       The data of the arff file, accessible by attribute names.
    meta : `MetaData`
       Contains information about the arff file such as name and
       type of attributes, the relation (name of the dataset), etc...

    Raises
    ------
    ParseArffError
        This is raised if the given file is not ARFF-formatted.
    NotImplementedError
        The ARFF file has an attribute which is not supported yet.

    Notes
    -----

    This function should be able to read most arff files. Not
    implemented functionality include:

    * date type attributes
    * string type attributes

    It can read files with numeric and nominal attributes.  It cannot read
    files with sparse data ({} in the file).  However, this function can
    read files with missing data (? in the file), representing the data
    points as NaNs.

    Examples
    --------
    >>> from scipy.io import arff
    >>> from cStringIO import StringIO
    >>> content = """
    ... @relation foo
    ... @attribute width  numeric
    ... @attribute height numeric
    ... @attribute color  {red,green,blue,yellow,black}
    ... @data
    ... 5.0,3.25,blue
    ... 4.5,3.75,green
    ... 3.0,4.00,red
    ... """
    >>> f = StringIO(content)
    >>> data, meta = arff.loadarff(f)
    >>> data
    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
    >>> meta
    Dataset: foo
    	width's type is numeric
    	height's type is numeric
    	color's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')

    t   readt   rtN(   t   hasattrt   opent	   _loadarfft   close(   t   fR   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyR   Ó  s    F	c         ` s*  y t  |  ƒ \ } } Wn/ t k
 rG } d t | ƒ } t | ƒ ‚ n Xt } x5 | D]- \ } } t | ƒ } | d k rU t } qU qU Wt | | ƒ }	 i t d 6t d 6t d 6}
 i t	 d 6t	 d 6t	 d 6} g  } g  ‰  | sÕx| D]ê \ } } t | ƒ } | d k rSt
 | ƒ \ } } | j | d | f ƒ ˆ  j t t d | d	 | ƒƒ qä | d
 k rªt | ƒ } | j | d | f ƒ t | ƒ } ˆ  j t t d | ƒƒ qä | j | |
 | f ƒ ˆ  j t	 ƒ qä Wn t d ƒ ‚ t ˆ  ƒ ‰ d ‡  ‡ f d † } | |  ƒ } t j | | ƒ } | |	 f S(   Ns'   Error while parsing header, error was: R   R   R   R   R   s   datetime64[%s]Rt   RD   R   s   S%dRj   s*   String attributes not supported yet, sorryR   c         3` s†   t  t ˆ ƒ ƒ } xm |  D]e } t j | ƒ s t j | ƒ rC q n  | j | ƒ } t g  | D] } ˆ  | | | ƒ ^ q\ ƒ Vq Wd  S(   N(   t   listt   rangeRZ   R-   t   r_emptyR   R.   (   t   row_itert   delimt   elemst   rawt   rowR!   (   t
   convertorst   ni(    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt	   generator[  s    (   Rb   R   Rh   R   t   FalseR   t   TrueR   Rf   Rg   RE   R\   R   Rw   R*   R&   Rl   t   NotImplementedErrorR   Rd   t   fromiter(   R   R{   R|   t   eR€   t   hasstrRT   Ri   RU   t   metat
   acls2dtypet	   acls2convt   descrRt   RD   t   nRj   R›   t   aR    (    (   R™   Rš   s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyRŽ   $  sL    	

c         C` sP   |  j  d |  j  d } t j |  ƒ t j |  ƒ t j |  ƒ t j |  ƒ | f S(   Ng      ð?i   (   t   sizeRd   t   nanmint   nanmaxt   meant   std(   R    t   nbfac(    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   basic_stats}  s    c   
   
   C` sØ   | d } | d k s. | d k s. | d k ri t  | ƒ \ } } } } t d |  | | | | | f ƒ nk |  d } x8 t t | d ƒ d ƒ D] }	 | | d |	 d 7} qŽ W| | d d	 7} | d
 7} t | ƒ d  S(   Ni    R   R   R   s   %s,%s,%f,%f,%f,%fs   ,{i   R   iÿÿÿÿt   }(   R®   t   printR’   R   (
   RT   R}   R    RU   t   minR'   R«   R¬   R€   R!   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   print_attribute‚  s    
$#
!
c         C` s_   t  |  ƒ \ } } t t | j ƒ ƒ t | j ƒ x& | D] } t | | | | | ƒ q9 Wd  S(   N(   R   R°   R   t   dtypeR¨   R²   (   t   filenameR    R¢   R!   (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt	   test_weka  s
    t   __main__i   (7   t
   __future__R    R   R   R+   RI   Rq   t	   functoolsR   t   numpyRd   t   scipy._lib.sixR   t   __all__R,   t   r_metaRZ   R“   R[   RF   R]   RL   RM   RO   t   IOErrorR   R   R   R   R"   R$   R*   R&   RE   RK   RW   RN   RP   Rb   Rg   Rl   Rw   t   objectR   R   RŽ   R®   R²   Rµ   Rœ   t   __test__R	   t   syst   argvR´   (    (    (    s5   /tmp/pip-build-7oUkmx/scipy/scipy/io/arff/arffread.pyt   <module>   sX   
								(	
	A			"		
		B	Q	Y				