U
    NZfKA                     @   s>  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ejdZejdZedd	 Zed
d Zeejdddgddgddgdgdgdgdggejdddgdd Zedd Zeejdddggdd Zdd Zedd Zeejddge	ejgd gd!gd"e
dgd#d$d%fdd&ge	ejgd!gd'ejd(gd#d)gd*d%fd#d)ge	ejgd!gd'ejd(gd#d)gd*d%fgd+d, Zeejd-i e	d#d.ejd/d0ejd1gd d&d2d3d!d4d5gd6d7d8ejd9ejd:gd;fg g d<d=d>e	d#d.d?d/d0d@d1gd d&d2d3d!d4d5gd6d7d8d@d9d?d:gd;fd#gd=d>e	ejd.d?d/d0d@d1gd d&d2d3d!d4d5gd6d7d8d@d9d?d:gd;fdg g d<ie	d#d.ejd/d0ejd1gd d&d2d3d!d4d5gd6d7d8ejd9ejd:gd;fgdAdB ZedCdD ZedEdF Z edGdH Z!eejdIdJdKgdLdM Z"eejdNdOd dPgejejgd2dQggfd=dRdPgd@dSgdTdQggfgdUdV Z#edWdX Z$eejdYd ejdZgdZejggfd&d d[d\dZgejejggfgd]d^ Z%ed_d` Z&edadb Z'eejdce(ddde e(df dddgie	e(dde(dfgfe(dddg dh i e	e(ddd gd?d&ggfe(dddi dddgie	ejd gfgdjdk Z)dldm Z*eejdnd=d?dogfdOejdpgfgdqdr Z+dsdt Z,eejdudOd=gdvdw Z-eejdxdydzd{d|d}d~gfd}d#d~ifgdd Z.edd Z/edd Z0edd Z1dd Z2dd Z3dS )zg
Tests that NA values are properly handled during
parsing for all of the parsers defined in parsers.py
    )StringION)STR_NA_VALUES)	DataFrameIndex
MultiIndexZpyarrow_skipZpyarrow_xfailc                 C   sT   | }d}| t|}tdddgdtjdgtjddggd	d
dgd}t|| d S )NzA,B,C
a,b,c
d,,f
,g,h
abcdfghABCcolumnsread_csvr   r   npnantmassert_frame_equalall_parsersparserdataresultexpected r   I/tmp/pip-unpacked-wheel-nbcvw55c/pandas/tests/io/parser/test_na_values.pytest_string_nas   s    r!   c                 C   sN   | }d}t ddgtjdgtjtjggddgd}|t|}t|| d S )NzA,B
foo,bar
NA,baz
NaN,nan
foobarbazr   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r    test_detect_string_na'   s     r&   	na_valuesz-999.0z-999ig     8r   zA,B
-999,1.2
2,-999
3,4.5
z"A,B
-999,1.200
2,-999.000
3,4.500
c                 C   sL   | }t tjdgdtjgddggddgd}|jt||d}t|| d S )	Ng333333?       @g      @g      @r   r   r   r'   r%   )r   r   r'   r   r   r   r   r   r    test_non_string_na_values6   s    &r*   c                    s   ddddddddd	d
dddddddddh}|t ks6t| }t|fdd td fddt|D }ttjt	t	d}|j
|d d}t|| d S )Nz-1.#INDz1.#QNANz1.#INDz-1.#QNANz#N/AzN/Azn/aZNAz<NA>z#NAZNULLnullNaNr   z-NaNz-nanz#N/A N/A Nonec                    sf   | dkrd}n| dkr&d dg|  }| | }|  d k rbd dg |  d  }| | }|S )Nr   r-   ,   )join)ivbufZjoined)nvr   r    r   x   s    z!test_default_na_values.<locals>.f
c                    s   g | ]\}} ||qS r   r   ).0r2   r3   )r   r   r    
<listcomp>   s     z*test_default_na_values.<locals>.<listcomp>)r   index)header)r   AssertionErrorlenr   r1   	enumerater   r   r   ranger   r   r   )r   Z
_NA_VALUESr   r   r   r   r   )r   r5   r    test_default_na_values\   s8     r?   r$   c                 C   s`   | }d}t dtjdgtjdtjgddtjggddd	gd
}|jt||dgd}t|| d S )Nz3A,B,C
ignore,this,row
1,NA,3
-1.#IND,5,baz
7,8,NaN
      ?            r   r   r   r   r0   )r'   Zskiprowsr%   )r   r'   r   r   r   r   r   r   r    test_custom_na_values   s    " rE   c                 C   s`   d}| }| t|}ttjdtjdgtdtjddtjgtddddgd}t|| d S )Nz1A,B,C
True,False,True
NA,True,False
False,NA,TrueTFdtyper   r   r   )	r   r   r   r   arrayr   objectr   r   r   r   r   r   r   r   r   r    test_bool_na_values   s    rL   c                 C   sh   d}| }|j t|dgdgdd}ttjdtjdgtjdtjdgtjdtjdgd}t|| d S )Nz3A,B,C
foo,bar,NA
bar,foo,foo
foo,bar,NA
bar,foo,foor"   r#   )r   r   r)   rH   r   r   r   r   Zdfr   r   r   r    test_na_value_dict   s    rN   zindex_col,expectedr0   rB   )r   r	   r
   r   namer9      )r   r
   )r   r0   r	   )namesc                 C   s.   d}| }|j t|t |d}t|| d S )Nza,b,c,d
0,NA,1,5
)r'   	index_col)r   r   setr   r   )r   rT   r   r   r   r   r   r   r    test_na_value_dict_multi_index   s    rV   zkwargs,expectedr   r
   er   rA         rC   ZonetwoZthreefivesevenrH   )r   r   Fr'   keep_default_nar-   r   c                 C   s*   d}| }|j t|f|}t|| d S )NzAA,B,C
a,1,one
b,2,two
,3,three
d,4,nan
e,5,five
nan,6,
g,7,seven
r   r   r   r   )r   kwargsr   r   r   r   r   r   r    test_na_values_keep_default   s    /
ra   c              
   C   sd   d}| }|j t|dd}tdddddd	d
gdddddddgdddd	dddgd}t|| d S )NzAA,B,C
a,1,None
b,2,two
,3,None
d,4,nan
e,5,five
nan,6,
g,7,seven
F)r^   r   r   r-   r
   rW   r   r   r0   rR   rA   rX   rB   rY   rC   r.   rZ   r[   r\   rH   r   r   r   r   r   rK   r   r   r    !test_no_na_values_no_keep_default"  s    
rc   c                 C   sF   d}| }|j t|ddgidd}tdgtjgd}t|| d S )Nza,b
,2r   2Fr]   r-   r   r   r   rK   r   r   r    &test_no_keep_default_na_dict_na_values=  s      rf   c                 C   sD   d}| }|j t|ddidd}tdgtjgd}t|| d S )Nza,b
1,2r   rR   Fr]   r0   re   r   rM   r   r   r    -test_no_keep_default_na_dict_na_scalar_valuesI  s
    rg   col_zero_na_valuesi Z113125c              	   C   st   d}| }t tjdgtjdgdtjgddgddgd	d
gtjdgd}|jt|d dd
dd|dd}t|| d S )Nz_113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008
729639,"qwer","",asdfkj,466.681,,252.373
g    ND&AZqwerz/blahaZkjsdkjZasdfkjg-y@g7A`*}@z225.874r-   g-o@)r   r0   rR   rA   rX   rB   rY   Fz214.008Zblah)rR   rY   r0   r   )r:   r^   r'   r%   )r   rh   r   r   r   r   r   r   r    1test_no_keep_default_na_dict_na_values_diff_reprsU  s&    ri   zna_filter,row_dataTr   r   1r   3c                 C   s>   d}| }|j t|dg|d}t|ddgd}t|| d S )NzA,B
1,A
nan,B
3,C
r   )r'   	na_filterr   r   rb   )r   rl   row_datar   r   r   r   r   r   r    !test_na_values_na_filter_overrides  s
    	rn   c              
   C   sr   | }d}| t|}tdddddtjtjtjgdddd	d
tjtjtjggddddddddgd}t|| d S )NzlDate,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
2012-03-14,USD,AAPL,BUY,1000
2012-05-12,USD,SBUX,SELL,500z
2012-03-14ZUSDZAAPLZBUYi  z
2012-05-12ZSBUXZSELLi  DateZCurrencyZSymbolTypeZUnitsZ	UnitPriceZCostZTaxr   r   r   r   r   r    test_na_trailing_columns  s$    rq   zna_values,row_datar(   re   r@   c                 C   s@   | }ddg}d}|j t|||d}t||d}t|| d S )Nr   r   1,2
2,1rS   r'   r   rb   )r   r'   rm   r   rS   r   r   r   r   r   r    test_na_values_scalar  s    
rt   c                 C   sn   | }ddd}|  }ddg}d}tddgtjtjgg|d	}|jt|||d
}t|| t|| d S )NrR   r0   re   r   r   rr   r@   r(   r   rs   )	copyr   r   r   r   r   r   r   Zassert_dict_equal)r   r   r'   Zna_values_copyrS   r   r   r   r   r   r    test_na_values_dict_aliasing  s    
rv   c                 C   sD   d}| }ddi}|j t||d}tdtjdgi}t|| d S )Nza
foo
1r   r"   r)   r   r0   r   )r   r   r   r'   r   r   r   r   r    test_na_values_dict_col_index  s    rw   zdata,kwargs,expectedl            r6   l           z,1z
,2z
1c                 C   s.   | }|j t|fdd i|}t|| d S )Nr:   r_   )r   r   r`   r   r   r   r   r   r    test_na_values_uint64  s    rx   c                 C   sH   d}| }t ddgitdgddd}|jt|dd	d
}t|| d S )Nza,1
b,2rj   rR   r   r   rO   rQ   r   F)rT   r^   r   r   r   r   r   r   )r   r   r   r   r   r   r   r    *test_empty_na_values_no_default_with_index  s
    rz   zna_filter,index_data5g      @c                 C   sP   | }d}t ddgddgdt|ddd	}|jt|dg|d
}t|| d S )Na,b,c
1,,3
4,5,6r0   rX   rA   rY   )r   r	   r   rO   rQ   )rT   rl   ry   )r   rl   Z
index_datar   r   r   r   r   r   r    test_no_na_filter_on_index  s
    "r}   c                 C   s\   | }d}|j t|dgddgd}tdtjgdtjgdtd	d
gddd}t|| d S )Nzidx,col1,col2
1,3,4
2,inf,-infr   infz-inf)rT   r'   rA   rX   )col1col2r0   rR   idxrO   rQ   )r   r   r   r   r   r   r   r   )r   r   r   outr   r   r   r    !test_inf_na_values_with_int_index  s     r   rl   c                 C   sV   | }d}|rt jnd}tddg|dgddgd}|jt||td	}t|| d S )
Nr|   r-   rj   4r{   rk   6)r   r   r	   )rl   rG   )r   r   r   r   r   strr   r   )r   rl   r   r   emptyr   r   r   r   r    +test_na_values_with_dtype_str_and_na_filter  s    r   zdata, na_values)zfalse,1
,1
trueN)zfalse,1
null,1
trueN)zfalse,1
nan,1
trueN)false,1
foo,1
truer"   r   r"   c              	   C   sF   | }d}t jt|d& |jt|d ddgddi|d W 5 Q R X d S )Nz(Bool column has NA values in column [0a])|(cannot safely convert passed user dtype of bool for object dtyped data in column 0)matchr   r   bool)r:   rS   rG   r'   pytestZraises
ValueErrorr   r   )r   r   r'   r   msgr   r   r    !test_cast_NA_to_bool_raises_error  s    r   c                 C   sd   | }d}|j t|d dddgtttdd }tddgd	d
gddgdddgd}t|| d S )NzDFile: small.csv,,
10010010233,0123,654
foo,,bar
01001000155,4530,898r   r   col3)r   r   r   )r:   rS   rG   Z10010010233Z01001000155Z0123Z4530Z654Z898r0   rA   rQ   )r   r   r   Zdropnar   r   r   r   r   r   r    test_str_nan_dropped<  s     

	r   c                 C   sP   | }d}|j t|ttdddid}tdgdgtjgd}t|| d S )NzA,B,B
X,Y,Z
1,2,infrR   r   Zr~   )r:   r'   r0   ))r   X)r   Yr   )	r   r   listr>   r   r   r   r   r   r   r   r   r    test_nan_multi_indexY  s     
 r   c              	   C   s8   | }d}t jtdd |jt|dd W 5 Q R X d S )N0
NaN
True
False
z	NA valuesr   r   rF   r   r   r   r   r   r   r    test_bool_and_nan_to_booln  s    r   c              	   C   s8   | }d}t jtdd |jt|dd W 5 Q R X d S )Nr   zconvert|NoneTyper   intrF   r   r   r   r   r    test_bool_and_nan_to_int{  s    r   c                 C   s@   | }d}|j t|dd}tdtjddgi}t|| d S )Nr   floatrF   0r@   g        )r   r   r   	from_dictr   r   r   r   r   r   r   r    test_bool_and_nan_to_float  s
    r   )4__doc__ior   Znumpyr   r   Zpandas._libs.parsersr   Zpandasr   r   r   Zpandas._testingZ_testingr   markZusefixturesZskip_pyarrowZxfail_pyarrowr!   r&   Zparametrizer*   r?   rE   rL   rN   r   from_tuplesrV   ra   rc   rf   rg   ri   rn   rq   rt   rv   rw   r   rx   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r    <module>   sX  



0
$

-





( 
 



