U
    NZfM                  
   @   s  d Z ddlZddlZddlm  mZ ddlm	Z	 ddl
ZddlmZ ddlmZ ddlmZ ejdd Zejdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zej j!ddd d! Z"ej j!ddd"d# Z#d$d% Z$d&d' Z%d(d) Z&d*d+ Z'd,d- Z(ej )d.ej*e+d/e,d/dej-gd0d1 Z.ej )d2d3d4gd5d6 Z/d7d8 Z0d9d: Z1d;d< Z2ej )d=d3d4gej j!d>dd?d@ Z3ej )d=d3d4gej j!d>ddAdB Z4ej )dCdDdEgej )d=d3d4gdFdG Z5ej )dCdDdEgej )dHej6ej7gdIdJ Z8dKdL Z9dMdN Z:e;dOdPdQ Z<e;dOdRdS Z=dTdU Z>dVdW Z?ej )dXdYdZd[ge7d4d4d4gfdYdZdge7d4d4d3gfgd\d] Z@d^d_ ZAej )d`ejBejCej+gdadb ZDdcdd ZEdedf ZFdgdh ZGdidj ZHdkdl ZIdmdn ZJdS )oz
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)is_dtype_equal)ArrowStringArray)Versionc                 C   s   t j| dS )z=Fixture giving StringDtype from parametrized 'string_storage')storage)pdStringDtype)string_storage r	   K/tmp/pip-unpacked-wheel-nbcvw55c/pandas/tests/arrays/string_/test_string.pydtype   s    r   c                 C   s   |   S )z3Fixture giving array type from parametrized 'dtype')Zconstruct_array_typer   r	   r	   r
   cls   s    r   c                 C   s   t dt jdt jdg| di}d}t||ks4td}t|j|ksJt| jdkrXdnd	}d
| d}t|jj|ks|td S )NAabr   z      A
0     a
1  <NA>
2     bz40       a
1    <NA>
2       b
Name: A, dtype: stringpyarrowr   StringArray<z+>
['a', <NA>, 'b']
Length: 3, dtype: string)r   	DataFramearrayNAreprAssertionErrorr   r   )r   dfexpectedZarr_namer	   r	   r
   	test_repr   s     r   c                 C   s6   |  dd dg}|d d k	s t|d tjks2td S )Nr   r      )_from_sequencer   r   r   )r   r   r	   r	   r
   test_none_to_nan+   s    r   c              	   C   s   |  ddg}| tjjkr d}nd}tjt|d d|d< W 5 Q R X | tjjkrXd}nd}tjt|d td	d
g|d d < W 5 Q R X d S )Nr   r   z4Cannot set non-string value '10' into a StringArray.Scalar must be NA or strmatch
   r   zMust provide strings.r      )	r   r   arraysr   pytestraises	TypeErrornpr   )r   arrmsgr	   r	   r
   test_setitem_validates1   s    r+   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr   cr   dr   r   r   tmassert_extension_array_equal)r   r)   r   r	   r	   r
   test_setitem_with_scalar_stringC   s    r1   c                 C   sh   t jdddg| d}tdd g}| }||ddg< t jdt jdg| d}t|| t|| d S )Nr   r   r,   r   r   r   r   )r   r   r(   copyr   r/   r0   assert_numpy_array_equal)r   r)   valueZ
value_origr   r	   r	   r
   $test_setitem_with_array_with_missingL   s    r5   c                 C   sP   t t jddd}d |d< || }t|j| s6t|d}t|| d S )N2000   )Zperiodsr   zdatetime64[ns])	r   SeriesZ
date_rangeastyper   r   r   r/   assert_series_equal)r   serZcastedresultr	   r	   r
   test_astype_roundtripY   s    

r=   c                 C   s   t jdddd d g| d}t jddd dd g| d}|| }t jdd	d d d g| d}t|| ||}t|| ||}t jd
dd d d g| d}t|| |j|dd}t jdd	ddd g| d}t|| d S )Nr   r   r,   r   xyzaxbyxayb-)
fill_valuezc-z-z)r   r8   r/   r:   addZradd)r   r   r   r<   r   r	   r	   r
   test_addd   s    

rH   c              	   C   s   | j dkr*d}tjjd |d}|j| tjdddg| d}tjdddggt	d}tj
tdd	 ||  W 5 Q R X t|}tj
tdd	 ||  W 5 Q R X d S )
Nr   z*Failed: DID NOT RAISE <class 'ValueError'>r&   reasonr   r   r,   r   z3 != 1r    )r   r%   markxfailnode
add_markerr   r   r(   objectr&   
ValueErrorr8   )r   requestrJ   rK   r   r   sr	   r	   r
   test_add_2dx   s    

rS   c                 C   sz   t jddd d g| d}dd dd g}|| }t jdd d d g| d}t|| || }t jdd d d g| d}t|| d S Nr   r   r   r>   r?   rA   rC   r.   )r   r   otherr<   r   r	   r	   r
   test_add_sequence   s    rV   c                 C   s~   | j dkr*d}tjjt|d}|j| tjddd g| d}|d }tjdd	d g| d}t	
|| d| }t	
|| d S )
Nr   z?unsupported operand type(s) for *: 'ArrowStringArray' and 'int'rI   r   r   r   r#   ZaaZbb)r   r%   rK   rL   NotImplementedErrorrM   rN   r   r   r/   r0   )r   rQ   rJ   rK   r   r<   r   r	   r	   r
   test_mul   s    
rX   zGH-28527)rJ   c                 C   s   t jddddg| d}t dddd	gg}||tks<t|| }t d
dddgg| }t|| || }t ddddgg| }t|| d S )Nr   r   r,   r-   r   tr?   vwatrB   ZcvZdwtarD   Zvcwd)	r   r   r   __add__NotImplementedr   r9   r/   assert_frame_equalr   r)   r   r<   r   r	   r	   r
   test_add_strings   s    rc   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tksDt|| }t dtjtjtjgg| }t	
|| || }t dtjtjtjgg| }t	
|| d S rT   )r   r   r(   nanr   r_   r`   r   r9   r/   ra   rb   r	   r	   r
   test_add_frame   s      re   c                    s   d| j  d tjdd dg|d}dt| }|jdkrBdnd}tj fdd	|D td}tj||d}t|| d S )
N__r   r,   r   r   boolean[pyarrow]booleanc                    s   g | ]}t | qS r	   )getattr).0itemop_namerU   r	   r
   
<listcomp>   s     z2test_comparison_methods_scalar.<locals>.<listcomp>)	__name__r   r   ri   r   r(   rO   r/   r0   )comparison_opr   r   r<   expected_dtyper   r	   rl   r
   test_comparison_methods_scalar   s    rr   c                 C   sh   d| j  d}tjdd dg|d}t||tj}|jdkr@dnd}tjd d d g|d}t|| d S )Nrf   r   r,   r   r   rg   rh   )ro   r   r   ri   r   r   r/   r0   )rp   r   rm   r   r<   rq   r   r	   r	   r
   $test_comparison_methods_scalar_pd_na   s    rs   c           	   	   C   s   d| j  d}tjdd dg|d}d}|dkrZtjtdd t||| W 5 Q R X d S t|||}d	d d	gd
d d
gd| }|jdkrdnd}tj||d}t	|| d S )Nrf   r   r,   r   *   )__eq____ne__znot supported betweenr    FTr   rg   rh   )
ro   r   r   r%   r&   r'   ri   r   r/   r0   )	rp   r   rm   r   rU   r<   Zexpected_datarq   r   r	   r	   r
   )test_comparison_methods_scalar_not_string   s    rw   c                 C   s   d| j  d}tjdd dg|d}d d dg}t|||}|jdkrHdnd}tjt|d dd	}t|d
 ||d
 |d
< tj||d}t	|| t||tj
}tjd d d g|d}t	|| d S )Nrf   r   r,   r   r   rg   rh   rO   )rF   r   )ro   r   r   ri   r   r(   fulllenr/   r0   r   )rp   r   rm   r   rU   r<   rq   r   r	   r	   r
   test_comparison_methods_array   s    
r{   c              	   C   s  | t jjkrd}nd}tjt|d | tjddgdd W 5 Q R X tjt|d | tg  W 5 Q R X | t jjkr| tjdtjgt	d | tjdd gt	d nbtjt|d | tjdtjgt	d W 5 Q R X tjt|d | tjdd gt	d W 5 Q R X tjt|d | tjdt j
gt	d W 5 Q R X tjt|d$ | tjdtdd	gt	d W 5 Q R X tjt|d$ | tjdtdd	gt	d W 5 Q R X d S )
Nz7StringArray requires a sequence of strings or pandas.NAzBUnsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArrayr    r   r   ZS1r   NaTns)r   r$   r   r%   r&   rP   r(   r   rd   rO   r|   Z
datetime64Ztimedelta64)r   r*   r	   r	   r
   test_constructor_raises   s(     " "(r~   nard   c                 C   s>   t jtdt jg}tt jtjd| gdd| d S )Nr   rO   r   )r   r$   r   r(   r   r   r/   r0   )r   r   r	   r	   r
   test_constructor_nan_like  s
     r   r2   TFc           	      C   s   t jdt jgtd}| }t jdtjgtd}|j|| d}|tkrhdd l	}||j||
 dd}n||}t|| t|| d S )Nr   r   )r2   r   TtypeZfrom_pandas)r(   r   rd   rO   r2   r   r   r   r   r   stringr/   r0   r3   )	r2   r   rQ   Znan_arrZexpected_inputZna_arrr<   par   r	   r	   r
   test_from_sequence_no_mutate&  s    r   c              	   C   s   t jdddg| d}|d}tjdddgdd}t|| t jdt jdg| d}d	}tjt	|d
 |d W 5 Q R X d S )N123r   Zint64r   r#      zJint\(\) argument must be a string, a bytes-like object or a( real)? numberr    )
r   r   r9   r(   r/   r3   r   r%   r&   r'   )r   r)   r<   r   r*   r	   r	   r
   test_astype_int9  s    
r   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nr   r   r   Int64r   r   )r   r   r   r9   r/   r0   r   r)   r<   r   r	   r	   r
   test_astype_nullable_intE  s    
r   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r   r8   r   r9   r(   rd   r/   r:   )r   Zany_float_dtyper;   r<   r   r	   r	   r
   test_astype_floatM  s    
r   skipnazNot implemented StringArray.sumc                 C   s0   t jdddg|d}|j| d}|dks,td S Nr   r   r,   r   r   abc)r   r8   sumr   r   r   r)   r<   r	   r	   r
   test_reduceU  s    r   c                 C   sJ   t jd dd ddd g|d}|j| d}| r8|dksFtnt |sFtd S r   )r   r8   r   r   isnar   r	   r	   r
   test_reduce_missing]  s
    r   methodminmaxc                 C   sZ   t jdddd g|d}t|| |d}|rH| dkr6dnd}||ksVtn|t jksVtd S )Nr   r   r,   r   r   r   )r   r8   ri   r   r   )r   r   r   rQ   r)   r<   r   r	   r	   r
   test_min_maxh  s    r   boxc           	      C   s   |j dkrD|tjkrD|tjkr$d}nd}tjjt|d}|j| |dddd g|d}t	t
| |}| d	krrdnd}||kstd S )
Nr   z<'<=' not supported between instances of 'str' and 'NoneType'z0'ArrowStringArray' object has no attribute 'max'rI   r   r   r,   r   r   )r   r   r   r%   rK   rL   r'   rM   rN   ri   r(   r   )	r   r   r   rQ   rJ   rK   r)   r<   r   r	   r	   r
   test_min_max_numpyt  s    
r   c              	   C   s   t jdt jg| d}|jdd}t jddg| d}t|| |jtdd}t jddg| d}t|| | jdkr~d}nd}t	j
t|d |jd	d W 5 Q R X d S )
Nr   r   r   )r4   r   z"Invalid value '1' for dtype stringz3Cannot set non-string value '1' into a StringArray.r    r   )r   r   r   Zfillnar/   r0   r(   str_r   r%   r&   r'   )r   rQ   r)   resr   r*   r	   r	   r
   test_fillna_args  s    
r   c                 C   sx   t d}tjdddg| d}||}|jt|| dd}| jdkrft|jtdkrf|	|}|
|sttd S )	Nr   r   r   r,   r   Tr   z11.0.0)r%   Zimportorskipr   r   listr   r   r   __version__chunked_arrayequalsr   )r   r   datar)   r   r	   r	   r
   test_arrow_array  s    


r   r   c              	   C   s   dd l }tjddd g| d}td|i}||}|djdksHttd| |	 }W 5 Q R X t
|d jtjs~t|d| d}t|| |jd	 tjkstd S )
Nr   r   r   r   r   r   string[])r#   r   )r   r   r   r   tablefieldr   r   option_context	to_pandas
isinstancer   r   r9   r/   ra   locr   r   Zstring_storage2r   r   r   r   r<   r   r	   r	   r
   test_arrow_roundtrip  s    
r   c              	   C   s   dd l }tjg | d}td|i}||}|djdksBt|j|jg |	 dg|j
d}td| | }W 5 Q R X t|d jtjst|d| d	}t|| d S )
Nr   r   r   r   )r   )schemar   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r9   r/   ra   r   r	   r	   r
    test_arrow_load_from_zero_chunks  s    
 r   c                 C   s   t | dddkrd}nd}tjdddtjg| d}|jd	d
}tjdddg|dddg |dd}t|| |jdd
}tjddg|d d |dd}t|| d S )Nr    r   zint64[pyarrow]r   r   r   r   F)Zdropnar#   r   r   r   countindexr   nameT)ri   r   r   r   value_countsr8   r/   r:   )r   	exp_dtyper)   r<   r   r	   r	   r
   test_value_counts_na  s    "r   c                 C   sp   t | dddkrd}nd}tjdddtjg| d}|jd	d
}tjddg|d d |ddd }t|| d S )Nr   r   r   zdouble[pyarrow]ZFloat64r   r   r   T)	normalizer#   r   Z
proportionr   r   )ri   r   r8   r   r   r/   r:   )r   r   r;   r<   r   r	   r	   r
    test_value_counts_with_normalize  s    "r   zvalues, expectedr   r   r,   c              	   C   s   t j| |d} t ddb |  }t|| t |  }t |}t|| t |  }t |}t	|| W 5 Q R X d S )Nr   zmode.use_inf_as_naT)
r   r   r   r   r/   r3   r8   r:   r   ra   )valuesr   r   r<   r	   r	   r
   test_use_inf_as_na  s    	

r   c                 C   sf   | j dkrtd| j   tjdddg| d}d|j  k r\|   kr\|jdd	k sbn td S )
Nr   znot applicable for r   r   r,   r   r   T)deep)r   r%   skipr   r8   nbytesZmemory_usager   )r   Zseriesr	   r	   r
   test_memory_usage  s    
r   float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r   r8   r9   r/   r:   )r   r   r;   r<   r   r	   r	   r
   test_astype_from_float_dtype  s    
r   c                 C   sF   t jdt jdg| d}t|}tjdt jdgtd}t|| d S )Nr   r   r   )r   r   r   r(   rO   r/   r3   r   r	   r	   r
   "test_to_numpy_returns_pdna_default  s    
r   c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr   r   r   )na_value)r   r   r   Zto_numpyr(   rO   r/   r3   )r   Znulls_fixturer   r)   r<   r   r	   r	   r
   test_to_numpy_na_value  s
    r   c                 C   s   t jddd g| d}|ddg}t dddg}t|| |dt jg}t dddg}t|| |g }t dddg}t|| |d|g}t dddg}t|| d S )Nr   r   r   r,   TF)r   r8   isinr/   r:   r   )r   Zfixed_now_tsrR   r<   r   r	   r	   r
   	test_isin  s    
r   c              	   C   s   t jdddg| d}tdddg}d ||< |jd t jks@tt jdddg| d}t|jt jjkrld}nd	}t	j
t|d
 d||< W 5 Q R X d S )Nr   r   r,   r   FTr   zCannot set non-string valuer   r    )r   r8   r(   r   r   r   r   r$   r   r%   r&   r'   )r   r;   maskr*   r	   r	   r
   (test_setitem_scalar_with_mask_validation3  s    r   c                 C   sF   dddg}t j|t jd}tj|| d}tj|| d}t|| d S Nr   r   r,   r   )r(   r   r   r   r/   r0   r   valsr)   r<   r   r	   r	   r
   test_from_numpy_strG  s
    
r   c                 C   s4   dddg}t j|| d}| }|}t|| d S r   )r   r   tolistr/   Zassert_equalr   r	   r	   r
   test_tolistO  s
    
r   )K__doc__Znumpyr(   r%   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandas.core.dtypes.commonr   Zpandasr   Zpandas._testingZ_testingr/   Zpandas.core.arrays.string_arrowr   Zpandas.util.versionr   Zfixturer   r   r   r   r+   r1   r5   r=   rH   rS   rV   rX   rK   rL   rc   re   rr   rs   rw   r{   r~   Zparametrizerd   Zfloat64floatr   r   r   r   r   r   r   r   r   r8   r   r   r   r   Z
skip_if_nor   r   r   r   r   r   Zfloat16Zfloat32r   r   r   r   r   r   r   r	   r	   r	   r
   <module>   s   

	

	!$

	




