U
    MZf5.                     @  s  d Z ddlmZ ddlmZmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZmZmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% erddl&m'Z'm(Z( ddddddddZ)G dd deZ*G dd de*Z+G dd de*Z,ddddd Z-d!d"d!d#d$d%Z.d!d"d!d&d'd(Z/d!d"d!d&d)d*Z0d!d"d!d+d,d-Z1d!d.d/d0d1Z2dd2d3d4d5Z3dS )6z
Module responsible for execution of NDFrame.describe() method.

Method NDFrame.describe() delegates actual execution to function describe_ndframe().
    )annotations)ABCabstractmethod)TYPE_CHECKINGAnyCallableHashableSequencecastN)	Timestamp)DtypeObjNDFrameTnpt)validate_percentile)is_bool_dtypeis_complex_dtypeis_extension_array_dtypeis_numeric_dtype)DatetimeTZDtype)
ArrowDtype)Float64Dtype)concat)format_percentiles)	DataFrameSeriesr   str | Sequence[str] | Nonez#Sequence[float] | np.ndarray | None)objincludeexcludepercentilesreturnc                 C  sN   t |}| jdkr$ttd| d}nttd| ||d}|j|d}tt|S )a   Describe series or dataframe.

    Called from pandas.core.generic.NDFrame.describe()

    Parameters
    ----------
    obj: DataFrame or Series
        Either dataframe or series to be described.
    include : 'all', list-like of dtypes or None (default), optional
        A white list of data types to include in the result. Ignored for ``Series``.
    exclude : list-like of dtypes or None (default), optional,
        A black list of data types to omit from the result. Ignored for ``Series``.
    percentiles : list-like of numbers, optional
        The percentiles to include in the output. All should fall between 0 and 1.
        The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
        75th percentiles.

    Returns
    -------
    Dataframe or series description.
       r   r   r   )r   r   r   )r   )refine_percentilesndimSeriesDescriberr
   DataFrameDescriberdescriber   )r   r   r   r   Z	describerresult r)   @/tmp/pip-unpacked-wheel-nbcvw55c/pandas/core/methods/describe.pydescribe_ndframe4   s    
r+   c                   @  s4   e Zd ZdZdddddZedddd	d
ZdS )NDFrameDescriberAbstractzAbstract class for describing dataframe or series.

    Parameters
    ----------
    obj : Series or DataFrame
        Object to be described.
    zDataFrame | SeriesNone)r   r    c                 C  s
   || _ d S Nr"   )selfr   r)   r)   r*   __init__k   s    z!NDFrameDescriberAbstract.__init__Sequence[float] | np.ndarrayr   r    c                 C  s   dS )zDo describe either series or dataframe.

        Parameters
        ----------
        percentiles : list-like of numbers
            The percentiles to include in the output.
        Nr)   )r/   r   r)   r)   r*   r'   n   s    z!NDFrameDescriberAbstract.describeN)__name__
__module____qualname____doc__r0   r   r'   r)   r)   r)   r*   r,   b   s   r,   c                   @  s*   e Zd ZU dZded< dddddZdS )	r%   z2Class responsible for creating series description.r   r   r1   r2   c                 C  s   t | j}|| j|S r.   )select_describe_funcr   )r/   r   describe_funcr)   r)   r*   r'   ~   s    zSeriesDescriber.describeN)r3   r4   r5   r6   __annotations__r'   r)   r)   r)   r*   r%   y   s   
r%   c                      sD   e Zd ZdZddddd fddZddd	d
dZdd Z  ZS )r&   ab  Class responsible for creating dataobj description.

    Parameters
    ----------
    obj : DataFrame
        DataFrame to be described.
    include : 'all', list-like of dtypes or None
        A white list of data types to include in the result.
    exclude : list-like of dtypes or None
        A black list of data types to omit from the result.
    r   r   r-   )r   r   r   r    c                  s:   || _ || _|jdkr*|jjdkr*tdt | d S )N   r   z+Cannot describe a DataFrame without columns)r   r   r$   columnssize
ValueErrorsuperr0   )r/   r   r   r   	__class__r)   r*   r0      s
    zDataFrameDescriber.__init__r1   r2   c                   sj   |   }g }| D ] \}}t|}|||| qt| t fdd|D ddd}|j |_|S )Nc                   s   g | ]}|j  d dqS )F)copy)Zreindex.0xZ	col_namesr)   r*   
<listcomp>   s     z/DataFrameDescriber.describe.<locals>.<listcomp>r!   F)Zaxissort)_select_dataitemsr7   appendreorder_columnsr   r;   rA   )r/   r   dataldesc_seriesr8   dr)   rE   r*   r'      s    zDataFrameDescriber.describec                 C  s   | j dkrB| jdkrBtjdg}| jj|d}t|jdkr~| j}n<| j dkrj| jdk	rbd}t|| j}n| jj| j | jd}|S )zSelect columns to be described.Ndatetime)r   r   allz*exclude must be None when include is 'all')r   r   )	r   r   npnumberr   Zselect_dtypeslenr;   r=   )r/   Zdefault_includerL   msgr)   r)   r*   rH      s    


zDataFrameDescriber._select_data)r3   r4   r5   r6   r0   r'   rH   __classcell__r)   r)   r?   r*   r&      s   r&   zSequence[Series]zlist[Hashable])rM   r    c                 C  sD   g }t dd | D td}|D ] }|D ]}||kr&|| q&q|S )z,Set a convenient order for rows for display.c                 s  s   | ]}|j V  qd S r.   )indexrB   r)   r)   r*   	<genexpr>   s     z"reorder_columns.<locals>.<genexpr>)key)sortedrU   rJ   )rM   namesZldesc_indexesZidxnamesnamer)   r)   r*   rK      s    rK   r   zSequence[float])rO   r   r    c                 C  s   ddl m} t|}ddddg| dg }|  |  |  |  g| |  | 	 g }t
| rt| jtr| jjdkrd	}qdd	l}t| }qt }n t| rt| std
}nd	}|||| j|dS )zDescribe series containing numerical data.

    Parameters
    ----------
    series : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r   countmeanstdminmaxmNfloatrX   r]   dtype)pandasr   r   r_   r`   ra   rb   quantiletolistrc   r   
isinstancerg   r   kindZpyarrowZfloat64r   r   r   rS   r]   )rO   r   r   formatted_percentiles
stat_indexrP   rg   par)   r)   r*   describe_numeric_1d   s(    
rp   )rL   percentiles_ignoredr    c           
      C  s   ddddg}|   }t||dk }|dkrH|jd |jd  }}d}ntjtj }}d}|  |||g}ddlm}	 |	||| j	|d	S )
zDescribe series containing categorical data.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r_   uniquetopfreqr   Nobjectr^   rf   )
value_countsrU   rX   ilocrS   nanr_   rh   r   r]   )
rL   rq   r\   	objcountscount_uniquers   rt   rg   r(   r   r)   r)   r*   describe_categorical_1d   s    r{   c                 C  s  ddg}|   }t||dk }|  |g}d}|dkr|jd |jd  }}| jj}	|  j	d}
t
|}|jdk	r|	dk	r||	}n
||	}|dddd	g7 }|||t
|
 |	d
t
|
 |	d
g7 }n |ddg7 }|tjtjg7 }d}ddlm} |||| j|dS )zDescribe series containing timestamp data treated as categorical.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r_   rr   r   Ni8rs   rt   firstlast)tzru   r^   rf   )rv   rU   r_   rX   rw   dtr   Zdropnavaluesviewr   tzinfoZ
tz_convertZtz_localizerb   rc   rS   rx   rh   r   r]   )rL   rq   r\   ry   rz   r(   rg   rs   rt   r   Zasintr   r)   r)   r*   $describe_timestamp_as_categorical_1d  s2    

r   )rL   r   r    c                 C  sf   ddl m} t|}dddg| dg }|  |  |  g| |  |  g }|||| j	dS )zDescribe series containing datetime64 dtype.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r^   r_   r`   rb   rc   )rX   r]   )
rh   r   r   r_   r`   rb   ri   rj   rc   r]   )rL   r   r   rm   rn   rP   r)   r)   r*   describe_timestamp_1dJ  s    r   r   )rL   r    c                 C  sN   t | jrtS t| rtS | jjdks2t| jtr6tS | jjdkrFtS tS dS )zSelect proper function for describing series based on data type.

    Parameters
    ----------
    data : Series
        Series to be described.
    Mrd   N)	r   rg   r{   r   rp   rl   rk   r   r   )rL   r)   r)   r*   r7   b  s    

r7   z%np.ndarray[Any, np.dtype[np.float64]]r2   c                 C  sv   | dkrt dddgS t| } t|  d| kr:| d t | } t | }| dk	sZtt|t| k rrt	d|S )z
    Ensure that percentiles are unique and sorted.

    Parameters
    ----------
    percentiles : list-like of numbers, optional
        The percentiles to include in the output.
    Ng      ?g      ?g      ?z%percentiles cannot contain duplicates)
rS   arraylistr   rJ   Zasarrayrr   AssertionErrorrU   r=   )r   Zunique_pctsr)   r)   r*   r#   x  s    


r#   )4r6   
__future__r   abcr   r   typingr   r   r   r   r	   r
   ZnumpyrS   Zpandas._libs.tslibsr   Zpandas._typingr   r   r   Zpandas.util._validatorsr   Zpandas.core.dtypes.commonr   r   r   r   Zpandas.core.dtypes.dtypesr   Zpandas.core.arrays.arrow.dtyper   Zpandas.core.arrays.floatingr   Zpandas.core.reshape.concatr   Zpandas.io.formats.formatr   rh   r   r   r+   r,   r%   r&   rK   rp   r{   r   r   r7   r#   r)   r)   r)   r*   <module>   s4    	.B( 0