U
    MZfn                     @  s@  d dl mZ d dlmZmZmZmZmZ d dlZ	d dl
mZ d dlmZmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZmZ d d	lmZmZ d dlm   m!Z" d d
l#m$Z$ d dl%m&Z& d dl'm(Z(m)Z)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 erd dl1m2Z2 edee$d dddGdddddddddddZ3dddddddddd d!Z4dHd"ddd#d$d%Z5dIddd&d'd(Z6dJdd)d*d+Z7dKddd,d-d.Z8d/d0 Z9edee$d1 ddej:ej:d2dd3d4d4dd5d6d1Z;dLdddddd7d8d9Z<dMddddd:d;d<Z=dNd>d?d@dAZ>dBdBdCdDdEdFZ?dS )O    )annotations)TYPE_CHECKINGCallableHashableSequencecastN)lib)AggFuncTypeAggFuncTypeBaseAggFuncTypeDict
IndexLabel)AppenderSubstitution)maybe_downcast_to_dtype)is_extension_array_dtypeis_integer_dtypeis_list_likeis_nested_list_like	is_scalar)ABCDataFrame	ABCSeries)_shared_docs)Grouper)Index
MultiIndexget_objs_combined_axis)concat)cartesian_product)Series	DataFramez
data : DataFramepivot_table   )indentsmeanFTAllr    r	   boolr   )dataaggfuncmarginsdropnamargins_nameobservedsortreturnc                 C  s   t |}t |}t|trg }g }|D ]>}t| |||||||||	|
d}|| |t|d| q&t||dd}|j| ddS t| |||||||||	|
}|j| ddS )N)
valuesindexcolumns
fill_valuer(   r)   r*   r+   r,   r-   __name__r"   )keysaxisr!   )method)_convert_by
isinstancelist__internal_pivot_tableappendgetattrr   Z__finalize__)r'   r/   r0   r1   r(   r2   r)   r*   r+   r,   r-   piecesr4   funcZ_tabletable r@   =/tmp/pip-unpacked-wheel-nbcvw55c/pandas/core/reshape/pivot.pyr!   8   sJ    

z!AggFuncTypeBase | AggFuncTypeDictc                 C  s  || }|dk	}|rt |r*d}t|}n
d}|g}|D ]}|| kr8t|q8g }|| D ]@}t|trn|j}z|| kr|| W qZ tk
r   Y qZX qZt|t| j	k r| | } nB| j	}|D ].}z|
|}W q tttfk
r   Y qX qt|}| j||	|
d}||}|rt|trt|j	r|jdd}|D ]r}|| krBt| | rB||krBt|| sBt|| tsBt| | jtjrBt|| | | j||< qB|}|jjdkr@|r@|jjdt| }g }tt|t|D ]<}|jj| }|dks||kr(|| n
|| q||}|st|jtr|tjt|jj|jjd}|j|d	d
}t|j	trtjt|j	j|j	jd}|j|dd
}|
dkrt|tr|jdd
}|dk	r|j|dd}|r&|r| |   j!dd
 } t"|| |||||||d	}|rN|sN|j	jdkrN|j#d	dd
}t|d	krpt|d	krp|j$}t|tr|r|jddd}|S )zL
    Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``.
    NTF)r,   r-   all)howr"   namesr   r5   Zinfer)Zdowncast)rowscolsr(   r,   r+   r2   )rC   r5   )%r   r9   KeyErrorr8   r   keyr;   	TypeErrorlenr1   Zdrop
ValueErrorgroupbyaggr   r*   r   dtypenpr   r0   nlevelsrE   rangeunstackr   from_arraysr   levelsreindexZ
sort_indexfillnaZnotnarB   _add_marginsZ	droplevelT)r'   r/   r0   r1   r(   r2   r)   r*   r+   r,   r-   r4   Zvalues_passedZvalues_multiiZ	to_filterxrJ   ZgroupedZaggedvr?   Zindex_namesZ
to_unstacknamemr@   r@   rA   r:   q   s    






 

 
 
r:   zDataFrame | Series)r?   r'   r+   c	              	   C  s  t |tstdd| d}	| jjD ]}
|| j|
kr&t|	q&t||||}| jdkr| jjdd  D ]}
|| j|
krlt|	qlt	|dkr|fdt	|d   }n|}|st | t
r| t||| iS |rt| |||||||}t |ts|S |\}}}n>t | tstt| ||||||}t |tsB|S |\}}}|j|j|d}|D ]0}t |tr~|| ||< n||d  ||< q`dd	lm} ||t|gd
j}|jj}t|jD ]:}t|r֐q||gj}|| jt|fd||< q||}||j_|S )Nz&margins_name argument must be a stringzConflicting name "z" in margins   r"    )r2   r   r   )r1   )args)r8   strrM   r0   rE   get_level_values_compute_grand_marginndimr1   rL   r   _appendr   _generate_marginal_resultstupler   AssertionError)_generate_marginal_results_without_valuesrW   pandasr    r   rZ   setZdtypesr   Zselect_dtypesapplyr   )r?   r'   r/   rG   rH   r(   r,   r+   r2   msglevelgrand_marginrJ   Zmarginal_result_setresultmargin_keys
row_marginkr    Zmargin_dummyZ	row_namesrP   r@   r@   rA   rY     s|    



             

 
rY   )r'   r+   c              	   C  s   |ri }| |   D ]\}}zlt|tr:t|| ||< nLt|trzt|| trht|||  ||< q|| |||< n||||< W q tk
r   Y qX q|S ||| jiS d S N)itemsr8   rd   r<   dictrK   r0   )r'   r/   r(   r+   rr   rv   r]   r@   r@   rA   rf   V  s    

rf   )r+   c                   s  t  dkrxg }g }	 fdd}
t |dkr|||  j||d|}d}| jd||dD ]8\}}|
|}| }|| ||< || |	| q`nddlm} d}| jd||dD ]\}}t  dkr|
|}n}|| |||j}t	|j
tr$tj|g|j
jd g d|_
nt|g|j
jd	|_
|| |	| q|sX| S t||d
}t |dkr|S n
| }| j}	t  dkr| |  j |d|}| }t  gttt   }|j
||_
nttj|jd}||	|fS )Nr   c                   s   | fdt  d   S )Nra   r"   rL   )rJ   rH   r+   r@   rA   _all_keyu  s    z,_generate_marginal_results.<locals>._all_keyr,   r"   rq   r5   r,   r   rD   r^   rF   r0   )rL   rN   rO   copyr;   rm   r    ro   rZ   r8   r0   r   from_tuplesrE   r   r^   r   r1   stackr9   rS   Zreorder_levelsr   rQ   nan)r?   r'   r/   rG   rH   r(   r,   r+   Ztable_piecesrt   r|   marginZcat_axisrJ   Zpieceall_keyr    Ztransformed_piecers   ru   Z	new_orderr@   r{   rA   ri   m  sV    


 

ri   )r?   r+   c                   s   t  dkrg } fdd}t |dkr`|| j||d|}	| }
|	| |
< | }||
 q|jdd|d|}	| }
|	| |
< | }||
 |S n
| }| j}t  r|  j |d|}nttj|jd}|||fS )Nr   c                     s&   t  dkrS fdt  d   S )Nr"   ra   rz   r@   r{   r@   rA   r|     s    z;_generate_marginal_results_without_values.<locals>._all_keyr}   r~   r   )rL   rN   ro   r;   r1   r   rQ   r   )r?   r'   rG   rH   r(   r,   r+   rt   r|   r   r   rs   ru   r@   r{   rA   rl     s*    	
rl   c                 C  sF   | d krg } n4t | s2t| tjtttfs2t| r:| g} nt| } | S rw   )	r   r8   rQ   Zndarrayr   r   r   callabler9   )Zbyr@   r@   rA   r7     s    r7   pivot)r0   r/   r   zIndexLabel | lib.NoDefault)r'   r1   r0   r/   r.   c                  st  t |} jdd  j  _dd  jjD  j_|tjkrz|tjk	rXt |}ng }|tjk} j|| |d}n|tjkrt jt	r fddt
 jjD }qt j jjdg}n fddt |D } fd	d|D }	||	 t	|}
t|r<t|ts<ttt |} j | j|
|d
}n j | j|
d}||}dd |jjD |j_|S )NF)deepc                 S  s   g | ]}|d k	r|nt jqS rw   r   	NoDefault.0r^   r@   r@   rA   
<listcomp>  s    zpivot.<locals>.<listcomp>)r;   c                   s   g | ]} j |qS r@   )r0   re   )r   r[   r'   r@   rA   r     s    r   c                   s   g | ]} | qS r@   r@   )r   idxr   r@   rA   r     s     c                   s   g | ]} | qS r@   r@   )r   colr   r@   rA   r     s     )r0   r1   r   c                 S  s   g | ]}|t jk	r|nd qS rw   r   r   r@   r@   rA   r   .  s    )comZconvert_to_list_liker   r0   rE   r   r   Z	set_indexr8   r   rS   rR   r   r^   extendrU   r   rj   r   r   r   Z_constructorZ_valuesZ_constructor_slicedrT   )r'   r1   r0   r/   Zcolumns_listlikerH   r;   ZindexedZ
index_listZdata_columnsZ
multiindexrs   r@   r   rA   r     sL    	




 




  

)r)   r+   r*   	normalizer.   c
                 C  sX  |dkr|dk	rt d|dk	r0|dkr0t dt| s>| g} t|sL|g}d}
dd | | D }|rtt|ddd}
t| |d	d
}t||dd
}t||\}}}}ddlm} tt|| tt||}|||
d}|dkrd|d< t	dd}n||d< d|i}|j
d|||||d|}|	dk	r8t||	||d}|j|dd}|j|dd}|S )a  
    Compute a simple cross tabulation of two (or more) factors.

    By default, computes a frequency table of the factors unless an
    array of values and an aggregation function are passed.

    Parameters
    ----------
    index : array-like, Series, or list of arrays/Series
        Values to group by in the rows.
    columns : array-like, Series, or list of arrays/Series
        Values to group by in the columns.
    values : array-like, optional
        Array of values to aggregate according to the factors.
        Requires `aggfunc` be specified.
    rownames : sequence, default None
        If passed, must match number of row arrays passed.
    colnames : sequence, default None
        If passed, must match number of column arrays passed.
    aggfunc : function, optional
        If specified, requires `values` be specified as well.
    margins : bool, default False
        Add row/column margins (subtotals).
    margins_name : str, default 'All'
        Name of the row/column that will contain the totals
        when margins is True.
    dropna : bool, default True
        Do not include columns whose entries are all NaN.
    normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False
        Normalize by dividing all values by the sum of values.

        - If passed 'all' or `True`, will normalize over all values.
        - If passed 'index' will normalize over each row.
        - If passed 'columns' will normalize over each column.
        - If margins is `True`, will also normalize margin values.

    Returns
    -------
    DataFrame
        Cross tabulation of the data.

    See Also
    --------
    DataFrame.pivot : Reshape data based on column values.
    pivot_table : Create a pivot table as a DataFrame.

    Notes
    -----
    Any Series passed will have their name attributes used unless row or column
    names for the cross-tabulation are specified.

    Any input passed containing Categorical data will have **all** of its
    categories included in the cross-tabulation, even if the actual data does
    not contain any instances of a particular category.

    In the event that there aren't overlapping indexes an empty DataFrame will
    be returned.

    Reference :ref:`the user guide <reshaping.crosstabulations>` for more examples.

    Examples
    --------
    >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
    ...               "bar", "bar", "foo", "foo", "foo"], dtype=object)
    >>> b = np.array(["one", "one", "one", "two", "one", "one",
    ...               "one", "two", "two", "two", "one"], dtype=object)
    >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
    ...               "shiny", "dull", "shiny", "shiny", "shiny"],
    ...              dtype=object)
    >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
    b   one        two
    c   dull shiny dull shiny
    a
    bar    1     2    1     0
    foo    2     2    1     2

    Here 'c' and 'f' are not represented in the data and will not be
    shown in the output because dropna is True by default. Set
    dropna=False to preserve categories with no data.

    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
    >>> pd.crosstab(foo, bar)
    col_0  d  e
    row_0
    a      1  0
    b      0  1
    >>> pd.crosstab(foo, bar, dropna=False)
    col_0  d  e  f
    row_0
    a      1  0  0
    b      0  1  0
    c      0  0  0
    Nz&aggfunc cannot be used without values.z)values cannot be used without an aggfunc.c                 S  s   g | ]}t |ttfr|qS r@   )r8   r   r   )r   r\   r@   r@   rA   r     s      zcrosstab.<locals>.<listcomp>TF)Z	intersectr-   rowprefixr   r   r   r   	__dummy__)r(   r2   r(   )r0   r1   r)   r+   r*   )r   r)   r+   )r0   r5   r"   )r1   r5   )r   )rM   r   r   
_get_names_build_names_mapperrm   r    ry   ziprL   r!   
_normalizeZrename_axis)r0   r1   r/   rownamescolnamesr(   r)   r+   r*   r   Z
common_idxZ	pass_objsrownames_mapperunique_rownamescolnames_mapperunique_colnamesr    r'   Zdfkwargsr?   r@   r@   rA   crosstab5  sf    j 
   r   )r?   r)   r+   r.   c              
   C  sH  t |ttfsRddd}z|| }W n, tk
rP } ztd|W 5 d }~X Y nX |dkrdd dd d	d d
}|d |d< z|| }W n, tk
r } ztd|W 5 d }~X Y nX || } | d} nv|dkr<| j}| j}	| jdd d f j	}
||
k||
k@ rt| d| jd ddf }| jdd df }| jd dd df } t
| |dd} |dkr||  }t| |gdd} | d} |	| _n|dkr||  }| |} | d} || _np|dks|dkr2||  }||  }d|j|< t| |gdd} | |} | d} || _|	| _ntdntd| S )Nr0   r1   )r   r"   zNot a valid normalize argumentFc                 S  s   | | j ddj dd S Nr"   rF   r   sumr\   r@   r@   rA   <lambda>      z_normalize.<locals>.<lambda>c                 S  s   | |    S rw   r   r   r@   r@   rA   r     r   c                 S  s   | j | jddddS r   )divr   r   r@   r@   rA   r     r   )rB   r1   r0   rB   Tr   z not in pivoted DataFrame)r   r)   r"   rF   zNot a valid margins argument)r8   r&   rd   rI   rM   rX   r0   r1   Zilocr^   r   r   r   rh   loc)r?   r   r)   r+   Z	axis_subserrZnormalizersfZtable_indexZtable_columnsZlast_ind_or_colZcolumn_marginZindex_marginr@   r@   rA   r     sd    










r   r   rd   r   c                 C  s   |d krVg }t | D ]>\}}t|tr>|jd k	r>||j q|| d|  qn*t|t| krntdt|tst|}|S )N_z*arrays and names must have the same length)	enumerater8   r   r^   r;   rL   rk   r9   )ZarrsrE   r   r[   Zarrr@   r@   rA   r   1  s    
r   z	list[str]z;tuple[dict[str, str], list[str], dict[str, str], list[str]])r   r   r.   c                   s   dd }t | t |}|| ||B |B   fddt| D } fddt| D } fddt|D } fddt|D }||||fS )	a  
    Given the names of a DataFrame's rows and columns, returns a set of unique row
    and column names and mappers that convert to original names.

    A row or column name is replaced if it is duplicate among the rows of the inputs,
    among the columns of the inputs or between the rows and the columns.

    Parameters
    ----------
    rownames: list[str]
    colnames: list[str]

    Returns
    -------
    Tuple(Dict[str, str], List[str], Dict[str, str], List[str])

    rownames_mapper: dict[str, str]
        a dictionary with new row names as keys and original rownames as values
    unique_rownames: list[str]
        a list of rownames with duplicate names replaced by dummy names
    colnames_mapper: dict[str, str]
        a dictionary with new column names as keys and original column names as values
    unique_colnames: list[str]
        a list of column names with duplicate names replaced by dummy names

    c                   s   t    fdd| D S )Nc                   s   h | ]}| kr|qS r@   r@   r   seenr@   rA   	<setcomp>b  s      z>_build_names_mapper.<locals>.get_duplicates.<locals>.<setcomp>)rn   rD   r@   r   rA   get_duplicates`  s    z+_build_names_mapper.<locals>.get_duplicatesc                   s$   i | ]\}}| krd | |qS Zrow_r@   r   r[   r^   Z	dup_namesr@   rA   
<dictcomp>g  s      z'_build_names_mapper.<locals>.<dictcomp>c                   s&   g | ]\}}| krd | n|qS r   r@   r   r   r@   rA   r   j  s    z'_build_names_mapper.<locals>.<listcomp>c                   s$   i | ]\}}| krd | |qS Zcol_r@   r   r   r@   rA   r   n  s      c                   s&   g | ]\}}| krd | n|qS r   r@   r   r   r@   rA   r   q  s    )rn   intersectionr   )r   r   r   Zshared_namesr   r   r   r   r@   r   rA   r   B  s     



r   )
NNNr$   NFTr%   FT)Nr%   N)r%   )r%   )r%   )NNNNFr%   TF)r%   )r   )@
__future__r   typingr   r   r   r   r   ZnumpyrQ   Zpandas._libsr   Zpandas._typingr	   r
   r   r   Zpandas.util._decoratorsr   r   Zpandas.core.dtypes.castr   Zpandas.core.dtypes.commonr   r   r   r   r   Zpandas.core.dtypes.genericr   r   Zpandas.core.commoncorecommonr   Zpandas.core.framer   Zpandas.core.groupbyr   Zpandas.core.indexes.apir   r   r   Zpandas.core.reshape.concatr   Zpandas.core.reshape.utilr   Zpandas.core.seriesr   rm   r    r!   r:   rY   rf   ri   rl   r7   r   r   r   r   r   r   r@   r@   r@   rA   <module>   s|             "7    U  Q ,E         . P