U
    MZfg                     @  sP  d dl mZ d dlZd dlZd dlmZmZmZ d dl	Z
d dlmZmZ d dlmZ d dlmZmZmZmZmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z*m+Z+m,Z, d dl-m.  m/Z0 d dl1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z: d dl;m<Z<m=Z= d dl>m?Z? erhd dl@mAZA d dl;mBZB dddddddZCdddd d!ZDdddddd"d#ZEddd$d%d&d'ZFdd(d(d)d*d+ZGd$d,d-d.d/ZHG d0d1 d1ZId2ddd3d4d5ZJd6dd7d8d9ZKd:d6d;d<d=ZLd2dd;d>d?ZMdd@dAdBZNd1dCd1dDdEdFZOdGdH ZPdS )I    )annotationsN)TYPE_CHECKINGSequencecast)NaT	internals)NA)	ArrayLikeAxisIntDtypeObjManagerShape)cache_readonly)astype_array)ensure_dtype_can_hold_nafind_common_typenp_find_common_type)is_1d_only_ea_dtypeis_dtype_equal	is_scalarneeds_i8_conversion)concat_compat)DatetimeTZDtypeExtensionDtype)is_valid_na_for_dtypeisnaisna_all)DatetimeArrayExtensionArray)SparseDtype)ensure_wrapped_if_datetimelike)ArrayManagerNullArrayProxy)ensure_block_shapenew_block_2d)BlockManager)Index)Blockzlist[Index]r
   boolr   )axesconcat_axiscopyreturnc              	     s   g  | D ]l\}}d}|  D ]4\}}|j|| ||ddd}|dkr|dk	rd}q|rj|dkrj|sj| } | q|dkr fddtt d jD }	n&|dksttt	j
d	d  D }	t|	|d |d gdd
}
|
S )z
    Concatenate array managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    ArrayManager
    FT)axis
allow_dupsuse_na_proxy   Nr   c                   s,   g | ]$ t  fd dttD qS )c                   s   g | ]}| j   qS  arrays.0i)jmgrsr1   @/tmp/pip-unpacked-wheel-nbcvw55c/pandas/core/internals/concat.py
<listcomp>l   s     z:_concatenate_array_managers.<locals>.<listcomp>.<listcomp>)concat_arraysrangelen)r5   r8   )r7   r9   r:   k   s   z/_concatenate_array_managers.<locals>.<listcomp>c                 S  s   g | ]
}|j qS r1   r2   )r5   mgrr1   r1   r9   r:   r   s     )Zverify_integrity)itemsreindex_indexerr+   appendr<   r=   r3   AssertionErrorlist	itertoolschainfrom_iterabler!   )mgrs_indexersr)   r*   r+   r?   indexersZaxis1_made_copyaxindexerr3   Znew_mgrr1   r>   r9   _concatenate_array_managersH   s0        
rL   rD   r	   )	to_concatr,   c                   s   dd | D }dd |D }t |dk}|r8|d j n.tdd |D rTt|  ntd	d |D   fd
d| D } t| d trt| d }|| S t	
| }t |dkrdd |D }t |dkrd|kr|t}|S )a  
    Alternative for concat_compat but specialized for use in the ArrayManager.

    Differences: only deals with 1D arrays (no axis keyword), assumes
    ensure_wrapped_if_datetimelike and does not skip empty arrays to determine
    the dtype.
    In addition ensures that all NullArrayProxies get replaced with actual
    arrays.

    Parameters
    ----------
    to_concat : list of arrays

    Returns
    -------
    np.ndarray or ExtensionArray
    c                 S  s   g | ]}t |ts|qS r1   )
isinstancer"   r5   xr1   r1   r9   r:      s     
 z!concat_arrays.<locals>.<listcomp>c                 S  s   h | ]
}|j qS r1   dtyperO   r1   r1   r9   	<setcomp>   s     z concat_arrays.<locals>.<setcomp>r0   r   c                 s  s$   | ]}|j d kot|tjV  qdS ))r6   ubN)kindrN   nprR   rO   r1   r1   r9   	<genexpr>   s     z concat_arrays.<locals>.<genexpr>c                 S  s   g | ]
}|j qS r1   rQ   r5   Zarrr1   r1   r9   r:      s     c                   s.   g | ]&}t |tr| nt| d dqS )Fr+   )rN   r"   Zto_arrayr   rY   Ztarget_dtyper1   r9   r:      s   c                 S  s   h | ]}|j jqS r1   )rR   rV   )r5   objr1   r1   r9   rS      s     rU   )r=   rR   allr   r   rN   r   typeZ_concat_same_typerW   concatenateastypeobject)rM   Zto_concat_no_proxydtypesZsingle_dtypeclsresultkindsr1   r[   r9   r;   x   s*    




r;   c                 C  sT  t | d d tr t| |||S |dkr4t| ||S t|| } dd | D }t|}g }|D ]\}}|d }	|	j}
t|dkr|d js|
j	}|r|
 }n| }d}nnt|rdd |D }|
jstj|dd}nt|dd}t|dd	}t|}|
j	j|jk}nt||d
}d}|r.|
j||d}nt||d}|| q\tt||S )z
    Concatenate block managers into one.

    Parameters
    ----------
    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
    axes : list of Index
    concat_axis : int
    copy : bool

    Returns
    -------
    BlockManager
    r   c                 S  s   g | ]\}}t ||qS r1   )_get_mgr_concatenation_plan)r5   r?   rI   r1   r1   r9   r:      s    z(concatenate_managers.<locals>.<listcomp>r0   Tc                 S  s   g | ]}|j jqS r1   )blockvaluesr5   Zjur1   r1   r9   r:      s     r-      )ndimrZ   F)	placement)rN   r!   rL   _concat_managers_axis0_maybe_reindex_columns_na_proxy_combine_concat_plansrg   r=   rI   rh   r+   view_is_uniform_join_unitsis_extensionrW   r_   r   r#   r    rR   _concatenate_join_unitsZmake_block_same_classr$   rB   r%   tuple)rH   r)   r*   r+   Zconcat_plansZconcat_planblocksrm   
join_unitsunitblkrh   ZfastpathvalsrU   r1   r1   r9   concatenate_managers   sD    


r{   r%   )r)   r+   r,   c                   s    fddt t D }t|  dd  D }d}g }t|D ]p\}}|| }	|jD ]J}
|	rn|
jdd}n|r||
 }n|
td}|j	||_|
| qX|t|j7 }qBtt||}|S )	z
    concat_managers specialized to concat_axis=0, with reindexing already
    having been done in _maybe_reindex_columns_na_proxy.
    c                   s"   i | ]}|t  | d  dkqS )r0   r   )r=   r4   rH   r1   r9   
<dictcomp>  s     z*_concat_managers_axis0.<locals>.<dictcomp>c                 S  s   g | ]}|d  qS )r   r1   rO   r1   r1   r9   r:     s     z*_concat_managers_axis0.<locals>.<listcomp>r   F)deepN)r<   r=   ro   	enumeraterv   r+   getitem_blocksliceZ	_mgr_locsaddrB   r@   r%   ru   )rH   r)   r+   Zhad_reindexersr8   offsetrv   r6   r?   Z	made_copyry   nbrd   r1   r|   r9   rn      s(    




rn   z0list[tuple[BlockManager, dict[int, np.ndarray]]])r)   rH   r,   c                 C  sV   g }|D ]H\}}|  D ](\}}|j| | || |ddddd}q||i f q|S )z
    Reindex along columns so that all of the BlockManagers being concatenated
    have matching columns.

    Columns added in this reindexing have dtype=np.void, indicating they
    should be ignored when choosing a column's final dtype.
    FT)r-   r+   Z
only_slicer.   r/   )r@   rA   rB   )r)   rH   Znew_mgrs_indexersr?   rI   r6   rK   r1   r1   r9   ro   %  s    
	ro   zdict[int, np.ndarray])r?   rI   c                 C  s`  t |dkstt| j}| D ]\}}t |||< q"t|}d|ksLt| jrp| jd }|jt	|||fgS | j
}| j}g }	tj|ddD ]\}
}|jst|
dkst| }t|}t ||d< t|}| j|
 }||j }t |t |jko"|jjr|jjjdkp"t|dk }|r8|dd n||d< t	|||}|	||f q|	S )z
    Construct concatenation plan for given block manager and indexers.

    Parameters
    ----------
    mgr : BlockManager
    indexers : dict of {axis: indexer}

    Returns
    -------
    plan : list of (BlockPlacement, JoinUnit) tuples

    r   F)groupr0   N)r=   rC   rD   shaper@   ru   Zis_single_blockrv   Zmgr_locsJoinUnitblknosblklocslibinternalsZget_blkno_placementsZis_slice_liker+   rK   Zas_slicesteprW   Zdiffr]   poprB   )r?   rI   Zmgr_shape_listrJ   rK   Z	mgr_shapery   r   r   ZplanZblkno
placementsZjoin_unit_indexersZ
shape_listr   Zax0_blk_indexerZunit_no_ax0_reindexingrx   r1   r1   r9   rf   B  s>    




rf   c                   @  s   e Zd ZdddddddZdd	d
dZedd	ddZedd	ddZdddddZedd	ddZ	dddddZ
dS )r   Nr'   r   None)rg   r   r,   c                 C  s"   |d kri }|| _ || _|| _d S Nrg   rI   r   )selfrg   r   rI   r1   r1   r9   __init__  s
    zJoinUnit.__init__strr,   c                 C  s$   t | j dt| j d| j dS )N(z, ))r^   __name__reprrg   rI   )r   r1   r1   r9   __repr__  s    zJoinUnit.__repr__r(   c                 C  s&   | j  D ]}|dk r
 dS q
dS )Nr   TF)rI   rh   any)r   rK   r1   r1   r9   needs_filling  s    zJoinUnit.needs_fillingr   c                 C  s2   | j }|jjjdkrtd| js(|jS t|jS )NVzBlock is None, no dtype)rg   rh   rR   rV   rC   r   r   )r   ry   r1   r1   r9   rR     s    zJoinUnit.dtype)rR   r,   c                   s   | j s
dS | jjjdkrdS | jtkrL| jj}t fdd|jddD S | jj}|t	krlt
| j sldS |tkrt rdS t| S )z
        Check that we are all-NA of a type/dtype that is compatible with this dtype.
        Augments `self.is_na` with an additional check of the type of NA values.
        Fr   Tc                 3  s   | ]}t | V  qd S r   )r   rO   rQ   r1   r9   rX     s     z,JoinUnit._is_valid_na_for.<locals>.<genexpr>Korder)is_narg   rR   rV   ra   rh   r]   ravel
fill_valuer   r   r   r   r   )r   rR   rh   na_valuer1   rQ   r9   _is_valid_na_for  s    
zJoinUnit._is_valid_na_forc                 C  s   | j }|jjdkrdS |js dS |j}|jdkr4dS t|jtrDdS |jdkrr|d }t	|rft
|sjdS t|S |d d }t	|rt
|sdS tdd |D S d S )Nr   TFr   r0   c                 s  s   | ]}t |V  qd S r   )r   )r5   rowr1   r1   r9   rX     s     z!JoinUnit.is_na.<locals>.<genexpr>)rg   rR   rV   Z_can_hold_narh   sizerN   r   rl   r   r   r   r]   )r   ry   rh   valr1   r1   r9   r     s&    

zJoinUnit.is_nar	   )empty_dtyper,   c                 C  s  |d kr*| j jjdkr*| j j}| j j}n|}| |rn| j j}|tdkrx| j jjdd}t|rx|d d krxd }t	|t
rt| j|j}t||dS t|rt||r| jrn`tt|}| }|jg |d}| j\}	}
|	dkst|	dtj|
ftjd }|j|d	|d
S nRt	|trP| }|j| j|d}||d d < |S tj| j|d}|| |S | js| j js| j jS | j jr| j tdj}n| j j}| js|  }n&| j! D ]\}}t"j#|||d}q|S )Nr   ra   r   r   r   rQ   r0   r   T)Z
allow_fillr   )r   rR   rj   )$rg   rR   rV   r   rh   r   rW   r   r=   rN   r   fullr   _valuer   r   r   rI   r   r   Zconstruct_array_typeZ_from_sequencerC   ZonesZintpZtake_emptyemptyfillZ_can_consolidateZis_boolr`   rq   r@   algosZtake_nd)r   r   upcasted_nar   rh   Z	blk_dtypeZi8valuesrc   Zmissing_arrZncolsZnrowsZ	empty_arrrJ   rK   r1   r1   r9   get_reindexed_values  sZ    



  


zJoinUnit.get_reindexed_values)N)r   
__module____qualname__r   r   r   r   rR   r   r   r   r1   r1   r1   r9   r     s   		r   zlist[JoinUnit])rw   r+   r,   c                   s   t |  tdd | D }t | fdd| D }t|dkrz|d }|rt|tjrp|jdk	rx| }q| }nFtdd |D rd	d |D }t	|dd
d}t
|d}nt	|dd}|S )zB
    Concatenate values from several join units along axis=1.
    c                 s  s   | ]}|j jjd kV  qdS r   Nrg   rR   rV   r5   rx   r1   r1   r9   rX   B  s     z*_concatenate_join_units.<locals>.<genexpr>c                   s   g | ]}|j  d qS )r   r   )r   ri   r   r1   r9   r:   E  s   z+_concatenate_join_units.<locals>.<listcomp>r0   r   Nc                 s  s   | ]}t |jV  qd S r   r   rR   r5   tr1   r1   r9   rX   V  s     c                 S  s*   g | ]"}t |jr|n|d ddf qS )r   Nr   r   r1   r1   r9   r:   [  s   T)r-   Zea_compat_axisrk   rj   )_get_empty_dtyper   _dtype_to_na_valuer=   rN   rW   Zndarraybaser+   r   r#   )rw   r+   has_none_blocksrM   Zconcat_valuesr1   r   r9   rt   <  s*    



rt   r   rR   r   c                 C  sv   t | tr| jS | jdkr$| dS | jdkr8| dS | jdkrFdS | jdkr^|sXdS tjS | jdkrntjS tdS )	z2
    Find the NA value to go with this dtype.
    )mMr   )fcNaNrU   N)r6   rT   O)rN   r   r   rV   r^   rW   nanNotImplementedErrorr   r1   r1   r9   r   j  s    







r   zSequence[JoinUnit])rw   r,   c                 C  s   t | dkr| d j}|jS t| r4| d jj}|S tdd | D }dd | D }t |sjdd | D }t|}|r~t|}|S )z
    Return dtype and N/A values to use when concatenating specified units.

    Returned N/A value may be None which means there was no casting involved.

    Returns
    -------
    dtype
    r0   r   c                 s  s   | ]}|j jjd kV  qdS r   r   r   r1   r1   r9   rX     s     z#_get_empty_dtype.<locals>.<genexpr>c                 S  s   g | ]}|j s|jqS r1   )r   rR   r   r1   r1   r9   r:     s      z$_get_empty_dtype.<locals>.<listcomp>c                 S  s    g | ]}|j jjd kr|jqS )r   r   r   r1   r1   r9   r:     s      )r=   rg   rR   _is_uniform_reindexr   r   r   )rw   ry   r   r   rb   rR   r1   r1   r9   r     s    

r   c                   sv   | d j   jjdkrdS t fdd| D ott fdd| D ottdd | D ottdd | D ott| d	kS )
z
    Check if the join units consist of blocks of uniform type that can
    be concatenated using Block.concat_same_type instead of the generic
    _concatenate_join_units (which uses `concat_compat`).

    r   r   Fc                 3  s    | ]}t |jt  kV  qd S r   )r^   rg   ri   firstr1   r9   rX     s     z)_is_uniform_join_units.<locals>.<genexpr>c                 3  s,   | ]$}t |jj jp"|jjjd kV  qdS ))rU   r6   rT   N)r   rg   rR   rV   ri   r   r1   r9   rX     s   c                 s  s   | ]}|j  p|jjV  qd S r   )r   rg   rs   ri   r1   r1   r9   rX     s     c                 s  s   | ]}|j  V  qd S r   )rI   ri   r1   r1   r9   rX     s     r0   )rg   rR   rV   r]   r=   rw   r1   r   r9   rr     s    

rr   r   c                 C  s(   t dd | D o&tdd | D dkS )Nc                 s  s   | ]}|j jV  qd S r   )rg   rs   ri   r1   r1   r9   rX     s     z&_is_uniform_reindex.<locals>.<genexpr>c                 S  s   h | ]}|j jjqS r1   )rg   rR   nameri   r1   r1   r9   rS     s     z&_is_uniform_reindex.<locals>.<setcomp>r0   )r]   r=   r   r1   r1   r9   r     s    r   int)	join_unitlengthr,   c                 C  s   d| j krF| j }| jdkr d}q| jt|d}| jt|| _n>| j}t| j }|d |d |d< | j d d| | j d< | jd | f| jdd  }|f| jdd  | _t|||dS )z
    Reduce join_unit's shape along item axis to length.

    Extra items that didn't fit are returned as a separate block.
    r   Nr0   r   )rI   rg   r   r   cpr+   r   r   )r   r   Zextra_indexersZextra_blockZextra_shaper1   r1   r9   _trim_join_unit  s    

r   c                 #  sZ  t | dkr2| d D ]}|d |d gfV  qn$dg  fdd}ttt| } tt|| } d t |krV d dkrtdt| \}}ttt |}t|t| }}||kr|d |fV  t|| |dd< q`d}	dgt | }
t|D ]R\}\}}||
|< t ||kr4||d t	||f||< q|}	|| | ||< q|	|
fV  q`dS )z
    Combine multiple concatenation plans into one.

    existing_plan is updated in-place.

    We only get here with concat_axis == 1.
    r0   r   c                   s&   t | d }|d kr" d  d7  < |S )Nr   r0   )next)seqretvalZ	num_endedr1   r9   _next_or_none  s    
z,_combine_concat_plans.<locals>._next_or_nonezPlan shapes are not alignedN)
r=   rD   mapiter
ValueErrorzipminmaxr   r   )Zplanspr   Z
next_itemsr   ZunitslengthsZmin_lenmax_lenZyielded_placementZyielded_unitsr6   Zplcrx   r1   r   r9   rp     s2    rp   )Q
__future__r   r+   r   rE   typingr   r   r   ZnumpyrW   Zpandas._libsr   r   r   Zpandas._libs.missingr   Zpandas._typingr	   r
   r   r   r   Zpandas.util._decoratorsr   Zpandas.core.dtypes.astyper   Zpandas.core.dtypes.castr   r   r   Zpandas.core.dtypes.commonr   r   r   r   Zpandas.core.dtypes.concatr   Zpandas.core.dtypes.dtypesr   r   Zpandas.core.dtypes.missingr   r   r   Zpandas.core.algorithmscoreZ
algorithmsr   Zpandas.core.arraysr   r   Zpandas.core.arrays.sparser   Zpandas.core.constructionr    Z#pandas.core.internals.array_managerr!   r"   Zpandas.core.internals.blocksr#   r$   Zpandas.core.internals.managersr%   Zpandasr&   r'   rL   r;   r{   rn   ro   rf   r   rt   r   r   rr   r   r   rp   r1   r1   r1   r9   <module>   sN   08O&L /.#