
    3;ji-                         d dl mZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
mZmZ d dlmZ dddd	Zdd
Z G d de          Zd Z G d de          Zd Zd ZdddZddZ G d de          Zd ZdS )    )Counter)suppress)
NamedTupleN)_isindeviceget_namespacexpxis_scalar_nanFreturn_inversereturn_countsc                j    | j         t          k    rt          | ||          S t          | ||          S )a  Helper function to find unique values with support for python objects.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : ndarray
        Values to check for unknowns.

    return_inverse : bool, default=False
        If True, also return the indices of the unique values.

    return_counts : bool, default=False
        If True, also return the number of times each unique item appears in
        values.

    Returns
    -------
    unique : ndarray
        The sorted unique values.

    unique_inverse : ndarray
        The indices to reconstruct the original array from the unique array.
        Only provided if `return_inverse` is True.

    unique_counts : ndarray
        The number of times each of the unique values comes up in the original
        array. Only provided if `return_counts` is True.
    r   )dtypeobject_unique_python
_unique_np)valuesr   r   s      O/root/voice-cloning/.venv/lib/python3.11/site-packages/sklearn/utils/_encode.py_uniquer      sO    > |v>
 
 
 	
 ~]       c                 `   t          |           \  }}d\  }}|r|r|                    |           \  }}}}nK|r|                    |           \  }}n0|r|                    |           \  }}n|                    |           }|j        rwt          |d                   rb|                    ||j                  }|d|dz            }|r	||||k    <   |r-|	                    ||d                   ||<   |d|dz            }|f}	|r|	|fz  }	|r|	|fz  }	t          |	          dk    r|	d         n|	S )zHelper function to find unique values for numpy arrays that correctly
    accounts for nans. See `_unique` documentation for details.)NNN   r   )r   
unique_allunique_inverseunique_countsunique_valuessizer   searchsortednansumlen)
r   r   r   xp_inversecountsuniquesnan_idxrets
             r   r   r   7   s    &!!EB OGV +- +&(mmF&;&;#GVV	 +,,V44	 +**622""6** | +gbk22 +//'2622-GaK-( 	1)0GGg%& 	+ ffVGHH%566F7OMgkM*F*C z yXX]]3q66+r   c                   .    e Zd ZU dZeed<   eed<   d ZdS )MissingValuesz'Data class for missing data informationr!   nonec                     g }| j         r|                    d           | j        r|                    t          j                   |S )z3Convert tuple to a list where None is always first.N)r-   appendr!   np)selfoutputs     r   to_listzMissingValues.to_listd   sF    9 	 MM$8 	"MM"&!!!r   N)__name__
__module____qualname____doc__bool__annotations__r3    r   r   r,   r,   ^   s=         11	III
JJJ    r   r,   c                     d | D             }|s| t          dd          fS d|v r7t          |          dk    rt          dd          }n#t          dd          }nt          dd          }| |z
  }||fS )a.  Extract missing values from `values`.

    Parameters
    ----------
    values: set
        Set of values to extract missing from.

    Returns
    -------
    output: set
        Set with missing values extracted.

    missing_values: MissingValues
        Object with missing value information.
    c                 4    h | ]}|t          |          |S Nr
   ).0values     r   	<setcomp>z#_extract_missing.<locals>.<setcomp>~   s,       U]mE6J6J]]]]r   F)r!   r-   Nr   T)r,   r#   )r   missing_values_setoutput_missing_valuesr2   s       r   _extract_missingrC   n   s      !    <}U;;;;;!!!!""a''$1e$$G$G$G!! %2d$F$F$F!! -$U C C C ((F(((r   c                   (     e Zd ZdZ fdZd Z xZS )_nandictz!Dictionary with support for nans.c                     t                                          |           |                                D ]\  }}t          |          r
|| _         d S d S r=   )super__init__itemsr   	nan_value)r1   mappingkeyr?   	__class__s       r   rH   z_nandict.__init__   sc    !!!!--// 	 	JCS!! !&	 	r   c                 l    t          | d          rt          |          r| j        S t          |          )NrJ   )hasattrr   rJ   KeyErrorr1   rL   s     r   __missing__z_nandict.__missing__   7    4%% 	"-*<*< 	">!smmr   )r4   r5   r6   r7   rH   rR   __classcell__rM   s   @r   rE   rE      sM        ++          r   rE   c                     t          | |          \  }}t          d t          |          D                       |                    fd| D             t	          |                     S )z,Map values based on its position in uniques.c                     i | ]\  }}||	S r:   r:   )r>   ivals      r   
<dictcomp>z#_map_to_integer.<locals>.<dictcomp>   s    >>>Cc1>>>r   c                      g | ]
}|         S r:   r:   )r>   vtables     r   
<listcomp>z#_map_to_integer.<locals>.<listcomp>   s    000AuQx000r   )r   )r   rE   	enumerateasarrayr   )r   r(   r$   r%   r]   s       @r   _map_to_integerra      sk    &'**EB>>9W+=+=>>>??E::0000000:HHHr   c                "   	 t          |           }t          |          \  }}t          |          }|                    |                                           t          j        || j                  }nP# t          $ rC t          d t          d | D                       D                       }t          d|           w xY w|f}|r|t          | |          fz  }|r|t          | |          fz  }t          |          dk    r|d         n|S )Nr   c              3   $   K   | ]}|j         V  d S r=   )r6   )r>   ts     r   	<genexpr>z!_unique_python.<locals>.<genexpr>   s$      LL!q~LLLLLLr   c              3   4   K   | ]}t          |          V  d S r=   )type)r>   r\   s     r   rf   z!_unique_python.<locals>.<genexpr>   s(      2K2Kq4772K2K2K2K2K2Kr   zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setrC   sortedextendr3   r0   arrayr   	TypeErrorra   _get_countsr#   )r   r   r   uniques_setmissing_valuesr(   typesr*   s           r   r   r      s<   
&kk&6{&C&C#^%%~--//000(7&,777 
 
 
LLs2K2KF2K2K2K/K/KLLLLL/',/ /
 
 	

 *C 30022 /FG,,..XX]]3q66+s   A2A5 5ACT)check_unknownc                @   t          | |          \  }}|                    | j        d          s5	 t          | |          S # t          $ r}t          d|           d}~ww xY w|r$t          | |          }|rt          d|           |                    ||           S )a  Helper function to encode values into [0, n_uniques - 1].

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.
    The numpy method has the limitation that the `uniques` need to
    be sorted. Importantly, this is not checked but assumed to already be
    the case. The calling method needs to ensure this for all non-object
    values.

    Parameters
    ----------
    values : ndarray
        Values to encode.
    uniques : ndarray
        The unique values in `values`. If the dtype is not object, then
        `uniques` needs to be sorted.
    check_unknown : bool, default=True
        If True, check for values in `values` that are not in `unique`
        and raise an error. This is ignored for object dtype, and treated as
        True in this case. This parameter is useful for
        _BaseEncoder._transform() to avoid calling _check_unknown()
        twice.

    Returns
    -------
    encoded : ndarray
        Encoded values
    numericz%y contains previously unseen labels: N)r   isdtyper   ra   rP   
ValueError_check_unknownr    )r   r(   rr   r$   r%   ediffs          r   _encoderz      s    : &'**EB::flI.. 
0	J"67333 	J 	J 	JHQHHIII	J  	Q!&'22D Q !O!O!OPPPw///s   A   
A!
AA!c                    t          | |          \  }}d}|                    | j        d          st          |           }t	          |          \  }}t          |          t	                    \  |z
  }|j        oj         }	|j        oj         }
fd|rQ|s|	s|
r"|                    fd| D                       }n)|                    t          |           |j
                  }t          |          }|
r|                    d           |	r|                    t          j                   n|                    |           }t          j        ||d|          }|rB|j        rt%          | ||          }n)|                    t          |           |j
                  }|                    |                    |                    rV|                    |          }|                    |          r,|j        r|r|                    |           }d||<   ||          }t          |          }|r||fS |S )	a  
    Helper function to check for unknowns in values to be encoded.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : array
        Values to check for unknowns.
    known_values : array
        Known values. Must be unique.
    return_mask : bool, default=False
        If True, return a mask of the same shape as `values` indicating
        the valid values.

    Returns
    -------
    diff : list
        The unique values present in `values` and not in `know_values`.
    valid_mask : boolean array
        Additionally returned if ``return_mask=True``.

    Nrt   c                 N    | v p j         o| d u pj        ot          |           S r=   )r-   r!   r   )r?   missing_in_uniquesro   s    r   is_validz _check_unknown.<locals>.is_valid  s>    $ E&+=E&*C}U/C/Cr   c                 &    g | ]} |          S r:   r:   )r>   r?   r~   s     r   r^   z"_check_unknown.<locals>.<listcomp>   s!    &K&K&K5xx&K&K&Kr   rc   T)assume_uniquer$   r   )r   ru   r   ri   rC   r!   r-   rl   onesr#   r8   listr/   r0   r   r	   	setdiff1dr   r   anyisnan)r   known_valuesreturn_maskr$   r%   
valid_mask
values_setmissing_in_valuesry   nan_in_diffnone_in_diffr   diff_is_nanis_nanr~   r}   ro   s                 @@@r   rw   rw      s   2 &,//EBJ::flI.. 1[[
(8(D(D%
%,''*:;*G*G''K''+J4F4J0J(-M6H6M2M	 	 	 	 	 	  	A A{ Al AXX&K&K&K&KF&K&K&KLL

WWS[[W@@
Dzz 	KK 	 KK((00}]LQSTTT 	Ay A"6<<<

WWS[[W@@
 66"((<(()) 		*((4..Kvvk"" *9 + +XXf--F)*Jv& [L)Dzz  ZKr   c                   .     e Zd ZdZ fdZd Zd Z xZS )_NaNCounterz$Counter with support for nan values.c                 p    t                                          |                     |                     d S r=   )rG   rH   _generate_items)r1   rI   rM   s     r   rH   z_NaNCounter.__init__G  s/    --e4455555r   c              #      K   |D ]=}t          |          s|V  t          | d          sd| _        | xj        dz  c_        >dS )z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r   rO   r   )r1   rI   items      r   r   z_NaNCounter._generate_itemsJ  sf       	  	 D && 


4-- #!"NNaNNN	  	 r   c                 l    t          | d          rt          |          r| j        S t          |          )Nr   )rO   r   r   rP   rQ   s     r   rR   z_NaNCounter.__missing__T  rS   r   )r4   r5   r6   r7   rH   r   rR   rT   rU   s   @r   r   r   D  s\        ..6 6 6 6 6           r   r   c                    | j         j        dv rt          |           }t          j        t          |          t          j                  }t          |          D ]<\  }}t          t                    5  ||         ||<   ddd           n# 1 swxY w Y   =|S t          | d          \  }}t          j        ||d          }t          j        |d                   rt          j        |d                   rd|d<   t          j        |||                   }	t          j        |t          j                  }||	         ||<   |S )zGet the count of each of the `uniques` in `values`.

    The counts will use the order passed in by `uniques`. For non-object dtypes,
    `uniques` is assumed to be sorted and `np.nan` is at the end.
    OUrc   NT)r   )r   r   )r   kindr   r0   zerosr#   int64r_   r   rP   r   isinr   r    
zeros_like)
r   r(   counterr2   rX   r   r   r'   uniques_in_valuesunique_valid_indicess
             r   rn   rn   Z  sy    |D  f%%#g,,bh777 )) 	* 	*GAt(## * *#DMq	* * * * * * * * * * * * * * *&vTBBBM6 dKKK	xb!"" %rx'<'< % $"?='BS:TUU]7"(333F &'; <FMs   3BB	B	)FF)F)collectionsr   
contextlibr   typingr   numpyr0   sklearn.utils._array_apir   r   r   r	   sklearn.utils._missingr   r   r   r,   rC   dictrE   ra   r   rz   rw   r   rn   r:   r   r   <module>r      s                          F F F F F F F F F F F F 0 0 0 0 0 0 ',5 & & & & &R$, $, $, $,N    J    #) #) #)L    t    I I I, , ,4 /3 (0 (0 (0 (0 (0VQ Q Q Qh    '   ,    r   