
    2;ji>                         d dl m Z  d dlmZmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZmZmZ d dlmZmZ d dlmZ  G d	 d
ee          ZdS )    )array)IterableMapping)Number)
itemgetterN)BaseEstimatorTransformerMixin_fit_context)check_arraymetadata_routing)check_is_fittedc                        e Zd ZU dZdej        iZdegdgdgdZe	e
d<   ej        ddddd	Zdd
ddddZ ed          dd            Zd Z ed          dd            Ze	fdZd ZddZddZ fdZ xZS )DictVectorizera  Transforms lists of feature-value mappings to vectors.

    This transformer turns lists of mappings (dict-like objects) of feature
    names to feature values into Numpy arrays or scipy.sparse matrices for use
    with scikit-learn estimators.

    When feature values are strings, this transformer will do a binary one-hot
    (aka one-of-K) coding: one boolean-valued feature is constructed for each
    of the possible string values that the feature can take on. For instance,
    a feature "f" that can take on the values "ham" and "spam" will become two
    features in the output, one signifying "f=ham", the other "f=spam".

    If a feature value is a sequence or set of strings, this transformer
    will iterate over the values and will count the occurrences of each string
    value.

    However, note that this transformer will only do a binary one-hot encoding
    when feature values are of type string. If categorical features are
    represented as numeric values such as int or iterables of strings, the
    DictVectorizer can be followed by
    :class:`~sklearn.preprocessing.OneHotEncoder` to complete
    binary one-hot encoding.

    Features that do not occur in a sample (mapping) will have a zero value
    in the resulting array/matrix.

    For an efficiency comparison of the different feature extractors, see
    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.

    Read more in the :ref:`User Guide <dict_feature_extraction>`.

    Parameters
    ----------
    dtype : dtype, default=np.float64
        The type of feature values. Passed to Numpy array/scipy.sparse matrix
        constructors as the dtype argument.
    separator : str, default="="
        Separator string used when constructing new features for one-hot
        coding.
    sparse : bool, default=True
        Whether transform should produce scipy.sparse matrices.
    sort : bool, default=True
        Whether ``feature_names_`` and ``vocabulary_`` should be
        sorted when fitting.

    Attributes
    ----------
    vocabulary_ : dict
        A dictionary mapping feature names to feature indices.

    feature_names_ : list
        A list of length n_features containing the feature names (e.g., "f=ham"
        and "f=spam").

    See Also
    --------
    FeatureHasher : Performs vectorization using only a hash function.
    sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical
        features encoded as columns of arbitrary data types.

    Examples
    --------
    >>> from sklearn.feature_extraction import DictVectorizer
    >>> v = DictVectorizer(sparse=False)
    >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
    >>> X = v.fit_transform(D)
    >>> X
    array([[2., 0., 1.],
           [0., 1., 3.]])
    >>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},
    ...                            {'baz': 1.0, 'foo': 3.0}]
    True
    >>> v.transform({'foo': 4, 'unseen_feature': 3})
    array([[0., 0., 4.]])
    	dict_typeno_validationbooleandtype	separatorsparsesort_parameter_constraints=Tc                >    || _         || _        || _        || _        d S Nr   )selfr   r   r   r   s        e/root/voice-cloning/.venv/lib/python3.11/site-packages/sklearn/feature_extraction/_dict_vectorizer.py__init__zDictVectorizer.__init__h   s"    
"			    FNfittingtransformingindicesvaluesc                   |D ]}	t          |	t                    r|| j        |	}
d}	n t          dt	          |	           d          |r+|
|vr't          |          ||
<   |                    |
           |rG|
|v rC|                    ||
                    |                    |                     |	                     dS )z)Add feature names for iterable of strings   zUnsupported type z; in iterable value. Only iterables of string are supported.N)
isinstancestrr   	TypeErrortypelenappendr   )r   fvfeature_namesvocabr!   r"   r#   r$   vvfeature_names              r   _add_iterable_elementz$DictVectorizer._add_iterable_elementn   s      	. 	.B"c"" +,1dnnbbA!R ! ! !  
  3<u44&)-&8&8l#$$\222 . 5 5u\2333djjnn---!	. 	.r   )prefer_skip_nested_validationc                 x   g }i }|D ]}|                                 D ]\  }}t          |t                    r|| j        |}nt          |t                    s||}njt          |t
                    r&t          dt          |           d| d| d          t          |t                    rd}| 	                    ||||           |+||vr't          |          ||<   |                    |           ڌ| j        r-|                                 d t          |          D             }|| _        || _        | S )a)  Learn a list of feature name -> indices mappings.

        Parameters
        ----------
        X : Mapping or iterable over Mappings
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

            .. versionchanged:: 0.24
               Accepts multiple string values for one categorical feature.

        y : (ignored)
            Ignored parameter.

        Returns
        -------
        self : object
            DictVectorizer class instance.
        NzUnsupported value type  for : z$.
Mapping objects are not supported.c                     i | ]\  }}||	S  r9   ).0ir-   s      r   
<dictcomp>z&DictVectorizer.fit.<locals>.<dictcomp>   s    ???daQ???r   )itemsr'   r(   r   r   r   r)   r*   r   r3   r+   r,   r   	enumeratefeature_names_vocabulary_)	r   Xyr/   r0   xr-   r.   r2   s	            r   fitzDictVectorizer.fit   s   *  	; 	;A		 ; ;1a%% K/0q$..!!#DLL6** 
Kqy#$LL7++ K#=$q'' = = = =$%= = =  
  8,, K#'L..q!]EJJJ+#500.1-.@.@l+%,,\:::%;( 9 	@   ??i&>&>???E+ r   c                 F   t          d          j        dk    s
J d            | j        }|rg }i }n| j        }| j        }d}t          |t                    r|gn|}t          d          }dg}g }	|D ]v}
|
                                D ];\  }}t          |t                    r|| j	        |}d}nt          |t                    s||}nt          |t                    s5t          |t                    r d }|                     ||||||||	           n6t          dt          |           d	| d
| dt          |           d	          |t|r+||vr't          |          ||<   |                    |           ||v rC|                    ||                    |	                    |                     |                     =|                    t          |                     xt          |          dk    rt#          d          t%          j        |t$          j                  }t          |          dz
  t          |          f}t+          j        |	||f||          }|ry| j        rr|                                 t%          j        t          |          t$          j                  }t5          |          D ]\  }}||         ||<   |||<   |d d |f         }| j        r|                                 n|                                }|r|| _        || _        |S )Nr;      zsizeof(int) != 4 on your platform; please report this at https://github.com/scikit-learn/scikit-learn/issues and include the output from platform.platform() in your bug reportTr   r&   r    zUnsupported value Type r6   r7   z.
z objects are not supported.zSample sequence X is empty.r   )shaper   )r   itemsizer   r?   r@   r'   r   r=   r(   r   r   r   r3   r)   r*   r+   r,   
ValueErrornp
frombufferintcsp
csr_matrixr   emptyint32r>   r   sort_indicestoarray)r   rA   r!   r   r/   r0   r"   r#   indptrr$   rC   r-   r.   r2   rH   result_matrix	map_indexnew_vals                     r   
_transformzDictVectorizer._transform   s   
 Szz"a'''N ('' 
 	%MEE /M$E a))0QCCq**   #	( #	(A		  5  51a%% /0q$..!!#DLAA6** qy#$LL#Aw// Jq(4K4K #'L..% '%1 '% / 	 	 	 	 $@$q'' @ @ @ @$%@ @77@ @ @    + ;<u#<#<.1-.@.@l+%,,\:::#u,,u\':;;;djjmm444MM#g,,''''v;;!:;;;-rw777Vq#e**-Wf%U%
 
 

  	8ty 	8   ]!3!328DDDI'66 # #
%*1X	'""a)!!!Y,7M; 	4&&(((()1133M 	%"/D$Dr   c                 0    |                      |d          S )a  Learn a list of feature name -> indices mappings and transform X.

        Like fit(X) followed by transform(X), but does not require
        materializing X in memory.

        Parameters
        ----------
        X : Mapping or iterable over Mappings
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

            .. versionchanged:: 0.24
               Accepts multiple string values for one categorical feature.

        y : (ignored)
            Ignored parameter.

        Returns
        -------
        Xa : {array, sparse matrix}
            Feature vectors; always 2-d.
        Tr!   )rX   )r   rA   rB   s      r   fit_transformzDictVectorizer.fit_transform&  s    0 q$///r   c                    t          | d           t          |ddg          }|j        d         }| j        }fdt	          |          D             }t          j        |          r;t          |                                 D ]\  }}|||f         ||         ||         <   nMt          |          D ]=\  }}t          ||ddf                   D ]\  }}	|	dk    r|||f         |||         <   >|S )aW  Transform array or sparse matrix X back to feature mappings.

        X must have been produced by this DictVectorizer's transform or
        fit_transform method; it may only have passed through transformers
        that preserve the number of features and their order.

        In the case of one-hot/one-of-K coding, the constructed feature
        names and values are returned rather than the original ones.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Sample matrix.
        dict_type : type, default=dict
            Constructor for feature mappings. Must conform to the
            collections.Mapping API.

        Returns
        -------
        X_original : list of dict_type objects of shape (n_samples,)
            Feature mappings for the samples in X.
        r?   csrcsc)accept_sparser   c                 $    g | ]}             S r9   r9   )r:   _r   s     r   
<listcomp>z4DictVectorizer.inverse_transform.<locals>.<listcomp>^  s    777777r   N)
r   r   rH   r?   rangerN   issparsezipnonzeror>   )
r   rA   r   	n_samplesnamesdictsr;   jdr.   s
     `       r   inverse_transformz DictVectorizer.inverse_transform@  s#   . 	./// %888GAJ	#7777eI&6&6777;q>> 	.QYY[[) - -1%&q!tWaq""- "%(( . .1%a111g.. . .DAqAvv&'1g%(. r   c                 T    t          | ddg           |                     |d          S )a  Transform feature->value dicts to array or sparse matrix.

        Named features not encountered during fit or fit_transform will be
        silently ignored.

        Parameters
        ----------
        X : Mapping or iterable over Mappings of shape (n_samples,)
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

        Returns
        -------
        Xa : {array, sparse matrix}
            Feature vectors; always 2-d.
        r?   r@   FrZ   )r   rX   )r   rA   s     r   	transformzDictVectorizer.transformk  s0    " 	/?@@@q%000r   c                     t          | d           t          d | j        D                       rd | j        D             }n| j        }t          j        |t
                    S )a^  Get output feature names for transformation.

        Parameters
        ----------
        input_features : array-like of str or None, default=None
            Not used, present here for API consistency by convention.

        Returns
        -------
        feature_names_out : ndarray of str objects
            Transformed feature names.
        r?   c              3   B   K   | ]}t          |t                     V  d S r   )r'   r(   r:   names     r   	<genexpr>z7DictVectorizer.get_feature_names_out.<locals>.<genexpr>  s/      IIT:dC(((IIIIIIr   c                 ,    g | ]}t          |          S r9   )r(   rq   s     r   rb   z8DictVectorizer.get_feature_names_out.<locals>.<listcomp>  s    GGG4SYYGGGr   rG   )r   anyr?   rK   asarrayobject)r   input_featuresr/   s      r   get_feature_names_outz$DictVectorizer.get_feature_names_out  sl     	.///IIT5HIIIII 	0GG43FGGGMM /Mz-v6666r   c                 6   t          | d           |st          j        |          d         }| j        }i }|D ]}t	          |          |||         <   || _        d t          |                                t          d                    D             | _        | S )a=  Restrict the features to those in support using feature selection.

        This function modifies the estimator in-place.

        Parameters
        ----------
        support : array-like
            Boolean mask or list of indices (as returned by the get_support
            member of feature selectors).
        indices : bool, default=False
            Whether support is a list of indices.

        Returns
        -------
        self : object
            DictVectorizer class instance.

        Examples
        --------
        >>> from sklearn.feature_extraction import DictVectorizer
        >>> from sklearn.feature_selection import SelectKBest, chi2
        >>> v = DictVectorizer()
        >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
        >>> X = v.fit_transform(D)
        >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])
        >>> v.get_feature_names_out()
        array(['bar', 'baz', 'foo'], ...)
        >>> v.restrict(support.get_support())
        DictVectorizer()
        >>> v.get_feature_names_out()
        array(['bar', 'foo'], ...)
        r?   r   c                     g | ]\  }}|S r9   r9   )r:   r-   r;   s      r   rb   z+DictVectorizer.restrict.<locals>.<listcomp>  s)     
 
 
!QA
 
 
r   r&   )key)	r   rK   wherer?   r+   r@   sortedr=   r   )r   supportr#   rh   	new_vocabr;   s         r   restrictzDictVectorizer.restrict  s    B 	./// 	+hw''*G#	 	1 	1A"%i..IeAh$
 
 !2!2
1FFF
 
 
 r   c                 x    t                                                      }d|j        _        d|j        _        |S )NTF)super__sklearn_tags__
input_tagsdicttwo_d_array)r   tags	__class__s     r   r   zDictVectorizer.__sklearn_tags__  s1    ww''))#&+#r   r   )F)__name__
__module____qualname____doc__r   UNUSED4_DictVectorizer__metadata_request__inverse_transformr(   r   r   __annotations__rK   float64r   r3   r
   rD   rX   r[   rl   rn   ry   r   r   __classcell__)r   s   @r   r   r      s        J JZ .9:J:Q,R) !U+	$ $D    !#
c$T      . . . . .> \5553 3 3 653ja a aF \5550 0 0 6502 .2 ) ) ) )V1 1 1(7 7 7 7(0 0 0 0d        r   r   )r   collections.abcr   r   numbersr   operatorr   numpyrK   scipy.sparser   rN   sklearn.baser   r	   r
   sklearn.utilsr   r   sklearn.utils.validationr   r   r9   r   r   <module>r      s          - - - - - - - -                       F F F F F F F F F F 7 7 7 7 7 7 7 7 4 4 4 4 4 4x x x x x%} x x x x xr   