
    ~Vji
                         d dl mZmZ d dlmZmZ d dlZd dlmZ	 d dlm
Z
 d dlmZ  G d de          Z G d d	e          Zdd
e
dee         defdZ G d de          Zd Zd Z G d de          ZdS )    )ABCabstractmethod)DictListN)Tensor)	TokenSpanc                   R    e Zd Zedee         deee                  fd            ZdS )
ITokenizer
transcriptreturnc                     dS )a  Tokenize the given transcript (list of word)

        .. note::

           The toranscript must be normalized.

        Args:
            transcript (list of str): Transcript (list of word).

        Returns:
            (list of int): List of token sequences
        N selfr   s     `/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/pipelines/_wav2vec2/aligner.py__call__zITokenizer.__call__             N)__name__
__module____qualname__r   r   strr   r   r   r   r
   r
   
   sL        49 d3i    ^  r   r
   c                   ^    e Zd Zdeeef         fdZdee         deee                  fdZdS )	Tokenizer
dictionaryc                     || _         d S Nr   )r   r   s     r   __init__zTokenizer.__init__   s    $r   r   r   c                        fd|D             S )Nc                 ,    g | ]}fd |D             S )c                 *    g | ]}j         |         S r   r   ).0cr   s     r   
<listcomp>z1Tokenizer.__call__.<locals>.<listcomp>.<listcomp>    s     222#222r   r   )r#   wordr   s     r   r%   z&Tokenizer.__call__.<locals>.<listcomp>    s.    JJJt2222T222JJJr   r   r   s   ` r   r   zTokenizer.__call__   s    JJJJzJJJJr   N)	r   r   r   r   r   intr   r   r   r   r   r   r   r      sm        %4S> % % % %K49 Kd3i K K K K K Kr   r   emissiontokensblankc                    | j         }|                     d          } t          j        |gt          j        |          }t          j        | ||          \  }}|                                }|d         |d         }}||fS )Nr   )dtypedevicer*   )r-   	unsqueezetorchtensorint32Fforced_alignexp)r(   r)   r*   r-   targetsaligned_tokensscoress          r   _align_emission_and_tokensr9   #   s{    _F!!!$$HlF85;vFFFG^HgUKKKNFZZ\\F+A.q	FN6!!r   c            	       b    e Zd Zededeee                  deee                  fd            ZdS )IAlignerr(   r)   r   c                     dS )a  Generate list of time-stamped token sequences

        Args:
            emission (Tensor): Sequence of token probability distributions in log-domain.
                Shape: `(time, tokens)`.
            tokens (list of integer sequence): Tokenized transcript.
                Output from :py:class:`torchaudio.pipelines.Wav2Vec2FABundle.Tokenizer`.

        Returns:
            (list of TokenSpan sequence): Tokens with time stamps and scores.
        Nr   )r   r(   r)   s      r   r   zIAligner.__call__0   r   r   N)	r   r   r   r   r   r   r'   r   r   r   r   r   r;   r;   /   sX         d3i T$y/EZ    ^  r   r;   c                     t          |           t          |          k    sJ d}g }|D ]'}|                    | |||z                       ||z  }(|S )Nr   )lensumappend)list_lengthsiretls        r   
_unflattenrF   ?   sg    u::W%%%%	A
C  

5QU#$$$	QJr   c                     d | D             S )Nc                     g | ]	}|D ]}|
S r   r   )r#   rA   items      r   r%   z_flatten.<locals>.<listcomp>J   s%    <<<Ue<<dD<<<<r   r   )nested_lists    r   _flattenrK   I   s    <<k<<<<r   c                   X    e Zd Zd Zdedeee                  deee                  fdZdS )Alignerc                     || _         d S r   r.   )r   r*   s     r   r   zAligner.__init__N   s    


r   r(   r)   r   c                     |j         dk    rt          d|j                   t          |t	          |          | j                  \  }}t          j        ||          }t          |d |D                       S )N   z&The input emission must be 2D. Found: c                 ,    g | ]}t          |          S r   )r>   )r#   tss     r   r%   z$Aligner.__call__.<locals>.<listcomp>W   s    !;!;!;b#b''!;!;!;r   )	ndim
ValueErrorshaper9   rK   r*   r3   merge_tokensrF   )r   r(   r)   r7   r8   spanss         r   r   zAligner.__call__Q   sz    =AVhnVVWWW!;HhvFVFVX\Xb!c!c~v66%!;!;F!;!;!;<<<r   N)	r   r   r   r   r   r   r'   r   r   r   r   r   rM   rM   M   s]          = =d3i =T$y/EZ = = = = = =r   rM   )r   )abcr   r   typingr   r   r0   torchaudio.functional
functionalr3   r   r   r
   r   r'   r9   r;   rF   rK   rM   r   r   r   <module>r\      s~   # # # # # # # #          ! ! ! ! ! !       + + + + + +       "K K K K K
 K K K	" 	" 	"c 	"3 	" 	" 	" 	"    s      = = =
= 
= 
= 
= 
=h 
= 
= 
= 
= 
=r   