
    ~Vji                     B    d dl mZmZ dgZ G d dej                  ZdS )    )nnTensor
Wav2Letterc            	       F     e Zd ZdZddedededd	f fd
ZdedefdZ xZ	S )r   au  Wav2Letter model architecture from *Wav2Letter: an End-to-End ConvNet-based Speech
    Recognition System* :cite:`collobert2016wav2letter`.

    See Also:
        * `Training example <https://github.com/pytorch/audio/tree/release/0.12/examples/pipeline_wav2letter>`__

    Args:
        num_classes (int, optional): Number of classes to be classified. (Default: ``40``)
        input_type (str, optional): Wav2Letter can use as input: ``waveform``, ``power_spectrum``
         or ``mfcc`` (Default: ``waveform``).
        num_features (int, optional): Number of input features that the network will receive (Default: ``1``).
    (   waveform   num_classes
input_typenum_featuresreturnNc                 "   t                                                       |dk    rdn|}t          j        t          j        |dddd          t          j        d          t          j        ddd	d
d          t          j        d          t          j        ddd	d
d          t          j        d          t          j        ddd	d
d          t          j        d          t          j        ddd	d
d          t          j        d          t          j        ddd	d
d          t          j        d          t          j        ddd	d
d          t          j        d          t          j        ddd	d
d          t          j        d          t          j        dddd
d          t          j        d          t          j        ddd
d
d          t          j        d          t          j        d|d
d
d          t          j        d                    }|dk    rYt          j        t          j        |dddd          t          j        d                    }t          j        ||          | _        |dv r	|| _        d S d S )Nr      0         )in_channelsout_channelskernel_sizestridepaddingT)inplace   r	      i         r      -   )power_spectrummfcc)super__init__r   
SequentialConv1dReLUacoustic_model)selfr
   r   r   acoustic_num_featuresr&   waveform_model	__class__s          V/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/models/wav2letter.pyr"   zWav2Letter.__init__   s   '1Z'?'?\I"7cWYbcmopppGD!!!I#CQqZ[\\\GD!!!I#CQqZ[\\\GD!!!I#CQqZ[\\\GD!!!I#CQqZ[\\\GD!!!I#CQqZ[\\\GD!!!I#CQqZ[\\\GD!!!I#CQqZ[\\\GD!!!I#DbQR\^___GD!!!I$TqQR\]^^^GD!!!I$[aXYcdeeeGD!!!-
 
2 ##]	lRU^akmnnn%%% N #%-"O"OD333"0D 43    xc                 r    |                      |          }t          j                            |d          }|S )z
        Args:
            x (torch.Tensor): Tensor of dimension (batch_size, num_features, input_length).

        Returns:
            Tensor: Predictor tensor of dimension (batch_size, number_of_classes, input_length).
        r	   )dim)r&   r   
functionallog_softmax)r'   r-   s     r+   forwardzWav2Letter.forward=   s6     ""M%%aQ%//r,   )r   r   r	   )
__name__
__module____qualname____doc__intstrr"   r   r2   __classcell__)r*   s   @r+   r   r      s         %1 %1C %1# %1Z] %1fj %1 %1 %1 %1 %1 %1N F        r,   N)torchr   r   __all__Moduler    r,   r+   <module>r>      si            
@ @ @ @ @ @ @ @ @ @r,   