
    ~Vji/                        U d dl Z d dlmZmZmZ d dlZd dlmZ d dlmc m	Z
 dedefdZd ed          fZeeef         ed<    G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Zdedededej        j        fdZ	 d"dededededededededee         defd Zdefd!ZdS )#    N)ListOptionalTuplexreturnc                 H    dddt          j        d| z  dz             z   z  z   S )a  The metric defined by ITU-T P.862 is often called 'PESQ score', which is defined
    for narrow-band signals and has a value range of [-0.5, 4.5] exactly. Here, we use the metric
    defined by ITU-T P.862.2, commonly known as 'wide-band PESQ' and will be referred to as "PESQ score".

    Args:
        x (float): Narrow-band PESQ score.

    Returns:
        (float): Wide-band PESQ score.
    g+?g@   g;pΈgׁsF@)mathexp)r   s    [/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/models/squim/objective.pytransform_wb_pesq_ranger   	   s,     Ma$(7Q;3G*H*H&HIII          ?g      @	PESQRangec                   ^     e Zd Zddeeef         ddf fdZdej        dej        fdZ xZ	S )	RangeSigmoid        r   	val_ranger   Nc                     t          t          |                                            t          |t                    rt          |          dk    sJ || _        t          j                    | _	        d S )N   )
superr   __init__
isinstancetuplelenr   nnSigmoidsigmoid)selfr   	__class__s     r   r   zRangeSigmoid.__init__    s]    lD!!**,,,)U++CI!0C0C0CC.7*,*,,r   r   c                     |                      |          | j        d         | j        d         z
  z  | j        d         z   }|S )Nr	   r   )r   r   r    r   outs      r   forwardzRangeSigmoid.forward&   s;    ll1oo!2T^A5F!FG$.YZJ[[
r   )r   )
__name__
__module____qualname__r   floatr   torchTensorr%   __classcell__r!   s   @r   r   r      s        7 7%u"5 7t 7 7 7 7 7 7 %,        r   r   c                   V     e Zd ZdZddededdf fdZd	ej        dej        fd
Z xZ	S )EncoderzEncoder module that transform 1D waveform to 2D representations.

    Args:
        feat_dim (int, optional): The feature dimension after Encoder module. (Default: 512)
        win_len (int, optional): kernel size in the Conv1D layer. (Default: 32)
           feat_dimwin_lenr   Nc                     t          t          |                                            t          j        d|||dz  d          | _        d S )Nr	   r   F)stridebias)r   r/   r   r   Conv1dconv1d)r    r2   r3   r!   s      r   r   zEncoder.__init__3   sD    gt%%'''i8WW\PUVVVr   r   c                     |                     d          }t          j        |                     |                    }|S )a  Apply waveforms to convolutional layer and ReLU layer.

        Args:
            x (torch.Tensor): Input waveforms. Tensor with dimensions `(batch, time)`.

        Returns:
            (torch,Tensor): Feature Tensor with dimensions `(batch, channel, frame)`.
        r	   dim)	unsqueezeFrelur8   r#   s      r   r%   zEncoder.forward8   s7     kkak  fT[[%%&&
r   )r0   r1   )
r&   r'   r(   __doc__intr   r*   r+   r%   r,   r-   s   @r   r/   r/   +   s         W W WS W$ W W W W W W
 %,        r   r/   c                   Z     e Zd Zddededededdf
 fdZd	ej        dej        fd
Z	 xZ
S )	SingleRNNr   rnn_type
input_sizehidden_sizedropoutr   Nc                    t          t          |                                            || _        || _        || _         t          t          |          ||d|dd          | _        t          j	        |dz  |          | _
        d S )Nr	   T)rF   batch_firstbidirectionalr   )r   rB   r   rC   rD   rE   getattrr   rnnLinearproj)r    rC   rD   rE   rF   r!   s        r   r   zSingleRNN.__init__G   s    i''))) $&&;gb(&;&;'
 '
 '
 IkAoz::			r   r   c                 `    |                      |          \  }}|                     |          }|S N)rK   rM   )r    r   r$   _s       r   r%   zSingleRNN.forwardY   s)    !Qiinn
r   )r   )r&   r'   r(   strr@   r)   r   r*   r+   r%   r,   r-   s   @r   rB   rB   F   s        ; ; ;# ;C ;RW ;bf ; ; ; ; ; ;$ %,        r   rB   c                       e Zd ZdZ	 	 	 	 	 	 	 dd	ed
ededededededdf fdZdej        de	ej        ef         fdZ
dej        de	ej        ef         fdZdej        dedej        fdZdej        dej        fdZ xZS )DPRNNa  *Dual-path recurrent neural networks (DPRNN)* :cite:`luo2020dual`.

    Args:
        feat_dim (int, optional): The feature dimension after Encoder module. (Default: 64)
        hidden_dim (int, optional): Hidden dimension in the RNN layer of DPRNN. (Default: 128)
        num_blocks (int, optional): Number of DPRNN layers. (Default: 6)
        rnn_type (str, optional): Type of RNN in DPRNN. Valid options are ["RNN", "LSTM", "GRU"]. (Default: "LSTM")
        d_model (int, optional): The number of expected features in the input. (Default: 256)
        chunk_size (int, optional): Chunk size of input for DPRNN. (Default: 100)
        chunk_stride (int, optional): Stride of chunk input for DPRNN. (Default: 50)
    @         LSTM   d   2   r2   
hidden_dim
num_blocksrC   d_model
chunk_sizechunk_strider   Nc                 J   t          t          |                                            || _        t	          j        g           | _        t	          j        g           | _        t	          j        g           | _        t	          j        g           | _	        t          |          D ]}| j                            t          |||                     | j                            t          |||                     | j                            t	          j        d|d                     | j	                            t	          j        d|d                     t	          j        t	          j        ||d          t	          j                              | _        || _        || _        d S )Nr	   g:0yE>)eps)r   rS   r   r\   r   
ModuleListrow_rnncol_rnnrow_normcol_normrangeappendrB   	GroupNorm
SequentialConv2dPReLUconvr^   r_   )
r    r2   r[   r\   rC   r]   r^   r_   rP   r!   s
            r   r   zDPRNN.__init__m   s^    	eT##%%%$}R((}R((b))b))z"" 	F 	FAL	(Hj I IJJJL	(Hj I IJJJM  at!D!D!DEEEM  at!D!D!DEEEEMIh++HJJ
 
	 %(r   r   c                     |j         d         }| j        | j        || j        z  z   | j        z  z
  }t          j        || j        || j        z   g          }||fS )N)shaper^   r_   r=   pad)r    r   seq_lenrestr$   s        r   	pad_chunkzDPRNN.pad_chunk   s\    '"+$"3g6O"OSWSb!bbeA)4$2C+CDEEDyr   c                    |                      |          \  }}|j        \  }}}|d d d d d | j         f                                                             ||d| j                  }|d d d d | j        d f                                                             ||d| j                  }t          j        ||gd          }|                    ||d| j                                      dd                                          }||fS )Nro      r:   r   )	rt   rp   r_   
contiguousviewr^   r*   cat	transpose)	r    r   r$   rs   
batch_sizer2   rr   	segments1	segments2s	            r   chunkingzDPRNN.chunking   s   NN1%%	T(+	%
Hg1112!2 2223>>@@EEjRZ\^`d`opp	111d/1112==??DDZQY[]_c_noo	iI.A666hhz8RAAKKAqQQ\\^^Dyr   rs   c                 ^   |j         \  }}}}|                    dd                                                              ||d| j        dz            }|d d d d d d d | j        f                                                             ||d          d d d d | j        d f         }|d d d d d d | j        d f                                                             ||d          d d d d d | j         f         }||z   }|dk    r|d d d d d | f         }|                                }|S )Nr   rv   ro   r   )rp   rz   rw   rx   r^   r_   )	r    r   rs   r{   r;   rP   r$   out1out2s	            r   mergingzDPRNN.merging   sh    !
CAkk!Q**,,11*c2tYZGZ[[111aaa-do--.99;;@@SRTUUVWVWVWYZYZYZ\`\m\o\oVop111aaaDO---.99;;@@SRTUUVWVWVWYZYZYZ\p_c_p^p\pVpqTk!88aaaFdUFl#Cnn
r   c                    |                      |          \  }}|j        \  }}}}|}t          | j        | j        | j        | j                  D ]f\  }}	}
}|                    dddd                                          	                    ||z  |d                                          } ||          }|	                    |||d                              dddd                                          } |	|          }||z   }|                    dddd                                          	                    ||z  |d                                          } |
|          }|	                    |||d                              dddd                                          } ||          }||z   }h| 
                    |          }|                     ||          }|                    dd                                          }|S )Nr   rv   r   r	   ro   )r~   rp   ziprc   re   rd   rf   permuterw   rx   rm   r   rz   )r    r   rs   r{   rP   dim1dim2r$   rc   re   rd   rf   row_inrow_outcol_incol_outs                   r   r%   zDPRNN.forward   s   --""4$%G!
AtT47dmUYUacgcp4q4q 	  	 0GXw[[Aq!,,7799>>zD?PRVXZ[[ffhhFgfooGll:tT2>>FFq!QPQRR]]__Ghw''G-C[[Aq!,,7799>>zD?PRVXZ[[ffhhFgfooGll:tT2>>FFq!QPQRR]]__Ghw''G-CCiinnll3%%mmAq!!,,..
r   )rT   rU   rV   rW   rX   rY   rZ   )r&   r'   r(   r?   r@   rQ   r   r*   r+   r   rt   r~   r   r%   r,   r-   s   @r   rS   rS   `   sd       
 
 ) )) ) 	)
 ) ) ) ) 
) ) ) ) ) )<5< E%,2C,D    	%, 	5s1B+C 	 	 	 		 	S 	U\ 	 	 	 	 %,        r   rS   c                   N     e Zd Zddeddf fdZdej        dej        fdZ xZS )	AutoPoolr	   pool_dimr   Nc                    t          t          |                                            || _        t	          j        |          | _        |                     dt	          j        t          j
        d                               d S )Nr:   alphar	   )r   r   r   r   r   Softmaxsoftmaxregister_parameter	Parameterr*   ones)r    r   r!   s     r   r   zAutoPool.__init__   sh    h&&(((%*,**B*B*Bejmm)D)DEEEEEr   r   c                     |                      t          j        || j                            }t          j        t          j        ||          | j                  }|S )Nr:   )r   r*   mulr   sumr   )r    r   weightr$   s       r   r%   zAutoPool.forward   sH    ei4:6677i	!V,,$-@@@
r   )r	   )	r&   r'   r(   r@   r   r*   r+   r%   r,   r-   s   @r   r   r      s        F F FT F F F F F F %,        r   r   c                   ~     e Zd ZdZdej        dej        dej        f fdZdej	        de
ej	                 fdZ xZS )	SquimObjectivea  Speech Quality and Intelligibility Measures (SQUIM) model that predicts **objective** metric scores
    for speech enhancement (e.g., STOI, PESQ, and SI-SDR).

    Args:
        encoder (torch.nn.Module): Encoder module to transform 1D waveform to 2D feature representation.
        dprnn (torch.nn.Module): DPRNN module to model sequential feature.
        branches (torch.nn.ModuleList): Transformer branches in which each branch estimate one objective metirc score.
    encoderdprnnbranchesc                     t          t          |                                            || _        || _        || _        d S rO   )r   r   r   r   r   r   )r    r   r   r   r!   s       r   r   zSquimObjective.__init__   s:     	nd##,,...
 r   r   r   c                 b   |j         dk    rt          d|j          d          |t          j        |dz  dd          dz  dz  z  }|                     |          }|                     |          }g }| j        D ]4}|                     ||                              d	                     5|S )
z
        Args:
            x (torch.Tensor): Input waveforms. Tensor with dimensions `(batch, time)`.

        Returns:
            List(torch.Tensor): List of score Tenosrs. Each Tensor is with dimension `(batch,)`.
        r   z/The input must be a 2D Tensor. Found dimension .r	   T)r;   keepdimg      ?   r:   )	ndim
ValueErrorr*   meanr   r   r   rh   squeeze)r    r   r$   scoresbranchs        r   r%   zSquimObjective.forward   s     6Q;;XqvXXXYYYAqDa666#=BCll1oojjoom 	6 	6FMM&&++--!-445555r   )r&   r'   r(   r?   r   Modulerb   r   r*   r+   r   r%   r,   r-   s   @r   r   r      s         	!	! y	! -		! 	! 	! 	! 	! 	! $u|*<        r   r   r]   nheadmetricc                    t          j        | || dz  dd          }t                      }|dk    r[t          j        t          j        | |           t          j                    t          j        | d          t                                }n|dk    rbt          j        t          j        | |           t          j                    t          j        | d          t          t                              }nMt          j        t          j        | |           t          j                    t          j        | d                    }t          j        |||          S )	al  Create branch module after DPRNN model for predicting metric score.

    Args:
        d_model (int): The number of expected features in the input.
        nhead (int): Number of heads in the multi-head attention model.
        metric (str): The metric name to predict.

    Returns:
        (nn.Module): Returned module to predict corresponding metric score.
       r   T)rF   rH   stoir	   pesq)r   )r   TransformerEncoderLayerr   rj   rL   rl   r   r   )r]   r   r   layer1layer2layer3s         r   _create_branchr      s    '!S^bcccFZZFIgw''HJJIgq!!NN	
 
 
6		Igw''HJJIgq!!9---	
 
 %'M")GW2M2Mrxzz[][delno[p[p$q$q=000r   r2   r3   r[   r\   rC   r^   r_   c	           	         ||dz  }t          | |          }	t          | ||||||          }
t          j        t	          ||d          t	          ||d          t	          ||d          g          }t          |	|
|          S )a  Build a custome :class:`torchaudio.models.squim.SquimObjective` model.

    Args:
        feat_dim (int, optional): The feature dimension after Encoder module.
        win_len (int): Kernel size in the Encoder module.
        d_model (int): The number of expected features in the input.
        nhead (int): Number of heads in the multi-head attention model.
        hidden_dim (int): Hidden dimension in the RNN layer of DPRNN.
        num_blocks (int): Number of DPRNN layers.
        rnn_type (str): Type of RNN in DPRNN. Valid options are ["RNN", "LSTM", "GRU"].
        chunk_size (int): Chunk size of input for DPRNN.
        chunk_stride (int or None, optional): Stride of chunk input for DPRNN.
    Nr   r   r   sisdr)r/   rS   r   rb   r   r   )r2   r3   r]   r   r[   r\   rC   r^   r_   r   r   r   s               r   squim_objective_modelr     s    0 !Qh((G(J
HgzS_``E}7E6227E6227E733	
 H '5(333r   c            
      0    t          dddddddd          S )zSBuild :class:`torchaudio.models.squim.SquimObjective` model with default arguments.rX   rT   r   r   rW   G   )r2   r3   r]   r   r[   r\   rC   r^   )r    r   r   squim_objective_baser   ;  s1     	 	 	 	r   rO   )r
   typingr   r   r   r*   torch.nnr   torch.nn.functional
functionalr=   r)   r   r   __annotations__r   r   r/   rB   rS   r   r   r@   rQ   modulesr   r   r   r   r   r   <module>r      s    ( ( ( ( ( ( ( ( ( (                Ju J J J J J  C  	"	5   	 	 	 	 	29 	 	 	    bi   6    	   4] ] ] ] ]BI ] ] ]@
 
 
 
 
ry 
 
 
% % % % %RY % % %P1C 1 1S 1RZ=N 1 1 1 1R #'#4 #4#4#4 #4 	#4
 #4 #4 #4 #4 3-#4 #4 #4 #4 #4Ln      r   