
    0;jis3                     t   d dl Z d dlmZ d dlmc mZ d dlmZmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ de j        d	e j        d
e j        fdZ G d de j        j                  Z G d de          Z G d de          Z G d de j        j                  Z G d dej                  Z dS )    N)pack	rearrangerepeat   )add_optional_chunk_mask)SinusoidalPosEmbBlock1DResnetBlock1DDownsample1DTimestepEmbedding
Upsample1DBasicTransformerBlock)get_intmeanflow_time_mixermaskdtypereturnc                     | j         t          j        k    sJ |t          j        t          j        t          j        fv sJ |                     |          } d| z
  dz  } | S )Ng      ?g    _)r   torchboolfloat32bfloat16float16to)r   r   s     Y/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/models/s3gen/decoder.pymask_to_biasr      sY    :####U]ENEMBBBBB775>>D $J("DK    c                   >     e Zd Zdedef fdZdej        fdZ xZS )	Transposedim0dim1c                 d    t                                                       || _        || _        d S N)super__init__r    r!   )selfr    r!   	__class__s      r   r%   zTranspose.__init__'   s+    				r   xc                 F    t          j        || j        | j                  }|S r#   )r   	transposer    r!   )r&   r(   s     r   forwardzTranspose.forward,   s    OAty$)44r   	__name__
__module____qualname__intr%   r   Tensorr+   __classcell__r'   s   @r   r   r   &   sh        S       
        r   r   c                   L     e Zd Zdedef fdZdej        dej        fdZ xZS )CausalBlock1Ddimdim_outc           	      F   t          t          |                               ||           t          j                            t          ||d          t          dd          t	          j        |          t          dd          t	          j	                              | _
        d S )N   r      )r$   r5   r%   r   nn
SequentialCausalConv1dr   	LayerNormMishblock)r&   r6   r7   r'   s      r   r%   zCausalBlock1D.__init__2   s{    mT""++C999X((gq))aOOL!!aOOGII
 



r   r(   r   c                 <    |                      ||z            }||z  S r#   )r@   )r&   r(   r   outputs       r   r+   zCausalBlock1D.forward<   s!    AH%%}r   r,   r3   s   @r   r5   r5   1   sq        
C 
# 
 
 
 
 
 
 U\        r   r5   c            	       2     e Zd Zddedededef fdZ xZS )CausalResnetBlock1D   r6   r7   time_emb_dimgroupsc                     t          t          |                               ||||           t          ||          | _        t          ||          | _        d S r#   )r$   rD   r%   r5   block1block2)r&   r6   r7   rF   rG   r'   s        r   r%   zCausalResnetBlock1D.__init__B   sN    !4((11#wfUUU#C11#GW55r   )rE   )r-   r.   r/   r0   r%   r2   r3   s   @r   rD   rD   A   s^        6 6C 6# 6S 6# 6 6 6 6 6 6 6 6 6 6r   rD   c                   n     e Zd Z	 	 	 	 	 	 	 ddedededed	ed
edededdf fdZdej        f fdZ	 xZ
S )r=   r   TzerosNin_channelsout_channelskernel_sizestridedilationrG   biaspadding_moder   c                     t          t          |                               ||||d|||||	|
           |dk    sJ |dz
  df| _        d S )Nr   )paddingrQ   rG   rR   rS   devicer   r   )r$   r=   r%   causal_padding)r&   rM   rN   rO   rP   rQ   rG   rR   rS   rV   r   r'   s              r   r%   zCausalConv1d.__init__I   sp     	lD!!**;+634x28t8D28 	+ 	G 	G 	G {{{{*Q2r   r(   c                     t          j        || j                  }t          t          |                               |          }|S r#   )FpadrW   r$   r=   r+   )r&   r(   r'   s     r   r+   zCausalConv1d.forward_   s9    E!T()),%%--a00r   )r   r   r   TrL   NN)r-   r.   r/   r0   r   strr%   r   r1   r+   r2   r3   s   @r   r=   r=   H   s         #3 33 3 	3
 3 3 3 3 3 
3 3 3 3 3 3,          r   r=   c                   \     e Zd Zddddgddddd	d
df fd	Zed             Zd ZddZ xZS )ConditionalDecoderi@  P   T   g        @         rE   geluFc                   	
 t                                                       t          |          }|| _        || _        || _        || _        t          |          | _        |d         dz  }t          ||d          | _
        t          j        g           | _        t          j        g           | _        t          j        g           | _        d| _        |t#          t%          |                    D ]}}||         |t%          |          dz
  k    }| j        rt'          ||          nt)          ||          }t          j        
	fdt#          |          D                       }|st+                    n/| j        rt-          d          nt          j        dd	          }| j                            t          j        |||g                     t#          |          D ]}|d
         }|d
         }| j        rt'          ||          nt)          ||          }t          j        
	fdt#          |          D                       }| j                            t          j        ||g                     |ddd
         |d         fz   }t#          t%          |          dz
            D ]}||         dz  }||dz            |t%          |          dz
  k    }| j        rt'          ||          nt)          ||          }t          j        
	fdt#          |          D                       }|st3          d          n/| j        rt-          d          nt          j        dd	          }| j                            t          j        |||g                     | j        rt5          |d
         |d
                   nt7          |d
         |d
                   | _        t          j        |d
         | j        d          | _        |                                  d| _        | j        rtA          |          | _        dS dS )z
        This decoder requires an input with the same shape of the target. So, if your text content
        is shorter or longer than the outputs, please re-sampling it before feeding to the decoder.
        r   ra   silu)rM   time_embed_dimact_fnr   )r6   r7   rF   c           	      8    g | ]}t                     S )r6   num_attention_headsattention_head_dimdropoutactivation_fnr   .0_rg   rk   rl   	num_headsoutput_channels     r   
<listcomp>z/ConditionalDecoder.__init__.<locals>.<listcomp>   I     	 	 	  **,5+= '&,  	 	 	r   r9   )rU   c           	      8    g | ]}t                     S ri   r   rn   s     r   rs   z/ConditionalDecoder.__init__.<locals>.<listcomp>   rt   r   Nr:   c           	      8    g | ]}t                     S ri   r   rn   s     r   rs   z/ConditionalDecoder.__init__.<locals>.<listcomp>   rt   r   T)use_conv_transpose)!r$   r%   tuplemeanflowrM   rN   causalr   time_embeddingsr   time_mlpr;   
ModuleListdown_blocks
mid_blocks	up_blocksstatic_chunk_sizerangelenrD   r
   r   r=   Conv1dappendr   r5   r	   final_block
final_projinitialize_weightstime_embed_mixerr   )r&   rM   rN   r{   channelsrl   rk   n_blocksnum_mid_blocksrq   rg   rz   rf   iinput_channelis_lastresnettransformer_blocks
downsamplerp   upsamplerr   r'   s        ``  ``          @r   r%   zConditionalDecoder.__init__d   sc   $ 	?? &(/<<!!q)#)
 
 
 =,,-++r** "#$s8}}%% 	] 	]A*M%a[N3x==1,,Gtxt f(]Naopppp-Vdeee !#	 	 	 	 	 	 	 	 #8__	 	 	" " 5< L^,,,CG;  L^^Q???TVT]^ln|~  JK  UL  UL  UL  ##BM6;Mz2Z$[$[\\\\~&& 	P 	PA$RLM#B<Ltxt f(]Naopppp-Vdeee  "$	 	 	 	 	 	 	 	 #8__	 	 	" " O""2=&:L1M#N#NOOOODDbD>Xa[N2s8}}q()) 	Y 	YA$QK!OM%a!e_N3x==1,,G
 	(!&+    #0!&+# # #	  "$	 	 	 	 	 	 	 	 #8__	 	 	" " Q
>dCCCCHL  Q\..!DDDY[Ybcq  tB  DE  OP  ZQ  ZQ  ZQ 
 N!!"-9KX0V"W"WXXXXHL|="x|DDDY`aijlamowxzo{Y|Y|)HRL$2CQGG!!! $= 	O$>~$N$ND!!!	O 	Or   c                 $    | j         j        j        S r#   )r   weightr   )r&   s    r   r   zConditionalDecoder.dtype   s    %++r   c                    |                                  D ]A}t          |t          j                  rSt          j                            |j        d           |j        %t          j                            |j        d           pt          |t          j	                  rKt          j                            |j        d           t          j                            |j        d           t          |t          j
                  rRt          j                            |j        d           |j        %t          j                            |j        d           Cd S )Nrelu)nonlinearityr   r   )modules
isinstancer;   r   initkaiming_normal_r   rR   	constant_	GroupNormLinear)r&   ms     r   r   z%ConditionalDecoder.initialize_weights   s    	1 	1A!RY'' 
1''v'FFF6%G%%afa000Ar|,, 1!!!(A...!!!&!,,,,Ary)) 1''v'FFF6%G%%afa000	1 	1r   Nc           
         |                      |                              |j                  }|                     |          }| j        ro|                      |                              |j                  }|                     |          }t          j        ||gd          }|                     |          }t          ||gd          d         }|5t          |d|j
        d                   }t          ||gd          d         }|t          ||gd          d         }g }	|g}
| j        D ] \  }}}|
d         } ||||          }t          |d	                                          }t          ||                                d
d
d| j        d          }t#          |dk    |j                  }|D ]} ||||          }t          |d                                          }|	                    |            |||z            }|
                    |dddddddf                    |
dd         }
|
d         }| j        D ]\  }} ||||          }t          |d	                                          }t          ||                                d
d
d| j        d          }t#          |dk    |j                  }|D ]} ||||          }t          |d                                          }| j        D ]\  }}}|
                                }|	                                }t          |ddddd|j
        d         f         |gd          d         } ||||          }t          |d	                                          }t          ||                                d
d
d| j        d          }t#          |dk    |j                  }|D ]} ||||          }t          |d                                          } |||z            }|                     ||          }|                     ||z            }||z  S )a  Forward pass of the UNet1DConditional model.

        Args:
            x: (B, 80, T)
            mask (_type_)
            t (_type_): shape (batch_size)
            spks (_type_, optional) Defaults to None.
            cond (_type_, optional)
            r: end time for meanflow mode (shape (1,) tensor)

        Raises:
            ValueError: _description_
            ValueError: _description_

        Returns:
            _type_: _description_
        r   )r6   zb * tr   Nzb c -> b c tru   )tzb c t -> b t cF)hidden_statesattention_masktimestepzb t c -> b c tr:   )r|   r   r   r}   rz   r   catr   r   r   shaper   r   
contiguousr   r   r   r   r   r   r   popr   r   )r&   r(   r   mur   spkscondrconcat_embedhiddensmasksr   r   r   	mask_down	attn_masktransformer_blockmask_midr   mask_upskiprB   s                         r   r+   zConditionalDecoder.forward   s   $   ##&&qw//MM!= 	4$$Q''**1733Aa  A 9aV333L%%l33A!R'""1%$!'"+>>>DaY((+AaY((+A6:6F 	/ 	/2F&
b	Iq)Q''A!-..99;;A/9>>3C3CUESTVZVlnpqqI$Y!^QW==I%7  !%%"##,  
 !-..99;;ANN1
1y=))ALL111aaa19-....crc
9*./ 	< 	<&F&q(A&&A!-..99;;A/8==??E5RSUYUkmoppI$Y!^QW==I%7  !%%"##,  
 !-..99;;AA48N 	& 	&0F&iikkG;;==Da111otz"~o-.5w??BAq'1%%A!-..99;;A/7<<>>5%QRTXTjlnooI$Y!^QW==I%7  !%%"##,  
 !-..99;;AW%%AAQ((W--}r   )NNN)	r-   r.   r/   r%   propertyr   r   r+   r2   r3   s   @r   r]   r]   c   s         zO zO zO zO zO zOz , , X,1 1 1Z Z Z Z Z Z Z Zr   r]   )!r   torch.nnr;   torch.nn.functional
functionalrY   einopsr   r   r   
utils.maskr   matcha.decoderr   r	   r
   r   r   r   matcha.transformerr   utils.intmeanflowr   r1   r   r   Moduler   r5   rD   r   r=   r]    r   r   <module>r      s                    * * * * * * * * * * / / / / / /" " " " " " " " " " " " " " " " 5 5 5 5 5 5 9 9 9 9 9 9u| EK EL               G    6 6 6 6 6- 6 6 6    58?   6j j j j j j j j j jr   