
    0;ji                         d dl Z d dlZd dlmZmZ  e j        e          Zd dlZd dl	m
Z
 d dl	mZ ddlmZ ddlmZ d dlmZ  e j        e          Zd Z G d	 d
ej
        j                  ZdS )    N)DictOptional)
functional   )make_pad_mask)
CFM_PARAMS)
DictConfigc                     | r| j         |k     r| d         } | j         |k     |dk    r.|                     d          dk    r | j        |gdg|dz
  z  R  } | j         |k    sJ d|d| j                     | S )z)repeat batch dimension if it's equal to 1Nr   r   zExpected ndim=z, got tnsr.ndim=)ndimsizerepeat)tnsrBr   s      V/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/models/s3gen/flow.py_repeat_batch_dimr      s    i$:D i$ q55TYYq\\Q&&4;q6QC4!8$4666DyD   "Gd"G"G49"G"G   K    c                   f    e Zd Zddddddddd	d
d
dddd eddddddd          ddgddddddddddddddd d!fd"ed#ed$ed%ed&ed'ed(ed)ed*ed+ej        j	        d,ej        j	        d-e
d.e
f fd/Zd0ed1ej        d2e
eeej                 f         fd3Z ej                    	 	 	 d7d6            Z xZS )8CausalMaskedDiffWithXveci   P      meli     T      N   r   gư>eulercosineg?gffffff?l1)	sigma_minsolvert_schedulertraining_cfg_rateinference_cfg_ratereg_loss_type   g        @            gelu)channelsdropoutattention_head_dimn_blocksnum_mid_blocks	num_headsact_fn)in_channelsout_channelspk_emb_dimn_spks
cfm_paramsdecoder_paramsi   i"V  r   i@  )n_fftnum_melssampling_ratehop_sizewin_sizefminfmax
input_sizeoutput_sizespk_embed_dimoutput_type
vocab_sizeinput_frame_rateonly_mask_losstoken_mel_ratiopre_lookahead_lenencoderdecoderdecoder_confmel_feat_confc                    t                                                       || _        || _        || _        || _        || _        || _        || _        t          j
        d| j                    t          j        ||          | _        t          j                            ||          | _        |
| _        t          j                            | j                                        |          | _        || _        || _        || _        |	| _        d S )Nzinput frame rate=)super__init__r?   r@   rJ   rK   rC   rB   rD   logginginfonn	Embeddinginput_embeddingtorchLinearspk_embed_affine_layerrH   encoder_projrI   rE   rF   rG   )selfr?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   	__class__s                 r   rN   z!CausalMaskedDiffWithXvec.__init__,   s    . 	$&(*$& 0@)>@@AAA!|J
CC&+hoom[&Q&Q#!HOODL,D,D,F,FTT,.!2r   batchdevicereturnc                 8   |d                              |          }|d                              |          }|d                              |          }|d                              |          }|d                              |          }t          j        |d          }|                     |          }t	          |                                                               d                               |          }|                     t          j	        |d	
                    |z  }| 
                    ||          \  }	}
|                     |	          }	t          j        |j        |j                  }t          |          D ][\  }}t!          j                    dk     rt!          j        d	t%          d|z                      }||d d d |f         ||d d d |f<   \t	          |
                    d                              d                                          |	          }| j                            |                                |                    d          |	                    dd                                          ||          \  }}d|iS )Nspeech_tokenspeech_token_lenspeech_featspeech_feat_len	embeddingr   dimr   )minr[   g      ?g333333?r   )condloss)toF	normalizerV   r   float	unsqueezerS   rT   clamprH   rW   zerosshaper[   	enumeraterandomrandintintsumsqueezerI   compute_loss
contiguous	transpose)rX   rZ   r[   token	token_lenfeatfeat_lenrb   maskh	h_lengthscondsijindexri   _s                    r   rx   z%CausalMaskedDiffWithXvec.compute_lossV   sn   
 n%((00,-0088	]#&&v..*+..v66+&))&11	 K	q111	//	::	 y)))0022<<R@@CCFKK$$U[A%>%>%>??$F ||E9559a   DJu|<<<h'' 	5 	5DAq}$$N1c#'ll33E"&q!!!VeV|"4E!QQQ,y}}}44<<<CCDDDHHKK,++OONN1KK1((** , 
 
a ~r   
   Fc           	         |                     d          }t          j        |          }t          j        |d          }|                     |          }t          ||d          }t          ||d          }t          ||d          }t          ||d          }t          ||d          }t          j        ||gd          ||z   }}t          |           	                    d          
                    |          }|| j        k                                    r7t                              |                                 d| j         d	           |                     |                                          |z  }|                     ||          \  }}|d
u r|d d d | j         | j        z  f         }|                    d                              d          }|j        d         |j        d         |j        d         z
  }}|                     |          }t          j        |||z   | j        g|j                  
                    |j                  }||d d d |f<   |                    dd          }t          |           	                    d          
                    |          }|j        d         |k    r|                    |dd          }|                     |                    dd                                           ||||	|
|          \  }}|d d d d |d f         }|j        d         |k    sJ |d fS )Nr   r   rc   r   )r   r   re   >z8
 out-of-range special tokens found in flow, fix inputs!Frg   )mur   spksrh   n_timestepsnoised_melsmeanflow)!r   rT   
atleast_2drk   rl   rV   r   concatr   rn   rj   rC   anyloggererrormaxrS   longrH   rG   rF   rv   rw   rq   rW   rp   r@   r[   dtyperz   r   rI   ry   )rX   r{   r|   prompt_tokenprompt_token_lenprompt_featprompt_feat_lenrb   finalizer   r   r   r   r   r   h_masksr   mel_len1mel_len2r   r}   r   s                         r   	inferencez"CausalMaskedDiffWithXvec.inference   sT    JJqMM $Y//	K	q111	//	::	 )qqAAA,-=qqIII'QQ???+OQQGGG%i;;;	 !<u(=1EEEGWZcGcyy)))44R88;;IFFT_$))++ 	vLLEIIKKtt$/tttuuu$$UZZ\\22T9 \\%33
7u!!!Dt--0DDDDEAKKBK''//B/77	(.q1171:@QRS@T3T(a   Q8 3T5EFu|\\\__`a`ghh)aaa(l1%%y)))44Q77::1==:a=A;;q!Q''D,,{{1a  ++--##  
 
a AAAqqq())O$z!}((((Tzr   )r   NF)__name__
__module____qualname__r	   ru   strboolrT   rQ   Moduler   rN   dictr[   r   Tensorrx   inference_moder   __classcell__)rY   s   @r   r   r   +   s       #&$&&)$)#')+(,()*+,0,069"]_kl5?Z9>'bjAD\_=A,C ,C6D 6D HKCj]`PRFG[]lmDJ:L :L'M 'M 262X]47TST^b(d (d+'3 '3 '3!'3 !$'3 "	'3
 !'3 $''3 "&'3 #&'3 %('3  (/'3  (/'3  $'3* !%+'3 '3 '3 '3 '3 '3T++ L+ 
c8EL))	*	+ + + +Z U !" B B B B B B B Br   r   )rO   rs   typingr   r   	getLoggerr   r   rT   torch.nnrQ   r   rk   
utils.maskr   configsr   	omegaconfr	   r   r   r    r   r   <module>r      s     ! ! ! ! ! ! ! !		8	$	$        $ $ $ $ $ $ % % % % % %                   
	8	$	$
 
 
[ [ [ [ [ux [ [ [ [ [r   