
    0;jib%                        d dl mZ d dlmZ d dlZd dlZd dlZd dlZd dlm	c m
Z d dlmZ d dlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlm Z  dZ!i dddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7Z"d8e#d9e#fd:Z$e G d; d<                      Z% G d= d>          Z&dS )?    )	dataclass)PathN)	load_file)snapshot_download   )T3)T3Config)S3_SRdrop_invalid_tokens)S3GEN_SRS3Gen)MTLTokenizer)VoiceEncoder)T3CondzResembleAI/chatterboxarArabicdaDanishdeGermanelGreekenEnglishesSpanishfiFinnishfrFrenchheHebrewhiHindiitItalianjaJapanesekoKoreanmsMalaynlDutchno	NorwegianplPolish
PortugueseRussianSwedishSwahiliTurkishChinese)ptrusvswtrzhtextreturnc                     t                     dk    rdS  d                                         r% d                                          dd         z    d                                                                g d}|D ]\  }}                     ||                                d           h d}t           fd|D                       s d	z    S )
zt
        Quick cleanup func for punctuation from LLMs or
        containing chars not seen often in the dataset
    r   z)You need to add some text for me to talk.r   N ))z..., )u   …rC   ):,)z - rC   );rC   )u   —-)u   –rG   )z ,rE   )u   “")u   ”rH   )u   ‘')u   ’rI   >
      、   。   ！   ，   ？rG   !rE   .?c              3   B   K   | ]}                     |          V  d S N)endswith).0pr?   s     L/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/mtl_tts.py	<genexpr>zpunc_norm.<locals>.<genexpr>W   s/      99At}}Q999999    rP   )lenislowerupperjoinsplitreplacerstripany)r?   punc_to_replaceold_char_sequencenew_charsentence_enderss   `    rW   	punc_normrf   3   s    
 4yyA~~:: Aw *Aw}}abb) 88DJJLL!!D  O (7 9 9#8||-x88 ;;sDMMMO999999999 KrY   c                   R    e Zd ZU dZeed<   eed<   d ZdefdZ	e
d
d            Zd	S )ConditionalsaR  
    Conditionals for T3 and S3Gen
    - T3 conditionals:
        - speaker_emb
        - clap_emb
        - cond_prompt_speech_tokens
        - cond_prompt_speech_emb
        - emotion_adv
    - S3Gen conditionals:
        - prompt_token
        - prompt_token_len
        - prompt_feat
        - prompt_feat_len
        - embedding
    t3genc                     | j                             |          | _         | j                                        D ]7\  }}t	          j        |          r|                    |          | j        |<   8| S )Ndevice)ri   torj   itemstorch	is_tensor)selfrm   kvs       rW   rn   zConditionals.toq   si    '**F*++HNN$$ 	2 	2DAqq!! 2dd&d11rY   fpathc                 p    t          | j        j        | j                  }t	          j        ||           d S )N)ri   rj   )dictri   __dict__rj   rp   save)rr   ru   arg_dicts      rW   ry   zConditionals.savex   s=    w
 
 
 	
8U#####rY   cpuc                 t    t          j        ||d          } | t          di |d         |d                   S )NT)map_locationweights_onlyri   rj    )rp   loadr   )clsru   r}   kwargss       rW   r   zConditionals.load   s@    E4PPPs6))F4L))6%=999rY   N)r{   )__name__
__module____qualname____doc__r   __annotations__rw   rn   r   ry   classmethodr   r   rY   rW   rh   rh   ]   s~           	JJJ	III  $$ $ $ $ $ : : : [: : :rY   rh   c                       e Zd Zdez  Zdez  Z	 ddedede	de
ded	efd
Zed             Zedd            Zedej        dd fd            ZddZ	 	 	 	 	 	 	 ddZdS )ChatterboxMultilingualTTS   
   Nri   s3genve	tokenizerrm   condsc                     t           | _        || _        || _        || _        || _        || _        || _        t          j	                    | _
        d S rS   )r   srri   r   r   r   rm   r   perthPerthImplicitWatermarkerwatermarker)rr   ri   r   r   r   rm   r   s          rW   __init__z"ChatterboxMultilingualTTS.__init__   sL     
"
 9;;rY   c                 4    t                                           S )z8Return dictionary of supported language codes and names.)SUPPORTED_LANGUAGEScopy)r   s    rW   get_supported_languagesz1ChatterboxMultilingualTTS.get_supported_languages   s     #'')))rY   r@   c                    t          |          }t                      }|                    t          j        |dz  d                     |                    |                                           t          t          j	                              }t          |dz            }d|                                v r|d         d         }|                    |           |                    |                                           t                      }|                    t          j        |dz  d                     |                    |                                           t          t          |dz                      }d }|d	z  x}	                                r-t                               |	                              |          } | ||||||
          S )Nve.ptT)r~   t3_mtl23ls_v2.safetensorsmodelr   s3gen.pt$grapheme_mtl_merged_expanded_v1.jsonconds.pt)r   )r   r   load_state_dictrp   r   rn   evalr   r	   multilingualload_safetensorskeysr   r   strexistsrh   )
r   ckpt_dirrm   r   ri   t3_stater   r   r   builtin_voices
             rW   
from_localz$ChatterboxMultilingualTTS.from_local   s   >>^^
Jx')===	
 	
 	
 	f%''((#H/J$JKKhmmoo%%(+H
8$$$
fJx*,4@@@	
 	
 	
 	 AABB
 
	 %
22M::<< 	@ %%m4477??Es2ub)V5AAAArY   c                     t          t          t          ddg dt          j        d                              }|                     ||          S )Nr   main)r   r   r   r   r   zCangjie5_TC.jsonHF_TOKEN)repo_id	repo_typerevisionallow_patternstoken)r   r   REPO_IDosgetenvr   )r   rm   r   s      rW   from_pretrainedz)ChatterboxMultilingualTTS.from_pretrained   sa    !  Z   Z   Zi
++  
 
 ~~h///rY         ?c           
      N   t          j        |t                    \  }}t          j        |t          t                    }|d | j                 }| j                            |t          | j                  }d }| j	        j
        j        x}r`| j        j        }	|	                    |d | j                 g|          \  }}
t          j        |                              | j                  }t          j        | j                            |gt                              }|                    dd                              | j                  }t-          |||t          j        d	d	d	          z  
                              | j                  }t1          ||          | _        d S )N)r   )orig_sr	target_srrl   )max_lensample_rater   T)axiskeepdimr   speaker_embcond_prompt_speech_tokensemotion_adv)librosar   r   resampler
   DEC_COND_LENr   	embed_refrm   ri   hpspeech_cond_prompt_lenr   forwardENC_COND_LENrp   
atleast_2drn   
from_numpyr   embeds_from_wavsmeanr   onesrh   r   )rr   	wav_fpathexaggerations3gen_ref_wav_srref_16k_wavs3gen_ref_dictt3_cond_prompt_tokensplens3_tokzr_ve_embedt3_conds                rW   prepare_conditionalsz.ChatterboxMultilingualTTS.prepare_conditionals   s   $\)AAAs&}hRWXXX%&8t'8&89--mXdk-ZZ !%7:444 	\z+H'/'7'7EWdFWEW9X8Ycg'7'h'h$!1$)$45J$K$K$N$Nt{$[$[! #DG$<$<k]X]$<$^$^__==a=6699$+FF &;$uz!Q':'::
 
 
 "DK"
 
 	 	
 "'>::


rY   皙?       @皙?      ?c
                 p   |r\|                                 t          vrAd                    t                                                    }
t	          d| d|
           |r|                     ||           n| j        
J d            t          |          t          | j        j        j	        d         
                                          k    rb| j        j        }t          |j        |j        |t          j        ddd          z                                | j        	          | j        _        t%          |          }| j                            ||r|                                 nd 
                              | j                  }t          j        ||gd          }| j        j        j        }| j        j        j        }t3          j        |d|          }t3          j        |d|          }t          j                    5  | j                            | j        j        |d|||||	          }|d         }t;          |          }|                    | j                  }| j                            || j        j                  \  }}|                     d          !                                "                                #                                }| j$        %                    || j&                  }d d d            n# 1 swxY w Y   t          j'        |          (                    d          S )NrC   zUnsupported language_id 'z'. Supported languages: )r   zBPlease `prepare_conditionals` first or specify `audio_prompt_path`)r   r   r   r   r   rl   )language_idr   )dim)r   r   )value)r   r   i  )r   text_tokensmax_new_tokenstemperature
cfg_weightrepetition_penaltymin_ptop_p)speech_tokensref_dictr   ))lowerr   r]   r   
ValueErrorr   r   floatri   r   itemr   r   r   rp   r   rn   rm   rf   r   text_to_tokenscatr   start_text_tokenstop_text_tokenFpadinference_mode	inferencer   r   rj   squeezedetachr{   numpyr   apply_watermarkr   r   	unsqueeze)rr   r?   r   audio_prompt_pathr   r   r   r   r   r   supported_langs_condr   soteotr   wavr   watermarked_wavs                      rW   generatez"ChatterboxMultilingualTTS.generate   sr     	;,,..6III"ii(;(@(@(B(BCCO:K : :(7: :  
  	p%%&7l%SSSS:))+o))) %
(A'(J(O(O(Q(Q"R"RRR JME"!-*/*I(5:aA+>+>>   bb$$	 JM n33D]hFrkFWFWFYFYFYnr3ssvvw{  xC  D  Dik :BBBgj)gj(eKs;;;eKs;;;!## 	Y 	Y G--
'#'%#5 . 	 	M *!,M 0>>M),,T[99MZ))+ *  FC ++a..''))--//5577C".>>sPTPW>XXO/	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y0 00::1===s   C3LL
L
rS   )r@   r   )r   )Nr   r   r   r   r   r   )r   r   r   r
   r   r   r   r   r   r   r   r   rh   r   r   r   r   rp   rm   r   r   r	  r   rY   rW   r   r      s4       u9L=L #< << < 	<
  < < < < < <$ * * [* B B B [B@ 
0U\ 
06Q 
0 
0 
0 [
0; ; ; ;> D> D> D> D> D> D>rY   r   )'dataclassesr   pathlibr   r   r   rp   r   torch.nn.functionalnn
functionalr   safetensors.torchr   r   huggingface_hubr   	models.t3r   models.t3.modules.t3_configr	   models.s3tokenizerr
   r   models.s3genr   r   models.tokenizersr   models.voice_encoderr   models.t3.modules.cond_encr   r   r   r   rf   rh   r   r   rY   rW   <module>r     s   ! ! ! ! ! !       				             ; ; ; ; ; ; - - - - - -       1 1 1 1 1 1 : : : : : : : : ) ) ) ) ) ) ) ) + + + + + + . . . . . . . . . . . . "  	
 	 	 	    	 
     !" #$ 	/   6'C 'C ' ' ' 'T $: $: $: $: $: $: $: $:Nh> h> h> h> h> h> h> h> h> h>rY   