
    0;jiD                        d Z ddlmZmZmZ ddlZddlmZ ddl	Z	ddl
mc mZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
l	mZmZmZ ddlmZ  G d dej                  ZddZddZ	  G d de	j        j                  Z G d de	j        j                  Z G d de	j        j                  Z  G d dej                  Z!dS )zHIFI-GAN    )DictOptionalListN)
get_window)Conv1d)ConvTranspose1d)remove_weight_norm)weight_norm)Uniform)nnsinpow)	Parameterc                   *     e Zd ZdZd fd	Zd Z xZS )Snakea  
    Implementation of a sine-based periodic activation function
    Shape:
        - Input: (B, C, T)
        - Output: (B, C, T), same shape as the input
    Parameters:
        - alpha - trainable parameter
    References:
        - This activation function is from this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
        https://arxiv.org/abs/2006.08195
    Examples:
        >>> a1 = snake(256)
        >>> x = torch.randn(256)
        >>> x = a1(x)
          ?TFc                 L   t          t          |                                            || _        || _        | j        r*t          t          j        |          |z            | _        n)t          t          j	        |          |z            | _        || j        _
        d| _        dS )a   
        Initialization.
        INPUT:
            - in_features: shape of the input
            - alpha: trainable parameter
            alpha is initialized to 1 by default, higher values = higher-frequency.
            alpha will be trained along with the rest of your model.
        g&.>N)superr   __init__in_featuresalpha_logscaler   torchzerosalphaonesrequires_gradno_div_by_zero)selfr   r   alpha_trainabler   	__class__s        Y/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/models/s3gen/hifigan.pyr   zSnake.__init__2   s     	eT##%%%& - 	D"5;{#;#;e#CDDDJJ"5:k#:#:U#BCCDJ#2
 )    c                     | j                             d                              d          }| j        rt          j        |          }|d|| j        z   z  t          t          ||z            d          z  z   }|S )u   
        Forward pass of the function.
        Applies the function to the input elementwise.
        Snake ∶= x + 1/a * sin^2 (xa)
        r   r      )r   	unsqueezer   r   expr   r   r   )r   xr   s      r!   forwardzSnake.forwardI   sv     
$$Q''11"55 	%Ie$$E 334CE	NNA8N8NNNr"   )r   TF__name__
__module____qualname____doc__r   r)   __classcell__r    s   @r!   r   r   "   sV         * * * * * *.      r"   r      c                 2    t          | |z  |z
  dz            S )Nr%   )int)kernel_sizedilations     r!   get_paddingr6   X   s    h&1Q6777r"           {Gz?c                     | j         j        }|                    d          dk    r"| j        j                            ||           d S d S )NConvr$   )r    r+   findweightdatanormal_)mmeanstd	classnames       r!   init_weightsrC   [   sK    $I~~f##	dC((((( $#r"   c                   r     e Zd ZdZddg dfdededee         f fdZd	ej        d
ej        fdZ	d Z
 xZS )ResBlockz)Residual block module in HiFiGAN/BigVGAN.      r1   rG      channelsr4   	dilationsc                 n   t          t          |                                            t          j                    | _        t          j                    | _        |D ]}| j                            t          t          |d|t          ||                                         | j                            t          t          |ddt          |d                                         | j                            t                     | j                            t                     t          j        fdt          t          | j                            D                       | _        t          j        fdt          t          | j                            D                       | _        d S )Nr1   )r5   paddingc                 2    g | ]}t          d           S F)r   r   .0_rJ   s     r!   
<listcomp>z%ResBlock.__init__.<locals>.<listcomp>   6     +
 +
 +
 (5111+
 +
 +
r"   c                 2    g | ]}t          d           S rO   rP   rQ   s     r!   rT   z%ResBlock.__init__.<locals>.<listcomp>   rU   r"   )r   rE   r   r   
ModuleListconvs1convs2appendr
   r   r6   applyrC   rangelenactivations1activations2)r   rJ   r4   rK   r5   r    s    `   r!   r   zResBlock.__init__l   s    	h&&(((moomoo! 	 	HK  #!) +K B B  	 	   K  #!" +K ; ;  	 	    	,''','''M +
 +
 +
 +
3t{++,,+
 +
 +
   M +
 +
 +
 +
3t{++,,+
 +
 +
  r"   r(   returnc                    t          t          | j                            D ]_} | j        |         |          } | j        |         |          } | j        |         |          } | j        |         |          }||z   }`|S N)r\   r]   rX   r^   r_   rY   )r   r(   idxxts       r!   r)   zResBlock.forward   s    T[))** 	 	C'"3'**B!S!"%%B'"3'++B!S!"%%BQAAr"   c                     t          t          | j                            D ]6}t          | j        |                    t          | j        |                    7d S rb   )r\   r]   rX   r	   rY   )r   rc   s     r!   r	   zResBlock.remove_weight_norm   sZ    T[))** 	1 	1Ct{3/000t{3/0000	1 	1r"   )r+   r,   r-   r.   r3   r   r   r   Tensorr)   r	   r/   r0   s   @r!   rE   rE   j   s        33 (yy	, ,, , 9	, , , , , ,\ %,    1 1 1 1 1 1 1r"   rE   c                   `     e Zd ZdZ	 	 	 d fd	Zd Z ej                    d             Z xZ	S )	SineGena   Definition of sine generator
    SineGen(samp_rate, harmonic_num = 0,
            sine_amp = 0.1, noise_std = 0.003,
            voiced_threshold = 0,
            flag_for_pulse=False)
    samp_rate: sampling rate in Hz
    harmonic_num: number of harmonic overtones (default 0)
    sine_amp: amplitude of sine-wavefrom (default 0.1)
    noise_std: std of Gaussian noise (default 0.003)
    voiced_thoreshold: F0 threshold for U/V classification (default 0)
    flag_for_pulse: this SinGen is used inside PulseGen (default False)
    Note: when flag_for_pulse is True, the first time step of a voiced
        segment is always sin(np.pi) or cos(0)
    r   皙?~jth?c                     t          t          |                                            || _        || _        || _        || _        || _        d S rb   )r   rh   r   sine_amp	noise_stdharmonic_numsampling_ratevoiced_threshold)r   	samp_ratern   rl   rm   rp   r    s         r!   r   zSineGen.__init__   sM     	gt%%''' "(& 0r"   c                 V    || j         k                        t          j                  }|S rb   )rp   typer   float32)r   f0uvs      r!   _f02uvzSineGen._f02uv   s$    4((..u}==	r"   c                 b   t          j        |                    d          | j        dz   |                    d          f                              |j                  }t          | j        dz             D ]!}||dz   z  | j        z  |dd||dz   ddf<   "dt          j	        z  t          j
        |d          dz  z  }t          t          j	         t          j	                  }|                    |                    d          | j        dz   df                              |j                  }d|dddddf<   | j        t          j        ||z             z  }|                     |          }|| j        z  d|z
  | j        z  d	z  z   }	|	t          j        |          z  }
||z  |
z   }|||
fS )
zW
        :param f0: [B, 1, sample_len], Hz
        :return: [B, 1, sample_len]
        r   r1   r$   Nr%   dim)lowhigh)sample_shaperG   )r   r   sizern   todevicer\   ro   nppicumsumr   samplerl   r   rw   rm   
randn_like)r   ru   F_mati	theta_matu_dist	phase_vec
sine_wavesrv   	noise_ampnoises              r!   r)   zSineGen.forward   s    RWWQZZ):Q)>LMMPPQSQZ[[t(1,-- 	F 	FA$&!a%L43E$EE!!!QAXqqq.!!Ie!<!<!<q!@A	beV"%000MM

D<MPQ<QST/UMVVYYZ_Zfgg		!!!Q' ]UYy9/D%E%EE
 [[__
 '1r6T]*BQ*FF	E,Z888  "_u,
2u$$r"   r   ri   rj   r   )
r+   r,   r-   r.   r   rw   r   no_gradr)   r/   r0   s   @r!   rh   rh      s          01)."#1 1 1 1 1 1  
 U]__% % _% % % % %r"   rh   c                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )SourceModuleHnNSFa   SourceModule for hn-nsf
    SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
                 add_noise_std=0.003, voiced_threshod=0)
    sampling_rate: sampling_rate in Hz
    harmonic_num: number of harmonic above F0 (default: 0)
    sine_amp: amplitude of sine source signal (default: 0.1)
    add_noise_std: std of additive Gaussian noise (default: 0.003)
        note that amplitude of noise in unvoiced is decided
        by sine_amp
    voiced_threshold: threhold to set U/V given F0 (default: 0)
    Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
    F0_sampled (batchsize, length, 1)
    Sine_source (batchsize, length, 1)
    noise_source (batchsize, length 1)
    uv (batchsize, length, 1)
    r   ri   rj   c                 8   t          t          |                                            || _        || _        t          |||||          | _        t          j        	                    |dz   d          | _
        t          j                                        | _        d S )Nr1   )r   r   r   rl   rm   rh   	l_sin_genr   r   Linearl_linearTanhl_tanh)r   ro   upsample_scalern   rl   add_noise_stdvoiced_threshodr    s          r!   r   zSourceModuleHnNSF.__init__   s    &&//111 & !!)=/K K q(8!<<hmmoor"   c                    t          j                    5  |                     |                    dd                    \  }}}|                    dd          }|                    dd          }ddd           n# 1 swxY w Y   |                     |                     |                    }t          j        |          | j        z  dz  }|||fS )z
        Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
        F0_sampled (batchsize, length, 1)
        Sine_source (batchsize, length, 1)
        noise_source (batchsize, length 1)
        r1   r%   NrG   )r   r   r   	transposer   r   r   rl   )r   r(   	sine_wavsrv   rS   
sine_merger   s          r!   r)   zSourceModuleHnNSF.forward  s     ]__ 	$ 	$#~~akk!Q.?.?@@Ir1!++Aq11Ia##B	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ [[y!9!9::
  $$t}4q85"$$s   AA::A>A>r   r*   r0   s   @r!   r   r      s`         " PS67& & & & & &% % % % % % %r"   r   c            #       p    e Zd ZdZdddddddddgd	d	gd	d
dg dg dg dg dgddgg dg dgdddfdedededededededee         dee         deeef         dee         deee                  dee         deee                  d ed!ed"e	j
        j        f" fd#Zd$ Zd% Zd& Z e	j        d'd'd(          fd)e	j        d*e	j        d+e	j        fd,Zd-ed.e	j        d+eeee	j                 f         fd/Z e	j                     e	j        d'd'd(          fd0e	j        d1e	j        d+e	j        fd2            Z xZS )3HiFTGeneratorza
    HiFTNet Generator: Neural Source Filter + ISTFTNet
    https://arxiv.org/abs/2309.09493
    P   rF      i"V  ri   rj   
         )n_ffthop_len)rG         rH   r   r   gGz?Nin_channelsbase_channelsnb_harmonicsro   	nsf_alpha	nsf_sigmansf_voiced_thresholdupsample_ratesupsample_kernel_sizesistft_paramsresblock_kernel_sizesresblock_dilation_sizessource_resblock_kernel_sizessource_resblock_dilation_sizeslrelu_slopeaudio_limitf0_predictorc                    t          t          |                                            d| _        || _        || _        |
| _        || _        || _        t          |          | _
        t          |          | _        t          |t          j        |          |
d         z  ||||          | _        t           j                            t          j        |          |
d         z            | _        t)          t+          ||ddd                    | _        t#          j                    | _        t3          t5          ||	                    D ]V\  }\  }}| j                            t)          t9          |d|z  z  |d|dz   z  z  ||||z
  dz                                 Wt#          j                    | _        t#          j                    | _        dg|d d d	         d d	         z   }t          j        |          }t3          t5          |d d d	         ||                    D ]\  }\  }}}|dk    r=| j                            t+          |
d
         dz   |d|dz   z  z  dd                     nD| j                            t+          |
d
         dz   |d|dz   z  z  |dz  ||dz                       | j                            tA          |d|dz   z  z  ||                     t#          j                    | _!        tE          t          | j                            D ]\}|d|dz   z  z  }t3          t5          ||                    D ]1\  }\  }}| j!                            tA          |||                     2]t)          t+          ||
d
         dz   ddd                    | _#        | j        $                    tJ                     | j#        $                    tJ                     t#          j&        d          | _'        t!          j(        tS          d|
d
         d          *                    t          j+                            | _,        || _-        d S )Nr1   r   )ro   r   rn   rl   r   r   )scale_factorr   rG   )rM   r%   r$   r   )r1   r   hannT)fftbins).r   r   r   out_channelsr   ro   r   r   r   r]   num_kernelsnum_upsamplesr   r   prodm_sourcer   r   Upsample	f0_upsampr
   r   conv_prerW   ups	enumerateziprZ   r   source_downssource_resblockscumprodrE   	resblocksr\   	conv_postr[   rC   ReflectionPad1dreflection_pad
from_numpyr   astypert   stft_windowr   )r   r   r   r   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ukdownsample_ratesdownsample_cum_ratesdchrS   r    s                             r!   r   zHiFTGenerator.__init__#  sy   ( 	mT""++---(*(&&455 00)'7>22\)5LL%#02 2 2 **8O8OR^_hRi8i*jj#;q!Q???
 

 =??"3~7L#M#MNN 	 	IAv1HOO#%!Q$/%!a!e*5!"Q1   
 
 
 
 MOO "3"!5crc!::!z*:;;%c*>ttt*DFb  eC  'D  'D  E  E 	 	LAy1aAvv!((<014maRSe6UWXZ[\\    !((<014maRSe6UWX[\W\^_jkopjprrr   !((1Q<8!Q??    s48}}%% 	: 	:A1q1u:.B&s+@BY'Z'Z[[ : :	6Aq%%hr1a&8&89999: %VBW0E0I1aYZ%[%[%[\\|$$$\*** 088 +Jv|G?T^b,c,c,c,j,jkmku,v,vww(r"   c                    t          d           | j        D ]}t          |           | j        D ]}|                                 t          | j                   t          | j                   | j                                         | j        D ]}t          |           | j        D ]}|                                 d S )NzRemoving weight norm...)	printr   r	   r   r   r   r   r   r   )r   ls     r!   r	   z HiFTGenerator.remove_weight_norm~  s    '((( 	" 	"Aq!!!! 	# 	#A  """"4=)))4>***((***" 	" 	"Aq!!!!& 	# 	#A  """"	# 	#r"   c           	          t          j        || j        d         | j        d         | j        d         | j                            |j                  d          }t          j        |          }|d         |d         fS )Nr   r   T)windowreturn_complex).r   ).r1   )r   stftr   r   r   r   view_as_real)r   r(   specs      r!   _stftzHiFTGenerator._stft  s    zg&(9)(DdFWX_F`imiyi|i|}~  ~F  jG  jG! ! ! !$''F|T&\))r"   c           	      b   t          j        |d          }|t          j        |          z  }|t          j        |          z  }t          j        t          j        ||          | j        d         | j        d         | j        d         | j                            |j	                            }|S )Ng      Y@)maxr   r   )r   )
r   clipcosr   istftcomplexr   r   r   r   )r   	magnitudephaserealimginverse_transforms         r!   _istftzHiFTGenerator._istft  s    Jyc222	59U+++%)E***!KdC(@(@$BST[B\^b^opy^z(,(9'(B4K[K^K^_h_oKpKpr r r  r"   r1   r   r(   sr`   c                 R   |                      |                    d                    \  }}t          j        ||gd          }|                     |          }t          | j                  D ]}t          j        || j	                  } | j
        |         |          }|| j        dz
  k    r|                     |          } | j        |         |          } | j        |         |          }||z   }d }t          | j                  D ]J}	|" | j        || j        z  |	z            |          }&| | j        || j        z  |	z            |          z  }K|| j        z  }t          j        |          }|                     |          }t          j        |d d d | j        d         dz  dz   d d f                   }
t          j        |d d | j        d         dz  dz   d d d f                   }|                     |
|          }t          j        || j         | j                  }|S )Nr1   ry   r   r%   )r   squeezer   catr   r\   r   F
leaky_relur   r   r   r   r   r   r   r   r'   r   r   r   clampr   )r   r(   r   s_stft_reals_stft_imags_stftr   sixsjr   r   s               r!   decodezHiFTGenerator.decode  s9   #'::aiill#;#; [K51===MM!t)** 	& 	&AQ 011AAAD&***''** &"1%f--B)&q)"--BBAB4+,, F F:AD,<(<q(@A!DDBBB$.T-=)=)AB1EEEBBT%%AALOONN1Ia#GD$5g$>!$Ca$G#G JKLL		!AAAt09Q>BCCQQQFGHHKK	5))KD,,d.>??r"   batchr   c                    |d                              dd                              |          }|                     |          }|                     |d d d f                                        dd          }|                     |          \  }}}|                     dd          }|                     ||          }||fS )Nspeech_featr1   r%   r(   r   )r   r   r   r   r   r   )r   r   r   r   ru   r   rS   generated_speechs           r!   r)   zHiFTGenerator.forward  s    
 M*44Q::==fEE{++NN2aaag;''11!Q77--""1aKK1;;;::##r"   r   cache_sourcec                 z   |                      |          }|                     |d d d f                                       dd          }|                     |          \  }}}|                    dd          }|j        d         dk    r||d d d d d |j        d         f<   |                     ||          }||fS )Nr1   r%   r   r   )r   r   r   r   shaper   )r   r   r  ru   r   rS   r  s          r!   	inferencezHiFTGenerator.inference  s     {++NN2aaag;''11!Q77--""1aKK1a A%%.:Aaaa*\'***+;;;::""r"   )r+   r,   r-   r.   r3   floatr   r   strr   r   Moduler   r	   r   r   r   rf   r   dictr   r   r)   inference_moder  r/   r0   s   @r!   r   r     s          "!$ !!&"$*,)*A02Bx57A+F+F/9zz8A		999iii7X78"g?Hyy)))>T!$!%,0%Y) Y)Y) Y) 	Y)
 Y) Y) Y) #(Y) !IY) $(9Y) sCx.Y) $(9Y) &*$s)_Y) +/s)Y) -1cOY)  !Y)" #Y)$  (/%Y) Y) Y) Y) Y) Y)v# # #* * *! ! ! 9DAq!8L8L        QVQ]        D$$ L$ 
c8EL))	*	$ $ $ $  UP[PUP[\]_`bcPdPd # #U\ # #iniu # # # # # # # #r"   r   )r1   )r7   r8   )"r.   typingr   r   r   numpyr   scipy.signalr   r   torch.nn.functionalr   
functionalr   torch.nnr   r   torch.nn.utilsr	   torch.nn.utils.parametrizationsr
   torch.distributions.uniformr   r   r   r   r  r   r6   rC   rE   rh   r   r    r"   r!   <module>r     s  $  ' ' ' ' ' ' ' ' ' '     # # # # # #                 $ $ $ $ $ $ - - - - - - 7 7 7 7 7 7 / / / / / /                2 2 2 2 2BI 2 2 2l8 8 8 8) ) ) )<1 <1 <1 <1 <1ux <1 <1 <1~>% >% >% >% >%eho >% >% >%B1% 1% 1% 1% 1% 1% 1% 1%h|# |# |# |# |#BI |# |# |# |# |#r"   