
    0;ji3                         d dl mZmZmZ d dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ  G d dej                  Z G d	 d
ej                  Ze G d dej                              ZdS )    )AnyDictOptionalN)GEGLUGELUAdaLayerNormAdaLayerNormZeroApproximateGELU)	Attention)LoRACompatibleLinear)maybe_allow_in_graphc                   *     e Zd ZdZd fd	Zd Z xZS )	SnakeBetaam  
    A modified Snake function which uses separate parameters for the magnitude of the periodic components
    Shape:
        - Input: (B, C, T)
        - Output: (B, C, T), same shape as the input
    Parameters:
        - alpha - trainable parameter that controls frequency
        - beta - trainable parameter that controls magnitude
    References:
        - This activation function is a modified version based on this paper by Liu Ziyin, Tilman Hartwig, Masahito Ueda:
        https://arxiv.org/abs/2006.08195
    Examples:
        >>> a1 = snakebeta(256)
        >>> x = torch.randn(256)
        >>> x = a1(x)
          ?Tc                    t                                                       t          |t                    r|n|g| _        t          ||          | _        || _        | j        rgt          j	        t          j        | j                  |z            | _        t          j	        t          j        | j                  |z            | _        nft          j	        t          j        | j                  |z            | _        t          j	        t          j        | j                  |z            | _        || j        _        || j        _        d| _        dS )a  
        Initialization.
        INPUT:
            - in_features: shape of the input
            - alpha - trainable parameter that controls frequency
            - beta - trainable parameter that controls magnitude
            alpha is initialized to 1 by default, higher values = higher-frequency.
            beta is initialized to 1 by default, higher values = higher-magnitude.
            alpha will be trained along with the rest of your model.
        g&.>N)super__init__
isinstancelistin_featuresr   projalpha_logscalenn	Parametertorchzerosalphabetaonesrequires_gradno_div_by_zero)selfr   out_featuresr   alpha_trainabler   	__class__s         d/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/models/s3gen/matcha/transformer.pyr   zSnakeBeta.__init__#   s    	+5lD+I+I]<<P\~(lCC	 - 	Kek$2B&C&Ce&KLLDJU[1A%B%BU%JKKDIIej1A&B&BU&JKKDJUZ0@%A%AE%IJJDI#2
 "1	)    c                 6   |                      |          }| j        r3t          j        | j                  }t          j        | j                  }n| j        }| j        }|d|| j        z   z  t          j        t          j        ||z            d          z  z   }|S )u   
        Forward pass of the function.
        Applies the function to the input elementwise.
        SnakeBeta ∶= x + 1/b * sin^2 (xa)
        r      )	r   r   r   expr   r   r!   powsin)r"   xr   r   s       r&   forwardzSnakeBeta.forward@   s     IIaLL 	Idj))E9TY''DDJE9Dt223uy1u9AUAUWX7Y7YYYr'   )r   TT)__name__
__module____qualname____doc__r   r.   __classcell__r%   s   @r&   r   r      sV         "* * * * * *:      r'   r   c                   Z     e Zd ZdZ	 	 	 	 	 ddedee         d	ed
ededef fdZ	d Z
 xZS )FeedForwardaV  
    A feed-forward layer.

    Parameters:
        dim (`int`): The number of channels in the input.
        dim_out (`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`.
        mult (`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension.
        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
        activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
        final_dropout (`bool` *optional*, defaults to False): Apply a final dropout.
    N           gegluFdimdim_outmultdropoutactivation_fnfinal_dropoutc                    t                                                       t          ||z            }||n|}|dk    rt          ||          }|dk    rt          ||d          }nD|dk    rt	          ||          }n-|dk    rt          ||          }n|dk    rt          ||          }t          j        g           | _	        | j	        
                    |           | j	        
                    t          j        |                     | j	        
                    t          ||                     |r.| j	        
                    t          j        |                     d S d S )Ngeluzgelu-approximatetanh)approximater9   zgeglu-approximate	snakebeta)r   r   intr   r   r
   r   r   
ModuleListnetappendDropoutr   )
r"   r:   r;   r<   r=   r>   r?   	inner_dimact_fnr%   s
            r&   r   zFeedForward.__init__`   s_    	d
OO	$0''cF""#y))F...#yf===FFg%%3	**FF111$S)44FFk))sI..F=$$
7++,,,,Y@@AAA 	1HOOBJw//00000	1 	1r'   c                 0    | j         D ]} ||          }|S N)rG   )r"   hidden_statesmodules      r&   r.   zFeedForward.forward   s*    h 	2 	2F"F=11MMr'   )Nr7   r8   r9   F)r/   r0   r1   r2   rE   r   floatstrboolr   r.   r3   r4   s   @r&   r6   r6   S   s        
 
 "&$#!1 !1!1 #!1 	!1
 !1 !1 !1 !1 !1 !1 !1 !1F      r'   r6   c                   ^    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 d ded	ed
edee         dedee         dededededededef fdZdee         defdZ		 	 	 	 	 	 d!de
j        dee
j                 dee
j                 dee
j                 dee
j                 deeef         dee
j                 fdZ xZS )"BasicTransformerBlocka  
    A basic Transformer block.

    Parameters:
        dim (`int`): The number of channels in the input and output.
        num_attention_heads (`int`): The number of heads to use for multi-head attention.
        attention_head_dim (`int`): The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
        cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
        only_cross_attention (`bool`, *optional*):
            Whether to use only cross-attention layers. In this case two cross attention layers are used.
        double_self_attention (`bool`, *optional*):
            Whether to use two self-attention layers. In this case no cross attention layers are used.
        activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
        num_embeds_ada_norm (:
            obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`.
        attention_bias (:
            obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
    r8   Nr9   FT
layer_normr:   num_attention_headsattention_head_dimcross_attention_dimr>   num_embeds_ada_normattention_biasonly_cross_attentiondouble_self_attentionupcast_attentionnorm_elementwise_affine	norm_typer?   c           	         t                                                       |	| _        |d uo|dk    | _        |d uo|dk    | _        |dv r|t          d| d| d          | j        rt          ||          | _        n8| j        rt          ||          | _        nt          j
        ||          | _        t          ||||||	r|nd |          | _        ||
rR| j        rt          ||          nt          j
        ||          | _        t          ||
s|nd |||||	          | _        nd | _        d | _        t          j
        ||          | _        t!          ||||
          | _        d | _        d| _        d S )Nada_norm_zeroada_norm)rb   ra   z`norm_type` is set to zw, but `num_embeds_ada_norm` is not defined. Please make sure to define `num_embeds_ada_norm` if setting `norm_type` to .)elementwise_affine)	query_dimheadsdim_headr=   biasrX   r]   )re   rX   rf   rg   r=   rh   r]   )r=   r>   r?   r   )r   r   r[   use_ada_layer_norm_zerouse_ada_layer_norm
ValueErrorr   norm1r	   r   	LayerNormr   attn1norm2attn2norm3r6   ff_chunk_size
_chunk_dim)r"   r:   rV   rW   r=   rX   r>   rY   rZ   r[   r\   r]   r^   r_   r?   r%   s                  r&   r   zBasicTransformerBlock.__init__   s   " 	$8!(;4(G'iYZiMi$#6d#B"_	U_H_555:M:UX X XKTX X X   " 	W%c+>??DJJ) 	W)#/BCCDJJc>UVVVDJ%'7K U 3 3QU-
 
 

 *.C* *SS"5666\#:QRRR J
 #?T$^$7$7Z^)+#!1	 	 	DJJ DJDJ \#:QRRR
c7-_lmmm  r'   
chunk_sizec                 "    || _         || _        d S rM   )rs   rt   )r"   ru   r:   s      r&   set_chunk_feed_forwardz,BasicTransformerBlock.set_chunk_feed_forward   s    %r'   rN   attention_maskencoder_hidden_statesencoder_attention_masktimestepcross_attention_kwargsclass_labelsc                 >     j         r                     ||          }nA j        r%                     ||||j                  \  }}	}
}}n                     |          }||ni }  j        |f j        r|nd  j        r|n|d|} j        r|	                    d          |z  }||z   } j        H j         r                     ||          n                     |          }  j        |f||d|}||z   } 	                    |          } j        r|d|d d d f         z   z  |
d d d f         z   } j
        |j         j                  j
        z  dk    r+t          d|j         j                  d j
         d          |j         j                  j
        z  }t          j         fd|                    | j        	          D              j        	          }n                     |          } j        r|                    d          |z  }||z   }|S )
N)hidden_dtype)ry   rx      r   z)`hidden_states` dimension to be chunked: z$ has to be divisible by chunk size: z[. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`.c                 :    g | ]}                     |          S  )rr   ).0	hid_slicer"   s     r&   
<listcomp>z1BasicTransformerBlock.forward.<locals>.<listcomp>1  s%    ooo	##ooor'   )r:   )rj   rl   ri   dtypern   r[   	unsqueezerp   ro   rq   rs   shapert   rk   r   catchunkrr   )r"   rN   rx   ry   rz   r{   r|   r}   norm_hidden_statesgate_msa	shift_mlp	scale_mlpgate_mlpattn_output
num_chunks	ff_outputs   `               r&   r.   zBasicTransformerBlock.forward   s     " 	;!%M8!D!D) 	;KO::xMDW LV L LH)Y "&M!:!:;Q;]!7!7ce dj
;?;T"^"7"7Z^595Nb11Tb
 
 %	
 
 ' 	>",,Q//+=K#m3 :!7;7Nm

=(333TXT^T^_lTmTm  %$*"&;5  )	 K (-7M "ZZ66' 	d!3q9QQQW;M7M!NQZ[\[\[\^b[bQc!c'!'84;KKqPP  }@R@XY]Yh@i  }  }  PT  P`  }  }  }   ,1$/BdFVVJ	oooo5G5M5Mj^b^m5M5n5noooO  II
  233I' 	: **1--	9I!M1r'   )r8   Nr9   NFFFFTrU   F)NNNNNN)r/   r0   r1   r2   rE   r   rQ   rR   r   rw   r   FloatTensor
LongTensorr   r   r.   r3   r4   s   @r&   rT   rT      s        2 -1$-1$%*&+!&(,%#M MM !M  	M &c]M M &c]M M #M  $M M "&M M M M M M M M^# S     7;=A>B/31537I I(I !!23I  ((9:	I
 !)): ;I 5+,I !%S#XI u/0I I I I I I I Ir'   rT   )typingr   r   r   r   torch.nnr   diffusers.models.attentionr   r   r   r	   r
   $diffusers.models.attention_processorr   diffusers.models.lorar   diffusers.utils.torch_utilsr   Moduler   r6   rT   r   r'   r&   <module>r      sc   & & & & & & & & & &                     ; : : : : : 6 6 6 6 6 6 < < < < < <? ? ? ? ?	 ? ? ?D3 3 3 3 3") 3 3 3l r r r r rBI r r r r rr'   