
    /;ji_                        d dl mZmZmZ d dlZd dlmc mZ d dlmZ ddl	m
Z
mZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ  ej        e          Zdej        dej         de!de!fdZ"e G d dej                              Z#e G d dej                              Z$e G d dej                              Z%e G d dej                              Z& G d dej                  Z' G d dej                  Z(dS )    )AnyDictOptionalN)nn   )	deprecatelogging)maybe_allow_in_graph   )GEGLUGELUApproximateGELU)	AttentionJointAttnProcessor2_0)SinusoidalPositionalEmbedding)AdaLayerNormAdaLayerNormContinuousAdaLayerNormZeroRMSNormffhidden_states	chunk_dim
chunk_sizec                      |j         |         |z  dk    r!t          d|j         |          d| d          |j         |         |z  }t          j         fd|                    ||          D             |          }|S )Nr   z)`hidden_states` dimension to be chunked: z$ has to be divisible by chunk size: z[. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`.c                 &    g | ]} |          S  r   ).0	hid_slicer   s     T/root/voice-cloning/.venv/lib/python3.11/site-packages/diffusers/models/attention.py
<listcomp>z)_chunked_feed_forward.<locals>.<listcomp>(   s!    WWW9IWWW    dim)shape
ValueErrortorchcatchunk)r   r   r   r   
num_chunks	ff_outputs   `     r   _chunked_feed_forwardr+      s    9%
2a77 d8KI8V  d  d  }G  d  d  d
 
 	
 $Y/:=J	WWWW(;(;JI(;(V(VWWW  I r!   c                   f     e Zd ZdZdedededef fdZdej        dej        d	ej        fd
Z xZ	S )GatedSelfAttentionDenseat  
    A gated self-attention dense layer that combines visual features and object features.

    Parameters:
        query_dim (`int`): The number of channels in the query.
        context_dim (`int`): The number of channels in the context.
        n_heads (`int`): The number of heads to use for attention.
        d_head (`int`): The number of channels in each head.
    	query_dimcontext_dimn_headsd_headc                 0   t                                                       t          j        ||          | _        t          |||          | _        t          |d          | _        t          j	        |          | _
        t          j	        |          | _        |                     dt          j        t          j        d                               |                     dt          j        t          j        d                               d| _        d S )N)r.   headsdim_headgegluactivation_fn
alpha_attn        alpha_denseT)super__init__r   Linearlinearr   attnFeedForwardr   	LayerNormnorm1norm2register_parameter	Parameterr&   tensorenabled)selfr.   r/   r0   r1   	__class__s        r   r<   z GatedSelfAttentionDense.__init__:   s     iY77	6RRR	iw???\),,
\),,
bl5<;L;L.M.MNNNr|EL<M<M/N/NOOOr!   xobjsreturnc           
         | j         s|S |j        d         }|                     |          }|| j                                        |                     |                     t          j        ||gd                              d d d |d d f         z  z   }|| j	                                        | 
                    |                     |                    z  z   }|S )Nr   r"   )rG   r$   r>   r8   tanhr?   rB   r&   r'   r:   r   rC   )rH   rJ   rK   n_visuals       r   forwardzGatedSelfAttentionDense.forwardK   s    | 	H71:{{4  $$&&4::eiD	WX>Y>Y>Y3Z3Z)[)[\]\]\]_h`h_hjkjkjk\k)lll %%''$''$**Q--*@*@@@r!   )
__name__
__module____qualname____doc__intr<   r&   TensorrP   __classcell__rI   s   @r   r-   r-   .   s         # C # s      "
 
U\ 
el 
 
 
 
 
 
 
 
r!   r-   c                   t     e Zd ZdZd fd	Zddee         defdZdej	        d	ej	        d
ej	        fdZ
 xZS )JointTransformerBlocka$  
    A Transformer block following the MMDiT architecture, introduced in Stable Diffusion 3.

    Reference: https://arxiv.org/abs/2403.03206

    Parameters:
        dim (`int`): The number of channels in the input and output.
        num_attention_heads (`int`): The number of heads to use for multi-head attention.
        attention_head_dim (`int`): The number of channels in each head.
        context_pre_only (`bool`): Boolean to determine if we should add some blocks associated with the
            processing of `context` conditions.
    Fc                    t                                                       || _        |rdnd}t          |          | _        |dk    rt          ||dddd          | _        n.|dk    rt          |          | _        nt          d| d	          t          t          d
          rt                      }nt          d          t          |d |||z  |||d|	  	        | _        t          j        |dd          | _        t!          ||d          | _        |s4t          j        |dd          | _        t!          ||d          | _        nd | _        d | _        d | _        d| _        d S )Nada_norm_continousada_norm_zeroFư>T
layer_norm)elementwise_affineepsbias	norm_typezUnknown context_norm_type: z>, currently only support `ada_norm_continous`, `ada_norm_zero`scaled_dot_product_attentionzYThe current PyTorch version does not support the `scaled_dot_product_attention` function.)	r.   cross_attention_dimadded_kv_proj_dimr4   r3   out_dimcontext_pre_onlyrb   	processorr`   ra   gelu-approximate)r#   dim_outr7   r   )r;   r<   rh   r   rB   r   norm1_contextr%   hasattrFr   r   r?   r   rA   rC   r@   r   norm2_context
ff_context_chunk_size
_chunk_dim)rH   r#   num_attention_headsattention_head_dimrh   context_norm_typeri   rI   s          r   r<   zJointTransformerBlock.__init__g   s    04DY00/%c**
 444!7SU4S_" " "D /11!1#!6!6D.?   1455 	-//IIk    $!'+>>%&-

 

 

	 \#%TJJJ
#sBTUUU 	#!#ceQU!V!V!VD)c3N`aaaDOO!%D"DO  r!   r   r   r#   c                 "    || _         || _        d S Nrr   rs   rH   r   r#   s      r   set_chunk_feed_forwardz,JointTransformerBlock.set_chunk_feed_forward       %r!   r   encoder_hidden_statestembc                    |                      ||          \  }}}}}| j        r|                     ||          }	n|                     ||          \  }	}
}}}|                     ||	          \  }}|                    d          |z  }||z   }|                     |          }|d|d d d f         z   z  |d d d f         z   }| j        "t          | j        || j	        | j                  }n|                     |          }|                    d          |z  }||z   }| j        rd }n|
                    d          |z  }||z   }| 
                    |          }	|	d|d d d f         z   z  |d d d f         z   }	| j        "t          | j        |	| j	        | j                  }n|                     |	          }||                    d          |z  z   }||fS )N)emb)r   r}   r   )rB   rh   rm   r?   	unsqueezerC   rr   r+   r   rs   rp   rq   )rH   r   r}   r~   norm_hidden_statesgate_msa	shift_mlp	scale_mlpgate_mlpnorm_encoder_hidden_states
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpattn_outputcontext_attn_outputr*   context_ff_outputs                     r   rP   zJointTransformerBlock.forward   sc    HLzzR_eizGjGjDHiH  	)-););<QSW)X)X&&[_[m[m%4 \n \ \X&
Kj
 ,099,D^ ,5 ,
 ,
((
 ((++k9%3!ZZ66/1yD7I3IJYWXWXWXZ^W^M__'-dg7I4?\`\lmmII 233I&&q))I5	%	1   	h$(!!","6"6q"9"9<O"O$9<O$O!)-););<Q)R)R&)Cq;WXWXWXZ^W^K_G_)`cnopopoprvovcw)w&+$9O%?RVRb% %!! %)OO4N$O$O!$9J<P<PQR<S<SVg<g$g!$m33r!   )Fr   )rQ   rR   rS   rT   r<   r   rU   r{   r&   FloatTensorrP   rW   rX   s   @r   rZ   rZ   X   s         0 0 0 0 0 0f # S    
24".24GLGX24`e`q24 24 24 24 24 24 24 24r!   rZ   c            -           e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d.d
edededee         dedee         dedededededededededee         dee         dee         dee         dee         dedef, fd Z	d/d"ee         d
efd#Z
	 	 	 	 	 	 	 d0d$ej        d%eej                 d&eej                 d'eej                 d(eej                 d)eeef         d*eej                 d+eeeej        f                  d,ej        fd-Z xZS )1BasicTransformerBlocka  
    A basic Transformer block.

    Parameters:
        dim (`int`): The number of channels in the input and output.
        num_attention_heads (`int`): The number of heads to use for multi-head attention.
        attention_head_dim (`int`): The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
        cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
        activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
        num_embeds_ada_norm (:
            obj: `int`, *optional*): The number of diffusion steps used during training. See `Transformer2DModel`.
        attention_bias (:
            obj: `bool`, *optional*, defaults to `False`): Configure if the attentions should contain a bias parameter.
        only_cross_attention (`bool`, *optional*):
            Whether to use only cross-attention layers. In this case two cross attention layers are used.
        double_self_attention (`bool`, *optional*):
            Whether to use two self-attention layers. In this case no cross attention layers are used.
        upcast_attention (`bool`, *optional*):
            Whether to upcast the attention computation to float32. This is useful for mixed precision training.
        norm_elementwise_affine (`bool`, *optional*, defaults to `True`):
            Whether to use learnable elementwise affine parameters for normalization.
        norm_type (`str`, *optional*, defaults to `"layer_norm"`):
            The normalization layer to use. Can be `"layer_norm"`, `"ada_norm"` or `"ada_norm_zero"`.
        final_dropout (`bool` *optional*, defaults to False):
            Whether to apply a final dropout after the last feed-forward layer.
        attention_type (`str`, *optional*, defaults to `"default"`):
            The type of attention to use. Can be `"default"` or `"gated"` or `"gated-text-image"`.
        positional_embeddings (`str`, *optional*, defaults to `None`):
            The type of positional embeddings to apply to.
        num_positional_embeddings (`int`, *optional*, defaults to `None`):
            The maximum number of positional embeddings to apply.
    r9   Nr5   FTr_   h㈵>defaultr#   rt   ru   re   r7   num_embeds_ada_normattention_biasonly_cross_attentiondouble_self_attentionupcast_attentionnorm_elementwise_affinerc   norm_epsfinal_dropoutattention_typepositional_embeddingsnum_positional_embeddings-ada_norm_continous_conditioning_embedding_dimada_norm_biasff_inner_dimff_biasattention_out_biasc           
      r   t                                                       |	| _        |d uo|dk    | _        |d uo|dk    | _        |dk    | _        |dk    | _        |dk    | _        |dv r|t          d| d| d	          || _	        || _
        |r|t          d
          |dk    rt          ||          | _        nd | _        |dk    rt          ||          | _        nX|dk    rt          ||          | _        n<|dk    rt!          |||||d          | _        nt#          j        |||          | _        t'          ||||||	r|nd ||          | _        ||
rx|dk    rt          ||          | _        n;|dk    rt!          |||||d          | _        nt#          j        |||          | _        t'          ||
s|nd ||||||          | _        nd | _        d | _        |dk    rt!          |||||d          | _        n-|dv rt#          j        |||          | _        n|dk    rd | _        t1          ||||||          | _        |dk    s|dk    rt5          ||||          | _        |dk    r2t#          j        t;          j        d|          |dz  z            | _        d | _         d| _!        d S )Nr]   ada_normada_norm_singler_   ada_norm_continuous)r   r]   z`norm_type` is set to zw, but `num_embeds_ada_norm` is not defined. Please make sure to define `num_embeds_ada_norm` if setting `norm_type` to .z\If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined.
sinusoidal)max_seq_lengthrms_normrj   )r.   r3   r4   dropoutrb   re   r   out_bias)r.   re   r3   r4   r   rb   r   r   )r]   r   r_   r   layer_norm_i2vgen)r   r7   r   	inner_dimrb   gatedzgated-text-image   g      ?r   )"r;   r<   r   use_ada_layer_norm_zerouse_ada_layer_normuse_ada_layer_norm_singleuse_layer_normuse_ada_layer_norm_continuousr%   rc   r   r   	pos_embedr   rB   r   r   r   rA   r   attn1rC   attn2norm3r@   r   r-   fuserrE   r&   randnscale_shift_tablerr   rs   )rH   r#   rt   ru   r   re   r7   r   r   r   r   r   r   rc   r   r   r   r   r   r   r   r   r   r   rI   s                           r   r<   zBasicTransformerBlock.__init__   s   4 	$8! )<4(G'iYZiMi$#6d#B"_	U_H_)26G)G&'<7-6:O-O*555:M:UX X XKTX X X  
 ##6   	&?&Gn   !L00:3OhiiiDNN!DN 
""%c+>??DJJ/)))#/BCCDJJ////=' DJJ c>U[cdddDJ%'7K U 3 3QU-'	
 	
 	

 *.C* J&&)#/BCC

3333A+! 

  \#x9PQQ
"?T$^$7$7Z^)+#!1+	 	 	DJJ DJDJ ---/=' DJJ \\\c85LMMDJJ---DJ''"
 
 
 W$$:L(L(L06IK^`rssDJ )))%'\%+a2E2ES2P%Q%QD"  r!   r   r   c                 "    || _         || _        d S rx   ry   rz   s      r   r{   z,BasicTransformerBlock.set_chunk_feed_forward  r|   r!   r   attention_maskr}   encoder_attention_masktimestepcross_attention_kwargsclass_labelsadded_cond_kwargsrL   c	                    |0|                     dd           t                              d           |j        d         }	| j        dk    r|                     ||          }
n| j        dk    r%|                     ||||j                  \  }
}}}}n| j        dv r|                     |          }
n| j        dk    r|                     ||d	                   }
n| j        d
k    rw| j        d          |                    |	dd          z   	                    dd          \  }}}}}}|                     |          }
|
d|z   z  |z   }
|

                    d          }
nt          d          | j        |                     |
          }
||                                ni }|                    dd           } | j        |
f| j        r|nd |d|}| j        dk    r|                    d          |z  }n| j        d
k    r||z  }||z   }|j        dk    r|
                    d          }||                     ||d                   }| j        | j        dk    r|                     ||          }
nd| j        dv r|                     |          }
nE| j        d
k    r|}
n7| j        dk    r|                     ||d	                   }
nt          d          | j         | j        d
k    r|                     |
          }
 | j        |
f||d|}||z   }| j        dk    r|                     ||d	                   }
n | j        d
k    s|                     |          }
| j        dk    r|
d|d d d f         z   z  |d d d f         z   }
| j        d
k    r |                     |          }
|
d|z   z  |z   }
| j        "t1          | j        |
| j        | j                  }n|                     |
          }| j        dk    r|                    d          |z  }n| j        d
k    r||z  }||z   }|j        dk    r|
                    d          }|S )NscalezSPassing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.r   r   r]   )hidden_dtype)r_   r   r   pooled_text_embr   r   r   r"   zIncorrect norm usedgligen)r}   r      rK   )r]   r_   r   zIncorrect norm)getloggerwarningr$   rc   rB   dtyper   reshaper(   squeezer%   r   copypopr   r   r   ndimr   r   rC   r   rr   r+   r   rs   )rH   r   r   r}   r   r   r   r   r   
batch_sizer   r   r   r   r   	shift_msa	scale_msagligen_kwargsr   r*   s                       r   rP   zBasicTransformerBlock.forward  s,    "-%))'488Dtuuu #(+
>Z''!%M8!D!D^..KO::xMDW LV L LH)Y ^BBB!%M!:!:^444!%M;LM^;_!`!`^000&t,x/?/?
Ar/R/RReA1eoo KIy(Iy( "&M!:!:!3q9}!E	!Q!3!;!;A!>!>2333>%!%0B!C!C CYBd!7!<!<!>!>!>jl.228TBB dj
;?;T"^"7"7Z^)
 
 %	
 
 >_,,",,Q//+=KK^000"[0K#m3"")11!44M $ JJ}mF6KLLM :!~++%)ZZx%H%H""#WWW%)ZZ%>%>""#444 &3""#888%)ZZ?PQb?c%d%d"" !1222~)dn@Q.Q.Q%)^^4F%G%G"$$*"&;5  )	 K (-7M >222!%M;LM^;_!`!`#444!%M!:!:>_,,!3q9QQQW;M7M!NQZ[\[\[\^b[bQc!c>...!%M!:!:!3q9}!E	!Q'-dg7I4?\`\lmmII 233I>_,, **1--	9II^000 9,I!M1"")11!44Mr!   )r9   Nr5   NFFFFTr_   r   Fr   NNNNNTTr   )NNNNNNN)rQ   rR   rS   rT   rU   r   strboolfloatr<   r{   r&   rV   
LongTensorr   r   rP   rW   rX   s   @r   r   r      s          N -1$-1$%*&+!&(,%#'/337GK'+&*#'1X XX !X  	X &c]X X &c]X X #X  $X X "&X X X  !X" #X$  (}%X& $,C='X( 8@})X*  }+X, sm-X. /X0 !1X X X X X Xt # S     268<9=/31537?Cx x|x !.x  (5	x
 !) 6x 5+,x !%S#Xx u/0x $Del):$;<x 
x x x x x x x xr!   r   c                        e Zd ZdZ	 ddededededee         f
 fdZd	ee         fd
Z	 ddej	        dedeej	                 dej	        fdZ
 xZS )TemporalBasicTransformerBlocka  
    A basic Transformer block for video like data.

    Parameters:
        dim (`int`): The number of channels in the input and output.
        time_mix_inner_dim (`int`): The number of channels for temporal attention.
        num_attention_heads (`int`): The number of heads to use for multi-head attention.
        attention_head_dim (`int`): The number of channels in each head.
        cross_attention_dim (`int`, *optional*): The size of the encoder_hidden_states vector for cross attention.
    Nr#   time_mix_inner_dimrt   ru   re   c                    t                                                       ||k    | _        t          j        |          | _        t          ||d          | _        t          j        |          | _        t          |||d           | _
        |2t          j        |          | _        t          ||||          | _        nd | _        d | _        t          j        |          | _        t          |d          | _        d | _        d | _        d S )Nr5   )rl   r7   )r.   r3   r4   re   )r.   re   r3   r4   r6   )r;   r<   is_resr   rA   norm_inr@   ff_inrB   r   r   rC   r   r   r   rr   rs   )rH   r#   r   rt   ru   re   rI   s         r   r<   z&TemporalBasicTransformerBlock.__init__  s    	//|C(( !&!
 
 

 \"455
(%' $	
 
 

 * &899DJ",$7)+	  DJJ DJDJ \"455
0HHH  r!   r   c                 "    || _         d| _        d S )Nr   ry   )rH   r   kwargss      r   r{   z4TemporalBasicTransformerBlock.set_chunk_feed_forwardT  s    %r!   r   
num_framesr}   rL   c                    |j         d         }|j         \  }}}||z  }|d d d f                             ||||          }|                    dddd          }|                    ||z  ||          }|}|                     |          }| j        "t          | j        || j        | j                  }n|                     |          }| j        r||z   }| 	                    |          }	| 
                    |	d           }
|
|z   }| j        1|                     |          }	|                     |	|          }
|
|z   }|                     |          }	| j        "t          | j        |	| j        | j                  }n|                     |	          }| j        r||z   }n|}|d d d f                             ||||          }|                    dddd          }|                    ||z  ||          }|S )Nr   r   r      )r}   )r$   r   permuter   rr   r+   r   rs   r   rB   r   r   rC   r   r   )rH   r   r   r}   r   batch_frames
seq_lengthchannelsresidualr   r   r*   s               r   rP   z%TemporalBasicTransformerBlock.forwardZ  s"    #(+
-:-@*j(!Z/
%dAAAg.66z:z[cdd%--aAq99%--j:.EzS[\\ ]33'1$*mT_^b^nooMM JJ}55M; 	5)H4M!ZZ66jj!34jPP#m3 :!!%M!:!:**%7Od*eeK'-7M "ZZ66'-dg7I4?\`\lmmII 233I; 	&%5MM%M%dAAAg.66z:z[cdd%--aAq99%--j:.EzS[\\r!   rx   )rQ   rR   rS   rT   rU   r   r<   r{   r&   rV   rP   rW   rX   s   @r   r   r     s        	 	" .23 33  3 !	3
  3 &c]3 3 3 3 3 3j#     9=	7 7|7 7  (5	7
 
7 7 7 7 7 7 7 7r!   r   c                   \     e Zd Z	 	 	 	 ddedededed	ed
ee         dedef fdZd Z xZS )SkipFFTransformerBlockr9   NFTr#   rt   ru   kv_input_dimkv_input_dim_proj_use_biasre   r   r   c
           	      Z   t                                                       ||k    rt          j        |||          | _        nd | _        t          |d          | _        t          |||||||	          | _        t          |d          | _	        t          |||||||	          | _
        d S )Nr^   )r.   r3   r4   r   rb   re   r   )r.   re   r3   r4   r   rb   r   )r;   r<   r   r=   	kv_mapperr   rB   r   r   rC   r   )rH   r#   rt   ru   r   r   r   re   r   r   rI   s             r   r<   zSkipFFTransformerBlock.__init__  s     	3Y|S:TUUDNN!DNS%((
%' 3'
 
 

 S%((
 3%''
 
 



r!   c                 :   ||                                 ni }| j        '|                     t          j        |                    }|                     |          } | j        |fd|i|}||z   }|                     |          } | j        |fd|i|}||z   }|S )Nr}   )r   r   ro   silurB   r   rC   r   )rH   r   r}   r   r   r   s         r   rP   zSkipFFTransformerBlock.forward  s    BXBd!7!<!<!>!>!>jl>%$(NN16:O3P3P$Q$Q!!ZZ66 dj
 
"7
 %
 
 $m3!ZZ66 dj
 
"7
 %
 
 $m3r!   )r9   NFT)	rQ   rR   rS   rU   r   r   r<   rP   rW   rX   s   @r   r   r     s         -1$#'(
 (
(
 !(
  	(

 (
 %)(
 &c](
 (
 !(
 (
 (
 (
 (
 (
T      r!   r   c                        e Zd ZdZ	 	 	 	 	 	 	 dded	ee         d
ededededef fdZ	de
j        de
j        fdZ xZS )r@   a  
    A feed-forward layer.

    Parameters:
        dim (`int`): The number of channels in the input.
        dim_out (`int`, *optional*): The number of channels in the output. If not given, defaults to `dim`.
        mult (`int`, *optional*, defaults to 4): The multiplier to use for the hidden dimension.
        dropout (`float`, *optional*, defaults to 0.0): The dropout probability to use.
        activation_fn (`str`, *optional*, defaults to `"geglu"`): Activation function to be used in feed-forward.
        final_dropout (`bool` *optional*, defaults to False): Apply a final dropout.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    Nr   r9   r5   FTr#   rl   multr   r7   r   rb   c	                    t                                                       |t          ||z            }||n|}|dk    rt          |||          }	|dk    rt          ||d|          }	n1|dk    rt	          |||          }	n|dk    rt          |||          }	t          j        g           | _        | j        	                    |	           | j        	                    t          j
        |                     | j        	                    t          j        |||                     |r.| j        	                    t          j
        |                     d S d S )Ngelu)rb   rk   rN   )approximaterb   r5   zgeglu-approximate)r;   r<   rU   r   r   r   r   
ModuleListnetappendDropoutr=   )rH   r#   rl   r   r   r7   r   r   rb   act_fnrI   s             r   r<   zFeedForward.__init__  s`    	C$JI$0''cF""#yt444F...#yf4HHHFFg%%3	555FF111$S)$???F=$$
7++,,,	)W4@@@AAA 	1HOOBJw//00000	1 	1r!   r   rL   c                     t          |          dk    s|                    dd           d}t          dd|           | j        D ]} ||          }|S )Nr   r   zThe `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.z1.0.0)lenr   r   r   )rH   r   argsr   deprecation_messagemodules         r   rP   zFeedForward.forward  sh    t99q==FJJw55A #Ugw(;<<<h 	2 	2F"F=11MMr!   )Nr   r9   r5   FNT)rQ   rR   rS   rT   rU   r   r   r   r   r<   r&   rV   rP   rW   rX   s   @r   r@   r@     s           "&$#"1 "1"1 #"1 	"1
 "1 "1 "1 "1 "1 "1 "1 "1 "1HU\ u|        r!   r@   ))typingr   r   r   r&   torch.nn.functionalr   
functionalro   utilsr   r	   utils.torch_utilsr
   activationsr   r   r   attention_processorr   r   
embeddingsr   normalizationr   r   r   r   
get_loggerrQ   r   ModulerV   rU   r+   r-   rZ   r   r   r   r@   r   r!   r   <module>r     s   ' & & & & & & & & &                 & & & & & & & & 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 A A A A A A A A 5 5 5 5 5 5 Z Z Z Z Z Z Z Z Z Z Z Z 
	H	%	%bi  QT be     & & & & &bi & & &R x4 x4 x4 x4 x4BI x4 x4 x4v z z z z zBI z z zz	 ~ ~ ~ ~ ~BI ~ ~ ~BE E E E ERY E E EP8 8 8 8 8") 8 8 8 8 8r!   