
    /;ji                    x   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
mc mZ d dl	mZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZ d
dlmZ  ej        e          Z e            rd dlZ e            r	d dlZd dlZndZe G d dej                               Z! G d d          Z" G d dej                   Z# G d d          Z$ G d d          Z% G d d          Z& G d d          Z' G d d          Z( G d d          Z) G d d          Z* G d  d!          Z+ G d" d#          Z, G d$ d%          Z- G d& d'ej                   Z. G d( d)ej                   Z/ G d* d+          Z0 G d, d-          Z1 G d. d/ej                   Z2 G d0 d1ej                   Z3 G d2 d3ej                   Z4 G d4 d5ej                   Z5 G d6 d7ej                   Z6 G d8 d9ej                   Z7 G d: d;e	j        j                   Z8e3e4e5e6fZ9e$e1e%e(e6fZ:e"e+e)e0e3e4e5e7e8f	Z;ee"e+e-e)e0e$e1e%e(e#e.e/e3e4e5e6f         Z<dS )<    N)import_module)CallableListOptionalUnion)nn   )IPAdapterMaskProcessor)	deprecatelogging)is_torch_npu_availableis_xformers_available)maybe_allow_in_graph   )LoRALinearLayerc            /           e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d@dedee         dededededededee         dedee         dee         dee         dee         dededededededed ed!         d"ef. fd#Z	d$ed%dfd&Z
	 dAd'ed(ee         d%dfd)Zd*ed%dfd+ZdBd,ZdCd-ed%d.fd/Z	 	 dDd0ej        d1eej                 d2eej                 d%ej        fd3Zd4ej        d%ej        fd5ZdEd4ej        d"ed%ej        fd7Z	 dAd8ej        d9ej        d2ej        d%ej        fd:Z	 dEd2ej        d;ed<ed"ed%ej        f
d=Zd1ej        d%ej        fd>Z ej                    dFd?            Z xZS )G	Attentionaq  
    A cross attention layer.

    Parameters:
        query_dim (`int`):
            The number of channels in the query.
        cross_attention_dim (`int`, *optional*):
            The number of channels in the encoder_hidden_states. If not given, defaults to `query_dim`.
        heads (`int`,  *optional*, defaults to 8):
            The number of heads to use for multi-head attention.
        dim_head (`int`,  *optional*, defaults to 64):
            The number of channels in each head.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
        bias (`bool`, *optional*, defaults to False):
            Set to `True` for the query, key, and value linear layers to contain a bias parameter.
        upcast_attention (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the attention computation to `float32`.
        upcast_softmax (`bool`, *optional*, defaults to False):
            Set to `True` to upcast the softmax computation to `float32`.
        cross_attention_norm (`str`, *optional*, defaults to `None`):
            The type of normalization to use for the cross attention. Can be `None`, `layer_norm`, or `group_norm`.
        cross_attention_norm_num_groups (`int`, *optional*, defaults to 32):
            The number of groups to use for the group norm in the cross attention.
        added_kv_proj_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the added key and value projections. If `None`, no projection is used.
        norm_num_groups (`int`, *optional*, defaults to `None`):
            The number of groups to use for the group norm in the attention.
        spatial_norm_dim (`int`, *optional*, defaults to `None`):
            The number of channels to use for the spatial normalization.
        out_bias (`bool`, *optional*, defaults to `True`):
            Set to `True` to use a bias in the output linear layer.
        scale_qk (`bool`, *optional*, defaults to `True`):
            Set to `True` to scale the query and key by `1 / sqrt(dim_head)`.
        only_cross_attention (`bool`, *optional*, defaults to `False`):
            Set to `True` to only use cross attention and not added_kv_proj_dim. Can only be set to `True` if
            `added_kv_proj_dim` is not `None`.
        eps (`float`, *optional*, defaults to 1e-5):
            An additional value added to the denominator in group normalization that is used for numerical stability.
        rescale_output_factor (`float`, *optional*, defaults to 1.0):
            A factor to rescale the output by dividing it with this value.
        residual_connection (`bool`, *optional*, defaults to `False`):
            Set to `True` to add the residual connection to the output.
        _from_deprecated_attn_block (`bool`, *optional*, defaults to `False`):
            Set to `True` if the attention block is loaded from a deprecated state dict.
        processor (`AttnProcessor`, *optional*, defaults to `None`):
            The attention processor to use. If `None`, defaults to `AttnProcessor2_0` if `torch 2.x` is used and
            `AttnProcessor` otherwise.
    N   @           F    Th㈵>      ?	query_dimcross_attention_dimheadsdim_headdropoutbiasupcast_attentionupcast_softmaxcross_attention_normcross_attention_norm_num_groupsqk_normadded_kv_proj_dimnorm_num_groupsspatial_norm_dimout_biasscale_qkonly_cross_attentionepsrescale_output_factorresidual_connection_from_deprecated_attn_block	processorAttnProcessorout_dimc                    t                                                       ||n||z  | _        || _        || _        |d u| _        ||n|| _        || _        || _        || _	        || _
        || _        d| _        ||n|| _        || _        || _        || _        | j        r|dz  nd| _        |||z  n|| _        || _        || _        || _        | j        | j        rt-          d          |t/          j        |||d          | _        nd | _        |t5          ||          | _        nd | _        |d | _        d | _        nP|dk    r7t/          j        ||	          | _        t/          j        ||	          | _        nt-          d
| d          |	d | _        nm|	dk    rt/          j        | j                  | _        nH|	dk    r/| j        |}n| j        }t/          j        ||
dd          | _        nt-          d|	 d          t/          j         || j        |          | _!        | j        sMt/          j         | j        | j        |          | _"        t/          j         | j        | j        |          | _#        nd | _"        d | _#        | j        dt/          j         || j                  | _$        t/          j         || j                  | _%        | j        t/          j         || j                  | _&        t/          j'        g           | _(        | j(        )                    t/          j         | j        | j        |                     | j(        )                    t/          j*        |                     | j        -| j        s&t/          j         | j        | j        |          | _+        |8tY          tZ          d          r| j        rt]                      nt_                      }| 0                    |           d S )NFg      r   z`only_cross_attention` can only be set to True if `added_kv_proj_dim` is not None. Make sure to set either `only_cross_attention=False` or define `added_kv_proj_dim`.Tnum_channels
num_groupsr+   affine)
f_channelszq_channels
layer_norm)r+   zunknown qk_norm: z . Should be None or 'layer_norm'
group_normr   zunknown cross_attention_norm: z.. Should be None, 'layer_norm' or 'group_norm'r   scaled_dot_product_attention)1super__init__	inner_dimr   use_biasis_cross_attentionr   r    r!   r,   r-   r   fused_projectionsr1   context_pre_onlyr.   r)   scaler   sliceable_head_dimr%   r*   
ValueErrorr   	GroupNormr:   SpatialNormspatial_normnorm_qnorm_k	LayerNorm
norm_crossLinearto_qto_kto_v
add_k_proj
add_v_proj
add_q_proj
ModuleListto_outappendDropout
to_add_outhasattrFAttnProcessor2_0r0   set_processor)selfr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r1   rC   norm_cross_num_channels	__class__s                             ^/root/voice-cloning/.venv/lib/python3.11/site-packages/diffusers/models/attention_processor.pyr>   zAttention.__init__^   s,   6 	$+$7X=M""5T"A:M:Y#6#6_h  0,%:"#6 !&")"5ww9 0 ,G( '+}=Xt^^#
,3,?W((U
 #(!2$8!!)d.G) y   & l	ocfostttDOO"DO' +yN^ _ _ _D $D?DKDKK$$,xS999DK,xS999DKKZZZZ[[['"DOO!\11 l4+CDDDOO!\11%1 +<''*.*B' l4A`fjsw  DOO u1Euuu   IidCCC	( 		$":DNQUVVVDI	$":DNQUVVVDIIDIDI!- i(94>JJDO i(94>JJDO$0"$),=t~"N"NmB''29T^T\QQQRRR2:g../// ,T5J, i8TTTDO &-a1O&P&PwUYUbw """huhwhw  	9%%%%%    use_npu_flash_attentionreturnc                     |rt                      }n8t          t          d          r| j        rt	                      nt                      }|                     |           dS )zR
        Set whether to use npu flash attention from `torch_npu` or not.

        r<   N)AttnProcessorNPUrZ   r[   r)   r\   r0   r]   )r^   rc   r/   s      ra   set_use_npu_flash_attentionz%Attention.set_use_npu_flash_attention   sk    
 # 		(**II '.a1O&P&PwUYUbw """huhwhw  	9%%%%%rb   'use_memory_efficient_attention_xformersattention_opc                 ~	   t          | d          ot          | j        t                    }t          | d          o&t          | j        t          t
          t          f          }t          | d          o2t          | j        t          t          t          t          t          f          }|rU|r|s|rt          d| j                   t                      st          dd          t          j                                        st%          d          	 t&          j                            t          j        dd	          t          j        dd	          t          j        dd	                    }n# t.          $ r}|d
}~ww xY w|rt1          | j        j        | j        j        | j        j        |          }|                    | j                                                   |                    | j        j        j         j!        j"                   n|rt          | j        j#        | j        j$        | j        j        | j        j        |          }|                    | j                                                   t          | j        d          r)|                    | j        j%        j!        j"                   n|r,tL          '                    d           t          |          }ntQ          |          }n|rt          tR          d          rtT          ntV          }	 |	| j        j        | j        j        | j        j                  }|                    | j                                                   |                    | j        j        j         j!        j"                   n|rt          tR          d          rt          nt          }	 |	| j        j#        | j        j$        | j        j        | j        j                  }|                    | j                                                   t          | j        d          r)|                    | j        j%        j!        j"                   n8t          tR          d          r| j,        rt[                      nt]                      }| /                    |           d
S )a  
        Set whether to use memory efficient attention from `xformers` or not.

        Args:
            use_memory_efficient_attention_xformers (`bool`):
                Whether to use memory efficient attention from `xformers` or not.
            attention_op (`Callable`, *optional*):
                The attention operation to use. Defaults to `None` which uses the default attention operation from
                `xformers`.
        r/   zpMemory efficient attention is currently not supported for LoRA or custom diffusion for attention processor type zeRefer to https://github.com/facebookresearch/xformers for more information on how to install xformersxformers)namezvtorch.cuda.is_available() should be True but is False. xformers' memory efficient attention is only available for GPU )r   r	   (   cuda)deviceN)hidden_sizer   rankri   )train_kvtrain_q_outrp   r   ri   to_k_custom_diffusionzMemory efficient attention with `xformers` might currently not work correctly if an attention mask is required for the attention operation.ri   r<   )rp   r   rq   )rr   rs   rp   r   )0rZ   
isinstancer/   LORA_ATTENTION_PROCESSORSCustomDiffusionAttnProcessor$CustomDiffusionXFormersAttnProcessorCustomDiffusionAttnProcessor2_0AttnAddedKVProcessorAttnAddedKVProcessor2_0SlicedAttnAddedKVProcessorXFormersAttnAddedKVProcessorLoRAAttnAddedKVProcessorNotImplementedErrorr   ModuleNotFoundErrortorchrn   is_availablerF   rk   opsmemory_efficient_attentionrandn	ExceptionLoRAXFormersAttnProcessorrp   r   rq   load_state_dict
state_dictto	to_q_loraupweightro   rr   rs   rt   loggerinfoXFormersAttnProcessorr[   LoRAAttnProcessor2_0LoRAAttnProcessorr)   r\   r0   r]   )
r^   rh   ri   is_lorais_custom_diffusionis_added_kv_processor_er/   attn_processor_classs
             ra   +set_use_memory_efficient_attention_xformersz5Attention.set_use_memory_efficient_attention_xformers   s    $,, 
N%2
 2
 &dK88 
ZN)+OQpq>
 >
 !(k : : 	!
zN$'*,(	@
 	@
 3 b	$ ' 5H ) X  HL  HV  X  X   )** )$ $    Z,,..  /  
 ??Jv>>>Jv>>>Jv>>> AA
 !   G   M 6 $ :(,(J,!-	  	 ))$.*C*C*E*EFFFT^58?FGGGG$ M@!^4 $ : $ :(,(J!-  	 ))$.*C*C*E*EFFF4>+BCC ULL!E!L!STTT& 
M
  b   9lSSS		1|LLL		 #,3A7U,V,Vm((\m % 10 $ :(,(J,  	
 ))$.*C*C*E*EFFFT^58?FGGGG$  q"@AA6335 %
 10!^4 $ : $ :(,(J	  	 ))$.*C*C*E*EFFF4>+BCC ULL!E!L!STTT q"@AA)FJm)$&&&&  	9%%%%%s   AE0 0
F:E<<F
slice_sizec                 r   |&|| j         k    rt          d| d| j          d          || j        t          |          }n`|t	          |          }nN| j        t                      }n8t          t          d          r| j        rt                      nt                      }|                     |           dS )z
        Set the slice size for attention computation.

        Args:
            slice_size (`int`):
                The slice size for attention computation.
        Nzslice_size z has to be smaller or equal to .r<   )rE   rF   r%   r}   SlicedAttnProcessorr{   rZ   r[   r)   r\   r0   r]   )r^   r   r/   s      ra   set_attention_slicezAttention.set_attention_slice  s     !j43J&J&Jp:ppVZVmpppqqq!d&<&H2:>>II#+J77II#/,..II '.a1O&P&PwUYUbw """huhwhw  	9%%%%%rb   c                 8   t          | d          rt          | j        t          j        j                  r^t          |t          j        j                  s?t                              d| j         d|            | j        	                    d           || _        dS )z
        Set the attention processor to use.

        Args:
            processor (`AttnProcessor`):
                The attention processor to use.
        r/   z-You are removing possibly trained weights of z with N)
rZ   rv   r/   r   r   Moduler   r   _modulespop)r^   r/   s     ra   r]   zAttention.set_processor  s     D+&&	+4>58?;;	+ y%(/::	+
 KKiii^giijjjMk***"rb   return_deprecated_loraAttentionProcessorc                 	   |s| j         S d |                                 D             }t          |                                          s| j         S |                    dd           |                    dd           t          |                                          st          d|           | j         j        j        }t          t          t                    d|z             }| j        }|t          t          t          fv r| j        | j        j        j        | j        j        j        | j        j        j        | j        j        j        | j        j        j        | j        j        j        | j        j        j        | j        j        j        | j        d         j        j        | j        d         j        j        d}t1          | j         d	          r| j         j        |d	<    ||fi |}|j                            | j        j                                                   |j                            | j        j                                                   |j                            | j        j                                                   |j                            | j        d         j                                                   n|t@          k    r ||| j!        j"        j#        d         | j        j        j        | j        j        j        
          }|j                            | j        j                                                   |j                            | j        j                                                   |j                            | j        j                                                   |j                            | j        d         j                                                   | j!        j        m|j$                            | j!        j                                                   |j%                            | j&        j                                                   n!d|_$        d|_%        nt          | d          |S )a7  
        Get the attention processor in use.

        Args:
            return_deprecated_lora (`bool`, *optional*, defaults to `False`):
                Set to `True` to return the deprecated LoRA attention processor.

        Returns:
            "AttentionProcessor": The attention processor in use.
        c                 H    i | ]\  }}t          |d           ||j        du S )
lora_layerN)rZ   r   ).0rl   modules      ra   
<dictcomp>z+Attention.get_processor.<locals>.<dictcomp>  sF     
 
 
fv|,,
&#4/
 
 
rb   rR   NrS   zLMake sure that either all layers or no layers have LoRA activated, but have LoRAr   )r   rq   network_alphaq_rankq_hidden_sizek_rankk_hidden_sizev_rankv_hidden_sizeout_rankout_hidden_sizeri   )r   rq   r   z does not exist.)'r/   named_modulesanyvaluesr   allrF   r`   __name__getattrr   r?   r   r   r   r   rO   r   rq   r   out_featuresrP   rQ   rV   rZ   ri   r   r   r   	to_k_lora	to_v_lorato_out_lorar   rR   r   shapeadd_k_proj_loraadd_v_proj_lorarS   )r^   r   is_lora_activatednon_lora_processor_cls_namelora_processor_clsrp   kwargslora_processors           ra   get_processorzAttention.get_processor  s    & 	">!

 
 $ 2 2 4 4
 
 
 $++--.. 	">! 	lD111lD111$++--.. 	r_prr  
 '+n&>&G#$]8%<%<fGb>bccn "35IKd!eee'+'?	,1!%!5!C).3!%!5!B).3!%!5!B).3!%!5!B KN5:#';q>#<#I F t~~66 E)-)D~&//FFvFFN$44TY5I5T5T5V5VWWW$44TY5I5T5T5V5VWWW$44TY5I5T5T5V5VWWW&66t{1~7P7[7[7]7]^^^^#;;;//$(O$:$@$CY)."i2@	  N $44TY5I5T5T5V5VWWW$44TY5I5T5T5V5VWWW$44TY5I5T5T5V5VWWW&66t{1~7P7[7[7]7]^^^ )5.>>t?Y?d?d?f?fggg.>>t?Y?d?d?f?fgggg15.15.. 2DDDEEErb   hidden_statesencoder_hidden_statesattention_maskc                    t          t          j        | j        j                  j                                                  dhfd|                                D             }t          |          dk    r0t          
                    d| d| j        j        j         d           fd|                                D             } | j        | |f||d|S )	ah  
        The forward method of the `Attention` class.

        Args:
            hidden_states (`torch.Tensor`):
                The hidden states of the query.
            encoder_hidden_states (`torch.Tensor`, *optional*):
                The hidden states of the encoder.
            attention_mask (`torch.Tensor`, *optional*):
                The attention mask to use. If `None`, no mask is applied.
            **cross_attention_kwargs:
                Additional keyword arguments to pass along to the cross attention.

        Returns:
            `torch.Tensor`: The output of the attention layer.
        ip_adapter_masksc                 *    g | ]\  }}|v	|v|S  r   )r   kr   attn_parametersquiet_attn_parameterss      ra   
<listcomp>z%Attention.forward.<locals>.<listcomp>&  s:     
 
 
!QA_<T<TYZbwYwYwAYwYwYwrb   r   zcross_attention_kwargs z are not expected by z and will be ignored.c                 $    i | ]\  }}|v 	||S r   r   )r   r   wr   s      ra   r   z%Attention.forward.<locals>.<dictcomp>-  s+    !j!j!j41aUVZiUiUi!QUiUiUirb   )r   r   )setinspect	signaturer/   __call__
parameterskeysitemslenr   warningr`   r   )r^   r   r   r   cross_attention_kwargsunused_kwargsr   r   s         @@ra   forwardzAttention.forward	  s1   6 g/0GHHSXXZZ[[!3 4
 
 
 
 
06688
 
 
 }!!NN G-  G  GdnNfNo  G  G  G   "k!j!j!j3I3O3O3Q3Q!j!j!jt~
 #8)	
 

 %
 
 	
rb   tensorc                     | j         }|j        \  }}}|                    ||z  |||          }|                    dddd                              ||z  |||z            }|S )ac  
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size // heads, seq_len, dim * heads]`. `heads`
        is the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r	   r      )r   r   reshapepermute)r^   r   	head_size
batch_sizeseq_lendims         ra   batch_to_head_dimzAttention.batch_to_head_dim7  so     J	#)< 
GS
i 7GSQQ1a++33J)4KWVY\eVeffrb   r   c                    | j         }|j        dk    r|j        \  }}}d}n|j        \  }}}}|                    |||z  |||z            }|                    dddd          }|dk    r |                    ||z  ||z  ||z            }|S )a   
        Reshape the tensor from `[batch_size, seq_len, dim]` to `[batch_size, seq_len, heads, dim // heads]` `heads` is
        the number of heads initialized while constructing the `Attention` class.

        Args:
            tensor (`torch.Tensor`): The tensor to reshape.
            out_dim (`int`, *optional*, defaults to `3`): The output dimension of the tensor. If `3`, the tensor is
                reshaped to `[batch_size * heads, seq_len, dim // heads]`.

        Returns:
            `torch.Tensor`: The reshaped tensor.
        r   r   r   r	   )r   ndimr   r   r   )r^   r   r1   r   r   r   r   	extra_dims           ra   head_to_batch_dimzAttention.head_to_batch_dimH  s     J	;!'-|$JII28,/J	7C
Gi,?CS\L\]]1a++a<<^^J$:Gi<OQTXaQabbFrb   querykeyc                     |j         }| j        r(|                                }|                                }|Gt          j        |j        d         |j        d         |j        d         |j         |j                  }d}n|}d}t          j        |||                    dd          || j	                  }~| j
        r|                                }|                    d          }~|                    |          }|S )	ak  
        Compute the attention scores.

        Args:
            query (`torch.Tensor`): The query tensor.
            key (`torch.Tensor`): The key tensor.
            attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied.

        Returns:
            `torch.Tensor`: The attention probabilities/scores.
        Nr   r   dtypero   )betaalphar   )r   r    floatr   emptyr   ro   baddbmm	transposerD   r!   softmaxr   )	r^   r   r   r   r   baddbmm_inputr   attention_scoresattention_probss	            ra   get_attention_scoreszAttention.get_attention_scoresc  s      	KKMME))++C!!KAA	!EKX]Xd  M DD*MD =MM"b!!*
 
 
  	8/5577*22r2::),,U33rb   target_lengthr   c                    | j         }||S |j        d         }||k    r~|j        j        dk    rU|j        d         |j        d         |f}t	          j        ||j        |j                  }t	          j        ||gd          }nt          j	        |d|fd	
          }|dk    r,|j        d         ||z  k     r|
                    |d          }n2|dk    r,|                    d          }|
                    |d          }|S )a  
        Prepare the attention mask for the attention computation.

        Args:
            attention_mask (`torch.Tensor`):
                The attention mask to prepare.
            target_length (`int`):
                The target length of the attention mask. This is the length of the attention mask after padding.
            batch_size (`int`):
                The batch size, which is used to repeat the attention mask.
            out_dim (`int`, *optional*, defaults to `3`):
                The output dimension of the attention mask. Can be either `3` or `4`.

        Returns:
            `torch.Tensor`: The prepared attention mask.
        Nr   mpsr   r   r   r	   r   r   )valuer      )r   r   ro   typer   zerosr   catr[   padrepeat_interleave	unsqueeze)	r^   r   r  r   r1   r   current_lengthpadding_shapepaddings	            ra   prepare_attention_maskz Attention.prepare_attention_mask  s*   & J	!!!,226]**$)U22 "0!5a!8.:Nq:QS` a+m>;OXfXmnnn!&NG+D!!L!L!L "#~=7IQT!U!U!Ua<<#A&i)???!/!A!A)QR!A!S!S\\+55a88N+==iQ=OONrb   c                 X   | j         
J d            t          | j         t          j                  r|                      |          }nct          | j         t          j                  rB|                    dd          }|                      |          }|                    dd          }nJ |S )aG  
        Normalize the encoder hidden states. Requires `self.norm_cross` to be specified when constructing the
        `Attention` class.

        Args:
            encoder_hidden_states (`torch.Tensor`): Hidden states of the encoder.

        Returns:
            `torch.Tensor`: The normalized encoder hidden states.
        NzGself.norm_cross must be defined to call self.norm_encoder_hidden_statesr   r	   )rM   rv   r   rL   rG   r   )r^   r   s     ra   norm_encoder_hidden_statesz$Attention.norm_encoder_hidden_states  s     **,u***dor|44 	$(OO4I$J$J!!66 
	 %:$C$CAq$I$I!$(OO4I$J$J!$9$C$CAq$I$I!!5$$rb   c                 f   | j         j        j        j        }| j         j        j        j        }| j        st          j        | j         j        j        | j        j        j        | j	        j        j        g          }|j
        d         }|j
        d         }t          j        ||| j        ||          | _        | j        j                            |           | j        rct          j        | j         j        j        | j        j        j        | j	        j        j        g          }| j        j                            |           nt          j        | j        j        j        | j	        j        j        g          }|j
        d         }|j
        d         }t          j        ||| j        ||          | _        | j        j                            |           | j        rSt          j        | j        j        j        | j	        j        j        g          }| j        j                            |           || _        d S )Nr   r   )r   ro   r   )rO   r   dataro   r   rA   r   r
  rP   rQ   r   r   rN   r@   to_qkvcopy_r   to_kvrB   )r^   fusero   r   concatenated_weightsin_featuresr   concatenated_biass           ra   fuse_projectionszAttention.fuse_projections  s   !&-	 %+& 	9#(9di.>.CTYEUEZ\`\e\l\q-r#s#s .4Q7K/5a8L )KDMZ`hmnnnDKK$$%9:::} :$)Ity~/BDINDWY]YbYgYl.m$n$n! &&'8999 $)9di.>.CTYEUEZ-[#\#\ .4Q7K/5a8L;4=Y_glmmmDJJ##$8999} 9$)Ity~/BDINDW.X$Y$Y!
%%&7888!%rb   )Nr   r   r   FFFNr   NNNNTTFr   r   FFNNNN)r/   r0   rd   N)FNN)r   )T)r   
__module____qualname____doc__intr   r   boolstrr>   rg   r   r   r   r]   r   r   Tensorr   r   r   r  r  r  no_gradr  __classcell__r`   s   @ra   r   r   *   s       0 0j .2!&$.2/1!%+/)-*.%*'*$),1/33H& H&H& &c]H& 	H&
 H& H& H& H& H& 'smH& *-H& #H& $C=H& "#H& #3-H&  !H&" #H&$ #%H&& 'H&(  %)H&* "+H&, &*-H&. O,/H&0 1H& H& H& H& H& H&T&4 &D & & & &$ aeD& D&7;D&KST\K]D&	D& D& D& D&L&c &d & & & &8# # # #(X XD XEY X X X Xz 9=15	,
 ,
|,
  (5,
 !.	,
 
,
 ,
 ,
 ,
\     "  s 5<    8 VZ- -\-(--FKl-	- - - -` ab- -#l-;>-LO-Z]-	- - - -^% %QVQ] % % % %: U]__& & & _& & & & &rb   r   c                       e Zd ZdZ	 	 	 d
dedej        deej                 deej                 deej                 dej        fd	ZdS )r0   zJ
    Default processor for performing attention-related computations.
    Nattnr   r   r   tembrd   c                    t          |          dk    s|                    dd           d}t          dd|           |}	|j        |                    ||          }|j        }
|
dk    r:|j        \  }}}}|                    ||||z                                dd          }||j        n|j        \  }}}|                    |||          }|j	        =|	                    |                    dd                                        dd          }|
                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|                    |||          }t#          j        ||          }|                    |          } |j        d         |          } |j        d         |          }|
dk    r,|                    dd	                              ||||          }|j        r||	z   }||j        z  }|S )
Nr   rD   The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`.1.0.0r  r   r	   r   r   )r   getr   rI   r   r   viewr   r  r:   rO   rM   r  rP   rQ   r   r  r   bmmr   rV   r   r-   r,   )r^   r+  r   r   r   r,  argsr   deprecation_messageresidual
input_ndimr   channelheightwidthsequence_lengthr   r   r   r  r  s                        ra   r   zAttnProcessor.__call__  s    t99q==FJJw55A #Ugw(;<<< ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ 44^_V`aa?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00&&u--$$S))&&u--33E3OO	/599..}== 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   NNN	r   r   r!  r"  r   r   r&  r   r   r   rb   ra   r0   r0     s          9=15'+@ @@ |@  (5	@
 !.@ u|$@ 
@ @ @ @ @ @rb   r0   c                        e Zd ZdZ	 	 	 	 	 	 ddededee         dee         d	ed
ef fdZ	 	 dde	de
j        dee
j                 dee
j                 de
j        f
dZ xZS )rx   aK  
    Processor for implementing attention for the Custom Diffusion method.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    TNr   rr   rs   rp   r   r(   r   c                 8   t                                                       || _        || _        || _        || _        | j        r<t          j        |p||d          | _        t          j        |p||d          | _	        | j        rt          j        ||d          | _
        t          j        g           | _        | j                            t          j        |||                     | j                            t          j        |                     d S d S NFr;   r=   r>   rr   rs   rp   r   r   rN   rt   to_v_custom_diffusionto_q_custom_diffusionrU   to_out_custom_diffusionrW   rX   r^   rr   rs   rp   r   r(   r   r`   s          ra   r>   z%CustomDiffusionAttnProcessor.__init__Y      	 &&#6  = 	p)+3F3U+Wbin)o)o)oD&)+3F3U+Wbin)o)o)oD& 	E)+;RW)X)X)XD&+-=+<+<D((//	+{Ya0b0b0bccc(//
70C0CDDDDD		E 	Erb   r+  r   r   r   rd   c                    |j         \  }}}|                    |||          }| j        r8|                     |                              |j        j        j                  }n7|                    |                    |j        j        j                            }|d}	|}nd}	|j        r|	                    |          }| j
        r|                     |                    | j        j        j                            }
|                     |                    | j        j        j                            }|
                    |j        j        j                  }
|                    |j        j        j                  }n*|                    |          }
|                    |          }|	rtt          j        |
          }|d d d dd d f         dz  |d d d dd d f<   ||
z  d|z
  |
                                z  z   }
||z  d|z
  |                                z  z   }|                    |          }|                    |
          }
|                    |          }|                    ||
|          }t          j        ||          }|                    |          }| j        r- | j        d         |          } | j        d         |          }n, |j        d         |          } |j        d         |          }|S )NFTr   r   r   )r   r  rs   rB  r   rO   r   r   rM   r  rr   rt   rA  rP   rQ   r   	ones_likedetachr   r  r2  r   rC  rV   )r^   r+  r   r   r   r   r:  r   r   	crossattnr   r  rH  r  s                 ra   r   z%CustomDiffusionAttnProcessor.__call__s  s    *7)<&
OQ44^_V`aa 	H..}==@@AQAWXXEEIIm..ty/?/EFFGGE (I$1!!I _(,(G(GH](^(^%= 	5,,-B-E-EdF`FgFm-n-nooC../D/G/GHbHiHo/p/pqqE&&)/00CHHTY-344EE))122CII344E 	C_S))F%aaa!QQQh/#5F111bqb!!!83,!f*

!<<CUNa&jELLNN%BBE&&u--$$S))&&u--33E3OO	/599..}== 		:;D8;MJJM;D8;MJJMM +DKN=99M*DKN=99Mrb   TTNNTr   r  r   r   r!  r"  r$  r   r#  r   r>   r   r   r&  r   r(  r)  s   @ra   rx   rx   F  s        (  %)-1E EE E c]	E
 &c]E E E E E E E E< 9=158 88 |8  (5	8
 !.8 
8 8 8 8 8 8 8 8rb   rx   c                   t    e Zd ZdZ	 	 d	dedej        deej                 deej                 dej        f
dZdS )
r{   z
    Processor for performing attention-related computations with extra learnable key and value matrices for the text
    encoder.
    Nr+  r   r   r   rd   c                 `   t          |          dk    s|                    dd           d}t          dd|           |}|                    |j        d         |j        d         d                              dd          }|j        \  }	}
}|                    ||
|	          }||}n|j        r|                    |          }|	                    |                    dd                                        dd          }|
                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|j        s|                    |          }|                    |          }|                    |          }|                    |          }t#          j        ||gd          }t#          j        ||gd          }n|}|}|                    |||          }t#          j        ||          }|                    |          } |j        d         |          } |j        d         |          }|                    dd	                              |j                  }||z   }|S )
Nr   rD   r.  r/  r   r   r	   r   r   )r   r0  r   r1  r   r   r  rM   r  r:   rO   r   rR   rS   r*   rP   rQ   r   r
  r  r2  r   rV   r   )r^   r+  r   r   r   r3  r   r4  r5  r   r:  r   r   encoder_hidden_states_key_proj encoder_hidden_states_value_projr   r  r  s                     ra   r   zAttnAddedKVProcessor.__call__  s    t99q==FJJw55A #Ugw(;<<< %**=+>q+A=CVWXCY[]^^hhijlmnn)6)<&
OQ44^_V`aa ($1!!_ 	[$($C$CDY$Z$Z!(?(?1(E(EFFPPQRTUVV		-((&&u--)-9N)O)O&+/??;P+Q+Q()-)?)?@^)_)_&+/+A+ABb+c+c(( 		5))M**CIIm,,E((--C**511E);SAqIIICI?GQOOOEE0C4E33E3OO	/599..}== 'A}55&A}55%//B77??OO%0rb   r  r<  r   rb   ra   r{   r{     s          9=15: :: |:  (5	:
 !.: 
: : : : : :rb   r{   c                   z    e Zd ZdZd Z	 	 d
dedej        deej                 deej                 dej        f
d	Z	dS )r|   z
    Processor for performing scaled dot-product attention (enabled by default if you're using PyTorch 2.0), with extra
    learnable key and value matrices for the text encoder.
    c                 N    t          t          d          st          d          d S )Nr<   zWAttnAddedKVProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rZ   r[   ImportErrorr^   s    ra   r>   z AttnAddedKVProcessor2_0.__init__  s4    q899 	i  	 	rb   Nr+  r   r   r   rd   c                    t          |          dk    s|                    dd           d}t          dd|           |}|                    |j        d         |j        d         d                              dd          }|j        \  }	}
}|                    ||
|	d	          }||}n|j        r|                    |          }|	                    |                    dd                                        dd          }|
                    |          }|                    |d	          }|                    |          }|                    |          }|                    |d	          }|                    |d	          }|j        s|                    |          }|                    |          }|                    |d	          }|                    |d	          }t#          j        ||gd
          }t#          j        ||gd
          }n|}|}t'          j        ||||dd          }|                    dd                              |	d|j        d                   } |j        d         |          } |j        d         |          }|                    dd                              |j                  }||z   }|S )Nr   rD   r.  r/  r   r   r	   r  )r1   r   r   F	attn_mask	dropout_p	is_causalr   )r   r0  r   r1  r   r   r  rM   r  r:   rO   r   rR   rS   r*   rP   rQ   r   r
  r[   r<   r   rV   )r^   r+  r   r   r   r3  r   r4  r5  r   r:  r   r   rN  rO  r   r  s                    ra   r   z AttnAddedKVProcessor2_0.__call__  s    t99q==FJJw55A #Ugw(;<<< %**=+>q+A=CVWXCY[]^^hhijlmnn)6)<&
OQ44^_V`jk4ll ($1!!_ 	[$($C$CDY$Z$Z!(?(?1(E(EFFPPQRTUVV		-((&&ua&88)-9N)O)O&+/??;P+Q+Q()-)?)?@^hi)?)j)j&+/+A+ABblm+A+n+n(( 		5))M**CIIm,,E((a(88C**5!*<<E);SAqIIICI?GQOOOEE0C4E 633RW
 
 
 &//155==j"hn]^N_`` 'A}55&A}55%//B77??OO%0rb   r  
r   r   r!  r"  r>   r   r   r&  r   r   r   rb   ra   r|   r|     s         
   9=15= == |=  (5	=
 !.= 
= = = = = =rb   r|   c                   n    e Zd ZdZd Z	 	 d
dedej        dej        deej                 dej        f
d	Z	dS )JointAttnProcessor2_0YAttention processor used typically in processing the SD3-like self-attention projections.c                 N    t          t          d          st          d          d S Nr<   PAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rR  rT  s    ra   r>   zJointAttnProcessor2_0.__init__@  1    q899 	rpqqq	r 	rrb   Nr+  r   r   r   rd   c                    |}|j         }|dk    r:|j        \  }	}
}}|                    |	|
||z                                dd          }|j         }|dk    r:|j        \  }	}
}}|                    |	|
||z                                dd          }|j        d         }	|                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|	                    |          }t          j        ||gd          }t          j        ||gd          }t          j        ||gd          }|j        d         }||j        z  }|                    |	d|j        |                              dd          }|                    |	d|j        |                              dd          }|                    |	d|j        |                              dd          }t          j        |||dd	          x}}|                    dd                              |	d|j        |z            }|                    |j                  }|d d d |j        d         f         |d d |j        d         d f         }} |j        d         |          } |j        d         |          }|j        s|                    |          }|dk    r,|                    dd
                              |	|
||          }|dk    r,|                    dd
                              |	|
||          }||fS )Nr  r   r	   r   r   r   r   FrX  rY  r   )r   r   r1  r   rO   rP   rQ   rT   rR   rS   r   r
  r   r[   r<   r   r   r   rV   rC   rY   )r^   r+  r   r   r   r3  r   r5  r6  r   r7  r8  r9  context_input_ndimr   r   r   encoder_hidden_states_query_projrN  rO  r?   head_dims                         ra   r   zJointAttnProcessor2_0.__call__D  s    !"'
??1>1D.J)..z7FUNSS]]^_abccM27""1F1L.J$9$>$>z7TZ]bTb$c$c$m$mnoqr$s$s!*03
 		-((ii&&		-(( ,0??;P+Q+Q()-9N)O)O&+/??;P+Q+Q( 	5"BCKKKi<=1EEE	5"BCKKKIbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP()(F3)
 )
 )
 	
 &//155==j"dj[cNcdd%((55 !!!0x~a0001!!!X^A.0001 - 'A}55&A}55$ 	K$(OO4I$J$J!??)33B;;CCJPWY_afggM""$9$C$CB$K$K$S$ST^`gioqv$w$w!333rb   r  
r   r   r!  r"  r>   r   r   FloatTensorr   r   r   rb   ra   r\  r\  =  s        ccr r r 486:C4 C4C4 (C4  %0	C4
 !!23C4 
	C4 C4 C4 C4 C4 C4rb   r\  c                   n    e Zd ZdZd Z	 	 d
dedej        dej        deej                 dej        f
d	Z	dS )FusedJointAttnProcessor2_0r]  c                 N    t          t          d          st          d          d S r_  rR  rT  s    ra   r>   z#FusedJointAttnProcessor2_0.__init__  ra  rb   Nr+  r   r   r   rd   c                    |}|j         }|dk    r:|j        \  }	}
}}|                    |	|
||z                                dd          }|j         }|dk    r:|j        \  }	}
}}|                    |	|
||z                                dd          }|j        d         }	|                    |          }|j        d         dz  }t          j        ||d          \  }}}|                    |          }|j        d         dz  }t          j        ||d          \  }}}t          j        ||gd          }t          j        ||gd          }t          j        ||gd          }|j        d         }||j	        z  }|                    |	d|j	        |                              dd          }|                    |	d|j	        |                              dd          }|                    |	d|j	        |                              dd          }t          j        |||dd	
          x}}|                    dd                              |	d|j	        |z            }|                    |j                  }|d d d |j        d         f         |d d |j        d         d f         }} |j        d         |          } |j        d         |          }|j        s|                    |          }|dk    r,|                    dd                              |	|
||          }|dk    r,|                    dd                              |	|
||          }||fS )Nr  r   r	   r   r   r   r   r   Frc  r   )r   r   r1  r   r  r   splitto_added_qkvr
  r   r[   r<   r   r   r   rV   rC   rY   )r^   r+  r   r   r   r3  r   r5  r6  r   r7  r8  r9  rd  qkv
split_sizer   r   r  encoder_qkvre  rN  rO  r?   rf  s                            ra   r   z#FusedJointAttnProcessor2_0.__call__  s    !"'
??1>1D.J)..z7FUNSS]]^_abccM27""1F1L.J$9$>$>z7TZ]bTb$c$c$m$mnoqr$s$s!*03
 kk-((Yr]a'
!KZR@@@sE ''(=>> &r*a/

 KZR888		
,*, 	5"BCKKKi<=1EEE	5"BCKKKIbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP()(F3)
 )
 )
 	
 &//155==j"dj[cNcdd%((55 !!!0x~a0001!!!X^A.0001 - 'A}55&A}55$ 	K$(OO4I$J$J!??)33B;;CCJPWY_afggM""$9$C$CB$K$K$S$ST^`gioqv$w$w!333rb   r  rg  r   rb   ra   rj  rj    s        ccr r r 486:G4 G4G4 (G4  %0	G4
 !!23G4 
	G4 G4 G4 G4 G4 G4rb   rj  c                       e Zd ZdZddee         fdZ	 	 ddedej	        deej	                 deej	                 d	ej	        f
d
Z
dS )r~     
    Processor for implementing memory efficient attention using xFormers.

    Args:
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
    Nri   c                     || _         d S r  ru   r^   ri   s     ra   r>   z%XFormersAttnAddedKVProcessor.__init__      (rb   r+  r   r   r   rd   c                 "   |}|                     |j        d         |j        d         d                              dd          }|j        \  }}}|                    |||          }||}n|j        r|                    |          }|                    |                    dd                                        dd          }|                    |          }	|                    |	          }	|	                    |          }
|
                    |          }|                    |
          }
|                    |          }|j        s|                    |          }|                    |          }|                    |          }|                    |          }t          j        |
|gd          }t          j        ||gd          }n|
}|}t           j                            |	|||| j        |j                  }|                    |	j                  }|                    |          } |j        d         |          } |j        d         |          }|                    dd                              |j                  }||z   }|S )Nr   r   r   r	   r   	attn_biasoprD   r   )r1  r   r   r  rM   r  r:   rO   r   rR   rS   r*   rP   rQ   r   r
  rk   r   r   ri   rD   r   r   r   rV   r   )r^   r+  r   r   r   r5  r   r:  r   r   rN  rO  r   r  s                 ra   r   z%XFormersAttnAddedKVProcessor.__call__  s    !%**=+>q+A=CVWXCY[]^^hhijlmnn)6)<&
OQ44^_V`aa ($1!!_ 	[$($C$CDY$Z$Z!(?(?1(E(EFFPPQRTUVV		-((&&u--)-9N)O)O&+/??;P+Q+Q()-)?)?@^)_)_&+/+A+ABb+c+c(( 		5))M**CIIm,,E((--C**511E);SAqIIICI?GQOOOEE0C4E ??3D<MUYU_ @ 
 
 &((55..}== 'A}55&A}55%//B77??OO%0rb   r  r  r   r   r!  r"  r   r   r>   r   r   r&  r   r   rb   ra   r~   r~     s        	 	) )Xh%7 ) ) ) ) 9=155 55 |5  (5	5
 !.5 
5 5 5 5 5 5rb   r~   c                       e Zd ZdZddee         fdZ	 	 	 ddedej	        deej	                 deej	                 d	eej	                 d
ej	        fdZ
dS )r   rs  Nri   c                     || _         d S r  ru   ru  s     ra   r>   zXFormersAttnProcessor.__init__.  rv  rb   r+  r   r   r   r,  rd   c                    t          |          dk    s|                    dd           d}t          dd|           |}	|j        |                    ||          }|j        }
|
dk    r:|j        \  }}}}|                    ||||z                                dd          }||j        n|j        \  }}}|                    |||          }|"|j        \  }}}|	                    d|d          }|j
        =|
                    |                    dd                                        dd          }|                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|                    |                                          }|                    |                                          }|                    |                                          }t$          j                            ||||| j        |j        	          }|                    |j                  }|                    |          } |j        d         |          } |j        d         |          }|
dk    r,|                    dd
                              ||||          }|j        r||	z   }||j        z  }|S )Nr   rD   r.  r/  r  r   r	   r   rx  r   )r   r0  r   rI   r   r   r1  r   r  expandr:   rO   rM   r  rP   rQ   r   
contiguousrk   r   r   ri   rD   r   r   r   rV   r   r-   r,   )r^   r+  r   r   r   r,  r3  r   r4  r5  r6  r   r7  r8  r9  
key_tokensr   query_tokensr   r   r  s                        ra   r   zXFormersAttnProcessor.__call__1  s    t99q==FJJw55A #Ugw(;<<< ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	"
J 44^ZQ[\\% "/!4A|Q+222|RHHN?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00&&u--88::$$S))4466&&u--88:: ??3D<MUYU_ @ 
 
 &((55..}== 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   r  r;  r{  r   rb   ra   r   r   "  s        	 	) )Xh%7 ) ) ) ) 9=15'+L LL |L  (5	L
 !.L u|$L 
L L L L L Lrb   r   c                       e Zd ZdZd Z	 	 	 ddedej        deej                 deej                 deej                 d	ej        fd
Z	dS )rf   a  
    Processor for implementing flash attention using torch_npu. Torch_npu supports only fp16 and bf16 data types. If
    fp32 is used, F.scaled_dot_product_attention will be used for computation, but the acceleration effect on NPU is
    not significant.

    c                 @    t                      st          d          d S )NzTAttnProcessorNPU requires torch_npu extensions and is supported only on npu devices.)r   rS  rT  s    ra   r>   zAttnProcessorNPU.__init__  s,    %'' 	vtuuu	v 	vrb   Nr+  r   r   r   r,  rd   c                    t          |          dk    s|                    dd           d}t          dd|           |}	|j        |                    ||          }|j        }
|
dk    r:|j        \  }}}}|                    ||||z                                dd          }||j        n|j        \  }}}|?|                    |||          }|                    ||j	        d|j        d                   }|j
        =|
                    |                    dd                                        dd          }|                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|j        d         }||j	        z  }|                    |d|j	        |                              dd          }|                    |d|j	        |                              dd          }|                    |d|j	        |                              dd          }|j        t"          j        t"          j        fv rMt)          j        ||||j	        d	d |d
t-          j        |j        d                   z  ddd
dd          d         }nt1          j        ||||dd          }|                    dd                              |d|j	        |z            }|                    |j                  } |j        d         |          } |j        d         |          }|
dk    r,|                    dd                              ||||          }|j        r||	z   }||j        z  }|S )Nr   rD   r.  r/  r  r   r	   r   BNSDr   i   F)	input_layoutpse
atten_maskrD   pre_tockensnext_tockens	keep_probsyncinner_preciser   rV  r   )r   r0  r   rI   r   r   r1  r   r  r   r:   rO   rM   r  rP   rQ   r   r   float16bfloat16	torch_npunpu_fusion_attentionmathsqrtr[   r<   r   r   rV   r-   r,   r^   r+  r   r   r   r,  r3  r   r4  r5  r6  r   r7  r8  r9  r:  r   r   r   r  r?   rf  s                         ra   r   zAttnProcessorNPU.__call__  s    t99q==FJJw55A #Ugw(;<<< ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ %!88ZdeeN ,00TZ^MabdMeffN?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00IbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP ;5=%.999%:
#)DIek"o666!"   MM" :sE^sV[  M &//155==j"dj[cNcdd%((55 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   r;  rZ  r   rb   ra   rf   rf     s         v v v 9=15'+^ ^^ |^  (5	^
 !.^ u|$^ 
^ ^ ^ ^ ^ ^rb   rf   c                       e Zd ZdZd Z	 	 	 ddedej        deej                 deej                 deej                 d	ej        fd
Z	dS )r\   zs
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
    c                 N    t          t          d          st          d          d S r_  rR  rT  s    ra   r>   zAttnProcessor2_0.__init__  ra  rb   Nr+  r   r   r   r,  rd   c                 F   t          |          dk    s|                    dd           d}t          dd|           |}	|j        |                    ||          }|j        }
|
dk    r:|j        \  }}}}|                    ||||z                                dd          }||j        n|j        \  }}}|?|                    |||          }|                    ||j	        d|j        d                   }|j
        =|
                    |                    dd                                        dd          }|                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|j        d         }||j	        z  }|                    |d|j	        |                              dd          }|                    |d|j	        |                              dd          }|                    |d|j	        |                              dd          }t!          j        ||||d	d
          }|                    dd                              |d|j	        |z            }|                    |j                  } |j        d         |          } |j        d         |          }|
dk    r,|                    dd                              ||||          }|j        r||	z   }||j        z  }|S )Nr   rD   r.  r/  r  r   r	   r   r   FrV  r   )r   r0  r   rI   r   r   r1  r   r  r   r:   rO   rM   r  rP   rQ   r[   r<   r   r   r   rV   r-   r,   r  s                         ra   r   zAttnProcessor2_0.__call__  sB    t99q==FJJw55A #Ugw(;<<< ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ %!88ZdeeN ,00TZ^MabdMeffN?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00IbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP 633RW
 
 
 &//155==j"dj[cNcdd%((55 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   r;  rZ  r   rb   ra   r\   r\     s         r r r 9=15'+M MM |M  (5	M
 !.M u|$M 
M M M M M Mrb   r\   c                       e Zd ZdZd Z	 	 	 	 ddedej        deej                 deej                 deej                 d	eej                 d
ej        fdZ	dS )HunyuanAttnProcessor2_0z
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). This is
    used in the HunyuanDiT model. It applies a s normalization layer and rotary embedding on query and key vector.
    c                 N    t          t          d          st          d          d S r_  rR  rT  s    ra   r>   z HunyuanAttnProcessor2_0.__init__M  ra  rb   Nr+  r   r   r   r,  image_rotary_embrd   c                    ddl m} |}|j        |                    ||          }|j        }	|	dk    r:|j        \  }
}}}|                    |
|||z                                dd          }||j        n|j        \  }
}}|?|                    |||
          }|                    |
|j        d|j        d                   }|j	        =|	                    |                    dd                                        dd          }|
                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|j        d         }||j        z  }|                    |
d|j        |                              dd          }|                    |
d|j        |                              dd          }|                    |
d|j        |                              dd          }|j        |                    |          }|j        |                    |          }| |||          }|j        s |||          }t%          j        ||||dd          }|                    dd                              |
d|j        |z            }|                    |j                  } |j        d	         |          } |j        d         |          }|	dk    r,|                    dd
                              |
|||          }|j        r||z   }||j        z  }|S )Nr   )apply_rotary_embr  r	   r   r   FrV  r   r   )
embeddingsr  rI   r   r   r1  r   r  r   r:   rO   rM   r  rP   rQ   rJ   rK   rA   r[   r<   r   r   r   rV   r-   r,   )r^   r+  r   r   r   r,  r  r  r5  r6  r   r7  r8  r9  r:  r   r   r   r  r?   rf  s                        ra   r   z HunyuanAttnProcessor2_0.__call__Q  s    	100000 ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ %!88ZdeeN ,00TZ^MabdMeffN?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00IbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP;"KK&&E;"++c""C '$$U,<==E* >&&s,<== 633RW
 
 
 &//155==j"dj[cNcdd%((55 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   )NNNNrZ  r   rb   ra   r  r  G  s         
r r r 9=15'+37U UU |U  (5	U
 !.U u|$U #5<0U 
U U U U U Urb   r  c                       e Zd ZdZd Z	 	 	 ddedej        deej                 deej                 deej                 d	ej        fd
Z	dS )FusedAttnProcessor2_0u  
    Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). It uses
    fused projection layers. For self-attention modules, all projection matrices (i.e., query, key, value) are fused.
    For cross-attention modules, key and value projection matrices are fused.

    <Tip warning={true}>

    This API is currently 🧪 experimental in nature and can change in future.

    </Tip>
    c                 N    t          t          d          st          d          d S )Nr<   z`FusedAttnProcessor2_0 requires at least PyTorch 2.0, to use it. Please upgrade PyTorch to > 2.0.rR  rT  s    ra   r>   zFusedAttnProcessor2_0.__init__  s4    q899 	r  	 	rb   Nr+  r   r   r   r,  rd   c                    t          |          dk    s|                    dd           d}t          dd|           |}	|j        |                    ||          }|j        }
|
dk    r:|j        \  }}}}|                    ||||z                                dd          }||j        n|j        \  }}}|?|                    |||          }|                    ||j	        d|j        d                   }|j
        =|
                    |                    dd                                        dd          }|A|                    |          }|j        d         d	z  }t          j        ||d
          \  }}}np|j        r|                    |          }|                    |          }|                    |          }|j        d         dz  }t          j        ||d
          \  }}|j        d         }||j	        z  }|                    |d|j	        |                              dd          }|                    |d|j	        |                              dd          }|                    |d|j	        |                              dd          }t%          j        ||||dd          }|                    dd                              |d|j	        |z            }|                    |j                  } |j        d         |          } |j        d         |          }|
dk    r,|                    dd                              ||||          }|j        r||	z   }||j        z  }|S )Nr   rD   r.  r/  r  r   r	   r   r   r   r   FrV  r   )r   r0  r   rI   r   r   r1  r   r  r   r:   r  r   rm  rM   r  rO   r  r[   r<   r   r   r   rV   r-   r,   )r^   r+  r   r   r   r,  r3  r   r4  r5  r6  r   r7  r8  r9  r:  r   ro  rp  r   r   r  kvr?   rf  s                            ra   r   zFusedAttnProcessor2_0.__call__  s    t99q==FJJw55A #Ugw(;<<< ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ %!88ZdeeN ,00TZ^MabdMeffN?& OOM,C,CAq,I,IJJTTUVXYZZM (++m,,C2!+J %C D D DE3 _(,(G(GH](^(^%IIm,,E122B"*JR<<<JCIbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP 633RW
 
 
 &//155==j"dj[cNcdd%((55 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   r;  rZ  r   rb   ra   r  r    s        
 
   9=15'+O OO |O  (5	O
 !.O u|$O 
O O O O O Orb   r  c                        e Zd ZdZ	 	 	 	 	 	 	 ddededee         d	ee         d
ededee         f fdZ		 	 dde
dej        deej                 deej                 dej        f
dZ xZS )ry   az  
    Processor for implementing memory efficient attention using xFormers for the Custom Diffusion method.

    Args:
    train_kv (`bool`, defaults to `True`):
        Whether to newly train the key and value matrices corresponding to the text features.
    train_q_out (`bool`, defaults to `True`):
        Whether to newly train query matrices corresponding to the latent image features.
    hidden_size (`int`, *optional*, defaults to `None`):
        The hidden size of the attention layer.
    cross_attention_dim (`int`, *optional*, defaults to `None`):
        The number of channels in the `encoder_hidden_states`.
    out_bias (`bool`, defaults to `True`):
        Whether to include the bias parameter in `train_q_out`.
    dropout (`float`, *optional*, defaults to 0.0):
        The dropout probability to use.
    attention_op (`Callable`, *optional*, defaults to `None`):
        The base
        [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to use
        as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best operator.
    TFNr   rr   rs   rp   r   r(   r   ri   c                 F   t                                                       || _        || _        || _        || _        || _        | j        r<t          j        |p||d          | _	        t          j        |p||d          | _
        | j        rt          j        ||d          | _        t          j        g           | _        | j                            t          j        |||                     | j                            t          j        |                     d S d S r?  )r=   r>   rr   rs   rp   r   ri   r   rN   rt   rA  rB  rU   rC  rW   rX   )	r^   rr   rs   rp   r   r(   r   ri   r`   s	           ra   r>   z-CustomDiffusionXFormersAttnProcessor.__init__%  s%    	 &&#6 ( = 	p)+3F3U+Wbin)o)o)oD&)+3F3U+Wbin)o)o)oD& 	E)+;RW)X)X)XD&+-=+<+<D((//	+{Ya0b0b0bccc(//
70C0CDDDDD		E 	Erb   r+  r   r   r   rd   c                    ||j         n|j         \  }}}|                    |||          }| j        r8|                     |                              |j        j        j                  }n7|                    |                    |j        j        j                            }|d}	|}nd}	|j        r|	                    |          }| j
        r|                     |                    | j        j        j                            }
|                     |                    | j        j        j                            }|
                    |j        j        j                  }
|                    |j        j        j                  }n*|                    |          }
|                    |          }|	rtt          j        |
          }|d d d dd d f         dz  |d d d dd d f<   ||
z  d|z
  |
                                z  z   }
||z  d|z
  |                                z  z   }|                    |                                          }|                    |
                                          }
|                    |                                          }t(          j                            ||
||| j        |j                  }|                    |j                  }|                    |          }| j        r- | j        d         |          } | j        d         |          }n, |j        d         |          } |j        d         |          }|S )NFTr   r   rx  r   )r   r  rs   rB  r   rO   r   r   rM   r  rr   rt   rA  rP   rQ   r   rG  rH  r   r  rk   r   r   ri   rD   r   rC  rV   )r^   r+  r   r   r   r   r:  r   r   rI  r   r  rH  s                ra   r   z-CustomDiffusionXFormersAttnProcessor.__call__A  s?    $9#@MF[Fa 	'
OQ 44^_V`aa 	H..}==@@AQAWXXEEIIm..ty/?/EFFGGE (I$1!!I _(,(G(GH](^(^%= 	5,,-B-E-EdF`FgFm-n-nooC../D/G/GHbHiHo/p/pqqE&&)/00CHHTY-344EE))122CII344E 	C_S))F%aaa!QQQh/#5F111bqb!!!83,!f*

!<<CUNa&jELLNN%BBE&&u--88::$$S))4466&&u--88:: ??3D<MUYU_ @ 
 
 &((55..}== 		:;D8;MJJM;D8;MJJMM +DKN=99M*DKN=99Mrb   )TFNNTr   Nr  )r   r   r!  r"  r$  r   r#  r   r   r>   r   r   r&  r   r(  r)  s   @ra   ry   ry     s&        0 !%)-1+/E EE E c]	E
 &c]E E E x(E E E E E E@ 9=15> >> |>  (5	>
 !.> 
> > > > > > > >rb   ry   c                        e Zd ZdZ	 	 	 	 	 	 ddededee         dee         d	ed
ef fdZ	 	 dde	de
j        dee
j                 dee
j                 de
j        f
dZ xZS )rz   u  
    Processor for implementing attention for the Custom Diffusion method using PyTorch 2.0’s memory-efficient scaled
    dot-product attention.

    Args:
        train_kv (`bool`, defaults to `True`):
            Whether to newly train the key and value matrices corresponding to the text features.
        train_q_out (`bool`, defaults to `True`):
            Whether to newly train query matrices corresponding to the latent image features.
        hidden_size (`int`, *optional*, defaults to `None`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        out_bias (`bool`, defaults to `True`):
            Whether to include the bias parameter in `train_q_out`.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout probability to use.
    TNr   rr   rs   rp   r   r(   r   c                 8   t                                                       || _        || _        || _        || _        | j        r<t          j        |p||d          | _        t          j        |p||d          | _	        | j        rt          j        ||d          | _
        t          j        g           | _        | j                            t          j        |||                     | j                            t          j        |                     d S d S r?  r@  rD  s          ra   r>   z(CustomDiffusionAttnProcessor2_0.__init__  rE  rb   r+  r   r   r   rd   c                    |j         \  }}}|                    |||          }| j        r|                     |          }n|                    |          }|d}	|}nd}	|j        r|                    |          }| j        r|                     |	                    | j        j
        j                            }
|                     |	                    | j        j
        j                            }|
	                    |j        j
        j                  }
|	                    |j        j
        j                  }n*|                    |          }
|                    |          }|	rtt          j        |
          }|d d d dd d f         dz  |d d d dd d f<   ||
z  d|z
  |
                                z  z   }
||z  d|z
  |                                z  z   }|j         d         }||j        z  }|                    |d|j        |                              dd          }|
                    |d|j        |                              dd          }
|                    |d|j        |                              dd          }t+          j        ||
||dd          }|                    dd                              |d|j        |z            }|	                    |j                  }| j        r- | j        d         |          } | j        d         |          }n, |j        d         |          } |j        d         |          }|S )	NFTr   r   r   r	   rV  r   )r   r  rs   rB  rO   rM   r  rr   rt   r   r   r   rA  rP   rQ   r   rG  rH  r   r1  r   r[   r<   r   rC  rV   )r^   r+  r   r   r   r   r:  r   r   rI  r   r  rH  r?   rf  s                  ra   r   z(CustomDiffusionAttnProcessor2_0.__call__  sK    *7)<&
OQ44^_V`aa 	-..}==EEIIm,,E (I$1!!I _(,(G(GH](^(^%= 	5,,-B-E-EdF`FgFm-n-nooC../D/G/GHbHiHo/p/pqqE&&)/00CHHTY-344EE ))122CII344E 	C_S))F%aaa!QQQh/#5F111bqb!!!83,!f*

!<<CUNa&jELLNN%BBE!'+	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP 633RW
 
 
 &//155==j"dj[cNcdd%((55 		:;D8;MJJM;D8;MJJMM +DKN=99M*DKN=99Mrb   rJ  r  rK  r)  s   @ra   rz   rz     s         *  %)-1E EE E c]	E
 &c]E E E E E E E E< 9=15A AA |A  (5	A
 !.A 
A A A A A A A Arb   rz   c                       e Zd ZdZdefdZ	 	 ddedej        de	ej                 de	ej                 d	ej        f
d
Z
dS )r   a'  
    Processor for implementing sliced attention.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    r   c                     || _         d S r  r   r^   r   s     ra   r>   zSlicedAttnProcessor.__init__      $rb   Nr+  r   r   r   rd   c                 j   |}|j         }|dk    r:|j        \  }}}	}
|                    |||	|
z                                dd          }||j        n|j        \  }}}|                    |||          }|j        =|                    |                    dd                                        dd          }|                    |          }|j        d         }|                    |          }||}n|j        r|	                    |          }|
                    |          }|                    |          }|                    |          }|                    |          }|j        \  }}}t          j        ||||j        z  f|j        |j                  }t#          || j        z            D ]v}|| j        z  }|dz   | j        z  }|||         }|||         }|
|||         nd }|                    |||          }t          j        ||||                   }||||<   w|                    |          } |j        d         |          } |j        d         |          }|dk    r,|                    dd                              |||	|
          }|j        r||z   }||j        z  }|S )Nr  r   r	   r   ro   r   r   r   )r   r   r1  r   r  r:   rO   r   rM   r  rP   rQ   r   r	  r   ro   r   ranger   r  r2  r   rV   r   r-   r,   )r^   r+  r   r   r   r5  r6  r   r7  r8  r9  r:  r   r   r   r   r  batch_size_attentionr  i	start_idxend_idxquery_slice	key_sliceattn_mask_slice
attn_slices                             ra   r   zSlicedAttnProcessor.__call__  s    !"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ 44^_V`aa?& OOM,C,CAq,I,IJJTTUVXYZZM		-((k"o&&u-- ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00$$S))&&u--05-lA!<
1BCEL`e`k
 
 
 +t>?? 	: 	:ADO+I1u/G	' 12KIg-.ICQC]nYw->??cgO22;	?[[J:uYw5F/GHHJ/9M)G+,,..}== 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   r  )r   r   r!  r"  r#  r>   r   r   r&  r   r   r   rb   ra   r   r     s         %3 % % % % 9=15G GG |G  (5	G
 !.G 
G G G G G Grb   r   c                       e Zd ZdZd Z	 	 	 ddddej        deej                 deej                 d	eej                 d
ej        fdZdS )r}   ah  
    Processor for implementing sliced attention with extra learnable key and value matrices for the text encoder.

    Args:
        slice_size (`int`, *optional*):
            The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
            `attention_head_dim` must be a multiple of the `slice_size`.
    c                     || _         d S r  r  r  s     ra   r>   z#SlicedAttnAddedKVProcessor.__init__U  r  rb   Nr+  r   r   r   r   r,  rd   c                 n   |}|j         |                     ||          }|                    |j        d         |j        d         d                              dd          }|j        \  }}}	|                    |||          }||}n|j        r|                    |          }|                    |                    dd                                        dd          }|                    |          }
|
j        d         }|	                    |
          }
|
                    |          }|                    |          }|	                    |          }|	                    |          }|j        s|                    |          }|                    |          }|	                    |          }|	                    |          }t          j        ||gd          }t          j        ||gd          }n|}|}|
j        \  }}}	t          j        ||||j        z  f|
j        |
j                  }t+          || j        z            D ]v}|| j        z  }|dz   | j        z  }|
||         }|||         }|
|||         nd }|                    |||          }t          j        ||||                   }||||<   w|                    |          } |j        d         |          } |j        d         |          }|                    dd                              |j                  }||z   }|S )Nr   r   r   r	   r   r  r   )rI   r1  r   r   r  rM   r  r:   rO   r   rR   rS   r*   rP   rQ   r   r
  r	  r   ro   r   r  r   r  r2  r   rV   r   )r^   r+  r   r   r   r,  r5  r   r:  r   r   r   rN  rO  r   r  r  r  r  r  r  r  r  r  r  s                            ra   r   z#SlicedAttnAddedKVProcessor.__call__X  sj    !( --mTBBM%**=+>q+A=CVWXCY[]^^hhijlmnn)6)<&
OQ44^_V`aa ($1!!_ 	[$($C$CDY$Z$Z!(?(?1(E(EFFPPQRTUVV		-((k"o&&u--)-9N)O)O&+/??;P+Q+Q()-)?)?@^)_)_&+/+A+ABb+c+c(( 		5))M**CIIm,,E((--C**511E);SAqIIICI?GQOOOEE0C4E05-lA!<
1BCEL`e`k
 
 
 +t>?? 	: 	:ADO+I1u/G	' 12KIg-.ICQC]nYw->??cgO22;	?[[J:uYw5F/GHHJ/9M)G+,,..}== 'A}55&A}55%//B77??OO%0rb   r;  )	r   r   r!  r"  r>   r   r&  r   r   r   rb   ra   r}   r}   K  s         % % % 9=15'+L LL |L  (5	L
 !.L u|$L 
L L L L L Lrb   r}   c                   ^     e Zd ZdZdedef fdZdej        dej        dej        fdZ xZ	S )	rH   ai  
    Spatially conditioned normalization as defined in https://arxiv.org/abs/2209.09002.

    Args:
        f_channels (`int`):
            The number of channels for input to group normalization layer, and output of the spatial norm layer.
        zq_channels (`int`):
            The number of channels for the quantized vector as described in the paper.
    r7   r8   c                     t                                                       t          j        |ddd          | _        t          j        ||ddd          | _        t          j        ||ddd          | _        d S )Nr   gư>Tr3   r   r   )kernel_sizestrider  )r=   r>   r   rG   
norm_layerConv2dconv_yconv_b)r^   r7   r8   r`   s      ra   r>   zSpatialNorm.__init__  su    
 	,J2SW`deeeiZQqZ[\\\iZQqZ[\\\rb   fzqrd   c                     |j         dd          }t          j        ||d          }|                     |          }||                     |          z  |                     |          z   }|S )Nr   nearest)sizemode)r   r[   interpolater  r  r  )r^   r  r  f_sizenorm_fnew_fs         ra   r   zSpatialNorm.forward  s`    ]2F;;;##R(4;;r??:rb   )
r   r   r!  r"  r#  r>   r   r&  r   r(  r)  s   @ra   rH   rH     s         ]] ] ] ] ] ] ] 5< EL        rb   rH   c            
       x     e Zd Z	 	 	 ddedee         dedee         f fdZded	ej        d
ej        fdZ	 xZ
S )r   Nr  rp   r   rq   r   c                    d}t          dd|d           t                                                       || _        || _        || _        |                    dd           }|                    dd           }||n|}||n|}|                    dd           }	|                    d	d           }
|	|	n|}	|
|
n|}
|                    d
d           }|                    dd           }||n|}||n|}t          ||||          | _        t          |p||||          | _	        t          |p|
|
|	|          | _
        t          ||||          | _        d S )NUsing LoRAAttnProcessor is deprecated. Please use the PEFT backend for all things LoRA. You can install PEFT by running `pip install peft`.r   0.30.0Fstandard_warnr   r   r   r   r   r   )r   r=   r>   rp   r   rq   r   r   r   r   r   r   r^   rp   r   rq   r   r   r4  r   r   r   r   r   r   r`   s                ra   r>   zLoRAAttnProcessor.__init__  s    l%x1DTYZZZZ&#6 	Hd++

?D99!-4)6)BHd++

?D99!-4)6)B::j$// **%6=='388-<-H//k(v}]]()<)K[Z^`mnn()<)M}^dfstt*?OXWdeerb   r+  r   rd   c                     | j         j        }t          |dd|dd           d           | j                            |j                  |j        _        | j                            |j                  |j	        _        | j
                            |j                  |j        _        | j                            |j                  |j        d         _        |j                            d           t!                      |_         |j        ||fi |S N0.26.0Make sure use r  z instead by settingLoRA layers to `self.{to_q,to_k,to_v,to_out[0]}.lora_layer` respectively. This will be done automatically when using `LoraLoaderMixin.load_lora_weights`r   r/   )r`   r   r   r   r   ro   rO   r   r   rP   r   rQ   r   rV   r   r   r0   r/   r^   r+  r   r   self_cls_names        ra   r   zLoRAAttnProcessor.__call__  s    /7qrr!2 7 7 7		
 	
 	
  $~001EFF	#~001EFF	#~001EFF	$($4$7$78L$M$MA!+&&&&t~dM<<V<<<rb   Nr  Nr   r   r!  r#  r   r>   r   r   r&  r   r(  r)  s   @ra   r   r     s         .2'+#f #f#f &c]#f 	#f
  }#f #f #f #f #f #fJ=Y =u| =RWR^ = = = = = = = =rb   r   c            
       x     e Zd Z	 	 	 ddedee         dedee         f fdZded	ej        d
ej        fdZ	 xZ
S )r   Nr  rp   r   rq   r   c                    d}t          dd|d           t                                                       t          t          d          st          d          || _        || _        || _        |	                    dd           }|	                    d	d           }||n|}||n|}|	                    d
d           }	|	                    dd           }
|	|	n|}	|
|
n|}
|	                    dd           }|	                    dd           }||n|}||n|}t          ||||          | _        t          |p||||          | _        t          |p|
|
|	|          | _        t          ||||          | _        d S )Nr  r   r  Fr  r<   r`  r   r   r   r   r   r   )r   r=   r>   rZ   r[   rS  rp   r   rq   r   r   r   r   r   r   r  s                ra   r>   zLoRAAttnProcessor2_0.__init__ 	  s    l((4GW\]]]]q899 	rpqqq&#6 	Hd++

?D99!-4)6)BHd++

?D99!-4)6)B::j$// **%6=='388-<-H//k(v}]]()<)K[Z^`mnn()<)M}^dfstt*?OXWdeerb   r+  r   rd   c                     | j         j        }t          |dd|dd           d           | j                            |j                  |j        _        | j                            |j                  |j	        _        | j
                            |j                  |j        _        | j                            |j                  |j        d         _        |j                            d           t!                      |_         |j        ||fi |S r  )r`   r   r   r   r   ro   rO   r   r   rP   r   rQ   r   rV   r   r   r\   r/   r  s        ra   r   zLoRAAttnProcessor2_0.__call__'	  s    /7qrr!2 7 7 7		
 	
 	
  $~001EFF	#~001EFF	#~001EFF	$($4$7$78L$M$MA!+&&&)++t~dM<<V<<<rb   r  r  r)  s   @ra   r   r     s         .2'+%f %f%f &c]%f 	%f
  }%f %f %f %f %f %fN=Y =u| =RWR^ = = = = = = = =rb   r   c                        e Zd ZdZ	 	 	 ddedededee         dee         f
 fd	Zd
ede	j
        de	j
        fdZ xZS )r   a  
    Processor for implementing the LoRA attention mechanism with memory efficient attention using xFormers.

    Args:
        hidden_size (`int`, *optional*):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*):
            The number of channels in the `encoder_hidden_states`.
        rank (`int`, defaults to 4):
            The dimension of the LoRA update matrices.
        attention_op (`Callable`, *optional*, defaults to `None`):
            The base
            [operator](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.AttentionOpBase) to
            use as the attention operator. It is recommended to set to `None`, and allow xFormers to choose the best
            operator.
        network_alpha (`int`, *optional*):
            Equivalent to `alpha` but it's usage is specific to Kohya (A1111) style LoRAs.
        kwargs (`dict`):
            Additional keyword arguments to pass to the `LoRALinearLayer` layers.
    r  Nrp   r   rq   ri   r   c                    t                                                       || _        || _        || _        || _        |                    dd           }|                    dd           }||n|}||n|}|                    dd           }	|                    dd           }
|	|	n|}	|
|
n|}
|                    dd           }|                    dd           }||n|}||n|}t          ||||          | _        t          |p||||          | _	        t          |p|
|
|	|          | _
        t          ||||          | _        d S )Nr   r   r   r   r   r   )r=   r>   rp   r   rq   ri   r   r   r   r   r   r   )r^   rp   r   rq   ri   r   r   r   r   r   r   r   r   r`   s                ra   r>   z"LoRAXFormersAttnProcessor.__init__R	  sm    	&#6 	(Hd++

?D99!-4)6)BHd++

?D99!-4)6)B::j$// **%6=='388-<-H//k(v}]]()<)K[Z^`mnn()<)M}^dfstt*?OXWdeerb   r+  r   rd   c                     | j         j        }t          |dd|dd           d           | j                            |j                  |j        _        | j                            |j                  |j	        _        | j
                            |j                  |j        _        | j                            |j                  |j        d         _        |j                            d           t!                      |_         |j        ||fi |S Nr  r  r  z instead by settingLoRA layers to `self.{to_q,to_k,to_v,add_k_proj,add_v_proj,to_out[0]}.lora_layer` respectively. This will be done automatically when using `LoraLoaderMixin.load_lora_weights`r   r/   )r`   r   r   r   r   ro   rO   r   r   rP   r   rQ   r   rV   r   r   r   r/   r  s        ra   r   z"LoRAXFormersAttnProcessor.__call__v	  s    /7qrr!2 7 7 7		
 	
 	
  $~001EFF	#~001EFF	#~001EFF	$($4$7$78L$M$MA!+&&&.00t~dM<<V<<<rb   )r  NN)r   r   r!  r"  r#  r   r   r>   r   r   r&  r   r(  r)  s   @ra   r   r   <	  s         2 +/'+"f "f"f !"f 	"f
 x("f  }"f "f "f "f "f "fH=Y =u| =RWR^ = = = = = = = =rb   r   c            
       |     e Zd ZdZ	 	 	 ddedee         dedee         f fdZd	ed
ej	        dej	        fdZ
 xZS )r   a  
    Processor for implementing the LoRA attention mechanism with extra learnable key and value matrices for the text
    encoder.

    Args:
        hidden_size (`int`, *optional*):
            The hidden size of the attention layer.
        cross_attention_dim (`int`, *optional*, defaults to `None`):
            The number of channels in the `encoder_hidden_states`.
        rank (`int`, defaults to 4):
            The dimension of the LoRA update matrices.
        network_alpha (`int`, *optional*):
            Equivalent to `alpha` but it's usage is specific to Kohya (A1111) style LoRAs.
        kwargs (`dict`):
            Additional keyword arguments to pass to the `LoRALinearLayer` layers.
    Nr  rp   r   rq   r   c                    t                                                       || _        || _        || _        t          ||||          | _        t          |p||||          | _        t          |p||||          | _        t          ||||          | _	        t          ||||          | _
        t          ||||          | _        d S r  )r=   r>   rp   r   rq   r   r   r   r   r   r   r   )r^   rp   r   rq   r   r`   s        ra   r>   z!LoRAAttnAddedKVProcessor.__init__	  s     	&#6 	(k4WW./B/QkS^`dfstt./B/QkS^`dfstt(k4WW(k4WW*;T=YYrb   r+  r   rd   c                     | j         j        }t          |dd|dd           d           | j                            |j                  |j        _        | j                            |j                  |j	        _        | j
                            |j                  |j        _        | j                            |j                  |j        d         _        |j                            d           t!                      |_         |j        ||fi |S r  )r`   r   r   r   r   ro   rO   r   r   rP   r   rQ   r   rV   r   r   r{   r/   r  s        ra   r   z!LoRAAttnAddedKVProcessor.__call__	  s    /7qrr!2 7 7 7		
 	
 	
  $~001EFF	#~001EFF	#~001EFF	$($4$7$78L$M$MA!+&&&-//t~dM<<V<<<rb   r  )r   r   r!  r"  r#  r   r>   r   r   r&  r   r(  r)  s   @ra   r   r   	  s         ( .2'+Z ZZ &c]Z 	Z
  }Z Z Z Z Z Z(=Y =u| =RWR^ = = = = = = = =rb   r   c                        e Zd ZdZd fd	Z	 	 	 	 	 ddedej        deej                 d	eej                 d
eej                 de	deej                 fdZ
 xZS )IPAdapterAttnProcessora  
    Attention processor for Multiple IP-Adapters.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or List[`float`], defaults to 1.0):
            the weight scale of image prompt.
    Nr  r   c                 p   t                                                       | _        | _        t	          |t
          t          f          s|g}|| _        t	          |t                    s|gt          |          z  }t          |          t          |          k    rt          d          || _
        t          j        fdt          t          |                    D                       | _        t          j        fdt          t          |                    D                       | _        d S )NJ`scale` should be a list of integers with the same length as `num_tokens`.c                 >    g | ]}t          j        d           S Fr;   r   rN   r   r   r   rp   s     ra   r   z3IPAdapterAttnProcessor.__init__.<locals>.<listcomp>	  +    eeeRY*KeDDDeeerb   c                 >    g | ]}t          j        d           S r  r  r  s     ra   r   z3IPAdapterAttnProcessor.__init__.<locals>.<listcomp>	  r  rb   )r=   r>   rp   r   rv   tuplelist
num_tokensr   rF   rD   r   rU   r  to_k_ipto_v_ipr^   rp   r   r  rD   r`   s    ``  ra   r>   zIPAdapterAttnProcessor.__init__	  s$   &#6 *udm44 	&$J$%&& 	.Gc*oo-Eu::Z((ijjj
}eeeeeeTWXbTcTcNdNdeee
 
 }eeeeeeTWXbTcTcNdNdeee
 
rb   r+  r   r   r   r,  rD   r   c           
         |}|nt          |t                    r|\  }}	nSd}
t          dd|
d           |j        d         | j        d         z
  }|d d d |d d f         |d d |d d d f         g}	}|j        |                    ||          }|j        }|dk    r:|j        \  }}}}|                    ||||z                                dd	          }||j        n|j        \  }}}|	                    |||          }|j
        =|
                    |                    dd	                                        dd	          }|                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|                    |          }|                    |||          }t%          j        ||          }|                    |          }|t          |t*                    s"t-          |                    d                    }t1          |          t1          | j                  cxk    rt1          |	          k    sGn t5          d
t1          |           dt1          | j                   dt1          |	           d          t7          t9          || j        |	                    D ]\  }\  }}}t          |t$          j                  r|j        dk    rt5          d          |j        d         |j        d         k    r.t5          d|j        d          d|j        d          d|           t          |t,                    rNt1          |          |j        d         k    s0t5          d|j        d          dt1          |           d|           nd gt1          | j                  z  }t9          |	| j        | j        | j        |          D ],\  }}}}}d}t          |t,                    rtA          d |D                       rd}n|dk    rd}|s|Yt          |t,                    s|g|j        d         z  }|j        d         }tC          |          D ]} ||d d |d d d d f                   }  ||d d |d d d d f                   }!|                    |           } |                    |!          }!|                    || d           }"t%          j        |"|!          }#|                    |#          }#tE          j#        |d d |d d d d f         ||#j        d         |#j        d	                   }$|$$                    |j%        |j&                  }$|||         |#|$z  z  z   } ||          }  ||          }!|                    |           } |                    |!          }!|                    || d           }"t%          j        |"|!          }|                    |          }|||z  z   }. |j'        d         |          } |j'        d         |          }|dk    r,|                    dd          (                    ||||          }|j)        r||z   }||j*        z  }|S )NYou have passed a tensor as `encoder_hidden_states`. This is deprecated and will be removed in a future release. Please make sure to update your script to pass `encoder_hidden_states` as a tuple to suppress this warning.!encoder_hidden_states not a tupler/  Fr  r   r   r  r	   "Length of ip_adapter_masks array ()) must match length of self.scale array (") and number of ip_hidden_states ()Each element of the ip_adapter_masks array should be a tensor with shape [1, num_images_for_ip_adapter, height, width]. Please use `IPAdapterMaskProcessor` to preprocess your maskNumber of masks (&) does not match number of ip images () at index #) does not match number of scales (c              3   "   K   | ]
}|d k    V  dS r   Nr   r   ss     ra   	<genexpr>z2IPAdapterAttnProcessor.__call__.<locals>.<genexpr>P
  &      --!qAv------rb   Tr   r   r   )+rv   r  r   r   r  rI   r   r1  r   r  r:   rO   rM   r  rP   rQ   r   r  r   r2  r   r   r  r  r   rD   rF   	enumeratezipr&  r  r  r   r  r
   
downsampler   r   ro   rV   r   r-   r,   )%r^   r+  r   r   r   r,  rD   r   r5  ip_hidden_statesr4  end_posr6  r   r7  r8  r9  r:  r   r   r   r  r  indexmaskip_statecurrent_ip_hidden_statesr  r  skipcurrent_num_imagesr  ip_keyip_valueip_attention_probs_current_ip_hidden_statesmask_downsamples%                                        ra   r   zIPAdapterAttnProcessor.__call__	  s    ! !,/77 :O7%'7'7C $ =wH[kpqqqq/5a84?1;MM)!!!XgXqqq.9*111ghh>:; (8%
 ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ 44^_V`aa?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00&&u--$$S))&&u--33E3OO	/599..}=='.55 G#'(8(B(B1(E(E#F#F ())S__UUUUDT@U@UUUUU 1=M9N9N 1 136tz??1 1,--1 1 1   7@DTVZV`br@s@s6t6t  2E2D%%dEL99 TY!^^([  
 z!}q(999(Z
1 Z Z4<N14EZ ZRWZ Z   "%.. s5zzTZPQ]7R7R(P
1 P P14UP PHMP P  $ !%vDJ7 HKdj$,FVH
 H
 /	U /	UC$eWgt D%&& --u-----  D! &U#%eT22 8!&$*Q- 7)-A&"#566 q q!()A!!!Q111*)M!N!N#*7+CAAAq!!!QQQJ+O#P#P!%!7!7!?!?#'#9#9(#C#C-1-F-FufVZ-[-[*49I>PRZ4[4[1484J4JKd4e4e1*@*K Aqqq!!!,&5;A>5;A>	+ + +:*<*<5;W\Wc*<*d*d(5aD]`oDo8p(p)q, %W%=>>F&w'?@@H!33F;;F#55h??H)-)B)B5&RV)W)W&/4y9KX/V/V,/3/E/EF^/_/_,$1E<T4T$TM 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   Nr  r   NNNr   Nr   r   r!  r"  r>   r   r   r&  r   r   r   r(  r)  s   @ra   r  r  	  s         
 
 
 
 
 
6 9=15'+37] ]] |]  (5	]
 !.] u|$] ] #5<0] ] ] ] ] ] ] ]rb   r  c                        e Zd ZdZd fd	Z	 	 	 	 	 ddedej        deej                 d	eej                 d
eej                 de	deej                 fdZ
 xZS )IPAdapterAttnProcessor2_0a  
    Attention processor for IP-Adapter for PyTorch 2.0.

    Args:
        hidden_size (`int`):
            The hidden size of the attention layer.
        cross_attention_dim (`int`):
            The number of channels in the `encoder_hidden_states`.
        num_tokens (`int`, `Tuple[int]` or `List[int]`, defaults to `(4,)`):
            The context length of the image features.
        scale (`float` or `List[float]`, defaults to 1.0):
            the weight scale of image prompt.
    Nr  r   c                    t                                                       t          t          d          st	          | j        j         d          | _        | _        t          |t          t          f          s|g}|| _        t          |t                    s|gt          |          z  }t          |          t          |          k    rt          d          || _        t!          j        fdt%          t          |                    D                       | _        t!          j        fdt%          t          |                    D                       | _        d S )Nr<   z@ requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.r  c                 >    g | ]}t          j        d           S r  r  r  s     ra   r   z6IPAdapterAttnProcessor2_0.__init__.<locals>.<listcomp>
  r  rb   c                 >    g | ]}t          j        d           S r  r  r  s     ra   r   z6IPAdapterAttnProcessor2_0.__init__.<locals>.<listcomp>
  r  rb   )r=   r>   rZ   r[   rS  r`   r   rp   r   rv   r  r  r  r   rF   rD   r   rU   r  r  r  r  s    ``  ra   r>   z"IPAdapterAttnProcessor2_0.__init__
  s\   q899 	>*lll   '#6 *udm44 	&$J$%&& 	.Gc*oo-Eu::Z((ijjj
}eeeeeeTWXbTcTcNdNdeee
 
 }eeeeeeTWXbTcTcNdNdeee
 
rb   r+  r   r   r   r,  rD   r   c           
         |}|nt          |t                    r|\  }}	nSd}
t          dd|
d           |j        d         | j        d         z
  }|d d d |d d f         |d d |d d d f         g}	}|j        |                    ||          }|j        }|dk    r:|j        \  }}}}|                    ||||z                                dd	          }||j        n|j        \  }}}|?|	                    |||          }|                    ||j
        d
|j        d
                   }|j        =|                    |                    dd	                                        dd	          }|                    |          }||}n|j        r|                    |          }|                    |          }|                    |          }|j        d
         }||j
        z  }|                    |d
|j
        |                              dd	          }|                    |d
|j
        |                              dd	          }|                    |d
|j
        |                              dd	          }t#          j        ||||dd          }|                    dd	                              |d
|j
        |z            }|                    |j                  }|t          |t,                    s"t/          |                    d                    }t3          |          t3          | j                  cxk    rt3          |	          k    sGn t7          dt3          |           dt3          | j                   dt3          |	           d          t9          t;          || j        |	                    D ]\  }\  }}}t          |t<          j                  r|j        dk    rt7          d          |j        d         |j        d         k    r.t7          d|j        d          d|j        d          d|           t          |t.                    rNt3          |          |j        d         k    s0t7          d|j        d          dt3          |           d|           nd gt3          | j                  z  }t;          |	| j        | j         | j!        |          D ]\  }}}}}d}t          |t.                    rtE          d |D                       rd}n|dk    rd}|s|t          |t.                    s|g|j        d         z  }|j        d         }tG          |          D ]q}  ||d d | d d d d f                   }! ||d d | d d d d f                   }"|!                    |d
|j
        |                              dd	          }!|"                    |d
|j
        |                              dd	          }"t#          j        ||!|"d dd          }#|#                    dd	                              |d
|j
        |z            }#|#                    |j                  }#tI          j%        |d d | d d d d f         ||#j        d         |#j        d	                   }$|$                    |j        |j&                  }$|||          |#|$z  z  z   }s ||          }! ||          }"|!                    |d
|j
        |                              dd	          }!|"                    |d
|j
        |                              dd	          }"t#          j        ||!|"d dd          }|                    dd	                              |d
|j
        |z            }|                    |j                  }|||z  z   } |j'        d         |          } |j'        d         |          }|dk    r,|                    d
d                              ||||          }|j(        r||z   }||j)        z  }|S )Nr  r  r/  Fr  r   r   r  r	   r   r   rV  r  r  r  r   r  r  r  r  r  c              3   "   K   | ]
}|d k    V  dS r  r   r  s     ra   r
  z5IPAdapterAttnProcessor2_0.__call__.<locals>.<genexpr>)  r  rb   Tr   r   )*rv   r  r   r   r  rI   r   r1  r   r  r   r:   rO   rM   r  rP   rQ   r[   r<   r   r   r   r   r  r  r   rD   rF   r  r  r   r&  r  r  r   r  r
   r  ro   rV   r-   r,   )%r^   r+  r   r   r   r,  rD   r   r5  r  r4  r  r6  r   r7  r8  r9  r:  r   r   r   r  r?   rf  r  r  r  r  r  r  r  r  r  r  r  r  r  s%                                        ra   r   z"IPAdapterAttnProcessor2_0.__call__
  s	    ! !,/77 :O7%'7'7C $ =wH[kpqqqq/5a84?1;MM)!!!XgXqqq.9*111ghh>:; (8%
 ( --mTBBM"'
??1>1D.J)..z7FUNSS]]^_abccM $9#@MF[Fa 	'
OQ %!88ZdeeN ,00TZ^MabdMeffN?& OOM,C,CAq,I,IJJTTUVXYZZM		-(( ($1!!_ 	[$($C$CDY$Z$Z!ii-..		/00IbM	
*

:r4:x@@JJ1aPPhhz2tz8<<FFq!LL

:r4:x@@JJ1aPP 633RW
 
 
 &//155==j"dj[cNcdd%((55'.55 G#'(8(B(B1(E(E#F#F ())S__UUUUDT@U@UUUUU 1=M9N9N 1 136tz??1 1,--1 1 1   7@DTVZV`br@s@s6t6t  2E2D%%dEL99 TY!^^([  
 z!}q(999(Z
1 Z Z4<N14EZ ZRWZ Z   "%.. s5zzTZPQ]7R7R(P
1 P P14UP PHMP P  $ !%vDJ7 HKdj$,FVH
 H
 <	U <	UC$eWgt D%&& --u-----  D! 3U#%eT22 8!&$*Q- 7)-A&"#566 q q!()A!!!Q111*)M!N!N#*7+CAAAq!!!QQQJ+O#P#P!'ZTZ!R!R!\!\]^`a!b!b#+==RX#V#V#`#`abde#f#f 564R!68ts^c5 5 51 5N4W4WXY[\4]4]4e4e&DJ,A5 51 5N4P4PQVQ\4]4]1*@*K Aqqq!!!,&5;A>5;A>	+ + +:*<*<5;W\Wc*<*d*d(5aD]`oDo8p(p5q8 %W%=>>F&w'?@@H#[[RXNNXXYZ\]^^F'}}ZTZRR\\]^`abbH 01/Mvx43Z_0 0 0, 0H/Q/QRSUV/W/W/_/_"B
X(=0 0, 0H/J/J5;/W/W,$1E<T4T$TM 'A}55&A}55??)33B;;CCJPWY_afggM# 	5)H4M%(BBrb   r  r  r  r)  s   @ra   r   r   
  s         
 
 
 
 
 
@ 9=15'+37x xx |x  (5	x
 !.x u|$x x #5<0x x x x x x x xrb   r   )=r   r  	importlibr   typingr   r   r   r   r   torch.nn.functionalr   
functionalr[   image_processorr
   utilsr   r   utils.import_utilsr   r   utils.torch_utilsr   lorar   
get_loggerr   r   r  rk   xformers.opsr   r   r0   rx   r{   r|   r\  rj  r~   r   rf   r\   r  r  ry   rz   r   r}   rH   r   r   r   r   r  r   rw   ADDED_KV_ATTENTION_PROCESSORSCROSS_ATTENTION_PROCESSORSr   r   rb   ra   <module>r3     s     # # # # # # 2 2 2 2 2 2 2 2 2 2 2 2                 4 4 4 4 4 4 & & & & & & & & N N N N N N N N 4 4 4 4 4 4 ! ! ! ! ! ! 
	H	%	%  OOOH P& P& P& P& P&	 P& P& P&fE E E E E E E EPe e e e e29 e e eP@ @ @ @ @ @ @ @FI I I I I I I IXJ4 J4 J4 J4 J4 J4 J4 J4ZN4 N4 N4 N4 N4 N4 N4 N4bD D D D D D D DN[ [ [ [ [ [ [ [|k k k k k k k k\V V V V V V V Vr_ _ _ _ _ _ _ _Db b b b b b b bJq q q q q29 q q qho o o o obi o o odT T T T T T T TnY Y Y Y Y Y Y Yx    ")   :8= 8= 8= 8= 8=	 8= 8= 8=v:= := := := :=29 := := :=zL= L= L= L= L=	 L= L= L=^8= 8= 8= 8= 8=ry 8= 8= 8=vC C C C CRY C C CLc c c c c c c cN 	   !  
    (#!   rb   