
    QVji@              #          U d dl Z d dlmZmZmZ d dlmZ d dlZd dlm	Z	  ed          Z
 ed          Zi Zeej        j        ef         ed<    eh d          Zd	ed
edeee
ef         gee
ef         f         fdZ edd          	 d4dd d ddej        dej        dej        deej                 dedededej        fd            Zdee         dedefdZdej        dededej        fdZdej        d ej        d!ed"edee         d#edej        fd$Zd!ed"eddfd%Zdej        d ej        d!ed"edee         dej        fd&Z ed'd          	 	 	 d5dd d d dd(dd)dej        d ej        d*ej        d+eej                 d,eej                 d-eej                 d.ed/ed0ed#edee         d1ed2ee         deej        ej        ej        ej        f         fd3            Z dS )6    N)CallableOptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_versionreturnc                 |     dt           t          t          f         dt           t          t          f         f fd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    d }t          j                            d d| d          |           }| t          t	          t	          t           j        j                  |          <   |                    |            |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr
   getattropsonnxregister_fake)r   overloadtorch_opr   r   s      N/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/onnx/ops/_impl.py	decoratorz_onnx_op.<locals>.decorator   s    *=**=**)W))x)) + 
 

   	wwuy~w'G'GRRS
 	t$$$    )r   r   r	   )r   r   r#   s   `` r"   _onnx_opr%      sO    
R( Xb"f-=        r$   RotaryEmbedding   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr(   r)   r*   c                h   | j         t                    }d         d         t          j                                        dk    fd           t          j        j         d         k    fd           t          j        j         d         k    fd           t          j                                        dk    o                                dk    fd	           nGt          j                                        d
k    o                                d
k    fd           |dk    rt          j        | d          } nJ|d
k    rDt          j        |dk    fd           d         }||z  }	||	g}
t          j        | |
          } t          j        t          | j                   dk    d            | j         d
         }	|dk    r|	}| ddddddd|f         }| dddddd|df         }|dz                    nt          j        j         d         k    oj         d         k    fd           t          j        j         d         k    oj         d         k    fd           t          j        j         d         k    fd           t          j        j         d         k    fd           t          j        d          t          j        d          |r+|dddddddddf         }|dddddddddf         }nt          j        |dd          \  }}|z  |z  z
  }|z  |z  z   }|r]t          j        |d          }t          j        |d          }t          j	        ||fd          }t          j        ||j                   }nt          j	        ||fd          }t          j	        ||fd          }|d
k    rt          j        |          S t          j        |d          S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                      d j          S )Nz6position_ids must be 2D when provided. Received shape shape)r.   s   r"   <lambda>z%rotary_embedding_23.<locals>.<lambda>F   s    a\M_aa r$   c                  *    d  dj         d          S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r3   )
batch_sizer.   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>J   s"    |Z||eqewxyez|| r$   r   c                  *    d d j         d          S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r7   r   r3   )r.   sequence_lengths   r"   r5   z%rotary_embedding_23.<locals>.<lambda>N   s>      GRa  G  Go{  pB  CD  pE  G  G r$   c                  (    d j          dj          S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r3   r,   r-   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>R   -     ](1] ]KT?] ] r$      c                  (    d j          dj          S )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r<   r3   r=   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>X   r>   r$      )r   r1   r   r?   c                      d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r"   r5   z%rotary_embedding_23.<locals>.<lambda>e   s    obmoo r$   c                      dS )Nzx should be a 4D tensor by nowr   r   r$   r"   r5   z%rotary_embedding_23.<locals>.<lambda>l   s    ,L r$   c                  &    dj          d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r3   )r8   cosr:   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   &    jjjjjSbjjj r$   c                  &    dj          d  d dS )Nzsin has shape rF   rG   rH   r3   )r8   r:   sins   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   rJ   r$   c                  ,    d j         d          d dS )NzLast dimension of cos cache (rM   ') should match rotary_embedding_dim/2 ().r3   )rI   rotary_embedding_dim_halfs   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   4      D	"  D  Df  D  D  D r$   c                  ,    dj         d          d  dS )NzLast dimension of sin cache (rM   rO   rP   r3   )rQ   rL   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   rR   r$   dim)
r4   lenr   _checkrU   permutereshape	unsqueezechunkcat)r+   r,   r-   r.   r(   r)   r*   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr8   rI   rC   rQ   r:   rL   s    ```               @@@@@@r"   rotary_embedding_23ri   /   sv    'K[!!JQJ!"oO !#aaaa	
 	
 	
 	q!Z/|||||	
 	
 	
 	q!_4 G  G  G  G  G	
 	
 	
 	MMOOq 9Y]]__%9] ] ] ] ]	
 	
 	
 	
 	MMOOq 9Y]]__%9] ] ] ] ]	
 	
 	
 Q M!\**	qNoooo	
 	
 	
 "!n9,	)YG	M!Y''	LQW"$L$LMMM
I q  (AAAqqq////0HQQQ11123334L 4 9 
 
 	L	!
"Fsy|'Fjjjjjj   
L	!
"Fsy|'Fjjjjjj   
L	"22 	D  	D  	D  	D  	D   
L	"22 	D  	D  	D  	D  	D   /Q C /Q C
  2aaaAAAqt!tm$aaaAAAqt!tm$Xqb111B 8cBhD8cBhD  3 tR((tR(()T4Lb999=(.AA9dD\r222Y,/R888FQ}V[111 =...r$   scaler_   c                 8    | | ndt          j        |          z  S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)rj   r_   s     r"   _get_scale_factorrn      s     %55C$)I2F2F,FGr$   tensorr8   c                     | j         d         | j         d         }}||z  }|                     ||||                              dd                                          S )z1Reshape 3D tensor to 4D for multi-head attention.r   r1   )r4   view	transpose
contiguous)ro   r8   r)   r:   r^   r_   s         r"   _reshape_3d_to_4drt      sS     $*<?FLO[Oy(IJIFF	1a	r$   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          |dk    rt          | ||||          S t          j        t          j        | |                    dd                              S )z1Get QK output tensor based on the specified mode.r   r0   rM   )_compute_qk_output_for_mode_0r   
zeros_likematmulrr   )ru   rv   rw   rx   rj   ry   s         r"   _get_qk_output_for_aten_spdar~      s[     !!,q%';U
 
 	

 QB0C0C D DEEEr$   c                 J     t          j         z  dk     fd           dS )z-Validate Group Query Attention configuration.r   c                      d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )rx   rw   s   r"   r5   z-_validate_gqa_configuration.<locals>.<lambda>   s    y 3yyZnyyy r$   N)r   rW   )rw   rx   s   ``r"   _validate_gqa_configurationr      sA     
L22a7yyyyy    r$   c                    |}||k    r||z  }|                     |d          }t          || j        d                   }t          j        |          }| |z  }	||z  }
t          j        |	|
                    dd                    S )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r   rT   r?   r0   rM   )repeat_interleavern   r4   rl   rm   r   r}   rr   )ru   rv   rw   rx   rj   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r"   r{   r{      s     H222+/CC&&}!&<<$UAGAJ77L<((J:~H*$H<("4"4R"<"<===r$   	Attention        )	is_causalkv_num_headsq_num_headsry   rj   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                	   d\  }}}t          | j                  }| j        d         }t          | j                  dk    r`t          j        |dk    o|dk    d            | j        d         }t	          | ||          } t	          |||          }t	          |||          }t          j        t          | j                  dk    o/t          |j                  dk    ot          |j                  dk    d            | j        |         }t          |
|          }
|t          j        ||g|	          n|                                }|t          j        ||g|	          n|                                }||}}| j        |         }|j        |         }| j        |         }|j        |         }|d
k    o"|	dk    o|du o|du p|j        t          j	        k    }t          ||           |rqd}||j        t          j	        k    r| n|}t          j        j                            | |||d
||
t          ||k                        }t          | ||||
|	          }nY||k    r3||z  }|                    ||	          }|                    ||	          }t          j        ||| j        | j                  }|rut          j        |du d            t          j        t          j        ||t          j	        | j                            }|                    | t+          d                    }|?|j        t          j	        k    r%|                    | t+          d                    }n||z   }t          |
| j        d                   } t-          j        |           }!| |!z  }"||!z  }#t          j        |"|#                    dd                    }$|$}|$|z   }%|	dk    r|%}|d
k    r|t          j        |%|z            z  }%|	dk    r|%}|x|t6          v rX|%j        }&|%                    t:          j        |                   }%t          j        |%d	          }'|'                    |&          }'n-t          j        |%d	          }'nt          j        |%d	          }'|	dk    r|'}t          j        |'|          }|dk    r=|                    dd                                           !                    ||d          }||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r   r1   r?   r   r?   c                      dS )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r$   r"   r5   zattention_23.<locals>.<lambda>  s    Q r$   r   rA   c                      dS )Nz'Q, K, and V should be 4D tensors by nowr   r   r$   r"   r5   zattention_23.<locals>.<lambda>!  s    9 r$   NrT   r   )r   	dropout_pr   rj   
enable_gqa)dtypedevicec                      dS )Nz'Cannot use both is_causal and attn_maskr   r   r$   r"   r5   zattention_23.<locals>.<lambda>v  s    +T r$   z-infr0   rM   r1   )"rV   r4   r   rW   rt   rn   r\   cloner   boolr   nn
functionalscaled_dot_product_attentionr~   r   zerosr   trilonesmasked_fillfloatrl   rm   r}   rr   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrs   rq   )(ru   rv   r   r   r   r   r   r   r   ry   rj   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr8   q_sequence_lengthq_head_sizepresent_keypresent_valuerw   rx   kv_sequence_lengthcan_use_sdpasdpa_attn_maskrh   	qk_outputr   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs(                                           r"   attention_23r      s   & ,3(L, !'llOJ 17||q12!2QQ	
 	
 	
 GAJa[99a\::a\::	LAGEc!'lla/ECLLA4E99   '(#Ke[11E
  		8Q-\2222WWYY  ! 		:q/|4444WWYY  qA ',/7<0-. 	3 	A!Q&	A%	A $?)/UZ"?	    35IJJJ r-  +4?ej+H+HiZZiN$AA$#';;  B 
 
 1 !
 
		 "666/3GGM##M|#DDA##M|#DDA K1
 
 
	
  	KLT!#T#T    *
%&*8	   K "--{lE&MMJJI  %*,,%119*eFmmLL		 &	1	 )
;; Y|,,
z>z> !<(2D2DR2L2LMM %	 ()3 A%%$I S=="UZw0F%G%GGL A%%$I ( $QQQ!-!3+#=>OP    #]<R@@@
']]>::

"]<R@@@

|<<<J A%%"I j!,, ! Q""--//44ZARTVWW 	 ;y88r$   )N)NNN)!rl   typingr   r   r   typing_extensionsr   r   torch.onnx.opsr   r   r	   r
   dict_ops
OpOverload__annotations__	frozensetr   strintr%   Tensorr   ri   r   rn   rt   r~   r   r{   tupler   r   r$   r"   <module>r      sX    . . . . . . . . . . ' ' ' ' ' '  * * * * * * Yt__WT]] AC UZ2H<= B B B09	  1 1 -!$xB (2r6"223   * 

R  
 ,0	D/  !D/ D/ D/|D/|D/ |D/ 5<(	D/ D/ D/ D/ \D/ D/ D/ ! D/NHXe_ H H H H H H

L
&)
69

\
 
 
 
F|F|F F 	F
 E?F F \F F F F$47	   >|>|> > 	>
 E?> \> > > >, 
+r
 )-'+)-F9 !"!'+F9 F9 F9|F9|F9 |F9 %	F9
 u|$F9 &F9 F9 F9 F9 F9 E?F9 F9  }F9 5<u|U\ABF9 F9 F9 F9 F9 F9r$   