
    0;ji'                         d dl Z d dlZd dlmZ d dlmc mZ d dlmZ  G d dej                  Z	 G d dej                  Z
 G d d	ej                  Z G d
 dej                  ZdS )    N)nn)	rearrangec                   >     e Zd Zd	 fd	Zed
d            Zd Z xZS )RelativePositionBiasF          c                     t                                                       || _        || _        || _        || _        t          j        ||          | _        d S N)	super__init__scalecausalnum_bucketsmax_distancer   	Embeddingrelative_attention_bias)selfr   r   r   r   heads	__class__s         `/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/models/t3/modules/perceiver.pyr   zRelativePositionBias.__init__   sQ    
&(')|K'G'G$$$    Tc                 (   d}|  }|s8|dz  }||dk                                      |z  z  }t          j        |          }n't          j        |t          j        |                    }|dz  }||k     }|t          j        |                                |z            t          j        ||z            z  ||z
  z                                   z   }t          j        |t          j	        ||dz
                      }|t          j
        |||          z  }|S )Nr         )longtorchabsmax
zeros_likelogfloatmathmin	full_likewhere)	relative_positionr   r   r   retn	max_exactis_smallval_if_larges	            r   _relative_position_bucketz.RelativePositionBias._relative_position_bucket   s    	2AKAE<<>>K//C	!AA	!U-a0011A1$	y= 	!''))i/0048L9<T3U3UUYdgpYpq
$&& yu|[[\_/]/]^^u{8Q555
r   c                    g |j         dd          |j        R \  }}}t          j        |t          j        |          }t          j        |t          j        |          }|d d d f         |d d d f         z
  }|                     || j        | j        | j                  }| 	                    |          }	t          |	d          }
||
| j        z  z   S )N)dtypedevice)r   r   r   zi j h -> () h i j)shaper1   r   aranger   r-   r   r   r   r   r   r   )r   qk_dotsijr1   q_posk_posrel_pos	rp_bucketvaluesbiass              r   forwardzRelativePositionBias.forward+   s    :bcc*:GN::1fQej@@@Qej@@@aaa.5D>12274;\`\l@D@Q 3 S S	--i88!455$+,,r   )Fr   r   r	   )Tr   r   )__name__
__module____qualname__r   staticmethodr-   r=   __classcell__r   s   @r   r   r      sr        H H H H H H    \*	- 	- 	- 	- 	- 	- 	-r   r   c                   J     e Zd Zd fd	Zd ZddZddZddZd	 Zd
 Z	 xZ
S )AttentionQKV皙?NFc                    t                                                       || _        || _        ||n|dz  | _        || _        || _        t          j        |          | _	        |r| 
                                nd | _        d S )Ng      )r   r   n_headshead_dimr   flashdropout_rater   Dropoutdropoutsetup_flash_configflash_config)r   rH   rI   rK   r   rJ   r   s         r   r   zAttentionQKV.__init__8   s     #/UUX5E

(z,//9>HD33555Dr   c                     dddd}|S )NT)enable_flashenable_mathenable_mem_efficient )r   rO   s     r   rN   zAttentionQKV.setup_flash_configB   s!     !$(
 

 r   c                       fd|||fD             \  }}} j         r                     ||||          }n                     ||||          }                     |          S )Nc                 :    g | ]}                     |          S rT   )split_heads).0tensorr   s     r   
<listcomp>z(AttentionQKV.forward.<locals>.<listcomp>L   s'    DDD4##F++DDDr   mask)rJ   flash_attentionscaled_dot_product_attentioncombine_heads)r   qkvr\   outs   `     r   r=   zAttentionQKV.forwardK   s    DDDD1a)DDD1a: 	H&&q!QT&::CC33Aq!$3GGC!!#&&&r   c                    t          j        d||          | j        z  }|'|                    |dk    t	          d                    }t          j        |d          }|                     |          }t          j        d||          S )Nzbhlt,bhls->bhtsr   z-inf)dimzbhts,bhls->bhlt)r   einsumr   masked_fillr"   softmaxrM   )r   r`   ra   rb   r\   simattns          r   r^   z)AttentionQKV.scaled_dot_product_attentionT   sz    l,a33dj@//$!)U6]];;C}Sb)))||D!!|-tQ777r   c                     | j         r| j         ni }t          j        j        j        di |5  t          j        ||||| j        r| j        nd          }d d d            n# 1 swxY w Y   |S )Ng        )	attn_mask	dropout_prT   )	rO   r   backendscuda
sdp_kernelFr^   trainingrK   )r   r`   ra   rb   r\   configrc   s          r   r]   zAttentionQKV.flash_attention\   s    &*&7?""R^ +55f55 	 	01a/3}D$++"  C	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
s   (A  A$'A$c                     |j         \  }}}|                    ||| j        | j                  }|                    dddd          S )Nr   r   r      )r2   viewrH   rI   permute)r   xbslength_s        r   rW   zAttentionQKV.split_headsf   sB    FAFF2vt|T];;yyAq!$$$r   c                     |j         \  }}}}|                    dddd                                          }|                    ||d          S )Nr   r   r   rv   re   )r2   rx   
contiguousrw   )r   ry   rz   r|   r{   s        r   r_   zAttentionQKV.combine_headsk   sK    7AvqIIaAq!!,,..vvb&"%%%r   )rF   NFr   )r>   r?   r@   r   rN   r=   r^   r]   rW   r_   rB   rC   s   @r   rE   rE   7   s        I I I I I I  ' ' ' '8 8 8 8   % % %
& & & & & & &r   rE   c                   8     e Zd ZdZ	 	 	 	 	 	 d
 fd	Zdd	Z xZS )AttentionBlock2z
    An attention block that allows spatial positions to attend to each other,
    using AttentionQKV and separate linear transformations for Q, K, and V.
    r   re   FT皙?Nc                 d   t                                                       || _        |dk    r|| _        n#||z  dk    sJ d| d|             ||z  | _        t	          j        |          | _        t	          j        ||          | _        t	          j        ||          | _	        t	          j        ||          | _
        t          | j        || j        z  |||          | _        t	          j        ||          | _        |r&t          || j        z  dz  d|dd	
          | _        d S d | _        d S )Nre   r   z	channels z' is not divisible by num_head_channels )rK   rJ   r   g      ?Fr   @   )r   r   r   r   r   )r   r   channels	num_headsr   	LayerNormnormLinearto_qto_kto_vrE   	attentionproj_outr   relative_pos_embeddings)	r   r   r   num_head_channelsr   r]   rK   r   r   s	           r   r   zAttentionBlock2.__init__w   sm    	 ""&DNN ,,111_8__L]__ 211%)::DNL**	 Ih11	Ih11	Ih11	%dnh$.6P_k  tC  KP  Q  Q  Q	(H55" 	0+?xSWSaGafhFhqv  H  VX  gi  ,j  ,j  ,jD(((+/D(((r   c                 |   |j         ^}}}|j         ^}}}	|                     |          }
|                     |          }|                     |
          }|                     |          }|                     |          }|                     ||||          }|                     |          } ||z   j        ||g|R  S )Nr[   )r2   r   r   r   r   r   r   reshape)r   x1x2r\   b1c1spatial1b2c2spatial2x1_normx2_normr`   ra   rb   hs                   r   r=   zAttentionBlock2.forward   s    HBHB))B--))B--IIgIIgIIgNN1aN..MM!QB22222r   )r   re   FTr   Nr   r>   r?   r@   __doc__r   r=   rB   rC   s   @r   r   r   q   sp           %#0 #0 #0 #0 #0 #0J3 3 3 3 3 3 3 3r   r   c                   *     e Zd ZdZd fd	Zd Z xZS )	Perceiverz,Inspired by https://arxiv.org/abs/2103.03206r         c                    t                                                       t          j                            t          j        d||                    | _        t          j        d          t          j        d||z   z            z  }| j        j	        
                    | |           t          ||          | _        dS )a@  
        Initialize the perceiver module.

        :param pre_attention_query_token: Number of query tokens for pre-attention
        :param pre_attention_query_size: Size of each query token
        :param embedding_dim: Dimension of the embedding space
        :param num_attn_heads: Number of attention heads
        r   g      @g       @N)r   r   r   r   	Parameteremptypre_attention_queryr#   sqrtdatauniform_r   rk   )r   pre_attention_query_tokenpre_attention_query_sizeembedding_dimnum_attn_headsquery_variancer   s         r   r   zPerceiver.__init__   s     	 $)8#5#5K46NOO$
 $
 
 3$)C;TWp;p4q*r*rr 	 %..OOO $M>BB			r   c                     | j                             |j        d         dd          }|                     ||          }|                     ||          }|S )z
        Forward pass of the perceiver module.
        :param h: Input tensor
        :return: Output after applying attention mechanisms
        r   re   )r   expandr2   rk   )r   r   query_pre_attrk   s        r   r=   zPerceiver.forward   sO     )00RDD))FA&&yy'**r   )r   r   r   r   r   rC   s   @r   r   r      sX        66C C C C C C2      r   r   )r#   r   r   torch.nn.functional
functionalrr   einopsr   Moduler   rE   r   r   rT   r   r   <module>r      s                         (- (- (- (- (-29 (- (- (-V7& 7& 7& 7& 7&29 7& 7& 7&t93 93 93 93 93bi 93 93 93x' ' ' ' '	 ' ' ' ' 'r   