
    0;ji                         d dl Z d dlZd dlmZ d dlmZ  e j        e          Zg dZ	e G d d                      Z
 G d d          ZdS )	    N)	dataclass)
MethodType))      )      )	      c                   L    e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   dS )AlignmentAnalysisResultfalse_start	long_tail
repetitiondiscontinuitycompletepositionN)__name__
__module____qualname__bool__annotations__int     r/root/voice-cloning/.venv/lib/python3.11/site-packages/chatterbox/models/t3/inference/alignment_stream_analyzer.pyr   r      sQ          OOONNNMMMMMr   r   c                   $    e Zd ZddZd ZddZdS )	AlignmentStreamAnalyzerr	   r   c                 b   |x| _         \  }}|| _        t          j        d||z
            | _        d| _        d| _        d| _        d| _        d| _	        d| _
        g | _        g | _        t          t                    D ]1\  }\  }}	| xj        dgz  c_        |                     ||||	           2dS )a  
        Some transformer TTS models implicitly solve text-speech alignment in one or more of their self-attention
        activation maps. This module exploits this to perform online integrity checks which streaming.
        A hook is injected into the specified attention layer, and heuristics are used to determine alignment
        position, repetition, etc.

        NOTE: currently requires no queues.
        r   FN)text_tokens_sliceeos_idxtorchzeros	alignmentcurr_frame_postext_positionstarted
started_atr   completed_atgenerated_tokenslast_aligned_attns	enumerateLLAMA_ALIGNED_HEADS_add_attention_spy)
selftfmrqueuer   alignment_layer_idxr    ij	layer_idxhead_idxs
             r   __init__z AlignmentStreamAnalyzer.__init__!   s     +<;!QQ!,,  !#
 #%(12E(F(F 	B 	B$A$	8##v-####D!YAAAA	B 	Br   c                       fd}|j         |         j        }|                    |           t          |d          r4t          |j        d          r!|j        j         _        d|j        _        dS dS dS )zW
        Adds a forward hook to a specific attention layer to collect outputs.
        c                     t          |t                    rIt          |          dk    r8|d         2|d                                         }|df         j        <   dS dS dS dS )ah  
            See `LlamaAttention.forward`; the output is a 3-tuple: `attn_output, attn_weights, past_key_value`.
            NOTE:
            - When `output_attentions=True`, `LlamaSdpaAttention.forward` calls `LlamaAttention.forward`.
            - `attn_output` has shape [B, H, T0, T0] for the 0th entry, and [B, H, 1, T0+i] for the rest i-th.
               Nr   )
isinstancetuplelencpur*   )moduleinputoutputstep_attention
buffer_idxr5   r.   s       r   attention_forward_hookzJAlignmentStreamAnalyzer._add_attention_spy.<locals>.attention_forward_hookG   sr     &%(( RS[[1__AV!'6DQ[6Q'
333R R__AVAVr   configoutput_attentionsTN)layers	self_attnregister_forward_hookhasattrrD   rE   original_output_attentions)r.   r/   rB   r4   r5   rC   target_layers   ` ` `  r   r-   z*AlignmentStreamAnalyzer._add_attention_spyC   s    		R 		R 		R 		R 		R 		R 		R {9-7**+ABBB4"" 	1wt{<O'P'P 	1.2k.KD+,0DK)))	1 	1 	1 	1r   Nc                 6   t          j        | j                                      d          }| j        \  }}| j        dk    r3||d||f                                                                         }n2|dd||f                                                                         }d|dd| j        dz   df<   t          j        | j	        |fd          | _	        | j	        }|j
        \  }}	|d                                         }
d|
| j        z
  cxk     odk     nc  }|s|
| _        | j         oG|ddddf                                         d	k    p#|dddd
f                                         dk     }| | _        | j        r| j        || _        | j        p| j        |	dz
  k    | _        | j        r| j        || _        |ddddf                                         }| j        o<|| j        dddf                             d                                          dk    }| j        oA|| j        dddf                             d          j                                        dk    }|t)          |t           j                  rZ|                                dk    r|                                n,|                    d          d                                         }n|}| j                            |           t7          | j                  dk    r| j        dd         | _        t7          | j                  dk    o,t7          t9          | j        dd                             dk    }|r*| j        d         }t:                              d|            |
|	dz
  k     r|	dk    rd|d| j        f<   |s|s|rFt:                              d|d|d|           dt          j         |          z  }d|d| j        f<   | xj        dz  c_        |S )z~
        Emits an AlignmentAnalysisResult into the output queue, and potentially modifies the logits to force an EOS.
        r   )dimNr9      g?   g      ?   r         iu%   🚨 Detected 2x repetition of token i .zforcing EOS token, long_tail=z, alignment_repetition=z, token_repetition=i   )!r!   stackr*   meanr   r$   cloner=   catr#   shapeargmaxr%   r&   maxr'   r   r(   sumvaluesr:   Tensornumelitemviewr)   appendr<   setloggerwarningr    	ones_like)r.   logits
next_tokenaligned_attnr2   r3   A_chunkATScur_text_posnr   r   last_text_token_durationr   alignment_repetitiontoken_idtoken_repetitionrepeated_tokens                      r   stepzAlignmentStreamAnalyzer.stepY   s}   
 {4#:;;@@Q@GG%1!##"122qs7+11337799GG #111ac6*00226688G 014&*+++, DNG#<!DDDNw1  **,,1C!CGGGGaGGGGH 	/!.D
  <'^aRSSkoo.?.?#.E.]111bqb5Z]I]&< 	 DO3DO D);q1u)D= 	"T.6 !D $%RSS"##X;??#4#4  MXq):););RSS)@'A'E'E!'E'L'L'P'P'R'RVW'W	  $}i!D4E4F4F4K2L2P2PUV2P2W2W2^2b2b2d2dgh2h !*el33 &0:0@0@0B0Ba0G0G:??,,,Z__]_M`M`abMcMhMhMjMj%!((222 4())A--(,(=bcc(B%
 %&&!+ 6D)"##.//00A5 	  	U!226NNNS>SSTTT 1q5  QUU(.F3$%  	., 	.0@ 	.NNkkk?SkkXhkklll 7 77F(-F3$%q r   )r	   r   )N)r   r   r   r6   r-   rw   r   r   r   r   r       sV         B  B  B  BD1 1 1,Y Y Y Y Y Yr   r   )loggingr!   dataclassesr   typesr   	getLoggerr   rg   r,   r   r   r   r   r   <module>r|      s      ! ! ! ! ! !       
	8	$	$ 322         R R R R R R R R R Rr   