
    %Vji&7                       d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZ d dlmZmZ d dlmZ dd	lmZ e	rd d
lmZmZ ej                            ed          Zej                            ed          Zeee
e ej!        f                           Z"eee         ge"f         Z# ej$        d           G d d                      Z% ej$        d           G d d                      Z& ej$        d           G d d                      Z'dZdZ(d[dZ)d\dZ*d]d Z+d^d#Z,d_d(Z-d`d.Z.dad0Z/dbd3Z0dbd4Z1dcd7Z2ej$         G d8 d9                      Z3dddBZ4dedDZ5 G dE dFe          Z6dfdKZ7dgdQZ8 ej$        d           G dR dS                      Z9 ej$        d           G dT dU                      Z:dhdYZ;dS )i    )annotationsN)Enum)AnyCallableOptionalTYPE_CHECKINGUnion)countersget_metrics_context)GraphPartitionMap	InputType)
OrderedSet   )is_using_cudagraph_partition)SequenceSet
perf_hintscudagraph_static_inputsT)frozenc                      e Zd ZU dZded<   dS )
FunctionIDz9Unique counter of a function wrapped in cudagraphify_implintidN__name__
__module____qualname____doc____annotations__     Y/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/_inductor/cudagraph_utils.pyr   r      s         ??GGGGGr!   r   c                  <    e Zd ZU dZded<   ded<   ded<   ded<   d	S )
PlaceholderInfoz
    A serializable version of torch.fx.Node that contains information
    pertinent to placeholder stack traces. We use these in logging and error messages
    related to cudagraphs, and will cache these results.
    strnameOptional[str]stack_tracelist[PlaceholderInfo]usersmutating_use_stack_traceNr   r    r!   r"   r$   r$   %   sK           III    ++++++r!   r$   c                  P    e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   dS )WrappedFunctionz
    Represents a function that you want to record for CUDA graph replay,
    with a little more metadata so we can identify if we have an applicable
    CUDA graph in our CUDA graph tree for it.
    zCallable[..., Any]modelSequence[int]static_input_idxsr   r   ztuple[torch.Tensor, ...]	constantsSequence[PlaceholderInfo]placeholdersmutated_input_idxsNr   r    r!   r"   r-   r-   4   sc           $$$$NNN''''++++%%%%%%r!   r-   placeholder_nodetorch.fx.Nodereturnr'   c                H   t          | j                  dk    r:t          t          | j                            j                            dd           S | j        D ]G}|j        t          j        j	        j
        j        k    r!|j                            dd           x}r|c S Hd S )Nr   r(   )lenr*   nextitermetagettargettorchopsatencopy_default)r5   user(   s      r"   &get_mutating_use_stack_trace_from_noderE   D   s     !""a''D)/00116::=$OOO% # #:-555!hll=$???{ #""""4r!   placeholder_infoc                    | j         S N)r+   )rF   s    r"   get_mutating_use_stack_tracerI   S   s    44r!   c                    | j         }| j                            dd           }g }d }| j        dk    r d | j        D             }t          |           }t          ||||          S )Nr(   placeholderc                ,    g | ]}t          |          S r    )to_placeholder_info).0is     r"   
<listcomp>z'to_placeholder_info.<locals>.<listcomp>^   s!    HHHA$Q''HHHr!   )r&   r<   r=   opr*   rE   r$   )r5   r&   r(   r*   r+   s        r"   rM   rM   W   s|     D"'++M4@@KE#m++HH1A1GHHH#I$
 $
  4e5MNNNr!   graphtorch.fx.Graphr)   c                $    d | j         D             S )Nc                B    g | ]}|j         d k    t          |          S )rK   )rQ   rM   )rN   nodes     r"   rP   z(get_placeholder_info.<locals>.<listcomp>g   s4       &*TW=U=UD!!=U=U=Ur!   )nodes)rR   s    r"   get_placeholder_inforX   f   s%     .3k   r!   reasonr%   c                    d|  S )Nzskipping cudagraphs due to r    )rY   s    r"   format_default_skip_messager[   l   s    1111r!   r3   r2   mutation_indices&Union[AbstractSet[int], Sequence[int]]c                    d}|D ]}| |         }t          |          x}r nt          dt          |           d          }|r| d| S |S )N zmutated inputs (z instances). Found from : 
 )rI   r[   r9   )r3   r\   r(   idxrK   msgs         r"   get_mutation_stack_tracerc   p   s     "$K  "3'6{CCC; 	E	 &=3/00=== C  766666Jr!   funcinputslist[InputType]is_cuda_graph_recorded_tensorCallable[[torch.Tensor], bool]c                     t           j        j        j        j        r fd j        D             }n j        }t                              d j                   t                              d|           |rt           j
        |          nd S )Nc                H    g | ]}|j         v  |                   |S r    )r0   )rN   ra   rd   re   rg   s     r"   rP   z&check_for_mutation.<locals>.<listcomp>   sJ     +
 +
 +
t---00== .  .--r!   z'check mutation static input indices: %sz#check mutation mutation indices: %s)r?   	_inductorconfigtritoncudagraph_treesr4   static_inputs_logdebugr0   rc   r3   )rd   re   rg   r\   s   ``` r"   check_for_mutationrq      s     $4 3+
 +
 +
 +
 +
 +
.+
 +
 +
  2143I   ACSTTT 	 !24DEEEr!   rV   c                \    | j         D ]#}|j                            dd           x}r|c S $d S )Nr(   )r*   r<   r=   )rV   rD   r(   s      r"   _get_use_stack_tracers      sF    z  (,,}d;;;; 		4r!   device_node_mapping!dict[torch.device, torch.fx.Node]c                   |                      t          j        d          d            t                      r(|                      t          j        d          d            |                     t          j        d                    x}r?d|j         d}t          |          x}rt          | d|           S t          |          S t          |           dk    r9t          t          |                                                     j        dk    rd S d |                                 D             }t          d	d
                    |                     S )Nr<   cpuzcpu device ()r`   r   cudac              3  4   K   | ]}t          |          V  d S rH   )repr)rN   keys     r"   	<genexpr>z:check_multiple_devices_or_any_cpu_nodes.<locals>.<genexpr>   s(      AAscAAAAAAr!   zmultiple devices: z, )popr?   devicer   r=   r&   rs   r[   r9   r:   r;   keystypejoin)rt   cpu_noderb   r(   	keys_reprs        r"   'check_multiple_devices_or_any_cpu_nodesr      sT    EL00$777 $%% ;U 3 3T:::&**5<+>+>???x 0-X]---.x888; 	X.#/V/V/V/VWWW*3/// 	  A%%)..00112276AAtAA&9&>&>&@&@AAAI&'RDIIi<P<P'R'RSSSr!   c                     t          |           S rH   )r   )rt   s    r"    check_lowering_disable_cudagraphr      s     33FGGGr!   rb   Nonec                <   t                               |            t          d         dxx         dz  cc<   t          j        j        j        j        rt          |           t                      }|
                                r|                    d| d           d S d S )Ninductorcudagraph_skipsr   cudagraph_skip_reasonT)	overwrite)perf_hint_logwarningr
   r?   rk   rl   rm   cudagraph_or_errorRuntimeErrorr   in_progressset)rb   metrics_contexts     r"   #log_cudagraph_skip_and_bump_counterr      s    #Z*+++q0+++$7  3)++O""$$ J3SDIIIIIJ Jr!   c                  "    e Zd ZU ded<   ddZdS )	BoxedDeviceIndexOptional[int]value
device_idxr7   r   c                F    |t          |t                    sJ || _        d S rH   )
isinstancer   r   )selfr   s     r"   r   zBoxedDeviceIndex.set   s(    !Z
C%@%@!!@


r!   N)r   r   r7   r   )r   r   r   r   r   r    r!   r"   r   r      s6                    r!   r   gmtorch.fx.GraphModulemutated_inputsOrderedSet[str]r4   OrderedSet[int]r0   r/   c                :   t          d          }t          j        j        j        j        rXt          |          fd|D             }t          |          dk    }|sd S t          | j	                  }t          ||          S t          |          dk    }|sd n|S )Nzmutated inputsc                    g | ]}|v|	S r    r    )rN   ra   unique_idxss     r"   rP   zGcheck_for_mutation_ignore_cuda_graph_managed_tensor.<locals>.<listcomp>   s#    XXXCKAWAWCAWAWAWr!   r   )r[   r?   rk   rl   rm   rn   r   r9   rX   rR   rc   )	r   r   r4   r0   default_msgr\   has_mutationr3   r   s	           @r"   3check_for_mutation_ignore_cuda_graph_managed_tensorr      s     ..>??K $4 9 !233XXXX+=XXX+,,1 	4+BH55'6FGGG >**a/'8tt[8r!   rK   c                V    | j         r| j         S | j        D ]}|j         r	|j         c S dS )zM
    Gets the first non-empty stack trace of a placeholder or its users.
    N)r(   r*   )rK   users     r"   get_placeholder_stack_tracer      sP      '&&! $ $ 	$####	$ 4r!   c                  &    e Zd ZdZdZdZdZd	dZdS )
CheckInvariantStatusr            r7   r%   c                r    | j         dk    rdS | j         dk    rdS | j         dk    rdS | j          d| j         S )NCudagraphManagedIdxMismatchz-cudagraph managed tensor data pointer changedStaticInputIdxMismatchz!static input data pointer changed&ExpectedDeadIndicesBeforeGraphMismatchz+expected dead indices before graph are livez: )r&   r   )r   s    r"   __str__zCheckInvariantStatus.__str__  sV    9555BBY22266YBBB@@i//4:///r!   Nr7   r%   )r   r   r   SUCCESSr   r   r   r   r    r!   r"   r   r     sE        G #$  ./*0 0 0 0 0 0r!   r   recorded_data_ptrSequence[Optional[int]]target_idxsmismatchc                   t                    t                    k    r t                    t          |           k    s
J d            fd|D             }fd|D             }| d}t          t          ||                    D ]\  }\  }	}
t          |	t          j                  sJ ||         }|	                                |
k    r=| |         }| d|j         d|
 d|	                                 dt          |           d	
}|S )
z}
    Logs the mismatch between input data pointers and recorded data pointers.
    This checks only idxs in target_idxs.
    zClength mismatch between inputs, recorded_data_ptr, and placeholdersc                     g | ]
}|         S r    r    )rN   rO   re   s     r"   rP   z)log_data_ptr_mismatch.<locals>.<listcomp>.  s    000q000r!   c                     g | ]
}|         S r    r    )rN   rO   r   s     r"   rP   z)log_data_ptr_mismatch.<locals>.<listcomp>/  s    ===A$Q'===r!   z.
zinput name: z. data pointer changed from z to z. input stack trace: 
)	r9   	enumeratezipr   r?   Tensordata_ptrr&   r   )r3   re   r   r   r   	t_tensorst_data_ptrs	error_msgrO   tensorr   indexrK   s    ``          r"   log_data_ptr_mismatchr     sj    v;;#/0000S[[CDUDU5U5U5UM 6V5UU 1000K000I=======K   I!*3y++F+F!G!G 	 	FH&%,/////A??((&u-K S S+*: S S-5S S;A??;L;LS S&A+&N&NS S S 
 r!   fn_cache)dict[tuple[int, ...], Callable[..., Any]]new_int_keyr   boolc                   t          |                                           dz   dfd}t          j        j        j        j        rCt          j        j        j        j        k    r$t                               |                       dS dS )Nr   r7   r%   c                     d  dS )NzCUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed a0   distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.r    )num_cudagraphss   r"   warn_msgz4maybe_warning_due_to_dynamic_shape.<locals>.warn_msgD  s    '0>' ' '		
r!   TFr   )	r9   r   r?   rk   rl   rm   "cudagraph_dynamic_shape_warn_limitr   r   )r   r   r   r   s      @r"   "maybe_warning_due_to_dynamic_shaper   >  s     ))A-N

 

 

 

 

 

 	%H
/
 
'
JK K 	hhjj)))t5r!   c                  2    e Zd ZU dZded<   ded<   ded<   dS )	CudagraphCachedInfoz'
    Info needed to realign inputs
    r2   r3   list[Optional[str]]stack_tracesz	list[str]cudagraph_fail_reasonsNr   r    r!   r"   r   r   [  sB           ,+++%%%%%%%%%%r!   r   c                  F    e Zd ZU dZded<   ded<   ded<   ded<   d	ed
<   dS )CudagraphMetadataz.
    Metadata for recording a CUDA graph.
    r2   r3   r   r0   r4   r   r   zdict[str, torch.Tensor]r1   Nr   r    r!   r"   r   r   f  sZ           ,+++&&&&''''%%%%&&&&&&r!   r   partition_mapr   metadatac                F   g }t                      }t                      }t          | j                  D ]\  }}|j        v r|                    |           |j        v r|                    |           |j        |         }nt          d| j         d| dg d          }|	                    |           g }| j
        D ]:}	|	!|	                    j        |	                    %|	                    d           ;fd| j        D             }
t          |||||
          S )z
    Convert the cudagraph metadata at the graph level to the graph partition level,
    given the graph partition info (i.e., mapping from partition input/output index
    to graph input/output index).
    N
partition__placeholder_)r&   r(   r*   r+   c                ,    i | ]}|j         |         S r    )r1   )rN   r&   r   s     r"   
<dictcomp>z4get_partition_cudagraph_metadata.<locals>.<dictcomp>  s0       +/h &  r!   )r   r   input_index_mappingr0   addr4   r3   r$   r   appendoutput_index_mappingr   constant_namesr   )r   r   partition_placeholderspartition_static_input_idxspartition_mutated_input_idxspartition_input_idxgraph_input_idxrK   partition_stack_tracesgraph_output_idxpartition_constantss    `         r"    get_partition_cudagraph_metadatar   s  s     3=<<4>LL 09)1 1 3 3,_ h888'++,?@@@h999(,,-@AAA&"/@KK *V-"2VVATVV )-	  K 	%%k2222)> 0 0'"))(*?@P*QRRRR"))$////   3@3O   #$  r!   )r5   r6   r7   r'   )rF   r$   r7   r'   )r5   r6   r7   r$   )rR   rS   r7   r)   )rY   r%   r7   r%   )r3   r2   r\   r]   r7   r%   )rd   r-   re   rf   rg   rh   r7   r'   )rV   r6   r7   r'   )rt   ru   r7   r'   )rb   r%   r7   r   )
r   r   r   r   r4   r   r0   r/   r7   r'   )rK   r$   r7   r'   )r3   r2   re   rf   r   r   r   r/   r   r   r7   r%   )r   r   r   r   r7   r   )r   r   r   r   r7   r   )<
__future__r   dataclassesenumr   typingr   r   r   r   r	   r?   torch._dynamo.utilsr
   r   torch._inductor.utilsr   r   torch.utils._ordered_setr   utilsr   collections.abcr   r   AbstractSet_logginggetArtifactLoggerr   r   ro   listr   r   
OutputType	ModelType	dataclassr   r$   r-   rE   rI   rM   rX   r[   rc   rq   rs   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   <module>r      s]   " " " " " "           @ @ @ @ @ @ @ @ @ @ @ @ @ @  = = = = = = = = > > > > > > > > / / / / / / / / / / / /  =<<<<<<<< 00<HHN44'  
 (5el!2345
d9o&
23	 d###       $# d###, , , , , , , $#, d###& & & & & & & $#&   5 5 5 5O O O O   2 2 2 2   (   >   T T T T8H H H H	J 	J 	J 	J                9 9 9 90   0 0 0 0 04 0 0 00   >   : d###& & & & & & & $#& d###	' 	' 	' 	' 	' 	' 	' $#	'3 3 3 3 3 3r!   