
    QVji_9                     |   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ erd dlmZ d d	fd
efdZ e j        ed d          Z e j        ed d	          Ze G d d                      Ze G d d                      Z G d d          Z G d d          ZddZd d dfdZd ZddZdS )     N)deque)	dataclass)TYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                     | j         S N)childrenxs    O/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/profiler/_utils.py<lambda>r      s    1:     Freversec              #      K   |rt           nd }t           ||                     }|r? ||          }|V   | ||                    D ]}|                    |           |=d S d S )Nc                     | S r    r   s    r   r   z_traverse.<locals>.<lambda>   s    q r   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser      s      0HH[[EeeDkk""I
 *WY''
 5Z!8!899 	* 	*K[))))	  * * * * *r   c                 *    |                                  S r   )popr   s    r   r   r      s    aeegg r   T)r   r   c                 *    |                                  S r   )popleftr   s    r   r   r      s     r   c                   ^    e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   e	d             Z
dS )EventMetricsr   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 :    | j         dk    rdS | j        | j         z  S )Nr   g        )r&   r(   selfs    r   fraction_idle_timezEventMetrics.fraction_idle_time(   s&     A%%3 4#888r   N)__name__
__module____qualname__r&   int__annotations__r'   r(   r)   propertyr-   r   r   r   r%   r%   !   sp         cL#L#K9 9 X9 9 9r   r%   c                   2    e Zd ZU eed<   eed<   dZeed<   dS )Intervalstartendr   r)   N)r.   r/   r0   r1   r2   r)   r   r   r   r5   r5   /   s4         JJJ	HHHKr   r5   c                   F    e Zd Zd	dZd Zd ZdefdZdee	         fdZ
dS )
EventKeyreturnNc                     || _         d S r   event)r,   r=   s     r   __init__zEventKey.__init__7   s    


r   c                 4    t          | j        j                  S r   )hashr=   idr+   s    r   __hash__zEventKey.__hash__:   s    DJM"""r   c                 6    | j         j        |j         j        k    S r   )r=   rA   )r,   others     r   __eq__zEventKey.__eq__=   s    z}..r   c                     | j         j         S r   )r=   namer+   s    r   __repr__zEventKey.__repr__@   s    */##r   	intervalsc                 b   d}t          |d           }|rXt          | j        j        |d         j                  }t          | j        j        |d         j                  }||k     r|||z
  z  }d\  }}|t          |          k     r||         }||         }|dz  }|j        |j        k    r$|j        |j        k    r|dz  }N|j        |_        |}t          | j        j        |j                  }t          | j        j        |j                  }||k     r|||z
  z  }|t          |          k     |S )Nr   c                     | j         S r   r6   r   s    r   r   z,EventKey.intervals_overlap.<locals>.<lambda>E   s    AG r   key)r      rO   )	sortedmaxr=   start_time_nsr6   minend_time_nsr7   len)	r,   rI   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapzEventKey.intervals_overlapC   sK   9*;*;<<<	 	<
 8)A,:LMMMdj4il6FGGK{**m ;;1#i..  %aLM%aLMFA =#666 $}'888FA*7*;M'A
 8-:MNNMdj4m6GHHK{**m ;;! #i..  $ r   r:   N)r.   r/   r0   r>   rB   rE   strrH   listr5   r]   r   r   r   r9   r9   6   s           # # #/ / /$# $ $ $ $4>      r   r9   c                   L    e Zd ZdeddfdZddZd ZddZd Zdde	de
fdZdS )BasicEvaluationprofr:   Nc                 N   || _         i | _        |                                  t          d | j                                        D             d           | _        d | j        D             | _        g | _        |                                 | _	        | 
                                 d S )Nc              3      K   | ]}|V  d S r   r   .0es     r   	<genexpr>z+BasicEvaluation.__init__.<locals>.<genexpr>j   s"      ,,1Q,,,,,,r   c                     | j         j        S r   )r=   rR   r   s    r   r   z*BasicEvaluation.__init__.<locals>.<lambda>j   s    AG<Q r   rM   c                     g | ]	}|j         
S r   r<   rf   s     r   
<listcomp>z,BasicEvaluation.__init__.<locals>.<listcomp>l   s    8881qw888r   )r   metricscompute_self_timerP   keys
event_keyseventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r,   rc   s     r   r>   zBasicEvaluation.__init__e   s    57    ,,))++,,,2Q2Q
 
 
 98888/1 $ 8 8 : :     r   c                    | j         j        J t          | j         j                                                  }|r|                                }|j        }|j        D ]!}||j        z  }|                    |           "t          |          | j	        vsJ d|j
         d|j                     t          |          | j	        t          |          <   |j        | j	        t          |                   _        |dS dS )zM
        Computes event's self time(total time - time in child ops).
        NzDuplicate id: z, )r'   )r   kineto_resultsr   experimental_event_treer!   r&   r   r   r9   rm   rA   rG   r%   )r,   stackr   	self_timer   s        r   rn   z!BasicEvaluation.compute_self_timeq   s!    |*666dl1IIKKLL  	=J"3I)2 * *[99	[))))J''t|;;;CCC*/CC <;; 2>91U1U1UDL*--. ",!< L$$  	= 	= 	= 	= 	=r   c                    | j         j        J | j         j                                        }d d t          fd|D             d           }t          fd|D             d           }t          ||z   d	           | _        i }d
}|D ]"t          |fd|          }||<   ||n|}#d
}d}||z   | j        z   }	d }
g }|	                    |
           |	D ]}t          |d          rW|                                dz  }|                                |	                                z   dz  }||v r||         ||         }t          |d          rR|
                                }|
                                |                                z   }||v r||         ||         }nt          |d          r|j        }|j        }|t          |          k     rT||         
                                |k    r6|dz  }|t          |          k     r||         
                                |k    6||z
  dz   }t          |d
          }t          |d          st          |d          r&|                    t#          |||                     t          |d          r|| j        t'          |                   _        |S )z
        Computes queue_depth at each event. This will calculate the queue depth data for
        All the events in the tree.
        This will return a list of Interval of queue depth data of cuda launch and kernels.
        Nc                 ~    h d}t          t          | d|                     t          fd|D                       S )z+Check if the event is a CUDA launch kernel.>   cudaLaunchKernel__cudaLaunchKernelcudaLaunchKernelExCcudaLaunchCooperativeKernel&cudaLaunchCooperativeKernelMultiDevicerG   c              3   B   K   | ]}                     |          V  d S r   )
startswithrg   patternrG   s     r   ri   zUBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel.<locals>.<genexpr>   s/      OOGtw//OOOOOOr   )r_   getattrany)rh   launch_patternsrG   s     @r   is_cuda_launch_kernelzBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   sR      O wq&!,,--DOOOOOOOOOOr   c                     |                                  t          j        k    rdS t          t	          | d|                                                     h d}t          fd|D                        S )z,Check if the event is a CUDA runtime kernel.FrG   >   cpymemfreeallocc              3       K   | ]}|v V  	d S r   r   r   s     r   ri   zNBasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel.<locals>.<genexpr>   s'      KKw7d?KKKKKKr   )device_typer   CUDAr_   r   lowerr   )rh   exclude_patternsrG   s     @r   is_cuda_kernelz;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   sy     }}*/11uwq&!,,--3355D  ?>>KKKK:JKKKKKKKr   c              3   2   K   | ]} |          |V  d S r   r   )rg   rh   r   s     r   ri   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s4      DD1+@+@+C+CDQDDDDDDr   c                 *    |                                  S r   start_nsr   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>       !**,, r   rM   c              3   2   K   | ]} |          |V  d S r   r   )rg   rh   r   s     r   ri   z6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s2      ==1>>!+<+<=Q======r   c                 *    |                                  S r   r   r   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   r   r   c                 *    |                                  S r   r   r   s    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   s    1::<< r   r   c                 X    |                                                                   k    S r   )linked_correlation_id)r   cuda_launch_events    r   r   z5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   s'    !1133$::<<= r   rL   c                     t          | d          r|                                 dz  S t          | d          r|                                 S t          | d          r| j        S t	          d          )Nstart_us  r   rR   zUnknown Event Type)hasattrr   r   rR   	Exceptionr<   s    r   new_old_event_comparatorzEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   su    uj)) /~~''$..uj)) (~~'''uo.. +**0111r   r   r   r   rR   rO   )r   rw   rq   rP   rr   index_of_first_matchsortr   r   duration_usr   duration_nsrR   rT   rU   rQ   r   r5   rm   r9   r)   )r,   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   rt   r=   
start_timeend_timecurrent_queue_depthr   r   r   s                   @@@r   rs   z#BasicEvaluation.compute_queue_depth   s    |*666,5<<>>
	P 
	P 
	P	L 	L 	L $DDDDDDD&&
 
 
 $=======&&
 
 

 "!339O9O
 
 
 35!3 	T 	T("= = = =(	  E 16N,-*/*;AS !'*<<t{J
	2 	2 	2 ,.4555 	P 	PEuj)) A"^^--4
!NN,,u/@/@/B/BBdJN**~e/D/P+9%+@(uj)) -"^^--
 >>++e.?.?.A.AAN**~e/D/P+9%+@(00 -"0
 , %s+='>'>>>'(<=FFHHZWW$)$ %s+='>'>>>'(<=FFHHZWW #79M"MPQ"Q"%&91"="=uj)) PWUJ-G-G P ''Z3FGG    00 P<OXe__-9r   c                 p   d}d}g }| j         rj| j        rc|t          | j        d         j        | j         d         j                  t          | j         d         j        | j        d         j                  gz  }| j         D ]O}|j        dk    r|s	|j        }d}|j        dk    r,|r*|                    t          ||j                             d}Pd | j	        
                                D             }|D ]A}t          |                              |          | j	        t          |                   _        BdS )z4
        Computes idle time of the profile.
        Fr   r   Tc                     g | ]	}|j         
S r   r<   rf   s     r   rl   z5BasicEvaluation.compute_idle_time.<locals>.<listcomp>  s    ;;;!ag;;;r   N)rt   rq   r5   rR   r6   r7   rT   r)   r   rm   ro   r9   r]   r(   )r,   idle
idle_startidle_intervals
data_point
event_listr=   s          r   ru   z!BasicEvaluation.compute_idle_time   sU   
 
)+  	T[ 	Q5t7LQ7O7UVV.r26B8STT N
 / 	 	J%**4*'^
%))d)%%hz:;K&L&LMMM;;t|'8'8':':;;;
 	0 	0E9A: :// L%)66	0 	0r   c                 l    ddl }t          t           j                            }d |D             }dd}g d}|t	          |          k     r||         k    r|dz  }%t          |dz   t	          |                    D ]x}t          |fd|          }t          |||          }	|	M||	         |k    rA                    t          ||	         j
        ||         j
                             ||n|} ny|dz  }|t	          |          k     Èfd	 j                                        D             }
|
r|                     fd
|
D             |j                  }|                     fd|
D             |j                  }||                    |          z
  |                    |          z  }||                    |          z
  |                    |          z  }|d|z  z   }d t#          t%          ||
          t'          j        d          d          D             }
|
d|         }
|
S )a  
        Filter and Rank the events based on some heuristics:
        1) Events that are in the falling phase of the queue depth.
        2) Events that have a high idle_time, self_time difference.

        Parameters:
            length: The number of events to return.
        r   Nc                     g | ]	}|j         
S r   )r)   rf   s     r   rl   z/BasicEvaluation.rank_events.<locals>.<listcomp>  s    ===qQ]===r      rO   c                     | k    S r   r   )r   bottom_threasholds    r   r   z-BasicEvaluation.rank_events.<locals>.<lambda>-  s    .?)? r   rL   )r6   r7   c                 >    g | ]}|                               |S r   )r]   )rg   r=   decrease_intervals     r   rl   z/BasicEvaluation.rank_events.<locals>.<listcomp><  s>     
 
 
&&'899

 
 
r   c                 4    g | ]}j         |         j        S r   )rm   r'   rg   r=   r,   s     r   rl   z/BasicEvaluation.rank_events.<locals>.<listcomp>C  s#    JJJee$1JJJr   )dtypec                 4    g | ]}j         |         j        S r   )rm   r-   r   s     r   rl   z/BasicEvaluation.rank_events.<locals>.<listcomp>G  s#    PPPEe$7PPPr   g333333?c                     g | ]\  }}|S r   r   )rg   _r=   s      r   rl   z/BasicEvaluation.rank_events.<locals>.<listcomp>O  s,       Au   r   T)rN   r   )torchr`   r   rt   rU   ranger   argmaxr   r5   r6   rm   ro   tensorfloat32meanstdrP   zipoperator
itemgetter)r,   lengthr   rt   	qd_valuestop_threasholdrY   rZ   next_minimum_idxpeak_idxr   rz   	idle_timenormalized_gainnormalized_selfheuristic_score_listr   r   s   `               @@r   rank_eventszBasicEvaluation.rank_events  s    	)> ? ?@@==,<===	#i..  |///Q1q5#i..11   $8????q$ $ $  ")1:JKKK 'Ih,?>,Q,Q%,, ,X6<>Nq>Q>W   
 -=,H((aAEFA+ #i..  .
 
 
 
**,,
 
 


  	-JJJJzJJJm %  I PPPPZPPPm %  I  )5::i+@+@@EIIiDXDXXO(5::i+@+@@EIIiDXDXXO#2S?5J#J   &,j99 +A.. ! ! !  J $GVG,Jr   rO   Tr   print_enablec                                            |          }|s|S |rdnd}|d                     fd|D                       z  }|rt          |           |S )NzOptimizable events:
zNo events to optimize

c                 |    g | ]8}d  d| dt          |j                   dj        |         j        dz  ddd  	9S )zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: d   z.2fz%
)source_code_locationr=   rm   r-   r   s     r   rl   z:BasicEvaluation.get_optimizable_events.<locals>.<listcomp>a  s            +EK88  |E*=C   
	   r   )r   joinprint)r,   r   r   r   outputs   `    r   get_optimizable_eventsz&BasicEvaluation.get_optimizable_eventsZ  s    %%f--
 	,6U((<U$))    (  	
 	
 		
  	&MMMr   r^   )rO   T)r.   r/   r0   r   r>   rn   rs   ru   r   r1   boolr   r   r   r   rb   rb   d   s        
!W 
! 
! 
! 
! 
!= = = =,l  l  l \0 0 0 08G G GR S D      r   rb   c                     ||t          |           k    rt          |           }t          ||          D ]} || |                   r|c S d S r   )rU   r   )seq	predicater6   r7   rY   s        r   r   r   o  sb    
{cSXXoo#hh5#  9SV 	HHH	4r   c                     | S r   r   r   s    r   r   r   x  s    a r   c                     | ||         } t          |           dk    rd S |                     t          | |                    |z   S )Nr   rM   )rU   r   rQ   )r   rN   r6   r7   s       r   r   r   x  sG    
eCi.C
3xx1}}t99S#&&&''%//r   c                 `    | +t          j        d| j                  }|| j        } &| j        S dS )Nz
\.py\(.*\)zNo source code location found)researchrG   parent)r=   matchs     r   r   r     s8    

	-44=LEz**r   r:   c                  Z    ddl m}   |             5  	 d d d            d S # 1 swxY w Y   d S )Nr   r   )torch.autograd.profilerr   r   s    r   _init_for_cuda_graphsr     s    //////	                   s    $$)r   Nr^   )	functoolsr   r   collectionsr   dataclassesr   typingr   r   r   torch.profilerr   torch.autogradr	   r   r   partialtraverse_dfstraverse_bfsr%   r5   r9   rb   r   r   r   r   r   r   r   <module>r      s(        				       ! ! ! ! ! !             + + + + + + % % % % % %  ,++++++ *>)=u * * * * * * !y 4E4EtTTT y ,,e  
 
9 
9 
9 
9 
9 
9 
9 
9        + + + + + + + +\H H H H H H H HV     Kqd 0 0 0 0+ + +     r   