
    QVjiqF              	          d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZmZ d dlZd d	lmZ g d
Z ee          Z G d dee          Z ed           G d d                      Zededee         ded         fd            ZdededdfdZde e         ddfdZ!de e         ddfdZ"dedede e         fdZ#dede e         fdZ$dede e         fdZ%dede e         fdZ&dede e         fdZ' ed          Z( ed          Z)d ee)         d!ee)ge(f         de*e(e e)         f         fd"Z+d#ede e         fd$Z,d#ede e         fd%Z-d&ede e         fd'Z.d&ede e         fd(Z/defd)Z0dedefd*Z1d&ede e         fd+Z2d&edefd,Z3d-edefd.Z4d&edefd/Z5d0ede e         fd1Z6d2ee         defd3Z7de e         fd4Z8d5ede e         fd6Z9de e         fd7Z:dS )8    N)defaultdict)IterableIterator)contextmanager)asdict	dataclass)Enum)	getLogger)CallableOptionalTypeVar)signpost_event)AffinityMode6maybe_temporarily_apply_numa_binding_to_current_threadNumaOptionsc                   "    e Zd ZdZdZdZdZdZdS )r   zW
    See behavior description for each affinity mode
    in torch.distributed.run.
    nodesocket	exclusivezcore-complexN)__name__
__module____qualname____doc__NODESOCKET	EXCLUSIVECORE_COMPLEX     L/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/numa/binding.pyr   r      s.         
 DFI!LLLr   r   T)frozenc                   *    e Zd ZU eed<   	 dZeed<   dS )r   affinity_modeF!should_fall_back_if_binding_failsN)r   r   r   r   __annotations__r$   boolr   r   r    r   r   $   s6          /4%t33333r   r   	gpu_indexnuma_optionsreturnc              #      K   |dV  dS t                      }t          | |           dV  t          |           dS )z
    1. Applies NUMA binding to the current thread, suitable for the thread
    which will be interacting with GPU gpu_index.
    2. Resets to the original CPU affinity before exiting the context manager.
    Nr'   r(   logical_cpu_indices)+_get_allowed_cpu_indices_for_current_thread%_apply_numa_binding_to_current_thread$_bind_current_thread_to_logical_cpus)r'   r(   original_logical_cpu_indicess      r    r   r   2   ss       #N#P#P ),    
EEE(8     r   c           	         | t          |          d}t                              d|           	 t          | |          }t                              dt	          |                     t          |           t                              dt	          |                     t          |           t                              dt	          |                     t          ddi |d	t	          |          i
           d S # t          $ r| t          ddi |dt          j
                    i
           t                              d|           |j        r/t                              dt          j
                               Y d S  w xY w)Nr+   z0Attempting to apply NUMA binding, given input %rz0Computed logical_cpu_indices=%s for NUMA bindingr,   z1Validated logical_cpu_indices=%s for NUMA bindingz=Successfully bound to logical_cpu_indices=%s for NUMA bindingnuma_bindingapply_successr-   )categoryname
parametersapply_exception	tracebackz)Failed to apply NUMA binding for input=%rzHContinuing executing without applying NUMA binding, despite exception %s)r   loggerinfo_get_logical_cpus_to_bind_to_get_ranges_str_from_ints%_raise_if_logical_cpu_indices_invalidr0   r   	Exceptionr9   
format_exc	exceptionr$   warning)r'   r(   kwargsr-   s       r    r/   r/   I   s    |,, F KKBFKKK-:l
 
 
 	>%&9::	
 	
 	

 	.BUVVVV?%&9::	
 	
 	

 	-ATUUUUK%&9::	
 	
 	

 	# %'@AT'U'U 	
 	
 	
 	
 	
 	
    #"Y133 	
 	
 	
 	
 	DfMMM9 	NNZ$&&   44!s   CC? ?BFFr-   c                 (    | st          d          d S )Nz+Must bind to a non-empty set of CPU indices)RuntimeErrorr,   s    r    r>   r>      s&     JHIIIJ Jr   c                 0    t          j        d|            d S Nr   )ossched_setaffinityr,   s    r    r0   r0      s    /00000r   c                 f   |j         t          j        k    rt          |           }n|j         t          j        k    rt          |           }nd|j         t          j        k    rt          |           }n>|j         t          j        k    rt          |           }nt          d|j          d          |S )z
    Args:
        gpu_index: The index of the GPU that will be used by the subprocess.
            Example: 0
        numa_options: See NumaOptions for details.

    Returns:
        Set of logical CPU indices to bind to.
    r'   zAffinity mode z not supported.)r#   r   r   !_node_get_logical_cpus_to_bind_tor   #_socket_get_logical_cpus_to_bind_tor   &_exclusive_get_logical_cpus_to_bind_tor   )_core_complex_get_logical_cpus_to_bind_to
ValueError)r'   r(   logical_cpuss      r    r<   r<      s     !\%66689MMM		#|':	:	::YOOO		#|'=	=	==	RRR		#|'@	@	@@9UUUU,*DUUUVVVr   c                 B    t          |           }t          |          S )z-
    Core logic of 'node' numa strategy.
    rK   numa_node_index)"_get_numa_node_index_for_gpu_index._get_allowed_logical_cpu_indices_for_numa_node)r'   rT   s     r    rL   rL      s.     99MMMO9'   r   c                     t          |           }t          |          }t          |          }t                      }|D ]%}|                    t          |                     &|S )z/
    Core logic of 'socket' numa strategy.
    rK   rS   )socket_index)rU   _get_socket_index_for_numa_node'_get_numa_node_indices_for_socket_indexsetupdaterV   )r'   numa_node_index_of_gpurX   numa_node_indicesrQ   rT   s         r    rM   rM      s     @)TTT2.  L @!   55L, 
 
: /  	
 	
 	
 	
 r   c                    t          |           }t          |          }t          |          }|                    |           }t	          |          }t          |d           }t          t          |                                                    }t          |          t          |          z  }t          |          t          |          z  }|dk     r7t          dt          |           d|ddt          |           d	z             ||z  t          ||          z   }||z   ||k     rdnd
z   }	d t          |                                          ||	         D             }
|
S )z2
    Core logic of 'exclusive' numa strategy.
    rK   rS   c                 <    t          t          |                     S Nlogical_cpu_index)min6_get_logical_cpu_indices_sharing_same_physical_core_asrb   s    r    <lambda>z8_exclusive_get_logical_cpus_to_bind_to.<locals>.<lambda>   s%    #B"3  #
 #
 r      zThere are only z# physical cores on numa_node_index=,z but there are z% GPUs associated with this NUMA node.r   c                     h | ]	}|D ]}|
S r   r   ).0r-   rc   s      r    	<setcomp>z9_exclusive_get_logical_cpus_to_bind_to.<locals>.<setcomp>  sA     , , , "5, ,
 	 	, , , ,r   )rU   _get_gpu_indices_for_numa_nodesortedindexrV   	_group_bydictitemslenrE   rd   listvalues)r'   rT   gpu_indicesoriginal_gpu_relative_indexallowed_logical_cpu_indices,physical_core_to_allowed_logical_cpu_indicesnum_physical_cores_per_gpu(num_gpus_to_give_one_extra_physical_corestartend$logical_cpu_indices_for_original_gpus              r    rN   rN      s    99MMMO0QQQK%%K"-"3"3I">">"P'# # # 4=#	
 	
4 40 48;AACCDD4 40 "%4" "	[		"
 0340 0K0, "A%%wc"NOOwwdswwwWK 0 0WWWX
 
 	
 (*DDs#%MH H E 	
$	% +-UUU A	
 , ,#'8??AA$
 $

)$, , ,( 0/r   c                    t          |           }t          |          }t          |          }|                    |           }t	          |          }t          |d           }t          t          |                                d                     }|t          |          z  }t          |
                                          |         }|S )z
    Core logic of 'core-complex' numa strategy.

    Each GPU is assigned a full core complex (group of cores sharing L3 cache)
    within its affined NUMA node.
    rK   rS   c                 <    t          t          |                     S ra   )rd   1_get_logical_cpus_sharing_same_max_level_cache_asrb   s    r    rf   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>%  s%    #="3  #
 #
 r   c                 >    t          | d                    | d         fS )Nrg   r   )rr   )items    r    rf   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>1  s    s47||mT!W5 r   )key)rU   rl   rm   rn   rV   ro   rp   rq   rr   rs   rt   )r'   rT   ru   rv   rw   .max_level_cache_to_allowed_logical_cpu_indicescache_index_for_original_gpur}   s           r    rO   rO     s    99MMMO0QQQK%%K"-"3"3I">">"P'# # # 6?#	
 	
6 62 6::@@BB 65		
 	
 	
6 62 $?6B B $  ,06==??, ,",$( 0/r   KVrt   get_keyc                     t          t                    }| D ](} ||          }||                             |           )|S )z2
    Groups elements with same key into sets.
    )r   r[   add)rt   r   key_to_valuesvaluer   s        r    ro   ro   C  sO     -8,<,<M & &genncu%%%%r   rc   c                     d|  d}t          |          5 }t          |                                          cd d d            S # 1 swxY w Y   d S )N/sys/devices/system/cpu/cpuz/topology/thread_siblings_list)open_get_set_of_int_from_ranges_strread)rc   "thread_siblings_list_absolute_pathfs      r    re   re   N  s     	X&7WWW ' 
0	1	1 9Q.qvvxx889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9s   !AAAc                    d|  d}d}t                      }t          j        |          D ]}|                    d          r|dd                                          s5t          j                            ||          }t          j                            |d          }t          |          5 }|                                	                                dvr	 d d d            	 d d d            n# 1 swxY w Y   t          j                            |d          }t          |          5 }	t          |	                                          }
d d d            n# 1 swxY w Y   |
|k    rB|
}t          j                            |d	          }t          |          5 }t          |                                          }d d d            n# 1 swxY w Y   |S )
Nr   z/cachern      type>   DataUnifiedlevelshared_cpu_list)r[   rH   listdir
startswith	isdecimalpathjoinr   r   stripintr   )rc   cpu_cache_dir_absolute_path	max_level$logical_cpus_sharing_max_level_cacheentrycache_index_absolute_pathtype_absolute_path	type_filelevel_absolute_path
level_filer   shared_cpu_list_absolute_pathshare_cpu_list_files                r    r   r   X  s    	@&7???   I+.55(788  (( 	abb	0C0C0E0E 	$&GLL1Le$T$T!  W\\*CVLL$%% 	~~%%''/BBB	 	 	 	 	 	 	B	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 !gll+DgNN%&& 	+*
))**E	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+I	(*%'8)
 )
% /00 	4G3R#((**4 40	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
 0/s6   /*C22C6	9C6	,"EE	!E	"GG	G	rT   c                 H    t          |           }t                      }||z  S NrS   )0_get_cpu_indices_for_numa_node_MAYBE_NOT_ALLOWEDr.   )rT   all_cpu_indicesallowed_cpu_indicess      r    rV   rV   ~  s4    F'  O FGG000r   c                     d|  d}	 t          |          5 }|                                }ddd           n# 1 swxY w Y   n&# t          $ r}t          d| d          |d}~ww xY wt	          |          S )z
    Returns:
        Indices of all CPUs associated with numa_node_index. However, the list
        is not filtered based on whether the thread is allowed to use them.
    z/sys/devices/system/node/nodez/cpulistNz:Could not determine CPUs corresponding to numa_node_index=.)r   r   FileNotFoundErrorrE   r   )rT   cpulist_absolute_pathr   cpu_range_stres        r    r   r     s     VOUUU'(( 	%AFFHHM	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%   LLLL
 
	 +=999s3   A 8A <A < A 
A'A""A'c                  >    t           j                                        S )N)torchcudadevice_countr   r   r    _get_gpu_countr     s    :""$$$r   c                 f   t           j                            |           }|j        }|j        }|j        }|dd|dd|dd}d| d}t          |          5 }t          t          |	                                
                                          d          cd d d            S # 1 swxY w Y   d S )N04x:02xz.0z/sys/bus/pci/devices/z
/numa_noder   )r   r   get_device_propertiespci_domain_id
pci_bus_idpci_device_idr   maxr   r   r   )r'   device_propertiesdomainbusdevicepci_addrpci_numa_node_absolute_pathr   s           r    rU   rU     s   
88CC,F

&C,F 777s7777777H"N("N"N"N	)	*	* -a 3qvvxx~~''((!,,	- - - - - - - - - - - - - - - - - -s   AB&&B*-B*c                 R      fdt          t                                D             S )Nc                 <    h | ]}t          |           k    |S )rK   )rU   )rj   r'   rT   s     r    rk   z1_get_gpu_indices_for_numa_node.<locals>.<setcomp>  s9       -	BBBoUU 	UUUr   )ranger   rS   s   `r    rl   rl     s<       ~//00   r   c                 B    t          |           }t          |          S NrS   )	cpu_index)._get_arbitrary_allowed_cpu_index_for_numa_node_get_socket_index_for_cpu)rT   arbitrary_cpu_indexs     r    rY   rY     s.    H'   %/BCCCCr   r   c                    d|  d}	 t          |          5 }t          |                                                                          cd d d            S # 1 swxY w Y   d S # t          $ r}t          d|           |d }~ww xY w)Nr   z/topology/physical_package_idz)Could not determine socket for cpu_index=)r   r   r   r   r   rE   )r   package_id_absolute_pathr   r   s       r    r   r     s    NiNNN R*++ 	)qqvvxx~~''((	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) R R RIYIIJJPQQRs:   A$ 3A
A$ AA$ AA$ $
B.BBc                 <    t          t          |                     S r   )rd   rV   rS   s    r    r   r     s"    6WWW  r   
ranges_strc                    t                      }|                     d          D ]}|                                }|sd|v r]|                    d          \  }}t          |          t          |          }}|                    t          ||dz                        z|                    t          |                     |S )z
    Util for parsing a string of int ranges, as in a sysfs file.

    Args:
        ranges_str: E.g., "0-2,4,6-7"

    Returns:
        E.g., {0, 1, 2, 4, 6, 7}
    rh   -rg   )r[   splitr   r   r\   r   r   )r   ints	range_str	start_strend_strr{   r|   s          r    r   r     s     UUD%%c** 	% 	%	OO%%	 	)!*!5!5IwYW3EKKeS1W--....HHS^^$$$$Kr   r   c                    | sdS t          |           }g }|d         x}}|dd         D ]I}||dz   k    r|}||k    r|                    |            n|                    | d|            |x}}J||k    r|                    |            n|                    | d|            d                    |          S )z
    Convert a set of integers to a compact string with ranges.

    Args:
        ints: E.g., {0, 1, 2, 4, 6, 7}

    Returns:
        E.g., "0-2,4,6-7"
     r   rg   Nr   rh   )rm   appendr   )r   sorted_intsrangesr{   prevnums         r    r=   r=     s      r,,KFq>!ED122  $(??DD}}j))))////000EDD }}j!!!!''''(((88Fr   c                      t          d          5 } |                                 }d d d            n# 1 swxY w Y   t          |          S )Nz!/sys/devices/system/node/possible)r   r   r   )r   possible_nodes_strs     r    !_get_systemwide_numa_node_indicesr     s    	1	2	2 &aVVXX& & & & & & & & & & & & & & & ++=>>>s   155rX   c                     t                      }t                      }|D ];}t          |          }| t          |          k    r|                    |           <|S r   )r   r[   r   r   r   )rX   systemwide_numa_node_indicesmatching_numa_node_indicesrT   r   s        r    rZ   rZ     st    #D#F#F !$7 < <L+
 
 
 4?RSSSSS&**?;;;%%r   c                  *    t          j        d          S rG   )rH   sched_getaffinityr   r   r    r.   r.   !  s    """r   );rH   r9   collectionsr   collections.abcr   r   
contextlibr   dataclassesr   r   enumr	   loggingr
   typingr   r   r   r   torch._utils_internalr   __all__r   r:   strr   r   r   r   r/   r[   r>   r0   r<   rL   rM   rN   rO   r   r   rp   ro   re   r   rV   r   r   rU   rl   rY   r   r   r   r=   r   rZ   r.   r   r   r    <module>r      s   				     # # # # # # . . . . . . . . % % % % % % ) ) ) ) ) ) ) )             . . . . . . . . . .  0 0 0 0 0 0   
8			" 	" 	" 	" 	"3 	" 	" 	" $
4 
4 
4 
4 
4 
4 
4 
4 %-k%:d^   ,66%06	6 6 6 6rJ#c( Jt J J J J
1S 1d 1 1 1 1
  	X	   8C CH    c c#h    .C0 C0S C0 C0 C0 C0L,0C ,0CH ,0 ,0 ,0 ,0^ GCLLGCLLhqk HaS!V,< aQi    99X9 9 9 9#0#0X#0 #0 #0 #0L1s 1sSVx 1 1 1 1::X: : : :&% % % % %-S -S - - - -$s s3x    D D D D D DRC RC R R R Rs s     C    0!HSM !c ! ! ! !H?3s8 ? ? ? ?&S &SX & & & &#SX # # # # # #r   