
    &Vji                     6   d dl Z d dlZd dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZmZmZ d Z	 dd	ej        d
edededej        deej                 defdZd	ej        d
ededefdZd	edee         dej        fdZdS )    N)Optional)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorPropertiesShardMetadata)
DeviceMeshDTensor	Replicater   c                     |                                 dk    rd|  d| S |                                 dk    r*d|  d| dt          |                                           S d|  d| d| |z   S )Ncpuzrank:/hpu:)lowerr   current_device)rankdevice_typenum_devices_per_nodes      ]/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_strr      s    e##+t++k+++						%	%^t^^k^^,>{,K,K,Z,Z,\,\^^^ItIIkIID3G,GIII    tensorr   
world_sizer   pgdevicereturnc                 j   |                      |d          }t          |          |k    r||                                         }d |                                 D             t	          j        |                                 d         |z            |z  d<   t          j        ||          g}ng }d |D             }	dgt          t          j
        d |	D                                 dd         z   }
dgt          |	d                   dz
  z  fd	|
D             }|t          j                  j        n|j        fd
t          t          |	                    D             }t          |	          t          |          cxk    rt          |          k    sn J d t          ||	|          D             }t!          ||                                 t#          | j        | j        dt(          j        |                                                     }t/          j        ||          S )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local shard to create a ShardedTensor.
    r   )dimc                     g | ]}d S r    .0_s     r   
<listcomp>z0_create_chunk_sharded_tensor.<locals>.<listcomp>-   s    ,,,1,,,r   c                 P    g | ]#}t          |                                          $S r%   )listsize)r'   chunks     r   r)   z0_create_chunk_sharded_tensor.<locals>.<listcomp>4   s(    :::%4

%%:::r   c                     g | ]
}|d          S r$   r%   )r'   
chunk_sizes     r   r)   z0_create_chunk_sharded_tensor.<locals>.<listcomp>6   s    JJJ
jmJJJr   N   c                     g | ]}|gz   	S r%   r%   )r'   d0offsetss     r   r)   z0_create_chunk_sharded_tensor.<locals>.<listcomp>9   s    ;;;bTG^;;;r   c                 X    g | ]&}t          t          j        |                    'S r%   )r   distget_global_rank)r'   rr   r   r   s     r   r)   z0_create_chunk_sharded_tensor.<locals>.<listcomp>?   sK         	 Q'' 	
 	
  r   c                 8    g | ]\  }}}t          |||          S r%   r
   )r'   offsetr,   	placements       r   r)   z0_create_chunk_sharded_tensor.<locals>.<listcomp>H   s:       #FD) 	fdI..  r   F)dtypelayoutrequires_gradmemory_format
pin_memory)shards_metadatar,   tensor_properties)sharded_tensor_metadataprocess_group)r-   lencloner,   mathceilr   from_tensor_and_offsetsr+   	itertools
accumulater   _get_pg_default_devicetyperangezipr   r	   r<   r=   torchcontiguous_format	is_pinnedr   +_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunkslocal_shardlocal_shardschunk_sizesdim0_offsetschunk_offsets
placementsshard_metadatarC   r   r4   s      ``          @@r   _create_chunk_sharded_tensorr\      s    \\*!\,,F
6{{TTl((**,,fkkmm,,,Yv{{}}Q/*<==D
5k7DQQR ;:6:::K3JJkJJJKK 	rc L cSQ((1,-G;;;;l;;;M > 	/3388[ 
      s;''((  J {s=11DDDDS__DDDDDD '*=+z'R'R  N 4&[[]]*,=1''))
 
 

 
 
 D.EUW   r   device_meshc                 B   |                                                                  } d t          |j                  D             }d t          |j                  D             }t	          d          |d<   t          j        | ||d                              |          S )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local tensor to create a DTensor.
    c                 *    g | ]}t                      S r%   r   r&   s     r   r)   z)_create_chunk_dtensor.<locals>.<listcomp>j   s    IIIAIKKIIIr   c                 *    g | ]}t                      S r%   r`   r&   s     r   r)   z)_create_chunk_dtensor.<locals>.<listcomp>k   s    EEE	EEEr   r   r0   F)	run_check)rZ   )detachrF   rN   ndimDShardr   
from_localredistribute)r   r   r]   replicate_placementsshard_placementss        r   _create_chunk_dtensorrj   \   s     ]]__""$$F JI{7G1H1HIIIEEU;3C-D-DEEE!!99R1U  l#   r   	root_meshc                     || j         k    s
J d            t          t          j        | j                            }t                      |d<   |                     | j         |          } |                                 S )zT
    All gather a DTensor in its sharded dimension and return the local tensor.
    z2The device mesh of a tensor should be a root mesh.r0   )r]   rZ   )r]   r+   copydeepcopyrZ   r   rg   to_local)r   rk   rZ   s      r   _all_gather_dtensorrp   u   s     ****< +** dmF$56677J [[JrN  & !  F
 ??r   )N) rm   rJ   rG   typingr   rP   torch.distributeddistributedr6   torch._utilsr   r   'torch.distributed._shard.sharded_tensorr   r   r   r	   &torch.distributed._shard.sharding_specr   torch.distributed.tensorr   r   r   re   r   TensorintProcessGroupr   r\   rj   rp   r%   r   r   <module>r{      s                            + + + + + + . . . . . .            A @ @ @ @ @ T T T T T T T T T T T TJ J J &*; ;L;
; ; 	;
 	; U\"; ; ; ; ;|L
  	   2
# \     r   