
    &Vji @                        d dl mZ d dlmZ d dlmZmZ d dlZd dlm	c m
Z d dlm	c mc mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ d	ed
ee         deeeef                  fdZdeded
ee         deeedf         eedf         f         fdZded	edee e                  d
ee         deeedf         eedf         f         f
dZ!dej"        ded
ee         dee e         e e         f         fdZ#dej$        ded
ee         dej$        fdZ%dej&        j'        dee(         defdZ)deded
ee         deedf         fdZ*dej$        fdZ+dS )    )defaultdict)Sequence)castOptionalN)	ShapeType)
DeviceMesh)DTensorSpec)_StridedShardPartial	Placement	ReplicateShard
mesh_shape
placementsreturnc           
         t          |          t          |           k    s0t          dt          |           dt          |            d          g }t          t                    }t	                      }t          |          D ]h\  }}t          |t                    r#||j                 	                    ||f           >|	                    ||f           t          |t                    r|j        |v rt          d| d| d| d          |j        |v r|                    |j                   |                    |j                  }| |         }t          |          dk    r|                                \  }	}
|
j        |k    st          d	|
j         d
| d          || |	         z  }|	                    |	t          |j                  f           t          |          dk    j|S )a$  
    Replace Strided Shards with regular shards in an adjusted order.

    Returns a list of (mesh_dim, placement) tuples where the list order is the sharding order.

    ex.
    [Shard(0), _StridedShard(0, split_factor=2), Shard(0)] ->
    [(0, Shard(0)), (2, Shard(0)), (1, Shard(0))]

    /Expected one placement per mesh dim, but found  placements and  mesh dims.zTStrided sharding does not allow Shard() to appear after the strided part has ended. z at mesh dim z in z violates this assumption.r   z@Can only convert _StridedShard to ordered Shard if split_factor(z) == aggregate mesh size ())lenRuntimeErrorr   listset	enumerate
isinstancer
   dimappendr   NotImplementedErroraddpopsplit_factor)r   r   ordereddeferred_strided_placementsstrided_part_ended_for_dimmesh_dimpstrided_placementsaggregate_sizestrided_mesh_dimstrideds              Y/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/distributed/tensor/_utils.py_explicit_order_placementsr-      sf    z??c*oo--WZW W:=j//W W W
 
 	
 G"-d"3"3!$ ,, I I!a'' 	I'.55xmDDDD NNHa=)))!U## I5666-B78B BGOB B%B B B   5777.2215999)D)H)H)O)O&%/%9N011A554F4J4J4L4L1('&3~EE".!Ncjcw !N !N<J!N !N !N# #  '*5E*FF(8%,,'GHHH 011A55 N    global_shapemesh.c                 T    t          | |j        |                                |          S )a  
    Compute the local tensor shape and the global offsets into the original tensor
    of a DTensor on its current global rank. This is useful for checkpointing purpose.

    Example:
    global_tensor = [[0,  1,  2,  3,  4], sharded on mesh (DP=2, TP=2) with (Shard(1), Shard(1))
                     [10, 11, 12, 13, 14]]

    This table shows the return value of local_shape and global_offset for each rank.
    (`local_tensor` is for illustration only).

    Note how the first coordinate of global_offset is always 0, corresponding to tensor dim 0 being replicated.

    Rank        local_tensor        local_shape     global_offset
    -------------------------------------------------------------
    0           [[0, 1],            (2, 2)          (0, 0)
                 [10, 11]]

    1           [[2],               (2, 1)          (0, 2)
                 [12]]

    2           [[3],               (2, 1)          (0, 3)
                 [13]]

    3           [[4],               (2, 1)          (0, 4)
                 [14]]

    Args:
        global_shape (ShapeType): The global shape of the DTensor.
        mesh (:class:`DeviceMesh`): The device mesh this DTensor is distributed on.
        placements (Sequence[:class:`Placement`]]): The placements of the DTensor.

    Return:
        local_shape: the shape of the DTensor's _local_tensor on the current rank.
        global_offset: a tuple of offsets for each dimension of the global tensor shape,
        identifying how this shard fits into the global tensor in each dimension.

    )&_compute_local_shape_and_global_offsetshapeget_coordinate)r/   r0   r   s      r,   %compute_local_shape_and_global_offsetr5   I   s.    R 2dj$"5"5"7"7  r.   my_coordinatec                 t   t          ||          }|dS t          |           }dgt          |           z  }|D ]\  }}||         }	t          |t                    r|j        }
dgt          |           z  }|
t          |          k     sJ d|
 dt          |                       |                    ||
         |	||                   \  }}|||
<   |||
<   |dk    r| |
         ||
<   ||
         ||
         k    r||
         ||
<   ||
xx         ||
         z  cc<   t          |          t          |          fS )N))r    r   Sharding dim  greater than tensor ndim )r-   r   r   r   r   r   _local_shard_size_and_offsettuple)r/   r   r6   r   ordered_placementslocal_shapeglobal_offsetr&   	placementmesh_dim_size	shard_dimlocal_offset
shard_sizeshard_offsets                 r,   r2   r2   x   s    4J
KKz<((c,///#5 	L 	LHi&x0M)U++ L%M	 !sS%6%66 3{#3#3333[I[[[IYIY[[ 433 ,5+Q+Q	*!!(+, ,(
L *4I&*6Y'?? 0<I/FM),, %Y/<	3JJJ3?	3Ji00%i000L4KK0002 [!!5#7#777r.   tensorc           	         t          |                                           }t          |                                           }t          |          D ]\  }}|                    |          }|                                rt          t          |          }|j        dk     rt          d|           |j        }	|	| j	        k     sJ d|	 d| j	         d| d            ||	         }
|
|z  ||	<   t          t          |                    D ](}||	k    r ||         ||	         k    r||         |z  ||<   )t          |t          t          f          s t          dt!          |           d           ||fS )	aV  
    Compute the global size and stride of a DTensor from the given local tensor.
    The local size is multiplited by `world_size` per Sharding dim.
    The local stride is multiplited by `world_size` per Sharding dim, as long as the
    dimension is outside sharding dim.

    For example, if we have a local tensor with size (4, 8, 2) and stride (16, 1, 8).
    If the DTensor placements are [Shard(2)] and world_size is 2;
    then the global size is (4, 8, 4) and stride is (16 * 2, 1, 8).

    Args:
        tensor (:class:`torch.Tensor`):
            Local tensor which DTensor will be constructed from.
        mesh (:class:`DeviceMesh`):
            Object which describes the mesh topology
            of devices for the DTensor.
        placements (Sequence[:class:`Placement`]]):
            The attribute of the DTensor that describes its layout
            on the mesh topology.

    Return:
        tensor_shape: A List of int which specifies the size of DTensor which build
            on top of the local tensor.
        tensor_stride: A List of int which specifies the stride of DTensor.
    r   zOShard placements should have negative dims normalized in the user-facing APIs: r9   r:   z for placement number .zplacement type z not supported!)r   sizestrider   is_shardr   r   r   AssertionErrorndimranger   r   r   r   r   type)rF   r0   r   tensor_shapetensor_strideidxr@   rA   shard_placementrB   local_dim_sizeis               r,   compute_global_tensor_inforV      s   8 &&L))M#J// S SY		# 	S"5)44O"Q&&$?-<? ?   (+Iv{***n	nnV[nnhknnn +** *)4N&4}&DL# 3}--.. H H	>>mA&6-	:R&R&R'4Q'7-'GM!$H I	7';<< 	SQiQQQRRR	S&&r.   r3   c                   	
 t          |          dk    rt          d          t          |          |j        k    r(t          dt          |           d|j         d          t	          |d         t
                    r| S t	          |d         t                    r!t          j        t          |           |j
                  		fdt          |                                          D             }t          j        |	|           d}|d         j        

fd	t          |j                  D             }|D ]Q}t          j        	|         ||                   st          d
          |                                }||
         z  }Rt          |           }|||d         j        <   t          j        |          S t          dt'          |d                    d          )a  
    Compute the global size of a DTensor from the given local tensor shape,
    the mesh and placements. Different from `compute_global_tensor_info`,
    which assumes sharding is even, this util allgathers local shards' shapes
    from all ranks and thus can support uneven sharding.
    NOTE: Currently this function only supports 1D mesh.

    Args:
        shape (:class:`torch.Size`):
            Shape of the local tensor
        mesh (:class:`DeviceMesh`):
            Object which describes the mesh topology
            of devices for the DTensor.
        placements (Sequence[:class:`Placement`]]):
            The attribute of the DTensor that describes its layout
            on the mesh topology.

    Return:
        tensor_shape: Shape of the global DTensor.
       z>compute_global_tensor_shape only supports 1 placement for now.r   r   r   r   devicec                 F    g | ]}t          j        j                   S )rY   )torch
empty_likerZ   ).0_r>   s     r,   
<listcomp>z/compute_global_tensor_shape.<locals>.<listcomp>   s<     #
 #
 #
 [1CDDD#
 #
 #
r.   c                      g | ]
}|k    |S r8   r8   )r^   drB   s     r,   r`   z/compute_global_tensor_shape.<locals>.<listcomp>'  s    DDDAQ)^^a^^^r.   z?Non-sharded dimensions should have identical size across ranks.zPlacement type z not supported.)r   r   rM   r   r   r   r   r\   rF   r   device_typerN   rI   funcolall_gather_inplacer   equaltolistSizerO   )r3   r0   r   gathered_shaped_tensorssharded_dim_sum
other_dimsshape_tensorshape_tensor_listr/   r>   rB   s            @@r,   compute_global_tensor_shapern      s   . :!!L
 
 	
 :$)##QZQ Q:>)Q Q Q
 
 	

 *Q-++ 
	JqM5	)	) 
l4;;t7GHHH#
 #
 #
 #
499;;''#
 #
 #
 	!"9;MMMqM%	DDDDty!1!1DDD
3 	< 	<L;{:6Z8PQQ "U   !- 3 3 5 50;;OOE{{*9Z]&'z,'''!Bd:a=11BBB
 
 	
r.   op_callargsc                 P   |D ]}t          |t          j        t          f          r	|j        c S t          |t
          t          f          rIt          |          dk    r6t          |d         t          j        t          f          r|d         j        c S t          d|  d          )z
    Find the device mesh object from args.
    It returns None if no mesh is found.
    NOTE: we can optimize this search if needed
    r   z+Cannot find device mesh from args for op : rH   )	r   dtensorDTensorr	   device_meshr   r<   r   
ValueError)ro   rp   args      r,   try_find_mesh_from_argsrw   8  s      & &cGO[9:: 	&?"""sT5M**	&C13q6GO[#ABB  q6%%%%
M7MMM
N
NNr.   global_stridec                     dgt                     z  t          |          D ]\  }}|                                rnt          t          |          j        }t          t                               D ]7} |          |         k    r#|xx         |                    |          z  cc<   8t           fdt          t                               D                       S )z
    Compute the stride of a local tensor shard, given the global stride of the DTensor.
    NOTE: Currently this function is assuming the DTensor is evenly shardable.
    rX   c              3   :   K   | ]}|         |         z  V  d S )Nr8   )r^   rU   rx   stride_divisorss     r,   	<genexpr>z'compute_local_stride.<locals>.<genexpr>^  sC        34aOA..     r.   )	r   r   rK   r   r   r   rN   rI   r<   )rx   r0   r   mesh_idxr'   rU   jr{   s   `      @r,   compute_local_strider   M  s    cC...O ,, > >!::<< 	>UA"A 3}--.. > > #mA&666#A&&&$))H*=*==&&&     8=c->P>P8Q8Q     r.   c                 :   t          | t          j                  r| S t          | t                    r| g}nSt	          |           dk    r1t          | d         t
                    rt          | d                   }nt          |           }t          j        |          S )z
    Unify variable types of size argument to torch.Size
    Acceptable types include:
        int, Sequence[int], Tuple[int], Tuple[Sequence[int]],
        or torch.Size
    rX   r   )r   r\   rh   intr   r   r   )rI   
torch_sizes     r,   normalize_to_torch_sizer   c  s     $
## $  V

	TaJtAw99$q']]

$ZZ
:j!!!r.   ),collectionsr   collections.abcr   typingr   r   r\   )torch.distributed._functional_collectivesdistributed_functional_collectivesrd   torch.distributed.tensor._apirF   _apirr   torch._prims_commonr   torch.distributed.device_meshr   &torch.distributed.tensor._dtensor_specr	   (torch.distributed.tensor.placement_typesr
   r   r   r   r   r<   r   r-   r5   r   r2   TensorrV   rh   rn   _ops
OpOverloadobjectrw   r   r   r8   r.   r,   <module>r      s>   # # # # # # $ $ $ $ $ $ ! ! ! ! ! ! ! !  : : : : : : : : : / / / / / / / / / / / / ) ) ) ) ) ) 4 4 4 4 4 4 > > > > > >             22'/	':2eCN#$2 2 2 2j++#-+;CI;N+
5c?E#s(O+,+ + + +^D8D8D8 DI&D8 #	D8
 5c?E#s(O+,D8 D8 D8 D8N8'L8' *8'8@8K8'
49d3i 8' 8' 8' 8'v;
:;
';
5=i5H;

Z;
 ;
 ;
 ;
|OZ"O*26*:OO O O O*$.<DY<O
38_   ,"UZ " " " " " "r.   