
    &Vji@                         d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dlm	Z	m
Z
 d dlmZ dgZ	 	 dd ddej        d	ee
         d
eeeeeef         f                  dee         dej        f
dZdS )    Nfnmatch)OptionalUnion)_mesh_resources
DeviceMesh)ParallelStyleparallelize_modulesrc_data_rankmoduledevice_meshparallelize_planr   returnc          
      n   t           j                            d           |pt          j                    }|t          j        d           | S t          |t                    r||_	        |
                    | |          S t          |t                    r|                                D ]\  }}|dk    rt          | ||           |                    d          }|d         t          t!          fd|                                                     }|st          j        d| d	 d
|  d           |                    d           |D ]E\  }}	|r+d                    |          }
t          |	||
|i|           2t          |	|||           F| S t)          dt+          |           d          )a
  
    Apply Tensor Parallelism in PyTorch by parallelizing modules or sub-modules based on a user-specified plan.

    We parallelize module or sub_modules based on a parallelize_plan. The parallelize_plan contains
    :class:`ParallelStyle`, which indicates how user wants the module or sub_module
    to be parallelized.

    User can also specify different parallel style per module fully qualified name (FQN).

    Note that ``parallelize_module`` only accepts a 1-D :class:`DeviceMesh`, if you have a 2-D or N-D :class:`DeviceMesh`,
    slice the DeviceMesh to a 1-D sub DeviceMesh first then pass to this API(i.e. ``device_mesh["tp"]``)

    Args:
        module (:class:`nn.Module`):
            Module to be parallelized.
        device_mesh (:class:`DeviceMesh`, optional):
            Object which describes the mesh topology of devices for the DTensor.
            If not specified, the call must be under a DeviceMesh context.
        parallelize_plan (Union[:class:`ParallelStyle`, Dict[str, :class:`ParallelStyle`]], optional):
            The plan used to parallelize the module. It can be either a
            :class:`ParallelStyle` object which contains how we prepare
            input/output for Tensor Parallelism or it can be a dict of module
            FQN and its corresponding :class:`ParallelStyle` object. If not
            specified, the call will do nothing at the moment.
    Keyword args:
        src_data_rank (int, optional): the rank of the source data for the logical/global tensor, it is used by
            :meth:`distribute_tensor` to scatter/broadcast the shards/replicas to other ranks. By default,
            we use ``group_rank=0`` on each DeviceMesh dimension as the source data to preserve the single-device
            semantic. If passing ``None`` explicitly, :meth:`parallelize_module` simply uses its local data instead
            of trying to preserve the single-device semantic via scatter/broadcast. Default: 0
    Return:
        A :class:`nn.Module` object parallelized.

    Example::
        >>> # xdoctest: +SKIP("distributed")
        >>> from torch.distributed.tensor.parallel import parallelize_module, ColwiseParallel
        >>> from torch.distributed.device_mesh import init_device_mesh
        >>>
        >>> # Define the module.
        >>> m = Model(...)
        >>> tp_mesh = init_device_mesh("cuda", (8,))
        >>> m = parallelize_module(m, tp_mesh, {"w1": ColwiseParallel(), "w2": RowwiseParallel()})
        >>>

    .. note:: For complex module architecture like Attention, MLP layers, we recommend composing
        different ParallelStyles together (i.e. ``ColwiseParallel`` and ``RowwiseParallel``) and pass
        as a parallelize_plan, to achieves the desired sharding computation.
    z4torch.distributed.tensor.parallel.parallelize_moduleNzNo parallelize_plan is provided and auto-parallel is not supported at the moment, so this parallelize_module call will do nothing. .r   c                 0    t          | d                   S )Nr   r   )ttokens    _/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/distributed/tensor/parallel/api.py<lambda>z$parallelize_module.<locals>.<lambda>f   s    gadE22     zParallelize plan key 'z6' could not be resolved: no submodule matching token 'z' in module z, skipping this plan entry.r   zLExpect Union[ParallelStyle, Dict[str, ParallelStyle]] for parallelize_plan, z found!)torch_C_log_api_usage_oncer   get_current_meshwarningswarn
isinstancer	   r   _applydictitemsr
   splitlistfilternamed_childrenpopjoin	TypeErrortype)r   r   r   r   module_pathparallelize_stylepath_splitsmatched_children_	submodule	leaf_pathr   s              @r   r
   r
      sc   n 
H  !WXXXC!A!C!CKN	
 	
 	
 
 "M22 9
)6&&&v{;;;	$d	+	+ 6
.>.D.D.F.F /	 /	*K*b  "6;8IJJJ%++C00K  NE#2222))++     $ 1[ 1 1491 1GM1 1 1  
  OOA 0  9  # 5 5I&!#"$56&3	     '!#)&3	    $ B"&'7"8"8B B B
 
 	
r   )NN)r   r   typingr   r   r   torch.nnnntorch.distributed.device_meshr   r   'torch.distributed.tensor.parallel.styler	   __all__Moduler"   strintr
    r   r   <module>r=      s#          " " " " " " " "        E E E E E E E E A A A A A A  
 
 )-QU~

 $%~
 ~
 ~
I~
*%~
 u]Dm9K4L%LMN~

 C=~
 Y~
 ~
 ~
 ~
 ~
 ~
r   