
    &Vji                     b   d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlm	Z	m
Z
mZmZmZmZ d dlZd dlmZ d dlmc mc mZ d dlmc mc mZ d dlmc mc mZ d dlmZ d dlm Z  d dl!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2 d d	l3m4Z4 d d
l5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZE erd dlFmGZG dZH	 d dlImJZJmKZK n# eL$ r dZHY nw xY w eMd          ZNdZOePejQ        ejQ        f         ZReeejQ        eRf                  ZSe;jT        e2jT        e;jU        e2jU        e;jV        e2jV        e;jW        e2jW        e;jX        e2jX        iZYe;jW        e;jX        gZZe;jV        e;jX        fZ[e	 d_de(deSde;dee?         dee#         de(fd            Z\ede(deSde#de(fd            Z]ede	de^fd            Z_ede#de^fd            Z`edeMdejQ        fd             Zaed!ejQ        deMdejQ        fd"            Zbd!ejQ        deMdePejQ        ejQ        f         fd#Zce	 d_de(d$ejd        d%eeej        jd                          d&eeeej        je                          eeej        jd                          f         de(f
d'            Zfd&ege	         d(e^ddfd)Zhede(d$ejd        d*eieje                 d+eeeMejj        f                  de(f
d,            Zkede(d$ejd        de(fd-            Zlede(dee;         d.ee:         d/ee7         d0e^d1e^d2eMd3eMde(fd4            Zmede(de(fd5            Znede(d6e6d7e^de(fd8            Zoed_de(de#de(fd9            Zpede(de(fd:            Zqd$ejd        d;egeje                 ddfd<Zrede(d=ejd        d+eeeMejj        f                  d>ee
ejd        gdf                  d?e^de(fd@            Zsede(d;egeje                 d=ejd        fdA            ZtdBejd        dCeeej        jd                          deiejd                 fdDZu	 d_dBej        jd        d%eiej        jd                 dEeeej        je                          deiej        je                 fdFZvdBej        jd        d%eiej        jd                 deiew         fdGZxdBejd        deiew         fdHZyd$ejd        d*eieje                 d+eeeMejj        f                  ddfdIZzd+eeeMejj        f                  dJeMdKe'deejj                 fdLZ{d$ejd        d*eieje                 d%eiejd                 dePe^e^f         fdMZ|dBejd        d>e
ejd        gdf         d%eiejd                 ddfdNZ}dBejd        dOeejj                 d%eiejd                 dKe'fdPZ~dBejd        d%eiejd                 degejd                 fdQZd$ejd        d*eieje                 dReiej                 dOeejj                 ddf
dSZd;egeje                 dTegej                 dOeejj                 ddfdUZdV Zd$ejd        d*eieje                 dOeejj                 dJeMdKe'dejj        fdWZd$ejd        d;egeje                 dejQ        ddfdXZdYegej                 ddfdZZd$ejd        d*eieje                 deeje                 fd[Zd*eieje                 ddfd\Zde;fd]ZdejQ        de j        fd^ZdS )`    N)	GeneratorIterableIterator)AnyCallableno_type_checkOptionalTYPE_CHECKINGUnion)default_hooks)_mesh_resources
DeviceMesh)_get_default_group)_FSDPDeviceHandle
_FSDPState_get_module_fsdp_state_is_fsdp_flattened!_named_parameters_with_duplicatesclean_tensor_nameTrainingState)_FSDP_USE_FULL_PREC_IN_EVALFlatParameterFlatParamHandleHandleShardingStrategy)_FreeEventQueue)BackwardPrefetch
CPUOffloadFullOptimStateDictConfigFullStateDictConfigMixedPrecisionShardingStrategyStateDictConfigStateDictType)_Policy)DTensorExtensions)_sync_params_and_buffers)is_traceable_wrapper_subclass)RemovableHandleT)deferred_initfakeFi  _fsdp_syncedstateprocess_groupsharding_strategypolicydevice_meshreturnc                 8   ||t          d          |t          v }|r+|||t          d| d          t          | ||          } n<|r#|| _        |                    d          | _        n||nt                      | _        | j                                        | _        | j                                        | _	        | j	        }|r|| j
                                        z  }t          j                            |          | _        || j        z  | _        | S )NzcCannot pass both process_group and device_mesh at the same time. Please just pass only one of them.zManual wrapping with zA requires explicit specification of process group or device_mesh.r   mesh_dim)
ValueErrorHYBRID_SHARDING_STRATEGIES*_init_process_group_state_for_hybrid_shard_device_mesh	get_groupr-   r   ranksize
world_size_inter_node_pgr   DefaultState_get_gradient_predivide_factor_gradient_predivide_factor_gradient_postdivide_factor)r,   r-   r.   r/   r0   is_hybrid_strategydata_parallel_world_sizes          \/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/distributed/fsdp/_init_utils.py_init_process_group_staterE   Y   sw     [%<<
 
 	
 +.HH  V^8K S(9 S S S  
 ?}k EE  	!,E"-"7"7"7"C"CE "/!:@R@T@T  $))++EJ*//11E$/ @ E$8$=$=$?$?? "AA$	
 	
 
$ 	!5#CC 
% L    c                    |rdt          |          r>|| _        |                    d          | _        |                    d          | _        nt          d|j                   |Gt                      }t          || j	        
                                          \  }}|| _        || _        n>t          |          r|\  | _        | _        nt          dt          |                     t          | j                  | _        | S )Nr   r3      z,Expected device_mesh to have ndim=2 but got zmExpected process_group to be passed in as either None or Tuple[dist.ProcessGroup, dist.ProcessGroup] but got r-   )"_is_valid_hybrid_shard_device_meshr8   r9   r=   r-   r5   ndimr   !_init_intra_and_inter_node_groups_device_handledevice_count_is_valid_hybrid_shard_pg_typetype_get_default_comm_hook_state_inter_node_state)r,   r-   r0   default_groupintra_node_groupinter_node_groups         rD   r7   r7      sJ     -k:: 		!,E $/#8#8!#8#D#DE "-"7"7"7"C"CEQ{?OQQ   
	*,,-N5/<<>>.
 .
** // *-88 	 9F5E!5!5]GKMGZGZ] ]  
 ;*  E LrF   c                     t          | t                    o+t          |           dk    ot          d | D                       S )N   c              3   J   K   | ]}t          |t          j                  V  d S N)
isinstancedistProcessGroup).0pgs     rD   	<genexpr>z1_is_valid_hybrid_shard_pg_type.<locals>.<genexpr>   s/      JJb
2t011JJJJJJrF   )rZ   tuplelenallrI   s    rD   rO   rO      sL     	=%(( 	K!#	KJJMJJJJJrF   c                 B    t          | t                    o
| j        dk    S )NrW   )rZ   r   rK   )r0   s    rD   rJ   rJ      s    k:..H;3Cq3HHrF   num_devices_per_nodec                 4    t          j        |           \  }}|S )aU  
    Return a process group across the current node.

    For example, given each row is a distinct node:
    0  1  2  3  4  5  6  7
    8  9 10 11 12 13 14 15
    This API would return an intra-node subgroup across
    [0, 1, ..., 7] or [8, 9, ..., 15] depending on the process's rank.
    For example, rank 3 would get [0, 1, ..., 7].
    )r[   new_subgroups)rd   intra_node_subgroup_s      rD   _init_intra_node_process_groupri      s!     "/0DEErF   global_process_groupc                 L  	 d}t          j        |           }t          j        |           }|z  }t          j        |           z  }t	                    D ]<		fdt	          |          D             }t          j        ||          }	|k    r|}=|J | d            |S )a  
    Return an inter-node process group where each contained rank has the same local rank.

    For example, given each row is a distinct node:
    0  1  2  3  4  5  6  7
    8  9 10 11 12 13 14 15
    This API would return inter-node process group [0, 8], [1, 9], [2, 10], and so forth
    depending on the process's rank. For example, rank 1 would get [1, 9], rank 5
    would get [5, 13].
    Nc                      g | ]
}|z  z   S  rm   )r]   i
local_rankrd   s     rD   
<listcomp>z2_init_inter_node_process_group.<locals>.<listcomp>   s1     !
 !
 !
89J!223!
 !
 !
rF   )ranksbackendz. expected to assign inter-node pg, but did not)r[   get_backendget_world_sizeget_rankrange	new_group)
rj   rd   inter_node_pgsharding_backendr<   	num_nodesmy_local_rankranks_for_inter_groupgrpro   s
    `       @rD   _init_inter_node_process_groupr~      s      M'(<==$%9::J22IM"677:NNM011    
!
 !
 !
 !
 !
=B9=M=M!
 !
 !
 n#8BRSSS&&M$$HHH %$$ rF   c                 @    t          |          t          | |          fS )a  
    Initialize intra and inter-node process groups and return the ones corresponding to this process's rank.

    This function can be used to initialize process groups for ``HYBRID_SHARD`` or
    ``_HYBRID_SHARD_ZERO2`` in FSDP.
    This function assumes each node has an equal number of CUDA-enabled devices.
    Returns:
        Tuple[dist.ProcessGroup, dist.ProcessGroup]: Intra and inter-node process group.
    )ri   r~   )rj   rd   s     rD   rL   rL      s)     	'';<<&';=QRR rF   moduleignored_modulesignored_statesc                    ||t          d          d }|d u}|r t          |          }t          |d           n#g }t          |t          |          ng d           t          |          dk    r%t	          |d         t
          j                  r|}n|}t          ||          | _        t          || j        |          | _
        t          || j                  | _        | S )NzfCannot pass both ignored_modules and ignored_states at the same time. Please just pass ignored_states.TFr   )r5   list_check_ignored_statesra   rZ   nn	Parameter_get_ignored_modules_ignored_modules_get_ignored_params_ignored_params_get_ignored_buffer_names_ignored_buffer_names)r,   r   r   r   ignored_parameterspassed_as_ignored_statesignored_states_lists          rD   _init_ignored_module_statesr     s$    "~'A:
 
 	
 -T9 
">22148888 %4%@D!!!b%	
 	
 	
 !##)!,bl;; 	2!41O1&/JJE/ E
 #<# #E LrF   r   c                    t          |           dk    rdS |rlt          d | D                       }t          d | D                       }|s4|s4t          d | D             t                    }t	          d|           dS dS t          d | D                       s2t          d	 | D             t                    }t	          d
|           dS )z
    Check that the ignored states are uniformly parameters or uniformly modules.

    We may remove this check in the future if we permit mixing.
    r   Nc              3   J   K   | ]}t          |t          j                  V  d S rY   )rZ   r   r   r]   r,   s     rD   r_   z(_check_ignored_states.<locals>.<genexpr>I  s.      UUUE2<88UUUUUUrF   c              3   J   K   | ]}t          |t          j                  V  d S rY   rZ   r   Moduler   s     rD   r_   z(_check_ignored_states.<locals>.<genexpr>J  s.      SS5*UBI66SSSSSSrF   c                 ,    h | ]}t          |          S rm   rP   r   s     rD   	<setcomp>z(_check_ignored_states.<locals>.<setcomp>M      "K"K"K54;;"K"K"KrF   )keyzUignored_states expects all nn.Parameter or all nn.Module list elements but got types c              3   J   K   | ]}t          |t          j                  V  d S rY   r   r   s     rD   r_   z(_check_ignored_states.<locals>.<genexpr>S  s.      LLE:eRY//LLLLLLrF   c                 ,    h | ]}t          |          S rm   r   r   s     rD   r   z(_check_ignored_states.<locals>.<setcomp>T  r   rF   z>ignored_modules expects nn.Module list elements but got types )ra   rb   sortedreprr5   )r   r   
all_paramsall_modulessorted_typess        rD   r   r   >  s6    >a UUnUUUUU
SSNSSSSS 	+ 	!"K"KN"K"K"KQUVVVL9*69 9  	 	 	 	 LL^LLLLL 	!"K"KN"K"K"KQUVVVL(%( (  	 	rF   ignored_params	device_idc                    d}|0t          |t          j                  r|nt          j        |          }|t          ||          D ]T}|j        j        dv r||j        }|j        j        |j        k    r$t          d|j         d|j        j                   U|pt          j                                        }|j        dk    rt          d          t          j	        |          | _
        | S )a=  
    Determine device handle used for initializing FSDP.

    If a device is specified by ``device_id``,
    then returns device handle corresponds to that device type. Otherwise, If the
    module is already on a non-CPU device, then the device type is that non-CPU device type.
    If the module is on CPU or meta, then the device type is the current accelerator device.
    See the :ref:`Accelerators<accelerators>` for details.


    This method will be called once ignored parameters was determined, as the device handle maybe needed
    for other initialization.
    N>   cpumetazLFSDP does not support modules with different device types but got params on z and r   zOFSDP needs a non-CPU accelerator device, but no accelerator device is detected.)rZ   torchdevice_get_orig_paramsrP   RuntimeError_C_get_acceleratorr   from_devicerM   )r,   r   r   r   determined_deviceparams         rD   _init_device_handler   [  s1   (  )U\22)IIi(( 	
  %fn== 
	 
	E| O33 ($)L!!<$(9(>>>&^->-C^ ^JO,J[^ ^   ?
 .L1J1J1L1L!U**a   -89JKKELrF   c                     t          |          | _        i }|                                D ]\  }}t          |          }|j        ||<   || _        | S rY   )_get_buffer_names_buffer_namesnamed_buffersr   dtype_buffer_name_to_orig_dtype)r,   r   r   buffer_namebuffers        rD   _init_buffer_stater     sf    
 ,F33E
 :<%3355 ? ?V'4428,";//'AE$LrF   mixed_precisioncpu_offloadlimit_all_gathersuse_orig_paramsbackward_prefetch_limitforward_prefetch_limitc                 ^   | j         dk    rA|t          j        k    r$t          j        d|pt          j         d           t          j        }n,|t          j        k    rt          j        dt          d           |pt          j        | _        |pt                      | _	        |4t          j                            dt          | j	                              t          j                            t"          d          d	k    | _        |pt'                      | _        || _        || _        t.          j        | _        d | _        t7                      | _        t;          j                    | _        tA          j!        | j        ||          | _"        d | _#        i }|| _$        d }	|	| _%        g }
|
| _&        | S )
NrH   z/FSDP is switching to use `NO_SHARD` instead of z since the world size is 1.zoThe `NO_SHARD` sharding strategy is deprecated. If having issues, please use `DistributedDataParallel` instead.   )
stacklevelz'torch.distributed.fsdp.mixed_precision. 1)'r<   r!   NO_SHARDwarningswarn
FULL_SHARDFutureWarningr.   r    r   r   r   _log_api_usage_oncestrosenvirongetr   _use_full_prec_in_evalr   r   r   _use_orig_paramsr   IDLEtraining_state_is_rootr   _free_event_queuer[   get_debug_level_debug_levelexec_order_utils_ExecOrderData_exec_order_data_unshard_event_fully_sharded_module_to_handle_handleparams)r,   r.   r   r   r   r   r   r   r   r   r   s              rD   _init_core_stater     s    1 0 999M'$C(8(C' ' '  
 -5	.7	7	7< 	
 	
 	
 	
 0N3C3NE+?~/?/?E"$$Rc%:O6P6PRR	
 	
 	
 	
2B773> 
  $3z||E/E,E(-EEN-//E-//E-< E
  E IK#,KE) *.GEM"$FELLrF   c                 f    g }|| _         g }|| _        g }|| _        d| _        d | _        d | _        | S )NT)_root_pre_forward_handles_pre_forward_handles_post_forward_handles_sync_gradients
_comm_hook_comm_hook_state)r,   r   r   r   s       rD   _init_runtime_stater     sK     8:&?E#24!5E35"7E EE!ELrF   backward_prefetchforward_prefetchc                 "    || _         || _        | S rY   )r   r   )r,   r   r   s      rD   _init_prefetching_stater     s     0E-E LrF   c                     t          j        |          }|r%|| j        k    rt          | j                  | _        nd | _        | S rY   )r   get_root_meshr8   r%   rM   _fsdp_extension)r,   r0   	root_meshs      rD   _init_extensionr     sP      -k::I  %yE$666 1%2F G G !%LrF   c                     t           j        | _        t                      }t	                      | _        || _        i }|| _        | S rY   )r#   FULL_STATE_DICT_state_dict_typer   r   _optim_state_dict_config_state_dict_config_unshard_params_ctx)r,   state_dict_configunshard_params_ctxs      rD   _init_state_dict_stater     sB    *:E)<)>)>%=%?%?E"0E57 2ELrF   r   c                     |D ]U}t          |j                  dk    r;d}|                                 D ]\  }}||u r|} n|sJ t          d| d          VdS )z
    Verify if the parameters are accepted by FSDP. The only restriction now
    is that the parameter cannot be a scalar tensor (param.shape == []).
    r   r   z/FSDP doesn't support scalar parameters. Change z& to a 1D tensor with numel equal to 1.N)ra   shapenamed_parametersr5   )r   r   r   
param_namenameparam_s         rD   _verify_managed_paramsr    s    
   u{q  J & 7 7 9 9  fF??!%JE # :M$M M M   ! rF   fully_sharded_moduleparam_init_fnsync_module_statesc                     t          | j        |           t          | j         j                  }t          | j         j                  \  }}|s|r|t          || j                   n:|rt          || j         j                   n|rt          j
        | fd           d  j        D             }t          | j        ||           t          | j        | j         j                   _        t          t          | j                            }	t!          ||	           |r:t#          ||	 j                    j        t(          v rt#          ||	 j                   t-           |	|            S )zHInitialize a ``FlatParamHandle`` from a module ``fully_sharded_module``.Nc                 8    t          |           d u o| j        vS rY   )r   r   )	submoduler,   s    rD   <lambda>z0_init_param_handle_from_module.<locals>.<lambda>L  s(    '=i'H'HD'P (8!77 rF   )check_fnc                 @    h | ]}|                                 D ]}|S rm   buffers)r]   ignored_moduler   s      rD   r   z1_init_param_handle_from_module.<locals>.<setcomp>P  sK       $,,..   	   rF   )_check_single_device_moduler   _get_device_from_device_idr:   rM   _need_to_materialize_moduler   _materialize_with_param_init_fn_materialize_meta_moduler)   materialize_module_move_module_to_device_get_compute_devicecompute_devicer   r   r  _sync_module_params_and_buffersr-   r.   r6   r=   _init_param_handle_from_params)
r,   r  r   r  r  device_from_device_idis_meta_moduleis_torchdistX_deferred_initignored_buffersmanaged_paramss
   `         rD   _init_param_handle_from_moduler  -  s      4e6KYWWW65:u3  3Ne3U5K3 3/N/ 	 
5 
=;T' -1G	
 	
 	
 	
 
 
  " 		
 	
 	
 	
 
% 
( 8 8 8 8	
 	
 	
 	
 #4  O 	   /
 E *+?AVWWXXN/@@@ ' .%2E	
 	
 	
 "&@@@+$ne6J   #5.:NOOOLrF   c                 2   t          |          dk    rd S t          ||| j        t          | j                 | j        j        | j        j        | j        j	        | j        j
        | j        | j        | j                  }|                                 | j        rJ | j                            |j                   || _        || j        |j        <   t+          j        d          }| j        j        r'|j        j        |k    r|                    |           d S d S d S )Nr   )fsdp_extensionr   )ra   r   r  SHARDING_STRATEGY_MAPr.   r   offload_paramsr   param_dtypereduce_dtypekeep_low_precision_gradsr-   r   r   shardr   r   append
flat_paramr   _fully_sharded_moduler   r   flat_param_to)r,   r   r  handle
cpu_devices        rD   r  r  r  s    6{{ae56()*6,  F LLNNN}	L)***EMJPE)&*FGe$$J' )F,=,D
,R,RZ((((() ),R,RrF   root_moduler   c                    d}	 |t          |          nt                      }n5# t          $ r(}t          |dt          |           z             |d}~ww xY w|D ]a}t          |t          j        j                  s"t          |dt          |           z             t          |          rt          d          b| 	                                D ]+}t          j        |          s|                    |           ,d |D             }| |v rt          j        d|            | 	                                D ]?}t          |          }|,t          |d          sJ |                    |j                   @|S )	ah  
    Check that ``_ignored_modules`` is an iterable of ``nn.Module`` s without any FSDP instances.

    Return the modules contained in their module
    subtrees as a :class:`set`. Nested FSDP instances are excluded, but their
    already-computed ignored modules are included.

    ``_ignored_modules`` represents the argument passed by the user to FSDP.
    z>`ignored_modules` should be an iterable of `torch.nn.Module`s Nzbut got zbut got an iterable with z1`ignored_modules` should not include FSDP modulesc                 t    h | ]5}|                                 D ]}t          |t          j                  |6S rm   )modulesrZ   	fsdp_fileFullyShardedDataParallel)r]   r   childs      rD   r   z'_get_ignored_modules.<locals>.<setcomp>  s\       ^^%%  %!CDD	   rF   zTrying to ignore the top-level module passed into the FSDP constructor itself will result in all parameters being ignored and is not well-supported: r   )set	TypeErrorrP   rZ   r   r   r   r   r5   r0  traversal_utils_composableaddr   r   hasattrupdater   )	r-  r   
msg_prefixignored_root_moduleser   r   r  optional_fsdp_states	            rD   r   r     s    RJQ%5%AC !!!suu 	  Q Q Q
%H5E0F0F%H%HHIIqPQ& R R&%(/22 	UJ)ST&\\)S)SSTTT!&)) 	R PQQQ	R %%'' - -*622 	- $$V,,,
 *  O o%%;28; ;	
 	
 	
 !((** I I	4Y??*.0BCCCCC""#6#GHHHs   $ 
A#AAr   c                 R   t                      }d |D             }|                    |           |!d |D             }|                    |           |                                 D ]?}t          |          }|,t	          |d          sJ |                    |j                   @|S )z
    Return the parameters of the modules in ``ignored_modules`` and the parameters in ``ignored_parameters``.

    :class:`FlatParameter` s are excluded from the result.
    c                 ^    h | ]*}|                                 D ]}t          |          |+S rm   )
parametersr   )r]   mps      rD   r   z&_get_ignored_params.<locals>.<setcomp>  sS     ! ! !ALLNN! !'(BTUVBWBW!	! ! ! !rF   Nc                 0    h | ]}t          |          |S rm   )r   )r]   rC  s     rD   r   z&_get_ignored_params.<locals>.<setcomp>  s6     (
 (
 (
1CA1F1F(
(
 (
 (
rF   r   )r4  r:  r0  r   r9  r   )r-  r   r   all_ignored_paramsparams_in_ignored_modulesparams_in_ignored_parametersr  r>  s           rD   r   r     s     36%%! !"! ! ! 7888%(
 (
)(
 (
 (
$ 	!!">??? !((** K K	4Y??*.0ABBBBB%%&9&IJJJrF   c                 J   t                      }d |D             |                    fd|                                 D                        |                                 D ]?}t	          |          }|,t          |d          sJ |                    |j                   @|S )z6Return the cleaned buffer FQNs in ``ignored_modules``.c                 @    h | ]}|                                 D ]}|S rm   r  )r]   rB  r   s      rD   r   z,_get_ignored_buffer_names.<locals>.<setcomp>  sA     " " "aiikk" ",2" " " "rF   c                 <    h | ]\  }}|v 	t          |          S rm   r   )r]   r   r   buffers_in_ignored_moduless      rD   r   z,_get_ignored_buffer_names.<locals>.<setcomp>  s;     	
 	
 	
#V333 k**333rF   Nr   )r4  r:  r   r0  r   r9  r   )r-  r   all_ignored_buffer_namesr  r>  rL  s        @rD   r   r     s    
 *-" "'" " " ##	
 	
 	
 	
'2'@'@'B'B	
 	
 	
   !((** W W	4Y??*.0GHHHHH$++,?,UVVV##rF   c                 >    d |                                  D             S )zrReturn the fully prefixed names of all buffers in the module hierarchy rooted at ``root_module`` as a class:`set`.c                 2    h | ]\  }}t          |          S rm   rK  )r]   r   rh   s      rD   r   z$_get_buffer_names.<locals>.<setcomp>  s2       +9;+&&  rF   )r   )r-  s    rD   r   r   	  s/     =H=V=V=X=X   rF   c                     d t          | |          D             }t          |          dk    r)t          j        d          |v r|t	          d          dS t          |          dk    rt	          d|           dS )z
    Raise an error if ``module`` has original parameters on multiple devices, ignoring the parameters in ``ignored_params``.

    Thus, after this method, the
    module must be either fully on the CPU or fully on a non-CPU device.
    c                     h | ]	}|j         
S rm   r   r]   r   s     rD   r   z._check_single_device_module.<locals>.<setcomp>  s    RRRu|RRRrF   rW   r   NzTTo support a module with both CPU and GPU params, please pass in device_id argument.rH   z;FSDP only supports single device modules but got params on )r   ra   r   r   r   )r   r   r   devicess       rD   r  r    s     SR)9&.)Q)QRRRG 7||qU\%00G;;5   
 
W		S'SS
 
 	
 
	rF   r:   device_handlec                 N   | dS t          | t          j                  r| nt          j        |           }|j        dk    re|j        ^t          j        d|  d| d|                                 d|j         d	           t          j        |                                          }|S )z
    Return a ``torch.device`` for the specified ``device_id``.

    Processes ``device_id`` and returns either the corresponding device or
    ``None`` if ``device_id`` is ``None``.
    Nr   z"FSDP got the argument `device_id` z	 on rank zJ, which does not have an explicit index. FSDP will use the current device z6. If this is incorrect, please explicitly call `torch.zk.set_device()` before FSDP initialization or pass in the explicit device index as the `device_id` argument.)rZ   r   r   rP   indexr   r   current_device)r   r:   rU  r   s       rD   r  r  .  s     t	5<88U		el9>U>U  {e 41 1 11 10=0L0L0N0N1 1 DJ;1 1 1	
 	
 	
 m::<<==MrF   c                 :   t          t          | |                    }t          d |D                       }|                                 D ]*}||v r|                    d          D ]}||j        z  }+| ot          ot          d |D                       }||fS )z
    Return if ``module`` has parameters on meta device and if ``module`` is using torchdistX deferred initialization.

    At most of the returned bools can
    be ``True``. If either is ``True``, then ``module`` needs to be
    materialized.
    c              3   $   K   | ]}|j         V  d S rY   )is_metarS  s     rD   r_   z._need_to_materialize_module.<locals>.<genexpr>X  s$      CC5CCCCCCrF   Frecursec              3   >   K   | ]}t          j        |          V  d S rY   )r*   is_fakerS  s     rD   r_   z._need_to_materialize_module.<locals>.<genexpr>d  s,      @@U##@@@@@@rF   )r   r   anyr0  r  r[  _TORCHDISTX_AVAIL)r   r   r   r  r  r  bufr  s           rD   r  r  K  s     *6>BBCCNCCNCCCCCN ^^%% * *	''$$U$33 	* 	*Cck)NN	*  	A	A@@@@@@@  
 666rF   c                     t          |          s"t          d| dt          |                     t          | |          }|D ]} ||           d S )Nz	Expected z to be callable but got )callabler5   rP   _get_modules_to_materialize)r-  r  r   modules_to_materializer   s        rD   r  r  i  s    
 M"" 
TTTtM?R?RTT
 
 	
 9oVV(  f rF   r  c           	         |p%t          j        |                                          }t          | |          }d }	 t          j                    5  |D ]}t          j        |                    d          |                    d                    }t          t          |                    dk    }|r+|                    |d           |                                 	 d d d            d S # 1 swxY w Y   d S # t          $ r<}	t          j        dt!          |	           dt#          |           d           |	d }	~	ww xY w)NFr\  r   )r   r]  zIUnable to call `reset_parameters()` for module on meta device with error z(. Please ensure that your module oftype z* implements a `reset_parameters()` method.)r   r   rX  re  no_grad	itertoolschainrA  r  ra   r   to_emptyreset_parametersBaseExceptionr   r   r   rP   )
r-  r  r   rU  materialization_devicerf  r   module_state_iterhas_module_statesr=  s
             rD   r  r  w  s    3 el$$&&7 7 9oVVF ]__ 
	. 
	.0 	. 	. %.O%%e%44fnnUn6S6S% %! %(->(?(?$@$@1$D!$ .OO+A5OQQQ++---	.
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	. 
	.    M!$QM MLLM M M	
 	
 	

 s<   C: BC- C: -C11C: 4C15C: :
E 7D;;E c                 D   g }t          j        | g          }| h}|r|                                }|                    |           |                                D ]C}||vr=t          |          .||vr*|                    |           |                    |           D||S rY   )collectionsdequepopleftr'  childrenr   r8  )r-  r   rf  queuevisited_modulesr   child_modules          rD   re  re    s    
 /1{m,,E'2mO
 
+%%f---"OO-- 	+ 	+LO33*<88@ 77##L111\***  
+ "!rF   r  c                    t          j        d          |#t          j                    }|                    |            g }g }|r|                                }|                    fd|                    d          D                        |                    fd|                    d          D                        |	                                D ]1}t          |t          j                  s|                    |           2|ƈfd|D             }	fd|D             }
t          |	|
|           dS t          t          |           d          }||j        k    rt!                       dS dS dS )	a  
    Move ``module`` depending on ``device_from_device_id`` and its current device.

    This includes moving ignored modules' parameters.

    - If ``device_from_device_id`` is not ``None``, then this moves
    ``module`` to the device.
    - If ``device_from_device_id`` is ``None``, then this does not move
    ``module`` but warns the user if it is on CPU.

    Precondition: ``_check_single_device_module()``.
    r   Nc              3   2   K   | ]}|j         k    |V  d S rY   rR  )r]   r   r,  s     rD   r_   z)_move_module_to_device.<locals>.<genexpr>  s<        <:-- ---- rF   Fr\  c              3   2   K   | ]}|j         k    |V  d S rY   rR  )r]   r   r,  s     rD   r_   z)_move_module_to_device.<locals>.<genexpr>  s<        =J.. .... rF   c                     g | ]}|v|	S rm   rm   )r]   rC  r   s     rD   rp   z*_move_module_to_device.<locals>.<listcomp>  s#    GGGq/F/F!/F/F/FrF   c                     g | ]}|v|	S rm   rm   )r]   rC  r  s     rD   rp   z*_move_module_to_device.<locals>.<listcomp>  s#    GGGaa.F.F.F.F.FrF   )r   r   rr  rs  r'  rt  extendrA  r  ru  rZ   r1  r2  _move_states_to_devicenextr   _warn_cpu_init)r   r   r  r  rv  r   r  curr_moduler  params_to_movebufs_to_mover   r,  s    ``         @rD   r  r    s   $ e$$J( /:.?.A.AV%'&( 	,--//K
 MM    (33E3BB     
 NN    )11%1@@     
 )1133 , ,	!)Y-OPP ,LL+++%  	,& HGGGVGGGGGGG7GGG~|=RSSS!&.994@@EU\Z77 77rF   r  c                 \   t          |           dk    rt          |          dk    rdS t          |           dk    r| d         j        }n t          |          dk    r|d         j        }t          j        d          }|| D ]r}t          j                    5  |                    |          |_        |j        $|j                            |          |j        _        ddd           n# 1 swxY w Y   s|D ]}|                    |          |_        dS ||k    rt                       dS dS )z
    Move states to the specified device.

    Precondition: ``_check_single_device_module()`` and module's parameters and
    buffers have been materialized if needed.
    r   Nr   )ra   r   r   rh  todatagradr  )r   r  r  rX  r,  r   r   s          rD   r  r    s    6{{aCLLA--
6{{Q)	W		 *e$$J(  	K 	KE K K"XX&;<<
:)&+jmm4I&J&JEJOK K K K K K K K K K K K K K K  	; 	;F ))$9::FKK	; 	;	:	%	% 
&	%s   AC))C-	0C-	c                  .    t          j        d           d S )Nam  The passed-in `module` is on CPU and will thus have FSDP's sharding initialization run on CPU, which may be slower than on GPU. We recommend passing in the `device_id` argument for FSDP to move `module` to GPU for the sharding initialization. `module` must also be on GPU device to work with the `sync_module_states=True` flag since that requires GPU communication.)r   r   rm   rF   rD   r  r    s%    M	1    rF   c                    t          t          | |          d          }||j        j        dk    r|j        }n&t	          j        |                                          }|||k    rt          d| d| d|           |S )a)  
    Determine and return this FSDP instance's compute device.

    If the module is already on a non-CPU device, then the compute device is that non-CPU
    device. If the module is on CPU, then the compute device is the current
    device.

    Since this method should be called after materializing the module, any
    non-CPU device should not be meta device. For now, the compute device is
    always a CUDA or CUDA-like device with its explicit index.

    Precondition: ``_check_single_device_module()`` and
    ``_move_module_to_device()``.
    Nr   z4Inconsistent compute device and `device_id` on rank z: z vs )r  r   r   rP   r   rX  r5   )r   r   r  r:   rU  r   r  s          rD   r  r    s    * !&.994@@EU\.%77m&B&B&D&DEE(^?T-T-T;4 ; ;; ;#8; ;
 
 	
 rF   c                   
 g }|                                  D ]}t          |t          d          st          |t          d           |                                
t          
          r;
                                \  }}
fd|D             }|                    |           |                    
           |D ]u}|                                t                    r;                                \  }}fd|D             }	|                    |	           `|                               vt          |           t          ||t          d           dS )z
    Synchronize module states (i.e. parameters ``params`` and all not-yet-synced buffers) by broadcasting from rank 0 to all ranks.

    Precondition: ``sync_module_states == True`` and ``self.process_group`` has
    been set.
    FTc                 0    g | ]}t          |          S rm   getattr)r]   attrdetached_buffers     rD   rp   z3_sync_module_params_and_buffers.<locals>.<listcomp>I  s#     R R RD$!?!? R R RrF   c                 0    g | ]}t          |          S rm   r  )r]   r  detached_params     rD   rp   z3_sync_module_params_and_buffers.<locals>.<listcomp>R  s#    LLLdGND99LLLrF   r   )srcN)r  r  FSDP_SYNCEDsetattrdetachr'   __tensor_flatten__r~  r'  +_check_module_states_for_sync_module_statesr&   PARAM_BROADCAST_BUCKET_SIZE)r   r   r-   module_statesr   attrsrh   inner_buffersr   inner_paramsr  r  s             @@rD   r  r  4  s    )+M.."" 6 6v{E22 
	6FK...$mmooO,_== 6 +==??q R R R RE R R R$$]3333$$_555 1 1(88 	1%88::HE1LLLLeLLLL  ....  0000/>>>#	     rF   r  c                 ^    | r(t          d | D                       rt          d          d S d S )Nc              3   P   K   | ]!}|j         t          j         d           k    V  "dS )r   N)r   r   )r]   tensors     rD   r_   z>_check_module_states_for_sync_module_states.<locals>.<genexpr>c  sD        17e,,,     rF   zThe module has CPU parameters or buffers when `sync_module_states=True`, which requires them to be on GPU. Please specify the `device_id` argument or move the module to GPU before passing it to FSDP.)r`  r5   )r  s    rD   r  r  `  sg      
  ;H     
 C
 
 	

 
 
 
rF   c              #      K   |                                  }	 	 t          |          }||vrt          |          s|V  '# t          $ r Y dS w xY w)aD  
    Return an iterator over the original parameters in ``module``.

    The iterator does not return
    the parameters in ``ignored_params``, any ``FlatParameter`` s (which may be
    present due to nested FSDP wrapping), or any original parameters already
    flattened (only relevant when ``use_orig_params=True``).
    TN)rA  r  r   StopIteration)r   r   	param_genr   s       rD   r   r   m  sy       !!##I	OOEN**3Ee3L3L*	    s   (A   
AAc           	          t          |           D ]G\  }}||vr>t          |          s/t          d| d|                                 d|j                   HdS )a5  
    Check that original parameters in ``fsdp_module`` have been flattened.

    The flattened parameters are made
    invisible to ``named_parameters()`` for the module hierarchy rooted at
    ``fsdp_module``. This should be called as a sanity check after flattening
    the wrapped module's parameters.
    z Found an unflattened parameter: z;  N)r   r   r   r;   	__class__)fsdp_moduler   r   r   s       rD   _check_orig_params_flattenedr    s     ?{KK  
E&&/A%/H/H&4: 4 4::<<4 4"'/4 4   rF   c                 R    | t           j        k    rt          j        nt          j        S rY   )r!   r   r   allreduce_hookreduce_scatter_hook)r.   s    rD   _get_default_comm_hookr    s)      0 999 	$$.rF   c                 ,    t          j        |           S )NrI   )r   r>   rI   s    rD   rQ   rQ     s     %MBBBBrF   rY   )rr  ri  r   r   collections.abcr   r   r   typingr   r   r   r	   r
   r   r   torch.distributeddistributedr[   (torch.distributed.fsdp._exec_order_utilsfsdp_exec_order_utilsr   'torch.distributed.fsdp._traversal_utils_traversal_utilsr6  2torch.distributed.fsdp.fully_sharded_data_parallelfully_sharded_data_parallelr1  torch.nnr   (torch.distributed.algorithms._comm_hooksr   torch.distributed.device_meshr   r   "torch.distributed.distributed_c10dr   $torch.distributed.fsdp._common_utilsr   r   r   r   r   r   r   "torch.distributed.fsdp._flat_paramr   r   r   r   %torch.distributed.fsdp._limiter_utilsr   torch.distributed.fsdp.apir   r   r   r   r    r!   r"   r#   torch.distributed.fsdp.wrapr$   &torch.distributed.tensor.parallel.fsdpr%   torch.distributed.utilsr&   torch.utils._python_dispatchr'   torch.utils.hooksr(   ra  
torchdistxr)   r*   ImportErrorintr  r  r`   r\   HybridShardProcessGroupTypeProcessGroupTyper   r   SHARD_GRAD_OPHYBRID_SHARD_HYBRID_SHARD_ZERO2r!  r6   #NO_RESHARD_AFTER_FORWARD_STRATEGIESrE   r7   boolrO   rJ   ri   r~   rL   r   r   r   r   r   r4  r   r   r   r   r   r   r   r   r  r  r  r   r   r   r   r   r  r  r  r  r  re  Tensorr  r  r  r  r  r  r   r  r  r>   rQ   rm   rF   rD   <module>r     s           				  9 9 9 9 9 9 9 9 9 9 O O O O O O O O O O O O O O O O              C C C C C C C C C C C C A A A A A A A A A A A A F F F F F F F F F F F F       B B B B B B E E E E E E E E A A A A A A                             B A A A A A	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 0 / / / / / D D D D D D < < < < < < F F F F F F  2111111 .........    "c"344 #D$5t7H$HI E$"35P"PQR 
 5>!7!B"$:$H!#9#F(*@*T  !( 
 "(' #  )-0 00#0 (0 W	0
 *%0 0 0 0 0f ((#( ( 	( ( ( (V # $     IJ I4 I I I I  AR     !+!! 
! ! ! !H+ 4d//0   &  	+ ++I+ hux78+ %(,-.%(/9R0SS	+ + + + +\I9=	   : --I- %- c5</01	-
 - - - -` I    " >> 01> n-> *%	>
 > > !>  > > > > >B      		'	 	 		 	 	 	  : J *     *     29 d2<6H T    & AA)A c5</01A Hbi[$%678	A
 A A A A AH ))) )) ) ) )<66x896 	^6 6 6 6x BF ) !%(*<!=> 			   D$$)$ 	X$ $ $ $:29 S    
I
%
 c5</01
 
	
 
 
 
<c5</01
 % el	   :7I7%7 ^7 4:	7 7 7 7<RYK-. ^ 
	     #EL1  ^  %	       F""-0^"	")_" " " ",3I3%3 &3 $EL1	3
 
3 3 3 3l%, $EL1 
	   @  I% $EL1 	
 % \   F)I)) $) 
	) ) ) )X

%

	

 

 

 

I% bl   ,% 
   (.>    C$CC C C C C Cs   C C! C!