
    &VjiE                     &   d dl Z d dlmZmZ d dlZd dlmZ d dlm	c m
Z d dlm	c mZ d dlmZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZ d d	lm Z  d d
l!m"Z" ej#        j$        Z$dgZ%e j&        d             Z'de(e"df         de)de)fdZ*de(e"df         dedefdZ+dej,        j-        de(e.df         de/e0e.f         defdZ1d Z2dej,        j-        de(e.df         de/e0e.f         de.fdZ3dej,        j-        de(e.df         de/e0e.f         de.fdZ4dededee         dee         de)d e)d!ej5        d"e)ded#e)de(eef         fd$Z6dej,        j-        de(e.df         de/e0e.f         de.fd%Z7d&edededee         de)d e)d'ed!ej5        d"e)ded#e)defd(Z8dej,        j-        de(e.df         de/e0e.f         de.fd)Z9e$j2        j:        e3e$j;        j:        e4e$j<        j:        e7e$j=        j:        e7e$j>        j:        e9e$j?        j:        e9iZ@d* ZAd+ ZBdS ),    N)castOptional)Tensor)
DeviceMesh)DTensor	ReplicateShard)DTensorSpec
TensorMeta)_MaskPartial)	_skip_dim	Reductionreplicate_reduction_dims)normalize_dim)	Placementloss_parallelc               #   J   K   t                       dV  t                       dS )a  
    A context manager that enables loss parallelism, where efficient parallelized loss computation
    can be performed when the input is sharded on the class dimension. Currently only the cross-entropy
    loss is supported.

    Within this context manager, one can use :func:`~torch.nn.functional.cross_entropy` or
    :class:`~torch.nn.CrossEntropyLoss` as usual, with the following assumptions on the input parameters.
    The corresponding ``backward()`` call, if any, also needs to happen under this context manager.

    Args:
        input (:class:`DTensor`):
            Input logits. Assumed to be sharded on the class dimension.
        target (Union[:class:`torch.Tensor`, :class:`DTensor`]):
            Must be ground truth class indices (class probabilities currently not supported).
            Assumed to be replicated across the ``DeviceMesh``.
        weight (Union[:class:`torch.Tensor`, :class:`DTensor`], optional):
            If given, assumed to be replicated across the ``DeviceMesh``.
        label_smoothing:
            Currently not supported.

    Returns:
        A replicated :class:`DTensor`.

    Example:
        A sharded DTensor is manually created here to showcase the usage.
        In practice, it is usually the output of a TP module.

        >>> # xdoctest: +SKIP("distributed")
        >>> from torch.distributed.tensor.parallel import loss_parallel
        >>> from torch.distributed.device_mesh import init_device_mesh
        >>> ...
        >>> device_mesh = init_device_mesh("cuda", (8,))
        >>> input = torch.randn(4, 16, device="cuda", requires_grad=True)
        >>> dist_input = distribute_tensor(input, device_mesh, placements=[Shard(1)])
        >>> target = torch.randint(16, (4,), device="cuda")
        >>> with loss_parallel():
        >>>     loss = F.cross_entropy(dist_input, target, reduction="mean")
        >>>     loss.backward()
        >>> ...
    N)_enable_custom_loss_ops_disable_custom_loss_ops     `/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/distributed/tensor/parallel/loss.pyr   r      s0      T 	EEEr   
placements.dimreturnc                     t          |           dk    st          d          | d                             |          st          d| d          dS )N   zLCurrently loss_parallel() only supports input on one-dimensional DeviceMesh.r   zUloss_parallel() should be enabled only when the input tensor is sharded on dimension .)len
ValueErroris_shard)r   r   s     r   _find_all_reduce_mesh_dimr"   Q   sl    z??aZ
 
 	
 a=!!#&& 
jdgjjj
 
 	
 1r   meshc                    t          | t                    r(| j        |k    r| S t          d| d| j         d          t          | t          j                  rt          j        | ||d          S t          dt          |                      )Nz	Expected z	 but got r   F)device_meshr   	run_checkzUnsupported type )	
isinstancer   r   RuntimeErrortorchr   
from_local	TypeErrortype)tensorr   r#   s      r   _cast_to_dtensorr.   ]   s     &'"" 
<
**MT:TT@QTTTUUU	FEL	)	) <!u
 
 
 	
 :DLL::;;;r   op_callargskwargsc                 >   t           j                            | ||          }t           j        j                            |j                  }t          |t                    r|S t          |t                    r|d         S t          dt          |           d          )Nr   zUnexpected tensor meta type: r   )r   _op_dispatcherunwrap_to_op_infosharding_propagatorpropagate_tensor_metaschemar'   r   tupler(   r,   )r/   r0   r1   op_infotensor_metas        r   _propagate_tensor_metar;   m   s    
 $66wfMMG(<RR K +z** Q	K	'	' Q1~O4;L;LOOOPPPr   c                    |r| j         t          j        k    sJ t          j        | t          j        j                  \  }}|                     |t          j                  } | 	                                dk    r| }nIt          j
        | |d          }t          j        |t          j        j        j        ||f          }| |z
  }t          j        t          j        |          |d          }	t          j        |	t          j        j        j        ||f          }	t          j        |	          }
||
z
  }|s|                    |          }|S )N)type_promotion_kind)dtypememory_formatr   T)keepdim)reduceOpgroup)r>   r)   halfutilselementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTtocontiguous_formatnumelamaxfuncol
all_reducec10dReduceOpMAXnamesumexpSUMlog)xr   half_to_floatr#   mesh_dimcomputation_dtyperesult_dtypeshiftedx_maxshifted_sumexpshifted_logsumexpresults               r   _log_softmaxr`      sL    %w%*$$$$&+&>	uDL' ' '#| 	
$E4KLLAwwyyA~~
1c4000!DM-24:J
 
 
 e)Yuy113EEEN&!2!7h?O  N 	.11((F )<((Mr   c                    t          t          |d                   }t          t          |d                   }t          t          |d                   }|j        }t          ||                                          }t          |j        |          }t          | ||          }t          |j        |||j        |          }	t          |j        |j        |          }
t          |	|
|	j                  S )Nr   r      r:   requires_grad)r   r   intbool_specr   r   r"   r   r;   r`   _local_tensorr#   r
   re   )r/   r0   r1   rV   r   rW   specrX   output_tensor_metaresres_specs              r   _log_softmax_handlerrn      s    
 	Wd1gA
sDG

CtAw''M7D
QUUWW
%
%C(#>>H/vFF
q]DIx
P
PC	&  H '   r   c                     t          t          |d                   }t          t          j        |d                   }|                    |          S )Nr      )r   r   r)   r>   rH   )r/   r0   r1   grad_outputinput_dtypes        r   _log_softmax_backward_handlerrs      s>    
 wQ((Ku{DG,,K>>+&&&r   rV   targetweightlocal_weight	reductionignore_indexinput_shapechannel_dimrX   c
                    |                                  ddk     rddt          dt          ffd}
| |
|          }|J  |
|          }| |z  } t          j        ||k    |d          }|                              }t          |          }|                    |||	          }t          j        | |          }|                    |||	          }|	                               }t          j        ||k    |d          }|t          j        j        k    r dk    r|                     dd	          }||fS |t          | j                  }d
|<   |                    |          }t          j        ||          	                              }t          j        ||k    |d          }|                                }n+||k                                                        |           }|t          j        j        k    r|                                }n,|t          j        j        k    r|                                |z  }||fS )Nr   rb   r   ru   r   c                 p    dk    r,dgz  }| j         d         |<   |                     |          }n| }|S )Nr   r   )shapeview)ru   r}   wrz   n_dimss      r   _weight_viewz'_nll_loss_forward.<locals>._weight_view   sM    A::E "(aE+E""AAAr   offset_shape
offset_dimr   g        )r   r   r)   where	unsqueezer   _partition_valuegather_reduce_valuesqueezer   NONEvaluenew_fulllistr}   expandrR   rH   rT   MEAN)rV   rt   ru   rv   rw   rx   ry   rz   r#   rX   r   r   local_wsafe_targetsafe_target_partial_placementsafe_target_partial_result_partialresult_reducedr_   total_weight	new_shapewsumr   s          `               @r   _nll_loss_forwardr      sh    UUWWFKzz	V 	 	 	 	 	 	 	 	 L  ''',|,,K+f4fa@@K((55L %++VVV,==dH  \![2FGGN&44^T8TTN$$[111F[</;;FIN(((VaZZzz"c**|##MM	!#	+HHY|A{L99AA+NN{6\14;;xxzz,.335588;; IM'''	in*	*	*,<r   c                 Z   t          t          |d                   }|d         }|d         }t          t          |d                   }t          t          |d                   }|                                dk    rdnd}|j        }	t          |	j        |          t          t          |	j        |g          |          }
t                      f|	j
        j        z  }t          ||
|	j
                  }d }|~t          |||	j
                  }fdt          |	j
        j                  D             }|                    |	j
        |          j        }|j        d         |j        j        |         k    sJ |t"          j        j        k    r|
}n|}t)          |          }||c|d<   |d<   t+          | t-          |          |          }t/          |j        |j        ||j        nd ||||j        ||	j
        
  
        \  }}t1          |	j
        ||          }t          |||j                  |fS )	Nr   r   rb   rp      c                 V    g | ]%}|k    rt          d           nt                      &S )r   )r	   r   ).0irX   s     r   
<listcomp>z-_nll_loss_forward_handler.<locals>.<listcomp>+  s<     
 
 
;<XE!HHH9;;
 
 
r   rc   rd   )r   r   rf   r   rh   r"   r   r   r   r   r#   ndimr.   rangeredistributeri   r}   r   r   r   r   r;   r8   r   r
   re   )r/   r0   r1   rV   rt   ru   rw   rx   rz   rj   target_placementsall_replicate_placementsrv   sharded_placementsoutput_placementsrk   r_   r   out_specrX   s                      @r   _nll_loss_forward_handlerr     sN   
 	Wd1gA!WF!WFS$q'""IT!W%%Luuww!||!!K7D(+FFH " ;-@@+  !*~	>f&7CCFL!&*BDINN
 
 
 
@Edin@U@U
 
 
 **496HIIW!!$(=k(JJJJJIN(((-4 ::DvDGT!W/tfMM,	 & 2		 FL 49&7EWXXXH 	 .	
 	
 	

 	 r   rq   r   c                    |                                 dk     rdnd}|t          j        j        k    r| |z  } |                    |          }t          j        ||k    |d          }t          j        |          }t          ||          }|	                    |          
                                }|                    ||	|
          }|j        j        J |j        j                            |j                  dz
  }t          j        |j        d         |j                  }|                                 dk    r|||<   n|                                 dk    r||||f<   nn|                    |d          }|j        }|                    d|j        |                   }||||f<   |                    |                              |d          }|                                 |                                  cxk    rdk    rn n|                     |          } |d t-          |                                           D             }|j        d         ||<   |                    |          }t/          |j                  }d||<   |                    |          }t          j        |||          }| |z  } t          j        ||k    | d          } |t          j        |          z   | z  S )	Nrb   r   r   r   g      ?)devicer   c                     g | ]}d S )r   r   )r   _s     r   r   z6_nll_loss_and_log_softmax_backward.<locals>.<listcomp>  s    ///1Q///r   )r   r   r   r   r   r)   r   
zeros_liker   r   flattenr   mask_bufferdatarH   r>   aranger}   r   	transposereshaper~   r   r   r   r   rS   )rq   rV   rt   ru   rw   rx   r   ry   rz   r#   rX   r   
grad_inputr   masked_safe_targetgrad_update	arange_1dgrad_input_tintermidate_shapegrad_input_2dr   r   w_targets                          r   "_nll_loss_and_log_softmax_backwardr   Z  s    uuww{{!!KIN(((!L0k**F+f4fa@@K!!$$J %++VVV%%k22::<<K*;;KxXX(-999#/477
8HIICOK #,>,E  I
 	uuww!||)4
%&&	
A4?
90011!++K<<(.$,,R1EFF7Bi!334"''(9::DD[RTUU
~~+//++////a/////!++K88//aeegg///	!'a	+	** MM	!#	+MM)$$<;77!H,+f4k1EEK 1%44r   c                    t          t          |d                   }t          t          |d                   }|d         }|d         }t          t          |d                   }t          t          |d                   }t          t          |d                   }	|                                dk    rdnd}
|j        }t          |j        |
          }t          t          |j        |
g          |
          }t                      f|j        j        z  }t          |||j                  }|t          |||j                  }t          |          }||c|d<   |d<   t          |	||j                  |d<   t          | t!          |          |          }t#          |j        |j        |j        ||j        nd |||	|j        |
|j        |          }t)          |j        |j        |          }t          |||j        	          S )
Nr   r   rb   rp   r         rc   rd   )r   r   rf   r   r   rh   r"   r   r   r   r   r#   r   r.   r   r;   r8   r   ri   r}   r
   re   )r/   r0   r1   rq   rV   rt   ru   rw   rx   r   rz   rj   rX   r   r   rk   r_   r   s                     r   _nll_loss_backward_handlerr     s   
 wQ((KWd1gA!WF!WFS$q'""IT!W%%LQ((Luuww!||!!K7D(+FFH " ;-@@+  !*~	>f&7CCF!&*BDINN ::DvDGT!W|-EtyQQDG/tfMM/!	 & 2		 F 	&  H *   r   c                  X    t           j        j                            t                     d S N)r   r3   _custom_op_handlersupdatecustomized_loss_opsr   r   r   r   r     s#    .556IJJJJJr   c                  b    t           D ]&} t          j        j                            |            'd S r   )r   r   r3   r   pop)	custom_ops    r   r   r     s<    ( B B	266yAAAAB Br   )C
contextlibtypingr   r   r)   torch._prims_common_prims_commonrD   )torch.distributed._functional_collectivesdistributed_functional_collectivesrL   "torch.distributed.distributed_c10ddistributed_c10drN   r   torch.distributed.device_meshr   torch.distributed.tensorr   r   r	   &torch.distributed.tensor._dtensor_specr
   r   ,torch.distributed.tensor._ops._embedding_opsr   'torch.distributed.tensor._ops._math_opsr   r   r   #torch.distributed.tensor._ops.utilsr   (torch.distributed.tensor.placement_typesr   opsaten__all__contextmanagerr   r8   rf   r"   r.   _ops
OpOverloadobjectdictstrr;   r`   rn   rs   Sizer   r   r   r   default_log_softmax_backward_datanll_loss_forwardnll_loss2d_forwardnll_loss_backwardnll_loss2d_backwardr   r   r   r   r   r   <module>r      sN       ! ! ! ! ! ! ! !  # # # # # # : : : : : : : : : 1 1 1 1 1 1 1 1 1       4 4 4 4 4 4 > > > > > > > > > > J J J J J J J J E E E E E E         
 > = = = = = > > > > > > y~ 
 - - -d	%	3*? 	c 	c 	 	 	 	<in-<5?<< < < < QZ"Q

Q fQ 	Q Q Q Q&  4Z"

 f 	   @'Z"'

' f' 	' ' ' 'F F F  VF  6"	F 
 F  F  F  F  F  F  66>F  F  F  F RAZ"A

A fA 	A A A AVB5B5B5 B5 V	B5
 B5 B5 B5 B5 B5 B5 B5 B5 B5 B5 B5J8Z"8

8 f8 	8 8 8 8x 	3#+-J!#<#%>"$>$&@ K K KB B B B Br   