
    QVjiC                      ~   d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddgZ G d de          Zd	d
e de de de	 de dz   e_        dee         dee         dee         dee         dee         dededededededededefdZdee         dee         dee         dee         dee         dededededededededefdZ e
e          	 	 	 	 	 d"dee         dee         dee         dee         dee         d ee         dededededededededefd!            ZdS )#    )castOptionalUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdamaxadamaxc                        e Zd Z	 	 	 	 	 dddddded	eeef         d
eeef         dededee	         de	de	de	f fdZ
 fdZd Zedd            Z xZS )r   Mb`?g?g+?:0yE>r   NF)maximizedifferentiable
capturableparamslrbetasepsweight_decayforeachr   r   r   c          	         t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|d         cxk    rdk     sn t          d|d                    d|d         cxk    rdk     sn t          d	|d                    d|k    st          d
|           ||||||||	d}
t	                                          ||
           d S )Nr   zTensor lr must be 1-element        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0: z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r!   r"   r#   r$   r%   r   r   r   )
isinstancer   numel
ValueErrorsuper__init__)selfr    r!   r"   r#   r$   r%   r   r   r   defaults	__class__s              L/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/optim/adamax.pyr-   zAdamax.__init__   s\    b&!! 	<bhhjjAoo:;;;byy;r;;<<<czz<s<<===eAh$$$$$$$$M58MMNNNeAh$$$$$$$$M58MMNNNl""JLJJKKK ( ,$	
 	
 	*****    c                    t                                          |           | j        D ]}|                    dd            |                    dd           |                    dd           |                    dd           |d         D ]}| j                            |g           }t          |          dk    rt          j        |d                   sjt          |d                   }|d         r(t          j
        |t                      |j        	          n!t          j
        |t                      
          |d<   d S )Nr%   r   Fr   r   r    r   stepdtypedevicer6   )r,   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r7   )r.   r<   grouppp_statestep_valr0   s         r1   r9   zAdamax.__setstate__D   sO   U###& 	 	EY---Z///-u555\51118_ 
 
*..B//w<<1$$U_WV_-M-M$$WV_55H
 !.O$,=,?,?    #\(:K:M:MNNN FO	
	 	r2   c                    d}|d         D ]x}|j         |t          j        |          z  }|                    |           |j         j        rt          d          |                    |j                    | j        |         }	t          |	          dk    r|d         r(t          j        dt                      |j
                  n!t          j        dt                      	          |	d
<   t          j        |t          j                  |	d<   t          j        |t          j                  |	d<   |                    |	d                    |                    |	d                    |                    |	d
                    z|S )NFr    z(Adamax does not support sparse gradientsr   r    r5   r'   r8   r4   )memory_formatexp_avgexp_inf)gradr?   
is_complexappend	is_sparseRuntimeErrorr<   r>   zerosr   r7   rB   
zeros_likepreserve_format)
r.   rC   params_with_gradgradsexp_avgsexp_infsstate_stepshas_complexrD   r<   s
             r1   _init_groupzAdamax._init_groupW   s    x 	. 	.Av~5+A...K##A&&&v O"#MNNNLL   JqME 5zzQ \*FEK*;*=*=ahOOOOc1B1D1DEEE f
 $)#3U%:$ $ $i  $)#3U%:$ $ $i  OOE),---OOE),---uV}----r2   c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }g }|d         \  }	}
|d         }|d         }|d         }|d         }|d         }|d         }|d	         }|                     ||||||          }t          |||||||	|
|||||||
           |S )zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr"   r#   r!   r$   r%   r   r   r   )
r#   beta1beta2r!   r$   r%   r   r   r   rY   ) _cuda_graph_capture_health_checkr?   enable_gradr:   rZ   r   )r.   closurelossrC   rT   rU   rV   rW   rX   r\   r]   r#   r!   r$   r%   r   r   r   rY   s                      r1   r4   zAdamax.stepz   s    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & $	 $	E-/"$E%'H%'H(*K >LE5,CtB 0LI&GZ(H"#34N|,J**'(K K  )!-%'    $ s   AA
A)r   r   r   r   NN)__name__
__module____qualname__r   r   rA   r   tupler   boolr-   r9   rZ   r   r4   __classcell__)r0   s   @r1   r   r      s!        $(%1"&$+ $ $+ $+ $+$+ %- $+ UE\"	$+
 $+ $+ $$+ $+ $+ $+ $+ $+ $+ $+ $+L    &! ! !F "4 4 4 "!4 4 4 4 4r2   a  Implements Adamax algorithm (a variant of Adam based on infinity norm).

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \beta_1, \beta_2
                \text{ (betas)},\theta_0 \text{ (params)},f(\theta) \text{ (objective)},
                \: \lambda \text{ (weight decay)},                                                \\
            &\hspace{13mm}    \epsilon \text{ (epsilon)}                                          \\
            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
                u_0 \leftarrow 0 \text{ ( infinity norm)}                                 \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}m_t      \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t               \\
            &\hspace{5mm}u_t      \leftarrow   \mathrm{max}(\beta_2 u_{t-1}, |g_{t}|+\epsilon)   \\
            &\hspace{5mm}\theta_t \leftarrow \theta_{t-1} - \frac{\gamma m_t}{(1-\beta^t_1) u_t} \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adam: A Method for Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        zd

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980

    r    rU   rV   rW   rX   r#   r\   r]   r!   r$   r   r   r   rY   c       	            t           j                                        st          |          }t	          |           D ]\  }}||         }|
s|n| }||         }||         }||         }t           j                                        sF|rDt                      }|j        j	        |j        j	        k    r|j        j	        |v sJ d| d            |dz  }|	dk    r|
                    ||	          }t          j        |          rPt          j        |          }t          j        |          }t          j        |          }t          j        |          }|                    |d|z
             |sPt          j        |                    |          |                                                    |          |           nt          j        |                    |                              d          |                                                    |                              d          gd          }|                    t          j        |dd                     |r:||z  dz
  }|                    |           ||z  }|                    ||           ]d|t3          |          z  z
  }||z  }|                    ||| 	           d S )
NIIf capturable=True, params and state_steps must be on supported devices: .r   r   alpha)outF)keepdim)value)r?   jitis_scriptingr   	enumeratecompileris_compilingr   r7   typeaddrM   view_as_reallerp_maximummul_absadd_cat	unsqueeze
unsqueeze_copy_amaxdiv_addcdiv_r   )r    rU   rV   rW   rX   r#   r\   r]   r!   r$   r   r   r   rY   iparamrL   rJ   rK   step_tcapturable_supported_devicesnorm_bufneg_bias_correctiondenombias_correctionclrs                             r1   _single_tensor_adamaxr      s   " 9!!## ^^f%% 89 895Qx#.tt$1+1+Q ~**,, 	 	+L+N+N(!V]%777L%)EEEE{\x{{{ FEF 	!188E866DE"" 	2&u--E%d++D(11G(11G 	dAI&&& 	BMU##

$$     ye$$..q11488::??33G3G3R3RST3U3UV H MM%*Xq%@@@AAA 	9 #(-!"3$$R(((11ENN7E****%:f+=+="==O&CNN7GC4N8888q89 89r2   c       	            |r
J d            t          |           dk    rd S t          j                                        sI|rGt	          d          t          fdt          | |          D                       sJ d d            t                    t          j	        | ||||g          }|
                                D ]\  \  }}}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|rt          ||||           |
rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd	
          d           nt          j        |d           |	dk    r1|
rt          j        |||	           nt          j        |||	          }t          j        ||dz
             t          j        ||           |
s|	dk    rt          j        |          }nt          j        |           t          j        ||           t          j        ||           |rlt          j        |          }t          j        |d           t          j        |           t          j        ||          }t          j        |||           pfd|D             }fd|D             }t          j        ||||           d S )Nz#_foreach ops don't support autogradr   F)supports_xlac              3   n   K   | ]/\  }}|j         j        |j         j        k    o|j         j        v V  0d S rb   )r7   rv   ).0rD   r4   r   s      r1   	<genexpr>z'_multi_tensor_adamax.<locals>.<genexpr>L  s]       
 
 4 HMT[-- >!==
 
 
 
 
 
r2   rj   rk   r(   cpu)r7   rl   r   c                 :    g | ]}d t          |          z  z
  S )r   r   )r   r4   r\   s     r1   
<listcomp>z(_multi_tensor_adamax.<locals>.<listcomp>  s8          26EZ----     r2   c                 :    g | ]}t                    |z  d z  S )r   )r   bcr!   s     r1   r   z(_multi_tensor_adamax.<locals>.<listcomp>  s)    OOO*R..2-3OOOr2   )r>   r?   rt   ru   r   allzipr   r   "_group_tensors_by_device_and_dtypevaluesr   listr   r   _foreach_negis_cpu_foreach_add_rB   _foreach_add_foreach_lerp__foreach_mul__foreach_abs_foreach_abs__foreach_maximum__foreach_pow_foreach_sub__foreach_div__foreach_mul_foreach_addcdiv_)r    rU   rV   rW   rX   r#   r\   r]   r!   r$   r   r   r   rY   grouped_tensorsgrouped_params_grouped_grads_grouped_exp_avgs_grouped_exp_infs_grouped_state_steps__grouped_paramsgrouped_gradsgrouped_exp_avgsgrouped_exp_infsgrouped_state_stepsbias_correctionsr   	step_sizer   s         ` `                    @r1   _multi_tensor_adamaxr   1  s   " DDDDD
6{{a >&&(( 

Z 

'H(
 (
 (
$  
 
 
 
 v{33
 
 
 
 
 	
 	

 xXtwww	
 	
 
 
BBB	(K8 O ""$$I I 		 	d6lO<<T&\>::V.?@@V.?@@"4<1EFF 	/?AQ    	>!.}==M ~**,, 	81DQ1G1N 	8#U\#e%D%D%DC      3Q7771 #M>VVVVV % 2!>! ! !
 	-}a%iHHH 	,e444  	/LA--!.}==MM...M3/// 0-@@@  	$1%9LMM 0!444 0"555&'79IJJE#N4DeLLLL       :M      POOO>NOOOI# 02BI   OI Ir2   )single_tensor_fnFr%   c
                   t           j                                        s(t          d |D                       st	          d          |t          | |d          \  }}|r-t           j                                        rt	          d          |r&t           j                                        st          }nt          } || |||||
|||||||	|           dS )zrFunctional API that performs adamax algorithm computation.

    See :class:`~torch.optim.Adamax` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rb   )r)   r?   r   )r   ts     r1   r   zadamax.<locals>.<genexpr>  s?       5 5()
1el##5 5 5 5 5 5r2   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)	r#   r\   r]   r!   r$   r   r   rY   r   )
r?   rt   ru   r   rP   r	   rq   rr   r   r   )r    rU   rV   rW   rX   r%   r   r   r   rY   r#   r\   r]   r!   r$   r   funcs                    r1   r   r     s*   4 >&&(( 
 5 5-85 5 5 2 2 
 ^
 
 	
 1Ne
 
 

7  U59))++ USTTT %uy--// %#$D!%     r2   )NFFFF)typingr   r   r   r?   r   	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__r   rA   rg   r   r   r   rH   r2   r1   <module>r      s   ( ( ( ( ( ( ( ( ( (                                        & X
R R R R RY R R Rl4	  
  
  
  
  5+ `L9LL9<L9 6lL9 6l	L9
 fL9 
L9 L9 L9 	L9 L9 L9 L9 L9 L9 L9 L9 L9^qLq<q 6lq 6l	q
 fq 
q q q 	q q q q q q q q qh  1FGGG # < <L<<< 6l< 6l	<
 f< d^< < < < < 
< <  !<" 	#<$ %< < < HG< < <r2   