
    YhO              $          d Z ddlmZmZmZ ddlZddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddgZ G d	 de          Zd
de de de de de
 dz   e_         dee         dee         dee         dee         dee         dee         dedededededededededef dZdee         dee         dee         dee         dee         dee         dedededededededededef d Z ee!          	 	 	 	 	 d%dee         dee         dee         dee         dee         dee         d#ee         dedededededededededef"d$            ZdS )&z)Implementation for the RMSprop algorithm.    )castOptionalUnionN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTRMSproprmspropc                        e Zd Z	 	 	 	 	 	 	 	 	 	 ddedeeef         d	ed
edededededee         dedef fdZ	 fdZ
d Zedd            Z xZS )r   {Gz?Gz?:0yE>r   FNparamslralphaepsweight_decaymomentumcentered
capturableforeachmaximizedifferentiablec                    t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           t	          ||||||||	|
|	
  
        }t                                          ||           d S )
Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid epsilon value: zInvalid momentum value: zInvalid weight_decay value: zInvalid alpha value: )
r   r!   r   r   r"   r    r#   r$   r%   r&   )
isinstancer   numel
ValueErrordictsuper__init__)selfr   r   r   r   r    r!   r"   r#   r$   r%   r&   defaults	__class__s                e/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/torch/optim/rmsprop.pyr-   zRMSprop.__init__   s,    b&!! 	<bhhjjAoo:;;;byy;r;;<<<czz<s<<===hBBBCCCl""JLJJKKKe||<U<<===%!)
 
 
 	*****    c                    t                                          |           | j        D ]D}|                    dd           |                    dd           |                    dd            |                    dd           |                    dd           |                    dd           |d	         D ]}| j                            |g           }t          |          dk    rt          j        |d
                   sjt          |d
                   }|d         r(t          j
        |t                      |j                  n!t          j
        |t                                |d
<   Fd S )Nr!   r   r"   Fr$   r%   r&   r#   r   stepdtypedevicer6   )r,   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r7   )r.   r<   grouppp_statestep_valr0   s         r1   r9   zRMSprop.__setstate__H   s{   U###& 	 	EZ+++Z///Y---Z///-u555\51118_ 
 
*..B//w<<1$$U_WV_-M-M$$WV_55H
 !.O$,=,?,?    #\(:K:M:MNNN FO	
	 	r2   c                    d}|d         D ]}	|	j         |t          j        |	          z  }|                    |	           |	j         j        rt          d          |                    |	j                    | j        |	         }
t          |
          dk    r|d         r(t          j        dt                      |	j
                  n!t          j        dt                                |
d	<   t          j        |	t          j        
          |
d<   |d         dk    r#t          j        |	t          j        
          |
d<   |d         r#t          j        |	t          j        
          |
d<   |                    |
d                    |                    |
d	                    |d         dk    r|                    |
d                    |d         r|                    |
d                    |S )NFr   z)RMSprop does not support sparse gradientsr   r#    r5   r8   r4   )memory_format
square_avgr!   momentum_bufferr"   grad_avg)gradr?   
is_complexappend	is_sparseRuntimeErrorr<   r>   zerosr   r7   
zeros_likepreserve_format)r.   rC   params_with_gradgradssquare_avgsmomentum_buffer_list	grad_avgsstate_stepshas_complexrD   r<   s              r1   _init_groupzRMSprop._init_group]   s    x $	4 $	4Av~5+A...K##A&&&v P"#NOOOLL   JqME 5zzQ \*DEK*;*=*=ahOOOOR/@/B/BCCC f
 ',&6U%:' ' 'l# $q((/4/?)>0 0 0E+, $ (-(8)>) ) )E*% u\2333uV}---Z 1$$$++E2C,DEEEZ  4  z!2333r2   c                    |                                   d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }g }g }	|                     |||||||	          }
t          ||||||	|d         |d         |d         |d         |d         |d         |d         |d	         |d
         |d         |
           |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r    r!   r"   r$   r%   r&   r#   )r   r   r   r    r!   r"   r$   r%   r&   r#   r[   ) _cuda_graph_capture_health_checkr?   enable_gradr:   r\   r   )r.   closurelossrC   rU   rV   rW   rY   rX   rZ   r[   s              r1   r4   zRMSprop.step   sv    	--///"$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & $	 $	E-/"$E(*K&(I13 (*K** $ K  $;Gn%L">2z*z*i(z*$%56 .'#    ( s   AA
A)
r   r   r   r   r   FFNFFN)__name__
__module____qualname__r   r   rA   r   boolr   r-   r9   r\   r   r4   __classcell__)r0   s   @r1   r   r      s&        $( "&$'+ '+'+ %- '+ 	'+
 '+ '+ '+ '+ '+ $'+ '+ '+ '+ '+ '+ '+ '+R    *1 1 1f "4 4 4 "!4 4 4 4 4r2   aj  Implements RMSprop algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \alpha \text{ (alpha)}, \: \gamma \text{ (lr)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm}   \lambda \text{ (weight decay)},\: \mu \text{ (momentum)},
                \: centered, \: \epsilon \text{ (epsilon)}                                       \\
            &\textbf{initialize} : v_0 \leftarrow 0 \text{ (square average)}, \:
                \textbf{b}_0 \leftarrow 0 \text{ (buffer)}, \: g^{ave}_0 \leftarrow 0     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}v_t           \leftarrow   \alpha v_{t-1} + (1 - \alpha) g^2_t
                \hspace{8mm}                                                                     \\
            &\hspace{5mm} \tilde{v_t} \leftarrow v_t                                             \\
            &\hspace{5mm}if \: centered                                                          \\
            &\hspace{10mm} g^{ave}_t \leftarrow g^{ave}_{t-1} \alpha + (1-\alpha) g_t            \\
            &\hspace{10mm} \tilde{v_t} \leftarrow \tilde{v_t} -  \big(g^{ave}_{t} \big)^2        \\
            &\hspace{5mm}if \: \mu > 0                                                           \\
            &\hspace{10mm} \textbf{b}_t\leftarrow \mu \textbf{b}_{t-1} +
                g_t/ \big(\sqrt{\tilde{v_t}} +  \epsilon \big)                                   \\
            &\hspace{10mm} \theta_t \leftarrow \theta_{t-1} - \gamma \textbf{b}_t                \\
            &\hspace{5mm} else                                                                   \\
            &\hspace{10mm}\theta_t      \leftarrow   \theta_{t-1} -
                \gamma  g_t/ \big(\sqrt{\tilde{v_t}} + \epsilon \big)  \hspace{3mm}              \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to
    `lecture notes <https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ by G. Hinton.
    and centered version `Generating Sequences
    With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.
    The implementation here takes the square root of the gradient average before
    adding epsilon (note that TensorFlow interchanges these two operations). The effective
    learning rate is thus :math:`\gamma/(\sqrt{v} + \epsilon)` where :math:`\gamma`
    is the scheduled learning rate and :math:`v` is the weighted moving average
    of the squared gradient.
    z
    Args:
        a0  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        alpha (float, optional): smoothing constant (default: 0.99)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        momentum (float, optional): momentum factor (default: 0)
        centered (bool, optional) : if ``True``, compute the centered RMSProp,
            the gradient is normalized by an estimation of its variance
        z	
        z

    r   rV   rW   rY   rX   rZ   r   r   r   r    r!   r"   r%   r&   r#   r[   c       
         "   t           j                                        st          |          }t	          |           D ]P\  }}||         }t           j                                        sF|rDt                      }|j        j	        |j        j	        k    r|j        j	        |v sJ d| d            ||         }|s|n| }||         }|dz  }|	dk    r|
                    ||	          }t          j        |          }|r<t          j        |          }t          j        |          }t          j        |          }|                    |                              ||d|z
             |rb||         }|rt          j        |          }|                    |d|z
             |                    ||d                                          }n|                                }|r|
                    |          }n|                    |          }|
dk    ra||         }|rt          j        |          }|                    |
                              ||           |                    ||            7|                    |||            Rd S )NIIf capturable=True, params and state_steps must be on supported devices: .r   r   r   value)r?   jitis_scriptingr   	enumeratecompileris_compilingr   r7   typeaddrN   view_as_realmul_addcmul_lerp_addcmulsqrt_sqrtadd_addcdiv_)r   rV   rW   rY   rX   rZ   r   r   r   r    r!   r"   r%   r&   r#   r[   iparamr4   capturable_supported_devicesrM   rJ   is_complex_paramrL   avgbufs                             r1   _single_tensor_rmspropr   	  s   & 9!!## ^^f%% 31 3151~ ~**,, 	 	+L+N+N(!T[%555L%)EEEE{\x{{{ FEE
 Qx#.tt$ ^
	188E866D +E22 	8&u--E%d++D+J77J''d!e)'DDD 	$ |H 8 -h77NN4U+++$$Xxr$BBHHJJCC//##C 	 ''#,,CC((3--Ca<<&q)C .(--HHX''c222JJs2#J&&&&NN4RCN0000g31 31r2   c       
         	  ! t          |           dk    rd S |r
J d            t          j                                        sG|rEt	                      !t          !fdt          | |          D                       sJ d! d            t          |          }t          j	        | |||||g          }|
                                D ]\  \  }}}}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|r||g}|
dk    r5t          t          t                   |          }|                    |           |r5t          t          t                   |          }|                    |           t          |g|R   |rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd          d	           nt          j        |d
           |	dk    r1|rt          j        |||		           nt          j        |||		          }t          j        ||           t          j        |||d
|z
             |r{t          t          t                   |          }t          j        ||d
|z
             t          j        |||d          }t          j        |           t          j        ||           n)t          j        |          }t          j        ||           |
dk    rt          t          t                   |          }t          j        ||
           t          j        |||           |rGt9          |t          j                  r-t          j        ||           } t          j        ||            ;t          j        ||| 	           U|rHt9          |t          j                  r.t          j        ||            t          j        |||           t          j        ||||            d S )Nr   z#_foreach ops don't support autogradc              3   n   K   | ]/\  }}|j         j        |j         j        k    o|j         j        v V  0d S rb   )r7   rt   ).0rD   r4   r   s      r1   	<genexpr>z(_multi_tensor_rmsprop.<locals>.<genexpr>p  s]       
 
 4 HMT[-- >!==
 
 
 
 
 
r2   ri   rj   g      ?cpu)r7   rk   r   rl   rn   )r>   r?   rr   rs   r   allzipr   r   "_group_tensors_by_device_and_dtypevaluesr   listr   rO   r   _foreach_negis_cpu_foreach_add_rB   _foreach_add_foreach_mul__foreach_addcmul__foreach_lerp__foreach_addcmul_foreach_sqrt__foreach_sqrt_foreach_addcdiv_r(   _foreach_mul_foreach_div_)"r   rV   rW   rY   rX   rZ   r   r   r   r    r!   r"   r%   r&   r#   r[   grouped_tensorsgrouped_params_grouped_grads_grouped_square_avgs_grouped_grad_avgs_grouped_momentum_buffer_list_grouped_state_steps__grouped_paramsgrouped_gradsgrouped_square_avgsgrouped_state_stepsstate_and_gradsgrouped_momentum_buffer_listgrouped_grad_avgsr   momentum_lrr   s"                                    @r1   _multi_tensor_rmspropr   U  s   & 6{{aDDDDDD >&&(( 
Z 
'H'J'J$ 
 
 
 
 v{33
 
 
 
 
 	
 	

 xXtwww	
 	
 	
 
BBB	Y0DkR O ""$$YW YW 				
 ) d6lO<<T&\>::"4<1EFF"4<1EFF 
	<,.ABO!||/3L"?0 0,  &&'CDDD :$(f7I$J$J!&&'8999.;?;;;; 	>!.}==M ~**,, 	81DQ1G1N 	8#U\#e%D%D%DC      3Q7771 #M>VVVVV % 2!>! ! ! 	/777QY	
 	
 	
 	
  
	* $T&\3E F F !2M1u9MMM(#%68IQS  C  %%%S))))%&9::CS)))a<<+/V;, ,(  <hGGG#$@-QTUUU  jU\:: #01MPRsSS#NK@@@@#"$@      WjU\:: W#C"---'sKKKK'sSURUVVVVVsYW YWr2   )single_tensor_fnFr$   c                   t           j                                        s(t          d |D                       st	          d          |t          | |d          \  }}|r-t           j                                        rt	          d          |r&t           j                                        st          }nt          } || |||||||||||||	||
           dS )ztFunctional API that performs rmsprop algorithm computation.

    See :class:`~torch.optim.RMSProp` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rb   )r(   r?   r   )r   ts     r1   r   zrmsprop.<locals>.<genexpr>  s?       5 5()
1el##5 5 5 5 5 5r2   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)
r   r   r   r    r!   r"   r%   r#   r&   r[   )
r?   rr   rs   r   rQ   r	   ro   rp   r   r   )r   rV   rW   rY   rX   rZ   r$   r%   r&   r#   r[   r   r   r   r    r!   r"   r   funcs                      r1   r   r     s0   : >&&(( 
 5 5-85 5 5 2 2 
 ^
 
 	
 1Ne
 
 

7  U59))++ USTTT &uy--// &$%D!%!     r2   )NFFFF)__doc__typingr   r   r   r?   r   	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   __all__r   r   rA   rf   r   r   r   rH   r2   r1   <module>r      s   0 0 ( ( ( ( ( ( ( ( ( (                                      $ i
 g g g g gi g g gV+X	  
  
  
  
  Y< BI1LI1<I1 fI1 F|	I1
 v,I1 fI1 	I1 I1 
I1 I1 I1 I1 I1 I1  !I1" #I1 I1 I1 I1XAWLAW<AW fAW F|	AW
 v,AW fAW 	AW AW 
AW AW AW AW AW AW  !AW" #AW AW AW AWH  1GHHH # A ALA<A fA F|	A
 v,A fA d^A A A A A 	A  !A" 
#A$ %A& 'A( )A A A IHA A Ar2   