
    YhTQ              !          d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ ddgZ G d de          Zd	d
e de
 de de	 d	z   e_        	 	 	 	 	 	 	 d%dee         dee         dee         dee         dee         dee         dee         dedee         dededededededef dZd  Zdee         dee         dee         dee         dee         dee         dedededededededefd!Zdee         dee         dee         dee         dee         dee         dedededededededefd"Zdee         dee         dee         dee         dee         dee         dedededededededed#dfd$ZdS )&    )castOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeeef         d	ed
edededee         dededee         f fdZ	 fdZ
d Zd Zedd            Z xZS )r   {Gz?r   绽|=NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                   t          |t                    r'|                                dk    rt          d          d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           d|k    st          d|           t	          ||||||||	|
		  	        }t                                          ||           |
r)|	rt          d
          |rt          d          d| _        | j	        D ]}|d         D ]}| j
        |         }|d         r0t          j        dt          |d                   |j                  n!t          j        dt                                |d<   t          j        |          rt#          ||          n|}t          j        ||t          j                  |d<   d S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r"   r    r!   r#   r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer(   step)memory_formatsum)
isinstancer   numel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r)   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r    r!   r"   r#   r   r   r   defaultsgrouppr7   
init_value	__class__s                   e/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/torch/optim/adagrad.pyr3   zAdagrad.__init__   si    b&!! 	<bhhjjAoo:;;;byy;r;;<<<hBBBCCCl""JLJJKKK///W<UWW   czz<s<<===%&?)

 

 

 	*** 	; P"#NOOO W"#UVVV6:D3& 	 	E8_  
1 W~FEK/wHHH x    c1B1D1DEEE f '**3G57PQQQ2 
  %z1F     e!	 	    c                 J   t                                          |           d }| j        D ]Z}|                    dd            |                    dd           |                    dd           |                    dd           }[t	          | j                                                  }t          |          dk    ot          j	        |d         d                   }|s?|D ]>}t          j
        t          |d                   t          |          	          |d<   =d S d S )
Nr#   r   Fr   r   r   r+   r&   r*   )r2   __setstate__r6   
setdefaultlistr7   valueslenr8   	is_tensorr:   floatr   )r?   r7   r   rA   state_valuesstep_is_tensorsrD   s          rE   rH   zAdagrad.__setstate__b   s@   U### & 	4 	4EY---Z///-u555$$Wd33EEDJ--//00l++q0 
eoOF#7
 7
  	!  !L!F)$$,=u,M,M,M  &			 	 rF   c                 ~    | j         D ]4}|d         D ])}| j        |         }|d                                          *5d S )Nr   r-   )r6   r7   share_memory_)r?   rA   rB   r7   s       rE   share_memoryzAdagrad.share_memoryw   sZ    & 	- 	-E8_ - -
1e**,,,,-	- 	-rF   c                    d\  }}|d         D ]}|j         |d         r)t          | dd          rt          |d           d| _        ||j         j        z  }|t          j        |          z  }|                    |           |                    |j                    | j        |         }	|                    |	d                    |                    |	d	                    ||fS )
N)FFr   r   r5   T)cuda_unsupportedFr-   r+   )	gradgetattrr	   r5   	is_sparser8   r;   appendr7   )
r?   rA   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexrB   r7   s
             rE   _init_groupzAdagrad._init_group}   s    '3$x 	2 	2Av!> Dg8' ' D
 2!dKKKK>CD;16#33u/222 ''***QV$$$
1!!%,///""5=111++rF   c                    d}|5t          j                    5   |            }ddd           n# 1 swxY w Y   | j        D ]}g }g }g }g }|                     |||||          \  }}	t	          |||||d         |d         |d         |d         ||d         |d         |d         |	|d	         t          | d
d          t          | dd                     |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r    r   r"   r#   r   r   r   
grad_scale	found_inf)r   r    r   r"   r_   r#   r   r   r`   r   rc   rd   )r8   enable_gradr6   ra   r   rX   )
r?   closurelossrA   r[   r\   r]   r^   r_   r`   s
             rE   r+   zAdagrad.step   s`    "$$ ! !wyy! ! ! ! ! ! ! ! ! ! ! ! ! ! ! & 	 	E-/"$E')J(*K+/+;+;'
K, ,(O[  ;">2z*%L /i(z*$%56'Gn"4t<<!$T::!    & s   /33)r   r   r   r   r   NN)__name__
__module____qualname__r   r   rN   r   r   boolr3   rH   rT   ra   r   r+   __classcell__)rD   s   @rE   r   r      sJ        $(+,"&D $ $D D DD %- D 	D
 D $)D D $D D D ~D D D D D DL    *- - -, , ,* "* * * "!* * * * *rF   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    Fr   r\   r]   r^   r   rc   rd   r_   r#   r   r`   r   r    r   r"   r   c                4   t          d |D                       st          d          ||t          | |	d          \  }}|d}|d}|r-t          j                                        rt          d          |r-t          j                                        rt          d          |r&t          j                                        st          }n/|r&t          j                                        st          }nt          } || ||||||||||	|
||           dS )	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   J   K   | ]}t          |t          j                  V  d S rh   )r.   r8   r   ).0ts     rE   	<genexpr>zadagrad.<locals>.<genexpr>  s.      @@qz!U\**@@@@@@rF   zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r    r   r"   r_   r   r   r`   rc   rd   )	allr4   r   r8   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   r\   r]   r^   r   rc   rd   r_   r#   r   r`   r   r    r   r"   r   _funcs                     rE   r   r      sn   2 @@K@@@@@ 
^
 
 	
 }1Ne
 
 

7 } U59))++ USTTT S'')) SQRRR &UY++-- &	 &//11 &$%D!'%     rF   c                 V    |                                  }t          j        |||          S rh   )sizer8   sparse_coo_tensor)rW   grad_indicesrK   r~   s       rE   _make_sparser   =  s$    99;;D"<>>>rF   c          
          ||J t           j                                        st          |          }t	          | |||          D ]F\  }}}}|dz  }t          |          }|s|n| }|dk    r-|j        rt          d          |                    ||          }|d|dz
  |z  z   z  }|j        r|	                                }|
                                }|                                }|                    t          |||                    d                               |                    |          }|                                                                                    |	          }|                    t          ||||z            |            Xt          j        |          }|r<t          j        |          }t          j        |          }t          j        |          }|                    ||d           |r|                                |	z   }n'|                                                    |	          }|                    |||            |r(t          j        |          }t          j        |          }Hd S )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)r8   rv   rw   r   zipr   rY   r4   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r;   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   r\   r]   r^   rc   rd   r   r    r   r"   r_   r   r   r`   paramrW   	state_sumstep_tr+   clrr   grad_valuesstd
std_valuesr;   s                            rE   rz   rz   B  s   " )"3"3"39!!## ^^*-feZ*U*U (= (=&tY!&!!#.tt$1~ "Q   88E866DAX--.> 	===??D==??L,,..KNN<lKOOA<N<NOOPPP''--C,,..33C88JJJT<z1IJJSVRV      )%00J 2)$//!.y99	*511tT333 1nn&&,nn&&++C00NN4SDN111 =-e44!1)<<	Q(= (=rF   c                ,   |r
J d            ||J t          |           dk    rd S t                    t          j        | |||g          }|                                D ]-\  \  }}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }|
ot          d |D                       }|rt          ||||||	d|||||           |rt          |||           |rt          j        |          }t          j                                        s9|d         j        r,t          j        |t          j        dd          d	           nt          j        |d
           |dk    r1|rt          j        |||	           nt          j        |||	          }fd|D             }t          j        |||d
           t          j        |          }t          j        ||	           |dk    s|rt          j        ||           |}nt          j        ||          }t          j        |||           /d S )Nz#_foreach ops don't support autogradr   c              3   $   K   | ]}|j         V  d S rh   )rY   )rp   rW   s     rE   rr   z(_multi_tensor_adagrad.<locals>.<genexpr>  s5       9
 9
#DN9
 9
 9
 9
 9
 9
rF   Trt   g      ?cpu)r)   r   r   c                 H    g | ]} d t          |          d z
  z  z   z  S )r   )r   )rp   r+   r   r   s     rE   
<listcomp>z)_multi_tensor_adagrad.<locals>.<listcomp>  sD     
 
 
>BRC1
4((1,889
 
 
rF   r   )rL   r   r   "_group_tensors_by_device_and_dtyperK   r   rJ   r   anyrz   r   r8   _foreach_negcompileris_compilingis_cpu_foreach_add_r:   _foreach_add_foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   r\   r]   r^   rc   rd   r   r    r   r"   r_   r   r   r`   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_r{   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_grad	minus_clrr   	numerators         ` `                   rE   ry   ry     s<   " DDDDDD)"3"3"3 6{{a	BB#F	
K0   &&((M? M? 		 	T&\>::DL-88 f/ABB!$v,0CDD!0 "
S 9
 9
'39
 9
 9
 6
 6
 " 	"!")! $!-'%#       	J-7HIII 	< -l;;L ~**,, 	71CA1F1M 	7"ELU$C$C$C3      2A6661 #L-|TTTTT$1 -|     
 
 
 
 
FX
 
 
	 	 1<UVWWWW!"344C%%%1i888$II*<CCIy#>>>>[M? M?rF   returnc                   | sd S |
s|rt          d          |rt          d          t          |          }|	|j        |ind }|	|j        |ind }t          j        | |||g          }|                                D ]F\  \  }}\  \  }}}}}t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }t          t          t                   |          }d\  }}|(|&||vr|	                    |d          ||<   ||         }|(|&||vr|	                    |d          ||<   ||         }t          j        |d           t          j        ||||||||	|||           |&t          j        ||gt          |          z             Hd S )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r    r"   r   rc   rd   )r4   r   r)   r   r   itemsr   rJ   r   tor8   r   _fused_adagrad__foreach_sub_rL   )r   r\   r]   r^   rc   rd   r   r    r   r"   r_   r   r   r`   grad_scale_dictfound_inf_dictgrouped_tensorsr)   r{   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                rE   rx   rx     sP   "   T+ TRSSS 
J
 
 	
 
BB ,6+A	J''t  7@6Ki&	22QUNB	
K0 O 
			 	 ( ( 	 	
	T&\>::DL-88 f/ABB!$v,0CDD.8++!o&A_,,*4--T-*R*R' / 7 ^%?..)2f4)P)Pv&-f5.222%(&	
 	
 	
 	
 '"%5$6=O9P9P$P  M( (rF   )NNNFNFF)typingr   r   r   r8   r   	optimizerr   r	   r
   r   r   r   r   r   r   r   r   r   r   __all__r   __doc__rJ   rl   rN   r   r   rz   ry   rx   r%   rF   rE   <module>r      s   ( ( ( ( ( ( ( ( ( (                                    " i
 b b b b bi b b bL4	  
  
  
  5. p !#'"& "" G GLG<G VG f	G
 D>G  G G G d^G G G 	G  !G" #G$ 
%G& 'G G G GT? ? ?
>=L>=<>= V>= f	>=
  >= >= 	>= >= >= 
>= >= >= >= >= >= >= >=Bj?Lj?<j? Vj? f	j?
  j? j? 	j? j? j? 
j? j? j? j? j? j? j? j?ZMLM<M VM f	M
  M M 	M M M 
M M M M M  
!M M M M M MrF   