o
    hP                  !   @   s4  d dl mZmZmZ d dlZd dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddgZG dd deZd	d
e de
 de de	 d	 e_							d)dee dee dee dee dee dee dee dedee dededededededef ddZd d! Zdee dee dee dee dee dee dedededededededefd"d#Zdee dee dee dee dee dee dedededededededefd$d%Zdee dee dee dee dee dee dedededededededed&dfd'd(ZdS )*    )castOptionalUnionN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc_use_grad_for_differentiable_view_as_real	OptimizerParamsTAdagradadagradc                       s   e Zd Z						ddddddedeeef d	ed
edededee dededee f fddZ	 fddZ
dd Zdd ZedddZ  ZS )r   {Gz?r   绽|=NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   c                   sd  t |tr| dkrtdd|kstd| d|ks%td| d|ks0td| d|ks;td| d|ksFtd| t||||||||	|
d		}t || |
rk|	rbtd
|rhtdd| _| j	D ]A}|d D ]:}| j
| }|d rtjdt|d d|jdntjdt d|d< t|rt||n|}tj||tjd|d< qtqnd S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r   r   r   r    r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fused)dtypedevicer$   step)memory_formatsum)
isinstancer   numel
ValueErrordictsuper__init__RuntimeError"_need_device_dtype_check_for_fusedparam_groupsstatetorchzerosr   r%   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r   r    r   r   r   defaultsgrouppr3   
init_value	__class__r!   g/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/torch/optim/adagrad.pyr/      sh   

zAdagrad.__init__c                    s   t  | d }| jD ]}|dd  |dd |dd |dd }qt| j }t|dko;t	|d d }|sS|D ]}tj
t|d t|dd	|d< q@d S d S )
Nr    r   Fr   r   r   r'   r"   r&   )r.   __setstate__r2   
setdefaultlistr3   valueslenr4   	is_tensorr6   floatr   )r;   r3   r   r=   state_valuesstep_is_tensorsr@   r!   rB   rC   a   s$   

zAdagrad.__setstate__c                 C   s4   | j D ]}|d D ]}| j| }|d   q	qd S )Nr   r)   )r2   r3   share_memory_)r;   r=   r>   r3   r!   r!   rB   share_memoryv   s   

zAdagrad.share_memoryc           
      C   s   d\}}|d D ]E}|j d urM|d r"t| ddr"t|dd d| _||j jO }|t|O }|| ||j  | j| }	||	d  ||	d	  q||fS )
N)FFr   r   r1   T)cuda_unsupportedFr)   r'   )	gradgetattrr   r1   	is_sparser4   r7   appendr3   )
r;   r=   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexr>   r3   r!   r!   rB   _init_group|   s&   



zAdagrad._init_groupc           
      C   s   d}|durt   | }W d   n1 sw   Y  | jD ]A}g }g }g }g }| |||||\}}	t|||||d |d |d |d ||d |d |d |	|d	 t| d
dt| ddd q |S )zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r    r   r   r   
grad_scale	found_inf)r   r   r   r   rX   r    r   r   rY   r   r[   r\   )r4   enable_gradr2   rZ   r   rQ   )
r;   closurelossr=   rT   rU   rV   rW   rX   rY   r!   r!   rB   r'      s@   




zAdagrad.step)r   r   r   r   r   NN)__name__
__module____qualname__r   r   rI   r   r   boolr/   rC   rN   rZ   r   r'   __classcell__r!   r!   r@   rB   r      sJ    


Fa[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    Fr   rU   rV   rW   r   r[   r\   rX   r    r   rY   r   r   r   r   r   c                C   s   t dd |D std|du r|du rt| |	dd\}}|du r$d}|du r*d}|r5tj r5td|r@tj r@td|rJtj sJt}n|rTtj sTt}nt}|| ||||||||||	|
||d	 dS )
ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c                 s   s    | ]	}t |tjV  qd S r`   )r*   r4   r   ).0tr!   r!   rB   	<genexpr>  s    zadagrad.<locals>.<genexpr>zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r   rX   r   r   rY   r[   r\   )	allr0   r   r4   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   rU   rV   rW   r   r[   r\   rX   r    r   rY   r   r   r   r   r   _funcr!   r!   rB   r      sJ   

c                 C   s   |   }t|||S r`   )sizer4   sparse_coo_tensor)rP   grad_indicesrF   rs   r!   r!   rB   _make_sparse<  s   rv   c             	   C   s  |d u r|d u s
J t | |||D ]\}}}}|d7 }t|}|s#|n| }|dkr8|jr1td|j||d}|d|d |   }|jrz| }| }| }|t	|||
d ||}|  |	}|jt	|||| | d qt|}|rt|}t|}t|}|j||dd |r| |	 }n| |	}|j||| d |rt|}t|}qd S )Nr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)zipr   rR   r0   addcoalesce_indices_valuesadd_rv   powsparse_masksqrt_r4   r7   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   rU   rV   rW   r[   r\   r   r   r   r   rX   r   r   rY   paramrP   	state_sumstep_tr'   clrru   grad_valuesstd
std_valuesr7   r!   r!   rB   rp   A  sJ   






rp   c                   s  |rJ d|d u r|d u sJ t | dkrd S t| |||g}| D ]\\}}}}}ttt |}ttt |}ttt |}ttt |}|
oStdd |D }|rit|||| ||	d|||||d q%|rqt	||| |rxt
|}t
j s|d jrt
j|t
jddd	dd
 nt
|d |dkr|rt
j|||d
 nt
j|||d
} fdd|D }t
j|||dd t
|}t
||	 |dks|rt
|| |}nt
||}t
||| q%d S )Nz#_foreach ops don't support autogradr   c                 s   s    | ]}|j V  qd S r`   )rR   )rf   rP   r!   r!   rB   rh     s    
z(_multi_tensor_adagrad.<locals>.<genexpr>Trj   g      ?cpu)r%   rw   r   c                    s&   g | ]}  d t |d     qS )r   )r   )rf   r'   r   r   r!   rB   
<listcomp>  s    z)_multi_tensor_adagrad.<locals>.<listcomp>rz   )rG   r   "_group_tensors_by_device_and_dtyperF   r   rE   r   anyrp   r   r4   _foreach_negcompileris_compilingis_cpu_foreach_add_r6   _foreach_add_foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   rU   rV   rW   r[   r\   r   r   r   r   rX   r   r   rY   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_rq   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_grad	minus_clrr   	numeratorr!   r   rB   ro   ~  s   


ro   returnc                C   sv  | sd S |
s|rt d|rt d|d ur|j|ind }|d ur&|j|ind }t| |||g}| D ]\\}}\\}}}}}ttt |}ttt |}ttt |}ttt |}d\}}|d urz|d urz||vrv|j|dd||< || }|d ur|d ur||vr|j|dd||< || }t	
|d t	j||||||||	|||d |d urt	||gt|  q5d S )Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=True)NNT)non_blockingr   )r   r   r   r   r   r[   r\   )r0   r%   r   r   itemsr   rE   r   tor4   r   _fused_adagrad__foreach_sub_rG   )r   rU   rV   rW   r[   r\   r   r   r   r   rX   r   r   rY   grad_scale_dictfound_inf_dictgrouped_tensorsr%   rq   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infr!   r!   rB   rn     sn   
rn   )NNNFNFF)typingr   r   r   r4   r   	optimizerr   r   r	   r
   r   r   r   r   r   r   r   r   __all__r   __doc__rE   rd   rI   r   rv   rp   ro   rn   r!   r!   r!   rB   <module>   s*  8 '
8

J	

=	

k	
