o
    h                      @   s  d dl Z d dlmZ d dlZd dlmZ d dlmZmZ ddlm	Z	 e	
eZG dd dejZG d	d
 d
ejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd deZi ded ed!d"d#fd$ed%ed&ed'd(ifd)ed*ed+ed,ejd-ed.ed/ed0ejd1ed2ejd3ejd4ejejejej d5Z!ee!Z"d6d7 Z#e#d&Z$e#d%Z%e#dZ&e#d$Z'e#d/Z(e#d4Z)e#d.Z*e#d-Z+dS )8    N)OrderedDict)version)Tensornn   )loggingc                       2   e Zd ZdZ fddZdedefddZ  ZS )PytorchGELUTanha  
    A fast C implementation of the tanh approximation of the GeLU activation function. See
    https://arxiv.org/abs/1606.08415.

    This implementation is equivalent to NewGELU and FastGELU but much faster. However, it is not an exact numerical
    match due to rounding errors.
    c                    s6   t    ttjtdk rtdtj dd S )Nz1.12.0zYou are using torch==zM, but torch>=1.12.0 is required to use PytorchGELUTanh. Please upgrade torch.)super__init__r   parsetorch__version__ImportErrorself	__class__ l/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/activations.pyr   %   s   
zPytorchGELUTanh.__init__inputreturnc                 C   s   t jj|ddS )Ntanh)approximate)r   
functionalgelur   r   r   r   r   forward-   s   zPytorchGELUTanh.forward__name__
__module____qualname____doc__r   r   r   __classcell__r   r   r   r   r	      s    r	   c                   @   "   e Zd ZdZdedefddZdS )NewGELUActivationz
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    r   r   c                 C   s6   d| dt tdtj |dt |d     S )N      ?      ?       @Hm?g      @)r   r   mathsqrtpipowr   r   r   r   r   7   s   6zNewGELUActivation.forwardNr   r    r!   r"   r   r   r   r   r   r   r%   1   s    r%   c                       sL   e Zd ZdZddef fddZdedefdd	Zdedefd
dZ  Z	S )GELUActivationa  
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in nn.functional
    Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    Fuse_gelu_pythonc                    s(   t    |r| j| _d S tjj| _d S N)r
   r   _gelu_pythonactr   r   r   )r   r0   r   r   r   r   C   s   
zGELUActivation.__init__r   r   c                 C   s    |d dt |td   S )Nr&   r'   r(   )r   erfr*   r+   r   r   r   r   r2   J   s    zGELUActivation._gelu_pythonc                 C   
   |  |S r1   r3   r   r   r   r   r   M      
zGELUActivation.forward)F)
r   r    r!   r"   boolr   r   r2   r   r#   r   r   r   r   r/   ;   s
    r/   c                   @   r$   )FastGELUActivationz}
    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 C   s*   d| dt |d dd| |     S )Nr&   r'   g3E?r)   )r   r   r   r   r   r   r   V   s   *zFastGELUActivation.forwardNr.   r   r   r   r   r9   Q       r9   c                   @   r$   )QuickGELUActivationzr
    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 C   s   |t d|  S )NgZd;?)r   sigmoidr   r   r   r   r   _   s   zQuickGELUActivation.forwardNr.   r   r   r   r   r;   Z   r:   r;   c                       s<   e Zd ZdZdedef fddZdedefdd	Z  ZS )
ClippedGELUActivationa  
    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602.

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created.

    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))). See https://arxiv.org/abs/1606.08415
    minmaxc                    s8   ||krt d| d| dt   || _|| _d S )Nzmin should be < max (got min: z, max: ))
ValueErrorr
   r   r>   r?   )r   r>   r?   r   r   r   r   p   s
   

zClippedGELUActivation.__init__xr   c                 C   s   t t|| j| jS r1   )r   clipr   r>   r?   )r   rB   r   r   r   r   x      zClippedGELUActivation.forward)	r   r    r!   r"   floatr   r   r   r#   r   r   r   r   r=   c   s    r=   c                       r   )AccurateGELUActivationz
    Applies GELU approximation that is faster than default and more accurate than QuickGELU. See:
    https://github.com/hendrycks/GELUs

    Implemented along with MEGA (Moving Average Equipped Gated Attention)
    c                    s    t    tdtj | _d S )N   )r
   r   r*   r+   r,   precomputed_constantr   r   r   r   r      s   
zAccurateGELUActivation.__init__r   r   c                 C   s,   d| dt | j|dt |d     S )Nr&   r   r)      )r   r   rH   r-   r   r   r   r   r      s   ,zAccurateGELUActivation.forwardr   r   r   r   r   rF   |   s    rF   c                       sD   e Zd ZdZ fddZdedefddZdedefdd	Z  ZS )
MishActivationz
    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
    visit the official repository for the paper: https://github.com/digantamisra98/Mish
    c                    s:   t    ttjtdk r| j| _d S tj	j
| _d S )Nz1.9.0)r
   r   r   r   r   r   _mish_pythonr3   r   r   mishr   r   r   r   r      s   
zMishActivation.__init__r   r   c                 C   s   |t tj| S r1   )r   r   r   r   softplusr   r   r   r   rK      rD   zMishActivation._mish_pythonc                 C   r5   r1   r6   r   r   r   r   r      r7   zMishActivation.forward)	r   r    r!   r"   r   r   rK   r   r#   r   r   r   r   rJ      s
    rJ   c                   @   r$   )LinearActivationz[
    Applies the linear activation function, i.e. forwarding input directly to output.
    r   r   c                 C   s   |S r1   r   r   r   r   r   r      s   zLinearActivation.forwardNr.   r   r   r   r   rN      r:   rN   c                   @   s   e Zd ZdZdddZdS )LaplaceActivationz
    Applies elementwise activation based on Laplace function, introduced in MEGA as an attention activation. See
    https://arxiv.org/abs/2209.10655

    Inspired by squared relu, but with bounded range and gradient for better stability
    绹۞? ^/?c                 C   s*   ||  |td }ddt|  S )Nr(   r&   r'   )divr*   r+   r   r4   )r   r   musigmar   r   r   r      s   zLaplaceActivation.forwardN)rP   rQ   r   r    r!   r"   r   r   r   r   r   rO      s    rO   c                   @   s   e Zd ZdZdd ZdS )ReLUSquaredActivationzX
    Applies the relu^2 activation introduced in https://arxiv.org/abs/2109.08668v2
    c                 C   s   t j|}t|}|S r1   )r   r   relur   square)r   r   relu_appliedsquaredr   r   r   r      s   
zReLUSquaredActivation.forwardNrU   r   r   r   r   rV      s    rV   c                       s   e Zd Z fddZ  ZS )ClassInstantierc                    s4   t  |}t|tr|n|i f\}}|di |S )Nr   )r
   __getitem__
isinstancetuple)r   keycontentclskwargsr   r   r   r\      s   zClassInstantier.__getitem__)r   r    r!   r\   r#   r   r   r   r   r[      s    r[   r   gelu_10i
   )r>   r?   	gelu_fastgelu_newgelu_pythonr0   Tgelu_pytorch_tanhgelu_accuratelaplace
leaky_relulinearrL   
quick_gelurW   relu2relu6r<   silu)swishr   preluc                 C   s,   | t v rt |  S td|  dtt   )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)activation_stringr   r   r   get_activation   s   rx   ),r*   collectionsr   r   	packagingr   r   r   utilsr   
get_loggerr   loggerModuler	   r%   r/   r9   r;   r=   rF   rJ   rN   rO   rV   r[   	LeakyReLUReLUReLU6SigmoidSiLUTanhPReLUACT2CLSrs   rx   rg   rf   r   re   rm   rp   rL   
linear_actr   r   r   r   <module>   s   

				
