o
    h                     @   sx  d dl Z d dlmZ d dlmZ ddlmZ ddlmZm	Z	 e	
eZe r*d dlZdd Z			d1d	ee d
ed dee dedef fddZ			d1d	ee d
ed dee dedef fddZ			d1d	ee d
ed dee dedef fddZ	d2d	ed
ddee dedef fddZ	d2d	ed
ddee dedef fddZ	d2d	ed
ddee dedef fddZeeeeeedZ		d3dedededee d ee f
d!d"Zd2d	ed ee fd#d$Zd2d	ed ee fd%d&Zd2d	ed ee fd'd(Zd2d	ed ee fd)d*Zd2d	ed ee fd+d,Z d2d	ed ee fd-d.Z!eeeee e!dZ"d2d	ed ee fd/d0Z#dS )4    Nwraps)Optional   )PretrainedConfig)is_torch_availableloggingc                    s,   dd dd  t  fdd}|S )ad  
    Decorator function to update the RoPE parameters in the forward pass, if the model is using a dynamic RoPE
    (i.e. a RoPE implementation that may recompute its frequencies in the forward pass).

    Args:
        rope_forward (Callable):
            The forward pass of the RoPE implementation.

    Returns:
        The decorated forward pass.
    c                 S   s   t |d }t| jdr| jj}n| jj}||kr8t| ds-| j| j||d d\| _}| jd| jdd dS | j	
|| _	| jd| j	dd dS )	zbLongrope uses long factor if sequence is larger than original pretraining length, short otherwise.r    original_max_position_embeddingslong_inv_freqseq_leninv_freqF
persistentN)torchmaxhasattrconfigr	   max_position_embeddingsrope_init_fnr
   register_bufferoriginal_inv_freqto)selfposition_idsdevicer   r	   _ r   t/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/modeling_rope_utils.pylongrope_frequency_update+   s   

z6dynamic_rope_update.<locals>.longrope_frequency_updatec                 S   s   t |d }|| jkr#| j| j||d\}| _| jd|dd || _|| jk rD| j| jkrF| j	|| _| jd| jdd | j| _dS dS dS )a  
        dynamic RoPE layers should recompute `inv_freq` in the following situations:
        1 - growing beyond the cached sequence length (allow scaling)
        2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
        r   r   r   Fr   N)
r   r   max_seq_len_cachedr   r   attention_scalingr   original_max_seq_lenr   r   )r   r   r   r   r   r   r   r   dynamic_frequency_update>   s   
z5dynamic_rope_update.<locals>.dynamic_frequency_updatec                    sB   d| j v r | ||jd n| j dkr| ||jd | ||S )Ndynamic)r   longrope)	rope_typer   )r   xr   r#   r   rope_forwardr   r   wrapperQ   s
   

z$dynamic_rope_update.<locals>.wrapperr   )r)   r*   r   r(   r   dynamic_rope_update   s
   r+   r   r   ztorch.devicer   returnztorch.Tensorc           
      K   s   | durt |dkrtd| d|  t |dkr#|d }|d }n!| durD| j}t| dr2| jnd}t| d	| j| j }t|| }d}d|t	j
d|d
t	jdj|t	jd|   }	|	|fS )a  
    Computes the inverse frequencies according to the original RoPE implementation
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_default_rope_parameters`, got `rope_kwargs`= and `config`=basedimpartial_rotary_factor      ?head_dim   dtyper   r5   )len
ValueError
rope_thetar   r0   getattrhidden_sizenum_attention_headsintr   arangeint64r   float)
r   r   r   rope_kwargsr.   r/   r0   r2   attention_factorr   r   r   r    _compute_default_rope_parameters\   s&   
,rC   c                 K   sx   | durt |dkrtd| d|  t |dkr|d }n	| dur(| jd }t| ||fi |\}}|| }||fS )a  
    Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_linear_scaling_rope_parameters`, got `rope_kwargs`=r-   factor)r7   r8   rope_scalingrC   )r   r   r   rA   rD   r   rB   r   r   r   '_compute_linear_scaling_rope_parameters   s   

rF   c                 K   s  | durt |dkrtd| d|  t |dkr+|d }|d }|d }|d }n)| durT| j}t| d	r:| jnd
}t| d| j| j }	t|	| }| j	}| j
d }d
}
|dur`||kr`|n|}||| | |d  ||d    }d
|tjd|dtjdj|tjd|   }||
fS )a4  
    Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length, used to update the dynamic RoPE at inference time.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_dynamic_ntk_parameters`, got `rope_kwargs`=r-   r.   r/   r   rD   r0   r1   r2   r   r3   r4   r6   )r7   r8   r9   r   r0   r:   r;   r<   r=   r   rE   r   r>   r?   r   r@   )r   r   r   rA   r.   r/   r   rD   r0   r2   rB   r   r   r   r   _compute_dynamic_ntk_parameters   s2   

$,rG   c                    s  t |dkrtd| | j}t| dr| jnd}t| d| j| j }t|| }| j	d }| j	
d}	| j	
d}
| j	
d	}d
| j	v rQ| j	d
 }| j| }n| j}ddd}|	du rr|
rn|rnt|||
||| }	n||}	| j	
dpyd}| j	
dpd}dd   fdd}dd }|td|dj|tjd|  }d| }d||  }||||||\}}d||||d j|tjd }|d|  ||  }||	fS )a  
    Computes the inverse frequencies with NTK scaling. Please refer to the
    [original paper](https://arxiv.org/abs/2309.00071)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   zYUnexpected arguments: `**rope_kwargs` should be unset in `_compute_yarn_parameters`, got r0   r1   r2   rD   rB   mscalemscale_all_dimr	   r   c                 S   s"   | dkrdS d| t |  d S )Nr   r1   g?)mathlog)scalerH   r   r   r   
get_mscale  s   z,_compute_yarn_parameters.<locals>.get_mscaleN	beta_fast    	beta_slowc                 S   s*   |t || d t j   dt |  S )zPInverse dimension formula to find the dimension based on the number of rotationsr3   )rJ   rK   pi)num_rotationsr/   r.   r   r   r   r   find_correction_dim  s   *z5_compute_yarn_parameters.<locals>.find_correction_dimc                    s@   t  | |||}t  ||||}t|dt||d fS )z.Find dimension range bounds based on rotationsr   r   )rJ   floorceilr   min)low_rothigh_rotr/   r.   r   lowhighrS   r   r   find_correction_range#  s   z7_compute_yarn_parameters.<locals>.find_correction_rangec                 S   s>   | |kr|d7 }t j|t jd|  ||   }t |dd}|S )NgMbP?r4   r   r   )r   r>   float32clamp)rV   r   r/   linear_func	ramp_funcr   r   r   linear_ramp_factor)  s
   z4_compute_yarn_parameters.<locals>.linear_ramp_factorr3   r6   )r   )r7   r8   r9   r   r0   r:   r;   r<   r=   rE   getr   r@   r   r>   r   )r   r   r   rA   r.   r0   r2   r/   rD   rB   rH   rI   r	   rM   rN   rP   r\   ra   	pos_freqsinv_freq_extrapolationinv_freq_interpolationrY   rZ   inv_freq_extrapolation_factorr   r   r[   r   _compute_yarn_parameters   sH   



"
 
rg   c                 K   s@  t |dkrtd| | j}t| dr| jnd}t| d| j| j }t|| }| j	d }| j	d }	| j	
d}
| j	
d	}t| d
rO| j}| j| j }
n| j}|du rl|
dkr]d}ntdt|
t|  }|r|||kr|tj|tj|d}n	tj|	tj|d}tjd|dtj|d | }d|||   }||fS )a  
    Computes the inverse frequencies with LongRoPE scaling. Please refer to the
    [original implementation](https://github.com/microsoft/LongRoPE)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   z]Unexpected arguments: `**rope_kwargs` should be unset in `_compute_longrope_parameters`, got r0   r1   r2   long_factorshort_factorrD   rB   r	   Nr   )r5   r   r3   )r7   r8   r9   r   r0   r:   r;   r<   r=   rE   rb   r	   r   rJ   sqrtrK   r   tensorr]   r>   r?   r@   )r   r   r   rA   r.   r0   r2   r/   rh   ri   rD   rB   r	   ext_factorsinv_freq_shaper   r   r   r   _compute_longrope_parametersB  s8   


rn   c                 K   s   t | ||fi |\}}| jd }| jd }| jd }| jd }	|	| }
|	| }dtj | }t||
k|| |}|	| | ||  }d| | | ||  }||k  ||
k  }t|||}||fS )a  
    Computes the inverse frequencies for llama 3.1.

    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    rD   low_freq_factorhigh_freq_factorr	   r3   r   )rC   rE   rJ   rQ   r   where)r   r   r   rA   r   rB   rD   ro   rp   old_context_lenlow_freq_wavelenhigh_freq_wavelenwaveleninv_freq_llamasmooth_factorsmoothed_inv_freqis_medium_freqr   r   r   _compute_llama3_parameters  s   



rz   )defaultlinearr$   yarnr%   llama3r&   received_keysrequired_keysoptional_keysignore_keysc                 C   s   d|v r|dh8 }| d |dur||8 }|| }|r&td|  d| |dur1|| | }n|| }|rDtd|  d|  dS dS )zYCompare the received keys in `config.rope_scaling` against the expected and optional keystyper&   Nz9Missing required keys in `rope_scaling` for 'rope_type'='z': z5Unrecognized keys in `rope_scaling` for 'rope_type'=')addKeyErrorloggerwarning)r&   r   r   r   r   missing_keysunused_keysr   r   r   _check_received_keys  s   	

r   c                 C   s@   | j }|d|dd }dh}t| }t||||d d S )Nr&   r   r   )rE   rb   setkeysr   )r   r   rE   r&   r   r   r   r   r   !_validate_default_rope_parameters  s
   r   c                 C   sx   | j }|d|dd }ddh}t| }t||||d |d }|d u s0t|tr0|dk r:td|  d S d S )Nr&   r   rD   r   r1   8`rope_scaling`'s factor field must be a float >= 1, got 	rE   rb   r   r   r   
isinstancer@   r   r   )r   r   rE   r&   r   r   rD   r   r   r   (_validate_linear_scaling_rope_parameters  s   r   c                 C   s   | j }|d|dd }ddh}dh}t| }t|||||d |d }|d u s4t|tr4|dk r>td|  d S d S )Nr&   r   rD   r	   r   r1   r   r   )r   r   rE   r&   r   r   r   rD   r   r   r   )_validate_dynamic_scaling_rope_parameters  s   r   c                 C   s6  | j }|d|dd }ddh}h d}t| }t|||||d |d }|d u s5t|tr5|dk r=td|  |d}|d urWt|trO|d	k rWtd
|  |d}	|	d urmt|	tsmtd|	  |d}
|
d urt|
tstd|
  |	pd|
pdk rtd|	 d|
 d d S d S )Nr&   r   rD   >   rH   rN   rP   rI   rB   r	   r   r1   r   rB   r   L`rope_scaling`'s attention_factor field must be a float greater than 0, got rN   z6`rope_scaling`'s beta_fast field must be a float, got rP   z6`rope_scaling`'s beta_slow field must be a float, got rO   r   zO`rope_scaling`'s beta_fast field must be greater than beta_slow, got beta_fast=z( (defaults to 32 if None) and beta_slow=z (defaults to 1 if None)r   )r   r   rE   r&   r   r   r   rD   rB   rN   rP   r   r   r   _validate_yarn_parameters  s6   


r   c                 C   s  | j }|d|dd }h d}h d}t| }t|||||d t| dr,| jnd}t| d| j| j	 }t
|| }	|d	}
t|
tsYtd
d |
D rYtd|
  t|
|	d ksptd|	d  dt|
  |d}t|tstdd |D rtd|  t||	d kstd|	d  dt|  t| drtd d S |d}|d u rtd nt|tr|dk rtd|  |d}|d urt|tr|dk rtd|  d S d S d S )Nr&   r   >   r&   rh   ri   >   rD   rB   r	   r   r0   r1   r2   ri   c                 s       | ]
}t |ttfV  qd S Nr   r=   r@   .0r'   r   r   r   	<genexpr>,      z0_validate_longrope_parameters.<locals>.<genexpr>zC`rope_scaling`'s short_factor field must be a list of numbers, got r3   z5`rope_scaling`'s short_factor field must have length z, got rh   c                 s   r   r   r   r   r   r   r   r   2  r   zB`rope_scaling`'s long_factor field must be a list of numbers, got z4`rope_scaling`'s long_factor field must have length r	   aY  This model has set a `original_max_position_embeddings` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_scaling`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.rD   z1Missing required keys in `rope_scaling`: 'factor'r   rB   g        r   )rE   rb   r   r   r   r   r0   r:   r;   r<   r=   r   listallr   r   r7   warning_oncer@   )r   r   rE   r&   r   r   r   r0   r2   r/   ri   rh   rD   rB   r   r   r   _validate_longrope_parameters  sH   




r   c           
      C   s6  | j }|d|dd }h d}t| }t||||d |d }|d u s0t|tr0|dk r8td|  |d }|d	 }|d u sIt|tsQtd
|  |d u sZt|tsbtd|  ||krqtd| d|  |d }	|	d u s~t|	t	std|	  |	| j
krtd|	 d| j
  d S d S )Nr&   r   >   rD   r&   ro   rp   r	   r   rD   r1   r   ro   rp   z<`rope_scaling`'s low_freq_factor field must be a float, got z=`rope_scaling`'s high_freq_factor field must be a float, got zc`rope_scaling`'s high_freq_factor field must be greater than low_freq_factor, got high_freq_factor=z and low_freq_factor=r	   zP`rope_scaling`'s original_max_position_embeddings field must be an integer, got zg`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got z and max_position_embeddings=)rE   rb   r   r   r   r   r@   r   r   r=   r   )
r   r   rE   r&   r   r   rD   ro   rp   r	   r   r   r   _validate_llama3_parametersP  sL   
r   c                 C   sd   t | dd}|du rdS |d|dd}t|}|dur'|| |d dS td| d dS )	zO
    Validate the RoPE config arguments, given a `PretrainedConfig` object
    rE   Nr&   r   r{   r   zTMissing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='')r:   rb   ROPE_VALIDATION_FUNCTIONSr   r   )r   r   rE   r&   validation_fnr   r   r   rope_config_validation  s   

r   )NNNr   )NN)$rJ   	functoolsr   typingr   configuration_utilsr   utilsr   r   
get_logger__name__r   r   r+   r=   tupler@   rC   rF   rG   rg   rn   rz   ROPE_INIT_FUNCTIONSstrr   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   
?

+

*

4

a

@

/
&2&
