o
    h8                     @   s   d dl mZmZ d dlmZmZmZmZmZm	Z	 ddl
mZ ddlmZmZ ddlmZ er4ddlmZ e rBd d	lZd d
lmZ neZG dd deZG dd deZdeejejgdiZd	S )    )ABCabstractmethod)TYPE_CHECKINGAnyDictListOptionalUnion   )is_torch_available)QuantizationConfigMixinQuantizationMethod   )get_module_from_name)PreTrainedModelN)
ModuleListc                
   @   s&  e Zd ZdZdZdZdZdefddZdLd
dZ	de
eeef  d	e
eeef  fddZdLddZdee ded	ee fddZdee ded	ee fddZdee ded	ee fddZdee dee d	ee fddZddd	eedf fddZd eeeeef f d	eeeeef f fd!d"Zd#d$d%d&d'ed(eeef d	ef
d)d*ZdMd,d-Zd.d/ Zd0d1 ZdNd2d3ZdNd4d5Zd6d7 Z d8d9 Z!e"		dOd#d$d:e
ee  d;e
ee  fd<d=Z#e$d	efd>d?Z%e$d	efd@dAZ&e'dBdC Z(e'dDdE Z)e'dPdFdGZ*e$e'dHdI Z+dJdK Z,dS )QHfQuantizera  
    Abstract class of the HuggingFace quantizer. Supports for now quantizing HF transformers models for inference and/or quantization.
    This class is used only for transformers.PreTrainedModel.from_pretrained and cannot be easily used outside the scope of that method
    yet.

    Attributes
        quantization_config (`transformers.utils.quantization_config.QuantizationConfigMixin`):
            The quantization config that defines the quantization parameters of your model that you want to quantize.
        modules_to_not_convert (`List[str]`, *optional*):
            The list of module names to not convert when quantizing the model.
        required_packages (`List[str]`, *optional*):
            The list of required pip packages to install prior to using the quantizer
        requires_calibration (`bool`):
            Whether the quantization method requires to calibrate the model before using it.
        requires_parameters_quantization (`bool`):
            Whether the quantization method requires to create a new Parameter. For example, for bitsandbytes, it is
            required to create a new xxxParameter in order to properly quantize the model.
    FNquantization_configc                 K   sH   || _ |dg | _|dd| _| js | jr"td|j dd S d S )Nmodules_to_not_convertpre_quantizedTzThe quantization method z does require the model to be pre-quantized. You explicitly passed `pre_quantized=False` meaning your model weights are not quantized. Make sure to pass `pre_quantized=True` while knowing what you are doing.)r   popr   r   requires_calibration
ValueErrorquant_method)selfr   kwargs r   p/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/base.py__init__8   s   zHfQuantizer.__init__torch_dtypetorch.dtypereturnc                 C      |S )aU  
        Some quantization methods require to explicitly set the dtype of the model to a
        target dtype. You need to override this method in case you want to make sure that behavior is
        preserved

        Args:
            torch_dtype (`torch.dtype`):
                The input dtype that is passed in `from_pretrained`
        r   r   r   r   r   r   update_torch_dtypeF      
zHfQuantizer.update_torch_dtype
device_mapc                 C   r"   )a  
        Override this method if you want to pass a override the existing device map with a new
        one. E.g. for bitsandbytes, since `accelerate` is a hard requirement, if no device_map is
        passed, the device_map is set to `"auto"``

        Args:
            device_map (`Union[dict, str]`, *optional*):
                The device_map that is passed through the `from_pretrained` method.
        r   )r   r&   r   r   r   update_device_mapR   r%   zHfQuantizer.update_device_mapc                 C   r"   )a  
        Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained`
        to compute the device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype`
        to `torch.int8` and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`.

        Args:
            torch_dtype (`torch.dtype`, *optional*):
                The torch_dtype that is used to compute the device_map.
        r   r#   r   r   r   adjust_target_dtype^   r%   zHfQuantizer.adjust_target_dtypemissing_keysprefixc                 C      |S )z
        Override this method if you want to adjust the `missing_keys`.

        Args:
            missing_keys (`List[str]`, *optional*):
                The list of missing keys in the checkpoint compared to the state dict of the model
        r   r   modelr)   r*   r   r   r   update_missing_keysj      zHfQuantizer.update_missing_keysunexpected_keysc                 C   r+   )z
        Override this method if you want to adjust the `unexpected_keys`.

        Args:
            unexpected_keys (`List[str]`, *optional*):
                The list of unexpected keys in the checkpoint compared to the state dict of the model
        r   )r   r-   r0   r*   r   r   r   update_unexpected_keyst   r/   z"HfQuantizer.update_unexpected_keysc                 C   r+   )aE  
        Override this method if you want to adjust the `missing_keys` after loading the model params,
        but before the model is post-processed.

        Args:
            missing_keys (`List[str]`, *optional*):
                The list of missing keys in the checkpoint compared to the state dict of the model
        r   r,   r   r   r   !update_missing_keys_after_loading~   s   	z-HfQuantizer.update_missing_keys_after_loadingexpected_keysloaded_keysc                 C   r+   )aV  
        Override this method if you want to adjust the `update_expected_keys`.

        Args:
            expected_keys (`List[str]`, *optional*):
                The list of the expected keys in the initialized model.
            loaded_keys (`List[str]`, *optional*):
                The list of the loaded keys in the checkpoint.
        r   )r   r-   r3   r4   r   r   r   update_expected_keys   r%   z HfQuantizer.update_expected_keysc                    s    fdd|  D S )a  
        returns dtypes for modules that are not quantized - used for the computation of the device_map in case
        one passes a str as a device_map. The method will use the `modules_to_not_convert` that is modified
        in `_process_model_before_weight_loading`.

        Args:
            model (`~transformers.PreTrainedModel`):
                The model to quantize
            torch_dtype (`torch.dtype`):
                The dtype passed in `from_pretrained` method.
        c                    s.   i | ]\ }t  fd djD r qS )c                 3   s    | ]}| v V  qd S Nr   ).0mnamer   r   	<genexpr>   s    zCHfQuantizer.get_special_dtypes_update.<locals>.<dictcomp>.<genexpr>)anyr   r7   _r#   r9   r   
<dictcomp>   s    z9HfQuantizer.get_special_dtypes_update.<locals>.<dictcomp>)named_parameters)r   r-   r   r   r#   r   get_special_dtypes_update   s   z%HfQuantizer.get_special_dtypes_update
max_memoryc                 C   r"   )zaadjust max_memory argument for infer_auto_device_map() if extra memory is needed for quantizationr   )r   rB   r   r   r   adjust_max_memory      zHfQuantizer.adjust_max_memoryr-   r   param_valuetorch.Tensor
param_name
state_dictc                 K      dS )a  
        checks if a loaded state_dict component is part of quantized param + some validation; only defined if
        requires_parameters_quantization == True for quantization methods that require to create a new parameters
        for quantization.
        Fr   )r   r-   rE   rG   rH   r   r   r   r   check_quantized_param   s   z!HfQuantizer.check_quantized_paramtorch.nn.Parameterc                 O   s   | j std| jj ddS )z
        takes needed components from state_dict and creates quantized param; only applicable if
        requires_parameters_quantization == True
        zG`.create_quantized_param()` method is not supported by quantizer class .N) requires_parameters_quantizationAttributeError	__class____name__r   argsr   r   r   r   create_quantized_param   s
   z"HfQuantizer.create_quantized_paramc                 O   rI   )a&  
        This method is used to potentially check for potential conflicts with arguments that are
        passed in `from_pretrained`. You need to define it for all future quantizers that are integrated with transformers.
        If no explicit check are needed, simply return nothing.
        Nr   rQ   r   r   r   validate_environment   s   z HfQuantizer.validate_environmentc                 C   r"   )z"updates the tp plan for the scalesr   r   configr   r   r   update_tp_plan   rD   zHfQuantizer.update_tp_planc                 K   s2   d|_ | jj|_| jr| | | j|fi |S )aQ  
        Setting model attributes and/or converting model before weights loading. At this point
        the model should be initialized on the meta device so you can freely manipulate the skeleton
        of the model in order to replace modules in-place. Make sure to override the abstract method `_process_model_before_weight_loading`.

        Args:
            model (`~transformers.PreTrainedModel`):
                The model to quantize
            kwargs (`dict`, *optional*):
                The keyword arguments that are passed along `_process_model_before_weight_loading`.
        T)is_quantizedr   r   quantization_methodr   _convert_model_for_quantization$_process_model_before_weight_loadingr   r-   r   r   r   r   preprocess_model   s
   

zHfQuantizer.preprocess_modelc                 K   s   | j |fi |S )a  
        Post-process the model post weights loading.
        Make sure to override the abstract method `_process_model_after_weight_loading`.

        Args:
            model (`~transformers.PreTrainedModel`):
                The model to quantize
            kwargs (`dict`, *optional*):
                The keyword arguments that are passed along `_process_model_after_weight_loading`.
        )#_process_model_after_weight_loadingr\   r   r   r   postprocess_model   s   zHfQuantizer.postprocess_modelc                 C   s$   |  |}|`|j`|j`d|_|S )z
        Potentially dequantize the model to retrive the original model, with some loss in accuracy / performance.
        Note not all quantization schemes support this.
        F)_dequantizehf_quantizerrV   r   _pre_quantization_dtyperX   r   r-   r   r   r   
dequantize   s   
zHfQuantizer.dequantizec                 C   s   t | jj d)NzH has no implementation of `dequantize`, please raise an issue on GitHub.)NotImplementedErrorr   r   rc   r   r   r   r`      s   zHfQuantizer._dequantizeskip_moduleskeep_in_fp32_modulesc                 C   s<   ddl m} g }|d u r|| }n|}|d ur|| |S )Nr
   )get_keys_to_not_convert)integrationsrh   extend)r-   rf   rg   rh   r   r   r   r   get_modules_to_not_convert  s   

z&HfQuantizer.get_modules_to_not_convertc                 C   rI   )zUFlag indicating whether the quantized model can carry out quantization aware trainingFr   r   r   r   r   is_qat_trainable     zHfQuantizer.is_qat_trainablec                 C   rI   )z;Flag indicating whether the quantized model can be compiledFr   rl   r   r   r   is_compileable  rn   zHfQuantizer.is_compileablec                 K      d S r6   r   r\   r   r   r   r[   !     z0HfQuantizer._process_model_before_weight_loadingc                 K   rp   r6   r   r\   r   r   r   r^   $  rq   z/HfQuantizer._process_model_after_weight_loadingc                 C   rp   r6   r   )r   safe_serializationr   r   r   is_serializable'  rq   zHfQuantizer.is_serializablec                 C   rp   r6   r   rl   r   r   r   is_trainable*  rD   zHfQuantizer.is_trainablec              	   C   s   ddl m} | D ];\}}|jj}|t v rE| jjt	j
krE|  t||\}}t| |j |j|< W d    n1 s@w   Y  q
d S )Nr   )init_empty_weights)
accelerateru   named_modulesrO   rP   !MODULES_TO_PATCH_FOR_QUANTIZATIONkeysr   r   r   COMPRESSED_TENSORSr   rV   get_text_config_modules)r   r-   ru   r:   modulemodule_class_nameparent_moduler   r   r   rZ   .  s   z+HfQuantizer._convert_model_for_quantization)r   r    r!   r    )r!   rK   )r-   r   )NNr6   )-rP   
__module____qualname____doc__r   required_packagesrM   r   r   r$   r   r   strr   r'   r(   r   r.   r1   r2   r5   rA   r	   intrC   boolrJ   rS   rT   rW   r]   r_   rd   r`   staticmethodrk   propertyrm   ro   r   r[   r^   rs   rt   rZ   r   r   r   r   r       sr    
*


"2









r   c                       s2   e Zd ZdZ fddZ				d	ddZ  ZS )
SequentialLlama4TextExpertsz
    A module that implements a compressed version of a list of expert modules.
    This is specifically designed to work with Llama4TextExperts in MoE layers.
    c                    s:   ddl m  t  fddtjD  j| _d S )Nr   )Llama4TextMLPc                    s   g | ]} qS r   r   r=   r   rV   r   r   
<listcomp>G  s    z8SequentialLlama4TextExperts.__init__.<locals>.<listcomp>)*transformers.models.llama4.modeling_llama4r   superr   rangenum_local_expertsnum_expertsrU   rO   r   r   r   D  s   "z$SequentialLlama4TextExperts.__init__hidden_statesrF   r!   c                 C   sH   | | jd|jd }t|}t| jD ]}| | || ||< q|S )N)reshaper   shapetorch
zeros_liker   )r   r   
routed_out
expert_idxr   r   r   forwardJ  s
   
z#SequentialLlama4TextExperts.forward)r   rF   r!   rF   )rP   r   r   r   r   r   __classcell__r   r   r   r   r   >  s    r   Llama4TextExperts)module_namequantization_methods)abcr   r   typingr   r   r   r   r   r	   utilsr   utils.quantization_configr   r   quantizers_utilsr   modeling_utilsr   r   torch.nnr   r   r   r   rz   BITS_AND_BYTESrx   r   r   r   r   <module>   s,       