o
    h                     @   s   d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZmZ ddlmZ e
 r1d dlZeeZG d	d
 d
eZdS )    )TYPE_CHECKINGListOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableis_vptq_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZdgZdef fddZdd ZdddZ		dddde
ee  fddZdddZedde
d fddZdddZ  ZS )VptqHfQuantizerzS
    Quantizer of the VPTQ method. Enables the loading of prequantized models.
    Tvptqquantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ z/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_vptq.pyr   (   s   
zVptqHfQuantizer.__init__c                 O   s    t  stdt stdd S )NzGUsing `vptq` quantization requires Accelerate: `pip install accelerate`zEUsing `vptq` quantization requires VPTQ>=0.0.4: `pip install -U vptq`)r	   ImportErrorr   )r   argsr   r   r   r   validate_environment,   s
   z$VptqHfQuantizer.validate_environmenttorch_dtypetorch.dtypereturnc                 C   sf   |d u r1t j rt j}td |S dd l}t|ddd }|ddu r)tdt j	}td	 |S )
NzCUDA available. Assuming VPTQ inference on GPU and loading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.r   device_availabilityc                 S      dS NFr   )devicer   r   r   <lambda>=   s    z4VptqHfQuantizer.update_torch_dtype.<locals>.<lambda>cpuTzKNo GPU found. Please wait for the next release of VPTQ to use CPU inferencezVNo GPU found. Assuming VPTQ inference on CPU and loading the model in `torch.float32`.)
torchcudais_availablefloat16loggerinfor   getattrRuntimeErrorfloat32)r   r   r   r    r   r   r   update_torch_dtype3   s   

z"VptqHfQuantizer.update_torch_dtypeNmodelr   keep_in_fp32_modulesc                 K   s@   ddl m} | || jj|| _||| j| jd | j|j_dS )z
        we don't have param like modules_to_not_convert to indicate which layers should not be quantized
        because `quantization_config` include the layers that should be quantized
        r   )replace_with_vptq_linear)r   modules_to_not_convertN)integrationsr2   get_modules_to_not_convertr   r3   config)r   r0   r1   r   r2   r   r   r   $_process_model_before_weight_loadingD   s   

z4VptqHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r0   r   r   r   r   #_process_model_after_weight_loading[      z3VptqHfQuantizer._process_model_after_weight_loadingc                 C   r!   r"   r   )r   r0   r   r   r   is_trainable^   s   zVptqHfQuantizer.is_trainablec                 C   r!   )NTr   )r   safe_serializationr   r   r   is_serializableb   r9   zVptqHfQuantizer.is_serializable)r   r   r   r   r   )r0   r   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r/   r   r   strr7   r8   propertyr:   r<   __classcell__r   r   r   r   r       s"    



r   )typingr   r   r   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r&   
get_loggerr=   r*   r   r   r   r   r   <module>   s   
