o
    h9                     @   s   d dl mZmZmZmZmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ ddlmZ e r5d dlZeeZG d	d
 d
eZdS )    )TYPE_CHECKINGAnyDictListOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                       s   e Zd ZdZdZdZddgZ fddZdd	 Zd&ddZ	ddddde
dee
ef fddZ	d'ddddde
dddee
ef deee
  fddZd(ddZ	d'dddeee
  fd d!Zd'd"d#Zedefd$d%Z  ZS ))EetqHfQuantizera  
    8-bit quantization from EETQ quantization method:
        before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
    TFeetq
acceleratec                    s   t  j|fi | || _d S N)super__init__quantization_config)selfr   kwargs	__class__ z/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_eetq.pyr   -   s   
zEetqHfQuantizer.__init__c              
   O   s   t  stdzdd l}W n ty% } zdt|v r td| d }~ww t s-td|dds9|ddr=td	tj	 sFt
d
|dd }|d u rWtd d S |d urpt|trrd| v sld| v rttdd S d S d S )NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   shard_checkpointzYou are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorr   strr   get
ValueErrortorchcudais_availableRuntimeErrorloggerwarning_once
isinstancedictvalues)r   argsr   r   excr    r   r   r   validate_environment1   sH   
"z$EetqHfQuantizer.validate_environmenttorch_dtypetorch.dtypereturnc                 C   s6   |d u rt j}td| |S |t jkrtd |S )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with EETQ.)r'   float16r+   info)r   r3   r   r   r   update_torch_dtype_   s   	

z"EetqHfQuantizer.update_torch_dtypemodelr
   param_valueztorch.Tensor
param_name
state_dictc           	      K   sj   ddl m} t||\}}t||r3| js|dkr)|dkr'|jtjkr'tddS |dkr1tdd	S dS )
Nr   )
EetqLinearbiasweightz6Expect quantized weights but got an unquantized weightFweight_scalez;Expect unquantized weights but got a quantized weight_scaleT)	r   r=   r   r-   pre_quantizeddtyper'   int8r&   )	r   r9   r:   r;   r<   r   r=   moduletensor_namer   r   r   check_quantized_paramm   s   
z%EetqHfQuantizer.check_quantized_paramNtarget_deviceztorch.deviceunexpected_keysc                 C   sL   ddl m} t||\}}	||\}
}|
||j|	< |d|| dS )zB
        quantizes weights into qweight and weight_scales
        r   )quantize_and_preprocess_weightsweight_scalesN)r   rI   r   to_buffersregister)r   r9   r:   r;   rG   r<   rH   rI   rD   rE   	new_valuer@   r   r   r   create_quantized_param   s
   z&EetqHfQuantizer.create_quantized_paramc                 K   s   |S r   r   )r   r9   r   r   r   r   #_process_model_after_weight_loading      z3EetqHfQuantizer._process_model_after_weight_loadingkeep_in_fp32_modulesc                 K   sD   ddl m} | || jj|| _||| j| j| jd}| j|j_d S )Nr	   )replace_with_eetq_linear)modules_to_not_convertr   rA   )integrationsrS   get_modules_to_not_convertr   rT   rA   config)r   r9   rR   r   rS   r   r   r   $_process_model_before_weight_loading   s   
z4EetqHfQuantizer._process_model_before_weight_loadingc                 C      dS NTr   )r   safe_serializationr   r   r   is_serializable   rQ   zEetqHfQuantizer.is_serializablec                 C   rY   rZ   r   )r   r   r   r   is_trainable   s   zEetqHfQuantizer.is_trainable)r3   r4   r5   r4   r   )r9   r
   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r2   r8   r$   r   r   rF   r   r   rO   rP   rX   r\   propertyboolr]   __classcell__r   r   r   r   r   !   sR    
.








r   )typingr   r   r   r   r   baser   modeling_utilsr
   utilsr   r   r   r   quantizers_utilsr   r'   
get_loggerr^   r+   r   r   r   r   r   <module>   s   
