o
    h                     @   s   d dl Zd dlmZmZmZ d dlmZ ddlm	Z	 er"ddl
mZ ddlmZmZmZmZ dd	lmZ e r;d dlZeeZG d
d de	ZdS )    N)TYPE_CHECKINGListOptional)version   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_auto_awq_availableis_torch_availablelogging)AWQLinearVersionc                       sx   e Zd ZdZdZddgZ fddZdd Zd	d
 Z	dddde	e
e  fddZdd ZdddZedd Z  ZS )AwqQuantizerzm
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://arxiv.org/abs/2306.00978)
    Tawq
acceleratec                    s   t  j|fi | d S N)super__init__)selfquantization_configkwargs	__class__ y/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_awq.pyr   -   s   zAwqQuantizer.__init__c                 K   s  t  stdt std| jjtjkr$tj	 s$t
d tj| j_| jjtjkr[ttjdtdk r=td|d u rHt
d d S t|trWd| v rYtd	d S d S tj	 sdtd
|d u rot
d d S |d urt|trd| v sd| v rtdd S d S d S )NzPLoading an AWQ quantized model requires auto-awq library (`pip install autoawq`)zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`)zLNo CUDA found, replace GEMM with IPEX version to support non-cuda AWQ model.autoawqz0.2.6z^To use IPEX backend, you need autoawq>0.2.6. Please install the latest version or from source.z\You have loaded an AWQ model without setting device_map, please set 'cpu' or 'xpu' or 'auto'diskzYou are attempting to load an IPEX version AWQ model with a device_map that contains disk device. This is not supported. Please make sure only cpu and xpu in the device_map.zaGPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPUzYou have loaded an AWQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpuzYou are attempting to load an AWQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorr
   r   r   r   GEMMtorchcudais_availableloggerwarning_onceIPEXparse	importlibmetadataRuntimeError
isinstancedictvalues
ValueError)r   
device_mapr   r   r   r   validate_environment0   sH   


"z!AwqQuantizer.validate_environmentc                 C   s4   |d u rt j}td |S |t jkrtd |S )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zQWe suggest you to set `torch_dtype=torch.float16` for better efficiency with AWQ.)r!   float16r$   infowarning)r   torch_dtyper   r   r   update_torch_dtype[   s   


zAwqQuantizer.update_torch_dtypeNmodelr	   keep_in_fp32_modulesc                 K   s^   ddl m}m} | || jj|| _||| j| jd\}}|||jj}|s-t	d d S d S )Nr   )replace_quantization_scalesreplace_with_awq_linear)r   modules_to_not_convertzYou are loading an AWQ model but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)
integrationsr8   r9   get_modules_to_not_convertr   r:   config
model_typer$   r3   )r   r6   r7   r   r8   r9   has_been_replacedr   r   r   $_process_model_before_weight_loadingc   s   


z1AwqQuantizer._process_model_before_weight_loadingc                 K   sx   | j jrddlm} ||| j }d|_| j jtjkr'ddlm} ||| j j	}| j jtj
kr:ddlm} ||}d S d S )Nr   )fuse_awq_modulesT)post_init_awq_exllama_modules)post_init_awq_ipex_modules)r   do_fuser;   rA   _awq_is_fusedr   r   EXLLAMArB   exllama_configr&   rC   )r   r6   r   rA   rB   rC   r   r   r   #_process_model_after_weight_loadingx   s   z0AwqQuantizer._process_model_after_weight_loadingc                 C   s6   | j jrtd dS | j jtjkrtd dS dS )Nz5You cannot save an AWQ model that uses fused modules!Fz7You cannot save an AWQ model that uses Exllama backend!T)r   rD   r$   r3   r   r   rF   )r   safe_serializationr   r   r   is_serializable   s   

zAwqQuantizer.is_serializablec                 C   s    d}t tj dt |kS )Nz0.2.0r   )r   r'   r(   r)   )r   MIN_AWQ_VERSION_FOR_PEFTr   r   r   is_trainable   s   zAwqQuantizer.is_trainabler   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r0   r5   r   r   strr@   rH   rJ   propertyrL   __classcell__r   r   r   r   r   #   s"    +	


r   )importlib.metadatar(   typingr   r   r   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r!   
get_loggerrM   r$   r   r   r   r   r   <module>   s   
