o
    Èh.  ã                   @   s˜   d dl Z d dlmZmZ d dlmZ ddlmZ er ddlm	Z	 ddl
mZmZmZmZmZ dd	lmZmZ eƒ r=d dlZe e¡ZG d
d„ deƒZdS )é    N)ÚTYPE_CHECKINGÚOptional)Úversioné   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_auto_gptq_availableÚis_gptqmodel_availableÚis_optimum_availableÚis_torch_availableÚlogging)Ú
GPTQConfigÚQuantizationConfigMixinc                       s†   e Zd ZdZdZg d¢ZdZdef‡ fdd„Zdd	„ Z	ddd„Z
dd„ Zddd„Zddd„Zedded fdd„ƒZddd„Z‡  ZS )ÚGptqHfQuantizerzå
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
    F)ÚoptimumÚ	auto_gptqÚ	gptqmodelNÚquantization_configc                    sD   t ƒ j|fi |¤Ž tƒ stdƒ‚ddlm} | | j ¡ ¡| _	d S )NúGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )ÚGPTQQuantizer)
ÚsuperÚ__init__r   ÚImportErrorÚoptimum.gptqr   Ú	from_dictr   Úto_dict_optimumÚoptimum_quantizer)Úselfr   Úkwargsr   ©Ú	__class__© úz/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_gptq.pyr   -   s
   zGptqHfQuantizer.__init__c                 O   sî   t ƒ stdƒ‚tƒ rtƒ rt d¡ tƒ o"t tj	 d¡¡t d¡kp%tƒ }|s1t
j ¡ s1tdƒ‚tƒ s;tƒ s;tdƒ‚tƒ rPt tj	 d¡¡t d¡k rPtdƒ‚tƒ rst tj	 d	¡¡t d
¡k sot tj	 d¡¡t d¡k rutdƒ‚d S d S )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   z‹You need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   ú1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r	   r
   ÚloggerÚwarningr   ÚparseÚ	importlibÚmetadataÚtorchÚcudaÚis_availableÚRuntimeError)r   Úargsr   Úgptq_supports_cpur"   r"   r#   Úvalidate_environment6   s6   
þýÿÿÿüz$GptqHfQuantizer.validate_environmentÚtorch_dtypeútorch.dtypeÚreturnc                 C   s4   |d u rt j}t d¡ |S |t jkrt d¡ |S )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r*   Úfloat16r%   Úinfo)r   r1   r"   r"   r#   Úupdate_torch_dtypeR   s   

þ
z"GptqHfQuantizer.update_torch_dtypec                 C   sB   |d u rdt  d¡i}tƒ s|ddt  d¡ifv r|ddik |S )NÚ Úcpur   )r*   Údevicer
   )r   Ú
device_mapr"   r"   r#   Úupdate_device_mapZ   s
   z!GptqHfQuantizer.update_device_mapÚmodelr   c                 K   sb   |j jdkr
tdƒ‚| jr/t tj d¡¡t d¡kr#| j 	|¡}d S | jj	|fi |¤Ž}d S d S )NÚ	input_idsz%We can only quantize pure text model.r   r$   )
r!   Úmain_input_namer-   Úpre_quantizedr   r'   r(   r)   r   Úconvert_model©r   r<   r   r"   r"   r#   Ú$_process_model_before_weight_loadingb   s   ûz4GptqHfQuantizer._process_model_before_weight_loadingc                 K   sV   | j r| j |¡}d S | jjd u r|j| j_| j || jj¡ t | j 	¡ ¡|j
_d S ©N)r?   r   Úpost_init_modelr   Ú	tokenizerÚname_or_pathÚquantize_modelr   r   Úto_dictÚconfigrA   r"   r"   r#   Ú#_process_model_after_weight_loadingm   s   
z3GptqHfQuantizer._process_model_after_weight_loadingc                 C   ó   dS ©NTr"   )r   r<   r"   r"   r#   Úis_trainablew   s   zGptqHfQuantizer.is_trainablec                 C   rK   rL   r"   )r   Úsafe_serializationr"   r"   r#   Úis_serializable{   s   zGptqHfQuantizer.is_serializable)r1   r2   r3   r2   )r<   r   rC   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úrequires_calibrationÚrequired_packagesr   r   r   r0   r6   r;   rB   rJ   Úpropertyr   rM   rO   Ú__classcell__r"   r"   r    r#   r   #   s    	



r   )r(   Útypingr   r   Ú	packagingr   Úbaser   Úmodeling_utilsr   Úutilsr	   r
   r   r   r   Úutils.quantization_configr   r   r*   Ú
get_loggerrP   r%   r   r"   r"   r"   r#   Ú<module>   s   
