o
    hs                     @   sx   d dl mZmZmZmZmZ ddlmZ erddlm	Z	 ddl
mZmZmZ e r-d dlZeeZG dd	 d	eZdS )
    )TYPE_CHECKINGDictListOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                       s   e Zd ZdZdZdZdgZ fddZdd ZdddZ		dd	d
de
ee  fddZdeeeeef f deeeeef f fddZdddZdddZedefddZ  ZS ) BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Checkout the paper introducing this method : https://arxiv.org/pdf/2402.17764
    FT
acceleratec                    s   t  j|fi | || _d S N)super__init__quantization_config)selfr   kwargs	__class__ |/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_bitnet.pyr   -   s   
zBitNetHfQuantizer.__init__c                 O   s   t  std|dds|ddrtdtj s#td d S |dd }|d u r4td d S |d urMt	|t
rOd	| v sId
| v rQtdd S d S d S )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   r   r   r   validate_environment1   s.   
"z&BitNetHfQuantizer.validate_environmentmodelr
   c                 K   s   |S r   r   )r   r,   r   r   r   r   #_process_model_after_weight_loadingN      z5BitNetHfQuantizer._process_model_after_weight_loadingNkeep_in_fp32_modulesc                 K   s:   ddl m} | || jj|| _||| j| j| jd}d S )Nr	   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr0   get_modules_to_not_convertr   r1   r2   )r   r,   r/   r   r0   r   r   r   $_process_model_before_weight_loadingQ   s   

z6BitNetHfQuantizer._process_model_before_weight_loading
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>e   s    z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r6   r   r   r   adjust_max_memoryd   s   z#BitNetHfQuantizer.adjust_max_memorytarget_dtypetorch.dtypec                 C   s
   t j}|S r   )r"   int8)r   r>   r   r   r   adjust_target_dtypeh   s   z%BitNetHfQuantizer.adjust_target_dtypec                 C      dS )NTr   )r   safe_serializationr   r   r   is_serializablel   r.   z!BitNetHfQuantizer.is_serializablec                 C   rB   )NFr   )r   r   r   r   is_trainableo   s   zBitNetHfQuantizer.is_trainable)r,   r
   r   )r>   r?   r7   r?   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r+   r-   r   r   strr5   r   r   intr=   rA   rD   propertyboolrE   __classcell__r   r   r   r   r       s&    


2

r   )typingr   r   r   r   r   baser   modeling_utilsr
   utilsr   r   r   r"   
get_loggerrF   r%   r   r   r   r   r   <module>   s   
