o
    h0                     @   s   d dl mZmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ er2ddlmZ e r;d d	lmZ e
 rBd d
lZeeZdd ZG dd deZd
S )    )TYPE_CHECKINGAnyDictList   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 C   s.   | dd d }| }|D ]}|j| }q|S )N.)split_modules)modelnamemodule_treeparentm r   y/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parent%   s
   r   c                       s  e Zd ZdZdZdZdZdgZ fddZdd Z	d	d
de
e dede
e fddZd	d
de
e de
e de
e fddZd	d
dddedeeef def
ddZd	d
dddedddeeef de
e fddZdd  Z			
d*d!d"Zd*d#d$Zd+d&d'Zedefd(d)Z  ZS ),HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                    s$   t  j|fi | d | _d| _d S )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__r   r   r    9   s   
zHqqHfQuantizer.__init__c                 O   s   t  std|dds|ddrtdtj s td| jd u r8d|v r/|d | _n	tj	| _t
d |d	d }t|tr`d
| v sOd| v rStdtt| dk| _d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r!   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r	   ImportErrorget
ValueErrortorchcudais_availableRuntimeErrorr!   float32loggerinfo
isinstancedictvalueslensetr"   )r#   argsr%   r*   r   r   r   validate_environment>   s.   



z#HqqHfQuantizer.validate_environmentr   r   missing_keysprefixreturnc                 K   s   | j r
dd |D S |S )Nc                 S   s   g | ]}d |vr|qS )weightr   ).0keyr   r   r   
<listcomp>b       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r#   r   r>   r?   r%   r   r   r   update_missing_keys^   s   z"HqqHfQuantizer.update_missing_keysexpected_keysloaded_keysc                    sH  | j s|S  fdd t|}t rddlm} | D ]\}}||_qt } || t }	|D ]|jjd D ]}
|
v rD|		 q9q1||	8 }|d d t
jdd dh }t }|D ]tfd	d
|D ro|	 q]||8 }|D ])d |v r|	d  n|fdd|D  d |v r|	d  qvt|S )Nc                    s:   |   D ]\}}t|tjjr||j  || qd S N)named_childrenr7   r0   nnLinearaddr   )r   layersr   module)_find_hqq_quantizable_layersr   r   rQ   n   s
   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersr   	HQQLinearskip_modulesr+   linear_layerquant_configcompute_dtypedevicebiasc                 3   s    | ]}| v V  qd S rJ   r   )rB   _module)rC   r   r   	<genexpr>   s    z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS )r   r   )rB   _ref_key)r[   r   r   	<setcomp>   rE   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)rF   r;   r	   hqq.core.quantizerS   named_modulesr   configr$   rN   r0   float16state_dict_keysanyupdatelist)r#   r   rH   rI   new_keysrS   r   rP   _valid_modules_skipped_modules_skip_module	_ref_keys_rm_keysr   )rQ   r[   rC   r   update_expected_keysg   sJ   



z#HqqHfQuantizer.update_expected_keysparam_valueztorch.Tensor
param_name
state_dictc           	      K   sv   t  r	ddlm} t||\}}| jr't|tjjst||o&|dko&|dkS t|tjjr2|dkp:t||o:|dkS )Nr   rR   rA   rZ   )	r	   r_   rS   r   rF   r7   r0   rL   rM   )	r#   r   rn   ro   rp   r%   rS   rP   tensor_namer   r   r   check_quantized_param   s   z$HqqHfQuantizer.check_quantized_paramtarget_deviceztorch.deviceunexpected_keysc                 C   sX  t  r	ddlm} t||\}}	d|ddd }
t||
}|
dd }|	dkr.dS i }| D ] \}}|
d |v rT|||dd < |durT||v rT|| q4| j	rt
||r_dS |dd| j|d}|| |jdurt
|jtjrtj|j|_| jr| |}t||| |`~tj  dS |D ]}t||tj||  q|jjd }|jjd	 }d|jdd
d }d}d|v r|}n||v r|| }|D ]}||jv rd} nq|dur|||| j|dd}|jdurt
|jtjrtj|j|_| jr| |}t||| n|j| j|d}t||| tj  dS )a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rR   r   Nr   rZ   rU   rW   rT   weight_quant_paramsT)rW   rX   rY   del_orig)dtyperY   )r	   r_   rS   r   joinr   r   itemsremoverF   r7   r!   load_state_dictrZ   r0   TensorrL   	Parameterr"   _patch_layer_for_multigpusetattr__dict__r1   empty_cachera   r$   r   to)r#   r   rn   ro   rs   rp   rt   rS   rP   rq   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrC   rW   rT   
module_tagmodule_quant_configskip_moduler   r   r   create_quantized_param   s   








z%HqqHfQuantizer.create_quantized_paramc                    s$   t dd   fdd_S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S rJ   )r0   matmulr   rY   
dequantizetrZ   )r#   xoutr   r   r   forward_with_device&  s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S rJ   r   )r   r   r   r   r   <lambda>,  s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)r   forward)r#   r   r   r   r   r   #  s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r$   )r   r$   r#   r   r%   r   r   r   $_process_model_before_weight_loading/  s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   r   r   r   #_process_model_after_weight_loading8  s   
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C      dS r   r   )r#   safe_serializationr   r   r   r   =  s   zHqqHfQuantizer.is_serializablec                 C   r   r   r   )r#   r   r   r   is_trainable@  s   zHqqHfQuantizer.is_trainable)r   r   rJ   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr    r=   r   strrG   rm   r   r   boolrr   r   r   r   r   r   propertyr   __classcell__r   r   r&   r   r   -   st     
	
9



i

	
r   )typingr   r   r   r   integrationsr   utilsr   r	   r
   r   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r0   
get_loggerr   r5   r   r   r   r   r   r   <module>   s   
