o
    h2a                     @   s^  d dl Zd dlZd dlZd dlmZ d dlmZ d dlmZ ddl	m
Z
mZmZmZmZmZmZ e rGd dlZd dlZd dlmZ ddlmZ e rbd dlZd dlmZ d d	lmZmZ d d
lmZ ee Z!d*ddZ"				d+ddZ#d,ddZ$dd Z%dd Z&dd Z'd-d.ddZ(dd Z)				d+d d!Z*		d*d"d#Z+d$d% Z,d&d' Z-d/d(d)Z.dS )0    N)deepcopy)	signature)version   )get_available_devicesis_accelerate_availableis_bitsandbytes_available'is_bitsandbytes_multi_backend_availableis_ipex_availableis_torch_availablelogging)Conv1D)init_empty_weights)add_hook_to_moduleremove_hook_from_module)find_tied_parametersc                 C   sJ  d|v r+| d}|dd D ]}t| |}|du r$t|  d| d|} q|d }|| jvr?|| jvr?t|  d| d|| jv }t| |}	|	jtdkrh|dtdfvrh|du rht| d| d|du}
|sqt svd}d}ntt	j
d	ot| j| t	j
j}t| j| t	j
j}|s|rm| j| }|jjd
krk|du r|	|}nt|tjr|d}ntj|dd}t| jtr|
s|j}|	j}|
|jtjtjfv krtd|j d|r ttjdtdk}|jtjtjfv r|stdt	j
j|fddi||}|
rt|d|d | nD|rd|
rUttjdtdk}|jtjtjfv rE|sEtdt	j
jjd||d|d|}nt	j
j|fddi||}|| j|< dS dS |du rx|	|}nt|tjr||}ntj||d}|r|| j|< dS t
j ||	j!d}|| j|< dS )a  
    A helper function to set a given tensor (parameter of buffer) of a module on a specific device (note that doing
    `param.to(device)` creates a new tensor not linked to the parameter, which is why we need this function). The
    function is adapted from `set_module_tensor_to_device` function from accelerate that is adapted to support the
    class `Int8Params` from `bitsandbytes`.

    Args:
        module (`torch.nn.Module`):
            The module in which the tensor we want to move lives.
        tensor_name (`str`):
            The full name of the parameter/buffer.
        device (`int`, `str` or `torch.device`):
            The device on which to set the tensor.
        value (`torch.Tensor`, *optional*):
            The value of the tensor (useful when going from the meta device to any other device).
        quantized_stats (`dict[str, Any]`, *optional*):
            Dict with items for either 4-bit or 8-bit serialization
    .Nz has no attribute z- does not have a parameter or a buffer named metaz7 is on the meta device, we need a `value` to put in on F
Params4bitcudacpu)devicezValue dtype `z7` is not compatible with parameter quantization status.bitsandbytesz0.37.2zDetected int8 weights but the version of bitsandbytes is not compatible with int8 serialization. Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`.requires_gradSCBz0.41.3zDetected 4-bit weights but the version of bitsandbytes is not compatible with 4-bit serialization. Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`.)dataquantized_statsr   r   )r    )"splitgetattr
ValueError_parameters_buffersr   torchr   hasattrbnbnn
isinstancer   
Int8ParamstypetoTensortensor
issubclass
source_clsr   T__dict__dtypeint8uint8r   parse	importlibmetadatasetattrfrom_prequantized	Parameterr   )moduletensor_namer   valuer   splitsr   
new_module	is_buffer	old_valueprequantized_loadingis_8bitis_4bitparam	new_valuekwargsis_8bit_serializableis_4bit_serializabler   r   z/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/integrations/bitsandbytes.py%set_module_quantized_tensor_to_device$   s   



*
 



6rK   Fc              	      s  |   D ]\}}|du rg }|| t|tjst|tr||vrd| t fdd|D st ~ t|trB|j	j
\}}n|j}|j}| dkrdtjj|||jdu|j|jd| j|< d}n8|jduro||jv ron-dtttjjjv rd|jini }	tjj|||jdu|jf|j|jd	|	| j|< d}t|| j| _| j| d
 W d   n1 sw   Y  t t|! dkrt"|||||d\}
}|#d q| |fS )z
    Private method that wraps the recursion for module replacement.

    Returns the converted model and a boolean that indicates if the conversion has been successfull or not.
    Nr   c                 3   $    | ]}|d   v p| kV  qdS r   Nr   .0keycurrent_key_name_strr   rJ   	<genexpr>       
z+_replace_with_bnb_linear.<locals>.<genexpr>llm_int8)has_fp16_weights	thresholdTquant_storage)compress_statistics
quant_typeFr   has_been_replacedr   )$named_childrenappendr(   r'   Linearr   joinanyr   weightshapein_featuresout_featuresquantization_methodr&   Linear8bitLtbiasllm_int8_has_fp16_weightllm_int8_threshold_modulesllm_int8_skip_moduleslistr   
Linear4bit
parametersbnb_4bit_quant_storagebnb_4bit_compute_dtypebnb_4bit_use_double_quantbnb_4bit_quant_typer*   r/   requires_grad_lenchildren_replace_with_bnb_linearpop)modelmodules_to_not_convertcurrent_key_namequantization_configr\   namer;   rd   re   extra_kwargs_r   rQ   rJ   rw      sn   





	*
rw   c                 C   s6   |du rdgn|}t | |||\} }|std | S )a  
    A helper function to replace all `torch.nn.Linear` modules by `bnb.nn.Linear8bit` modules from the `bitsandbytes`
    library. This will enable running your models using mixed int8 precision as described by the paper `LLM.int8():
    8-bit Matrix Multiplication for Transformers at Scale`. Make sure `bitsandbytes` compiled with the correct CUDA
    version of your hardware is installed before running this function. `pip install -i https://test.pypi.org/simple/
    bitsandbytes`

    The function will be run recursively and replace all `torch.nn.Linear` modules except for the `lm_head` that should
    be kept as a `torch.nn.Linear` module. The replacement is done under `init_empty_weights` context manager so no
    CPU/GPU memory is required to run this function. Int8 mixed-precision matrix decomposition works by separating a
    matrix multiplication into two streams: (1) and systematic feature outlier stream matrix multiplied in fp16
    (0.01%), (2) a regular stream of int8 matrix multiplication (99.9%). With this method, int8 inference with no
    predictive degradation is possible for very large models (>=176B parameters).

    Parameters:
        model (`torch.nn.Module`):
            Input model or `torch.nn.Module` as the function is run recursively.
        modules_to_not_convert (`List[`str`]`, *optional*, defaults to `["lm_head"]`):
            Names of the modules to not convert in `Linear8bitLt`. In practice we keep the `lm_head` in full precision
            for numerical stability reasons.
        current_key_name (`List[`str`]`, *optional*):
            An array to track the current key of the recursion. This is used to check whether the current key (part of
            it) is not in the list of modules to not convert (for instances modules that are offloaded to `cpu` or
            `disk`).
        quantization_config ('transformers.utils.quantization_config.BitsAndBytesConfig'):
            To configure and manage settings related to quantization, a technique used to compress neural network models
            by reducing the precision of the weights and activations, thus making models more efficient in terms of both
            storage and computation.
    Nlm_headzYou are loading your model in 8bit or 4bit but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)rw   loggerwarning)ry   rz   r{   r|   r\   r   r   rJ   replace_with_bnb_linear   s   r   c                  O      t dt t| i |S )Nzj`replace_8bit_linear` will be deprecated in a future version, please use `replace_with_bnb_linear` instead)warningswarnFutureWarningr   argsrG   r   r   rJ   replace_8bit_linear  
   r   c                  O   r   )Nz`set_module_8bit_tensor_to_device` will be deprecated in a future version, please use `set_module_quantized_tensor_to_device` instead)r   r   r   rK   r   r   r   rJ    set_module_8bit_tensor_to_device  r   r   c                    s  t | }|  t|}t|tr!tt| g t|  }nt|g }t	|dk}|sC| 
   durC fdd|  D }|S t|  }|d d g}t|t| }tt|t| }ddg}	g }
|D ]}|	D ]}||v rz||d}qn|
| qj|
S )	a  
    An utility function to get the key of the module to keep in full precision if any For example for CausalLM modules
    we may want to keep the lm_head in full precision for numerical stability reasons. For other architectures, we want
    to keep the tied weights of the model. The function will return a list of the keys of the modules to not convert in
    int8.

    Parameters:
    model (`torch.nn.Module`):
        Input model
    r   Nc                    s$   g | ]\}}t |t  kr|qS r   )id)rO   r}   r;   
output_embr   rJ   
<listcomp>@  s   $ z+get_keys_to_not_convert.<locals>.<listcomp>r   z.weightz.bias )r   tie_weightsr   r(   dictsumrm   valueskeysru   get_output_embeddingsnamed_modulesnamed_parameterssetreplacer^   )ry   
tied_modeltied_params	tied_keyshas_tied_paramslist_last_modulelist_modulesintersectionlist_untouchednames_to_removefiltered_module_namesr}   name_to_remover   r   rJ   get_keys_to_not_convert$  s2   
 
r   rb   torch.nn.Parameterr2   torch.dtypec                 C   s   t | tjjstdt|  d| jj}|dvr| S |dkr7tj	
| j| j}td|j d ||S |jdu r@| j|_ttj	drPtj	| j|j}n| j|jd	d
 d }||S )z
    Helper function to dequantize 4bit or 8bit bnb weights.

    If the weight is not a bnb quantized weight, it will be returned as is.
    z1Input weight should be of type nn.Parameter, got z instead)r   r)   r   z(The model is going to be dequantized in z - if you want to upcast it to another dtype, make sure to pass the desired dtype when quantizing the model through `bnb_4bit_quant_type` argument of `BitsAndBytesConfig`Nint8_vectorwise_dequantr      g   @ ?)r(   r$   r'   r:   	TypeErrorr*   	__class____name__r&   
functionaldequantize_4bitr   quant_stater   warning_oncer2   r+   r   r%   r   view)rb   r2   statecls_nameoutput_tensordequantizedr   r   rJ   dequantize_bnb_weightW  s"   


r   c                 C   s\   t tj| jj}| j}i }t|j}|	 D ]}||j
v r$|| ||< q|di |}|S )a  
    Creates a new hook based on the old hook. Use it only if you know what you are doing !
    This method is a copy of: https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245
    with some changes
    Nr   )r    
acceleratehooksr   r   r1   inspectr   __init__r   ro   )old_hookold_hook_clsold_hook_attrfiltered_old_hook_attrold_hook_init_signatureknew_hookr   r   rJ   _create_accelerate_new_hookx  s   
r   c              	      s  |  }|dkrtjjntjj}|  D ]\}}	|du rg }|| t|	|r||vrd| t	 fdd|D st
|	dd}
|	jj}t  tjj|	j|	j|
dud}W d   n1 saw   Y  |dkrn|	j}nd}tjt|	j|||_|
dur|
|_t|	dr|	j}t|}t|	 t|| || || j|< d	}tt|	 d
krt |	|||||d\}}|!d q| |fS )aq  
    Converts a quantized model into its dequantized original version. The newly converted model will have
    some performance drop compared to the original model before quantization - use it only for specific usecases
    such as QLoRA adapters merging.

    Returns the converted model and a boolean that indicates if the conversion has been successfull or not.
    rU   Nr   c                 3   rL   rM   r   rN   rQ   r   rJ   rS     rT   z*_dequantize_and_replace.<locals>.<genexpr>rh   )rh   _hf_hookTr   r[   r   )"rf   r&   r'   rg   rn   r]   r^   r(   r`   ra   r    rb   r   r   r$   r_   rd   re   r   r:   r   rh   r%   r   r   r   r   r+   rk   ru   rm   rv   _dequantize_and_replacerx   )ry   r2   rz   r{   r|   r\   quant_method
target_clsr}   r;   rh   r   r?   r   r   r   r   r   rQ   rJ   r     sR   






	r   c                 C   s(   t | | j||d\} }|std | S )N)rz   r|   z_For some reason the model has not been properly dequantized. You might see unexpected behavior.)r   r2   r   r   )ry   rz   r|   r\   r   r   rJ   dequantize_and_replace  s   
r   c                 C   s   dd l }t|dt }tt }|dhkr0t s0ddlm} |dr'td t	dd |D }|
|s[| rTtd	d
 |D }d|pDd  d| d}t| t|td dS td dS )Nr   supported_torch_devicesr   )	find_specintel_extension_for_pytorchzYou have Intel IPEX installed but if you're intending to use it for CPU, it might not have the right version. Be sure to double check that your PyTorch and IPEX installs are compatible.c                 S   s   g | ]}|d kr|qS )r   r   rO   r   r   r   rJ   r     s    z<_validate_bnb_multi_backend_availability.<locals>.<listcomp>c                 s   s     | ]}|d kr
dn|V  qdS )r   zl"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)Nr   r   r   r   rJ   rS     s    
z;_validate_bnb_multi_backend_availability.<locals>.<genexpr>z3None of the available devices `available_devices = zY` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = z`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backendz:No supported devices found for bitsandbytes multi-backend.Fz$Multi-backend validation successful.T)r   r    r   r   r
   importlib.utilr   r   r   	frozensetr   errorRuntimeErrordebug)raise_exceptionr&   bnb_supported_devicesavailable_devicesr   bnb_supported_devices_with_infoerr_msgr   r   rJ   (_validate_bnb_multi_backend_availability  s2   




r   c                 C   sR   t  sdS dd l}|j s"d}| rt| t|t| dS td dS )NFr   aB  CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backendz#CUDA backend validation successful.T)	r   r$   r   is_availabler   r   r   r   r   )r   r$   log_msgr   r   rJ   '_validate_bnb_cuda_backend_availability  s   



r   c                 C   sP   t  stjdrttjdtdk rt| S dS t r$t	| S t| S )zs
    Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
    r   z0.43.1F)
r   r6   utilr   r   r5   r7   r   r	   r   )r   r   r   rJ   !validate_bnb_backend_availability$  s   
r   )NN)NNNF)NNN)N)rb   r   r2   r   )F)/importlib.metadatar6   r   r   copyr   r   	packagingr   utilsr   r   r   r	   r
   r   r   r   r&   r$   torch.nnr'   pytorch_utilsr   r   r   accelerate.hooksr   r   accelerate.utilsr   
get_loggerr   r   rK   rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   <module>   sR    $

u

N.	3!
J
)