o
    hT                     @   s  d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZ e r7ddlZddlmZ eeZdd	 Ze rMe rMdd
lmZ nddlmZ G dd deZG dd deZdadd Zdd Zdd Zdd Z dd Z!dd Z"d"ddZ#d#d d!Z$dS )$z
Integration with Deepspeed
    N)partialmethod   )dep_version_check)is_accelerate_availableis_torch_availablelogging)nnc                  C   s@   t jdd u} | rztd}W dS  tjy   Y dS w d S )N	deepspeedTF)	importlibutil	find_specimportlib_metadatametadataPackageNotFoundError)package_exists_ r   w/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/integrations/deepspeed.pyis_deepspeed_available$   s   
r   )HfDeepSpeedConfig)objectc                       s    e Zd ZdZ fddZ  ZS )r   aJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c                    s(   t |  td td t | d S )N
accelerater	   )set_hf_deepspeed_configr   super__init__selfconfig_file_or_dict	__class__r   r   r   J   s   zHfDeepSpeedConfig.__init__)__name__
__module____qualname____doc__r   __classcell__r   r   r   r   r   9   s    r   c                       sX   e Zd ZdZ fddZdd Zdd Zdd
dZeeddZ	dddZ
dd Z  ZS )HfTrainerDeepSpeedConfigz
    The `HfTrainerDeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                    s   t  | d | _g | _d S N)r   r   _dtype
mismatchesr   r   r   r   r   X   s   
z!HfTrainerDeepSpeedConfig.__init__c                 C   s   | j d u r	td| j S )Nz8trainer_config_process() wasn't called yet to tell dtype)r'   
ValueErrorr   r   r   r   dtype]   s   
zHfTrainerDeepSpeedConfig.dtypec                 C   s   |  |}|d u rdS |dkS )NFauto)	get_value)r   ds_key_longvalr   r   r   is_autob   s   
z HfTrainerDeepSpeedConfig.is_autoNTc              
   C   s   |  |\}}|du rdS ||dkr|||< dS |sdS ||}|dur?||krA| jd| d| d| d|  dS dS dS )a  
        A utility method that massages the config file and can optionally verify that the values match.

        1. Replace "auto" values with `TrainingArguments` value.

        2. If it wasn't "auto" and `must_match` is true, then check that DS config matches Trainer
        config values and if mismatched add the entry to `self.mismatched` - will assert during
        `trainer_config_finalize` for one or more mismatches.

        Nr,   z- ds =z vs hf )find_config_nodegetr(   append)r   r.   hf_valhf_key
must_matchconfigds_keyds_valr   r   r   
fill_matchi   s   
(z#HfTrainerDeepSpeedConfig.fill_matchF)r7   c                 C   s  |j |j |j }| d|jd|  | d|jd | d|d|  | d|jd | d|jd	 | d
|j|jgd | d|jd | d|j	d | 
dd | d|jd	 |jsc|jrm|jdkrjdnd}nd}|jr| jdi | jd< |j| jd d< | d|js|jo|dkd | d|dkd | d|jd | d|jp|jd | drtj| _dS | drtj| _dS tj| _dS ) z
        Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
        creation.
        train_micro_batch_size_per_gpuper_device_train_batch_sizegradient_accumulation_stepstrain_batch_sizeztrain_batch_size (calculated)gradient_clippingmax_grad_normzoptimizer.params.lrlearning_ratezoptimizer.params.betaszadam_beta1+adam_beta2zoptimizer.params.epsadam_epsilonzoptimizer.params.weight_decayweight_decayzscheduler.params.warmup_min_lrr   zscheduler.params.warmup_max_lrapexampN
checkpointuse_node_local_storagezfp16.enabledz%fp16|fp16_full_eval+fp16_backend(amp)zamp.enabledzfp16+fp16_backend(apex)zamp.opt_levelfp16_opt_levelzbf16.enabledzbf16|bf16_full_eval)
world_sizer=   r>   r;   rA   rB   
adam_beta1
adam_beta2rC   rD   	fill_onlyfp16fp16_full_evalfp16_backendsave_on_each_noder8   r3   rI   bf16bf16_full_evalis_truetorchbfloat16r'   is_falsefloat32float16)r   argsauto_find_batch_sizer?   rP   r   r   r   trainer_config_process   s`   


z/HfTrainerDeepSpeedConfig.trainer_config_processc                    sF  g d} fdd|D }t |dkr{t|jdr|jj}n=t|jdr+t|jj}n0t|jdr>t|jjdr>|jjj}nt|jdrSt|jjdrSt|jjj}ntd| d	 d
||   	 r{ dt
d| |   dd|   d|d  d||d t  jdkrd j}td| ddS )z
        This stage is run after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                    s   g | ]	}  |r|qS r   )r0   ).0xr*   r   r   
<listcomp>       zDHfTrainerDeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>r   hidden_sizehidden_sizestext_configzThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.r]   r^   g?r_   
   z scheduler.params.total_num_stepsznum_training_steps (calculated)z!scheduler.params.warmup_num_stepswarmup_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)lenhasattrr8   rd   maxre   rf   r)   rM   is_zero3intr;   get_warmup_stepsr(   join)r   rZ   modelnum_training_stepshidden_size_based_keyshidden_size_auto_keysrd   r(   r   r*   r   trainer_config_finalize   sX   	
z0HfTrainerDeepSpeedConfig.trainer_config_finalize)NTF)r    r!   r"   r#   r   r+   r0   r;   r   rM   r\   ru   r$   r   r   r   r   r%   R   s    

Jr%   c                 C   s   t | ad S r&   )weakrefref_hf_deepspeed_config_weak_ref)hf_deepspeed_config_objr   r   r   r     s   r   c                   C   s   d a d S r&   )ry   r   r   r   r   unset_hf_deepspeed_config  s   r{   c                   C   s    t d urt  d urt   S dS )NF)ry   rm   r   r   r   r   is_deepspeed_zero3_enabled$  s   
r|   c                   C   s   t d urt  d urt  jS d S r&   )ry   r8   r   r   r   r   deepspeed_config+  s   r}   c                    sR   t |dd| }dur|_g  d	dtjf fdd| |dd  S )
z
    Loads state dict into a model specifically for Zero3, since DeepSpeed does not support the `transformers`
    tensor parallelism API.

    Nearly identical code to PyTorch's `_load_from_state_dict`
    	_metadataN Fmodulec           
         s  d u ri n	 d d i }||d< ||dg g f}t rstfdd|D dkrsdd l}t| jd d dd  fd	d| D }t|dkrs|jj|dd
 t	j
 dkrd| j|  W d    n1 snw   Y  | j D ]\}}	|	d ur|	|| d | qxd S )Nassign_to_params_buffersTc                    s   g | ]	}|  r|qS r   )
startswith)r`   key)prefixr   r   rb   J  rc   zC_load_state_dict_into_zero3_model.<locals>.load.<locals>.<listcomp>r   F)r   recursec                    s   g | ]
}| v r | qS r   r   )r`   k)named_parametersr   r   rb   P  s    )modifier_rank.)r3   r|   rj   r	   dictr   keyszeroGatheredParametersrU   distributedget_rank_load_from_state_dict_modulesitems)
r   
state_dictr   r   local_metadatarZ   r	   params_to_gathernamechild
error_msgsloadr   )r   r   r   r   C  s$     
z/_load_state_dict_into_zero3_model.<locals>.load)r   )r   F)getattrcopyr~   r   Module)model_to_loadr   r   r   r   !_load_state_dict_into_zero3_model2  s   r   c                    s   ddl m}m} |j}d}d|v r|jrtd||d}n| r'td 	 }d|d	< d}	d
|v r=||}	||	fS t
||rS fdd}
|||
d}	||	fS j |d}	||	fS )zY
    A convenience wrapper that deals with optimizer and lr scheduler configuration.
    r   )
DummyOptimDummySchedulerN	optimizerz|--adafactor was passed, but also found `optimizer` configured in the DeepSpeed config. Only one optimizer can be configured.)paramszDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)Tzero_allow_untested_optimizer	schedulerc                    s"   t  }d |_|j | d}|S )Nrr   r   )r   lr_schedulercreate_scheduler)r   trainer_copyr   rr   trainerr   r   _lr_scheduler_callable  s   
z5deepspeed_optim_sched.<locals>._lr_scheduler_callable)lr_scheduler_callabler   )accelerate.utilsr   r   r8   	adafactorr)   
is_offloadloggerinfocreate_optimizer
isinstancer   )r   hf_deepspeed_configrZ   rr   model_parametersr   r   r8   r   r   r   r   r   r   deepspeed_optim_schedb  s2   

r   Fc                 C   s   ddl m} | j}| j}| jjjj}|||| |	|
  |r>| s*td|d |d d\}}d}	||fS d| _|jdi d	d}
|
d
kr^ddl}|j||
| d}ttdd | }	t| ||||	\}}||fS )a  
    Init DeepSpeed, after updating the DeepSpeed configuration with any relevant Trainer's args.

    If `resume_from_checkpoint` was passed then an attempt to resume from a previously saved checkpoint will be made.

    Args:
        trainer: Trainer object
        num_training_steps: per single gpu
        resume_from_checkpoint: path to a checkpoint if to resume from after normal DeepSpeedEngine load
        inference: launch in inference mode (no optimizer and no lr scheduler)
        auto_find_batch_size: whether to ignore the `train_micro_batch_size_per_gpu` argument as it's being
            set automatically by the auto batch size finder

    Returns: optimizer, lr_scheduler

    We may use `deepspeed_init` more than once during the life of Trainer, when we do - it's a temp hack based on:
    https://github.com/deepspeedai/DeepSpeed/issues/1394#issuecomment-937405374 until Deepspeed fixes a bug where it
    can't resume from a checkpoint after it did some stepping https://github.com/deepspeedai/DeepSpeed/issues/1612

    r   )r   zMZeRO inference only makes sense with ZeRO Stage 3 - please adjust your configr   r   )NNNtensor_parallelautotp_size   )rq   tp_sizer+   c                 S   s   | j S r&   )requires_grad)pr   r   r   <lambda>  s    z deepspeed_init.<locals>.<lambda>)deepspeed.utilsr   rq   rZ   acceleratorstatedeepspeed_pluginhf_ds_configru   setLevelget_process_log_levelrm   r)   del_config_sub_treer   r8   r3   r	   tp_model_initr+   listfilter
parametersr   )r   rr   	inference	ds_loggerrq   rZ   r   r   r   r   r   r	   r   r   r   deepspeed_init  s0   


r   Tc                 C   sv   dd l }t| | d}t|dkr4td|  | j||ddd\}}|d u r2td| d S td| )Nr   z/global_step*zAttempting to resume from T)load_module_strictload_optimizer_statesload_lr_scheduler_statesz-[deepspeed] failed to resume from checkpoint z!Can't find a valid checkpoint at )globsortedrj   r   r   load_checkpointr)   )deepspeed_enginecheckpoint_pathr   r   deepspeed_checkpoint_dirs	load_pathr   r   r   r   deepspeed_load_checkpoint  s   
r   rv   )T)%r#   r   importlib.metadatar   r   importlib.utilr
   rw   	functoolsr   dependency_versions_checkr   utilsr   r   r   rU   r   
get_loggerr    r   r   accelerate.utils.deepspeedr   DeepSpeedConfigbuiltinsr   r%   ry   r   r{   r|   r}   r   r   r   r   r   r   r   r   <module>   s8   
 B0
=>