o
    h8                     @   s   d dl Z d dlmZmZ d dlmZ ddlmZ ddlm	Z	m
Z
mZmZ eeZe
 r6d dlZddlmZ eG dd	 d	eZdS )
    N)	dataclassfield)Optional   )TrainingArguments)cached_propertyis_tf_availableloggingrequires_backends)kerasc                   @   s  e Zd ZU dZdZedddidZee e	d< edddidZ
ee e	d	< eddd
idZee e	d< edddidZee	d< edddidZee	d< ededef fddZed"ddZedefddZedd ZedefddZedefddZedefd d!ZdS )#TFTrainingArgumentsa!  
    TrainingArguments is the subset of the arguments we use in our example scripts **which relate to the training loop
    itself**.

    Using [`HfArgumentParser`] we can turn this class into
    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
    command line.

    Parameters:
        output_dir (`str`):
            The output directory where the model predictions and checkpoints will be written.
        overwrite_output_dir (`bool`, *optional*, defaults to `False`):
            If `True`, overwrite the content of the output directory. Use this to continue training if `output_dir`
            points to a checkpoint directory.
        do_train (`bool`, *optional*, defaults to `False`):
            Whether to run training or not. This argument is not directly used by [`Trainer`], it's intended to be used
            by your training/evaluation scripts instead. See the [example
            scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
        do_eval (`bool`, *optional*):
            Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is
            different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your
            training/evaluation scripts instead. See the [example
            scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
        do_predict (`bool`, *optional*, defaults to `False`):
            Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's
            intended to be used by your training/evaluation scripts instead. See the [example
            scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
        eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
            The evaluation strategy to adopt during training. Possible values are:

                - `"no"`: No evaluation is done during training.
                - `"steps"`: Evaluation is done (and logged) every `eval_steps`.
                - `"epoch"`: Evaluation is done at the end of each epoch.

        per_device_train_batch_size (`int`, *optional*, defaults to 8):
            The batch size per GPU/TPU core/CPU for training.
        per_device_eval_batch_size (`int`, *optional*, defaults to 8):
            The batch size per GPU/TPU core/CPU for evaluation.
        gradient_accumulation_steps (`int`, *optional*, defaults to 1):
            Number of updates steps to accumulate the gradients for, before performing a backward/update pass.

            <Tip warning={true}>

            When using gradient accumulation, one step is counted as one step with backward pass. Therefore, logging,
            evaluation, save will be conducted every `gradient_accumulation_steps * xxx_step` training examples.

            </Tip>

        learning_rate (`float`, *optional*, defaults to 5e-5):
            The initial learning rate for Adam.
        weight_decay (`float`, *optional*, defaults to 0):
            The weight decay to apply (if not zero).
        adam_beta1 (`float`, *optional*, defaults to 0.9):
            The beta1 hyperparameter for the Adam optimizer.
        adam_beta2 (`float`, *optional*, defaults to 0.999):
            The beta2 hyperparameter for the Adam optimizer.
        adam_epsilon (`float`, *optional*, defaults to 1e-8):
            The epsilon hyperparameter for the Adam optimizer.
        max_grad_norm (`float`, *optional*, defaults to 1.0):
            Maximum gradient norm (for gradient clipping).
        num_train_epochs(`float`, *optional*, defaults to 3.0):
            Total number of training epochs to perform.
        max_steps (`int`, *optional*, defaults to -1):
            If set to a positive number, the total number of training steps to perform. Overrides `num_train_epochs`.
            For a finite dataset, training is reiterated through the dataset (if all data is exhausted) until
            `max_steps` is reached.
        warmup_ratio (`float`, *optional*, defaults to 0.0):
            Ratio of total training steps used for a linear warmup from 0 to `learning_rate`.
        warmup_steps (`int`, *optional*, defaults to 0):
            Number of steps used for a linear warmup from 0 to `learning_rate`. Overrides any effect of `warmup_ratio`.
        logging_dir (`str`, *optional*):
            [TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to
            *runs/**CURRENT_DATETIME_HOSTNAME***.
        logging_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"steps"`):
            The logging strategy to adopt during training. Possible values are:

                - `"no"`: No logging is done during training.
                - `"epoch"`: Logging is done at the end of each epoch.
                - `"steps"`: Logging is done every `logging_steps`.

        logging_first_step (`bool`, *optional*, defaults to `False`):
            Whether to log and evaluate the first `global_step` or not.
        logging_steps (`int`, *optional*, defaults to 500):
            Number of update steps between two logs if `logging_strategy="steps"`.
        save_strategy (`str` or [`~trainer_utils.SaveStrategy`], *optional*, defaults to `"steps"`):
            The checkpoint save strategy to adopt during training. Possible values are:

                - `"no"`: No save is done during training.
                - `"epoch"`: Save is done at the end of each epoch.
                - `"steps"`: Save is done every `save_steps`.

        save_steps (`int`, *optional*, defaults to 500):
            Number of updates steps before two checkpoint saves if `save_strategy="steps"`.
        save_total_limit (`int`, *optional*):
            If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in
            `output_dir`.
        no_cuda (`bool`, *optional*, defaults to `False`):
            Whether to not use CUDA even when it is available or not.
        seed (`int`, *optional*, defaults to 42):
            Random seed that will be set at the beginning of training.
        fp16 (`bool`, *optional*, defaults to `False`):
            Whether to use 16-bit (mixed) precision training (through NVIDIA Apex) instead of 32-bit training.
        fp16_opt_level (`str`, *optional*, defaults to 'O1'):
            For `fp16` training, Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. See details on
            the [Apex documentation](https://nvidia.github.io/apex/amp).
        local_rank (`int`, *optional*, defaults to -1):
            During distributed training, the rank of the process.
        tpu_num_cores (`int`, *optional*):
            When training on TPU, the number of TPU cores (automatically passed by launcher script).
        debug (`bool`, *optional*, defaults to `False`):
            Whether to activate the trace to record computation graphs and profiling information or not.
        dataloader_drop_last (`bool`, *optional*, defaults to `False`):
            Whether to drop the last incomplete batch (if the length of the dataset is not divisible by the batch size)
            or not.
        eval_steps (`int`, *optional*, defaults to 1000):
            Number of update steps before two evaluations.
        past_index (`int`, *optional*, defaults to -1):
            Some models like [TransformerXL](../model_doc/transformerxl) or :doc*XLNet <../model_doc/xlnet>* can make
            use of the past hidden states for their predictions. If this argument is set to a positive int, the
            `Trainer` will use the corresponding output (usually index 2) as the past state and feed it to the model at
            the next training step under the keyword argument `mems`.
        tpu_name (`str`, *optional*):
            The name of the TPU the process is running on.
        tpu_zone (`str`, *optional*):
            The zone of the TPU the process is running on. If not specified, we will attempt to automatically detect
            from metadata.
        gcp_project (`str`, *optional*):
            Google Cloud Project name for the Cloud TPU-enabled project. If not specified, we will attempt to
            automatically detect from metadata.
        run_name (`str`, *optional*):
            A descriptor for the run. Notably used for wandb, mlflow, comet and swanlab logging.
        xla (`bool`, *optional*):
            Whether to activate the XLA compilation or not.
    tfNhelpzName of TPU)defaultmetadatatpu_namezZone of TPUtpu_zonez!Name of Cloud TPU-enabled projectgcp_projectg      ?z,Power for the Polynomial decay LR scheduler.
poly_powerFz.Whether to activate the XLA compilation or notxlareturntf.distribute.Strategyc                 C   sL  t | dg td tjd}| jrtj	d | j
r&tjjdd}|S z| jr8tjjj| j| j| jd}ntjj }W n tyV   | jrRtd| j d	d }Y nw |rw| jrbtj	d
 tj| tjj| tj|}|S t|dkrtjjdd}|S t|dkrtjjdd}|S t|dkrtj }|S td)Nr   zTensorflow: setting up strategyGPUmixed_float16z/cpu:0)device)zoneprojectzCouldn't connect to TPU !mixed_bfloat16r   r   z/gpu:0zJCannot find the proper strategy, please check your environment properties.)r
   loggerinfor   configlist_physical_devicesfp16r   mixed_precisionset_global_policyno_cuda
distributeOneDeviceStrategyr   cluster_resolverTPUClusterResolverr   r   
ValueErrorRuntimeErrorexperimental_connect_to_clustertpuexperimentalinitialize_tpu_systemTPUStrategylenMirroredStrategy)selfgpusstrategyr.    r7   q/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/training_args_tf.py_setup_strategy   sJ   
#	
z#TFTrainingArguments._setup_strategyc                 C   s   t | dg | jS )z=
        The strategy used for distributed training.
        r   )r
   r9   r4   r7   r7   r8   r6      s   zTFTrainingArguments.strategyc                 C   s   t | dg | jjS )Y
        The number of replicas (CPUs, GPUs or TPU cores) used in this training.
        r   )r
   r9   num_replicas_in_syncr:   r7   r7   r8   
n_replicas   s   zTFTrainingArguments.n_replicasc                 C   s   dS )zH
        Whether or not the current process should produce log.
        Fr7   r:   r7   r7   r8   
should_log   s   zTFTrainingArguments.should_logc                 C   &   | j rtd | j p| j}|| j S )zz
        The actual batch size for training (may differ from `per_gpu_train_batch_size` in distributed training).
        zUsing deprecated `--per_gpu_train_batch_size` argument which will be removed in a future version. Using `--per_device_train_batch_size` is preferred.)per_gpu_train_batch_sizer   warningper_device_train_batch_sizer=   r4   per_device_batch_sizer7   r7   r8   train_batch_size     
z$TFTrainingArguments.train_batch_sizec                 C   r?   )z{
        The actual batch size for evaluation (may differ from `per_gpu_eval_batch_size` in distributed training).
        zUsing deprecated `--per_gpu_eval_batch_size` argument which will be removed in a future version. Using `--per_device_eval_batch_size` is preferred.)per_gpu_eval_batch_sizer   rA   per_device_eval_batch_sizer=   rC   r7   r7   r8   eval_batch_size  rF   z#TFTrainingArguments.eval_batch_sizec                 C   s    t | dg tdt | jjS )r;   r   zaThe n_gpu argument is deprecated and will be removed in a future version, use n_replicas instead.)r
   warningswarnFutureWarningr9   r<   r:   r7   r7   r8   n_gpu!  s   zTFTrainingArguments.n_gpu)r   r   )__name__
__module____qualname____doc__	frameworkr   r   r   str__annotations__r   r   r   floatr   boolr   tupleintr9   propertyr6   r=   r>   rE   rI   rM   r7   r7   r7   r8   r      sF   
  0
r   )rJ   dataclassesr   r   typingr   training_argsr   utilsr   r   r	   r
   
get_loggerrN   r   
tensorflowr   modeling_tf_utilsr   r   r7   r7   r7   r8   <module>   s   
