o
    hM                    @   s  d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
 ddlmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZmZmZm Z  er^dd
l!m"Z" e #e$Z%dZ&i Z'i Z(i Z)g Z*e rddl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8 e1e'd< e5e'd< e4e0e3e-e.e/dZ(e2e,dZ)e9e(: e9e':  ddg Z*G dd deZ;G dd deZ<e	G dd deZ=e	G dd de=Z>e	G dd de=Z?e	G dd  d Z@dS )!z-Generation configuration class and utilities.    N)ABCabstractmethod)	dataclassis_dataclass)TYPE_CHECKINGAnyCallableDictListOptionalUnion   )__version__)PretrainedConfig)	GENERATION_CONFIG_NAMEExplicitEnumPushToHubMixincached_filedownload_urlextract_commit_hashis_remote_urlis_torch_availablelogging)PreTrainedModel)_from_model_config_commit_hash_original_object_hashtransformers_version)
HQQQuantizedCacheHybridCacheHybridChunkedCache
MambaCacheOffloadedStaticCacheQuantizedCacheConfigQuantoQuantizedCacheSlidingWindowCacheStaticCacheStaticCacheConfig   )#SynthIDTextWatermarkLogitsProcessorWatermarkLogitsProcessor	quantizedstatic)r,   offloaded_staticsliding_windowhybridhybrid_chunkedmamba)quantoHQQ	offloadeddynamicc                   @   s4   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdS )GenerationModezg
    Possible generation modes, downstream of the [`~generation.GenerationMixin.generate`] method.
    contrastive_searchgreedy_searchsampleassisted_generationdola_generationbeam_searchbeam_sampleconstrained_beam_searchgroup_beam_searchN)__name__
__module____qualname____doc__CONTRASTIVE_SEARCHGREEDY_SEARCHSAMPLEASSISTED_GENERATIONDOLA_GENERATIONBEAM_SEARCHBEAM_SAMPLECONSTRAINED_BEAM_SEARCHGROUP_BEAM_SEARCH rM   rM   /var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/generation/configuration_utils.pyr6   R   s    r6   c                   @   s  e Zd ZdZdZdd Zdd Zdd Zd	d
 Zd<de	d de
fddZd=ddZ		d>deeejf de	eeejf  defddZe						d?deeejf de	eeejf  de	eeejf  dedede	eeef  dedd fd d!Zed"eeejf fd#d$Zed%eeef dd fd&d'Zd(eeef ddfd)d*Zdeeef fd+d,Zdeeef fd-d.Zd@d0ed1edefd2d3ZdAd4eeejf d0efd5d6Zed7edd fd8d9Zd:d; Z dS )BGenerationConfiga[Y  
    Class that holds a configuration for a generation task. A `generate` call supports the following generation methods
    for text-decoder, text-to-text, speech-to-text, and vision-to-text models:

        - *greedy decoding* if `num_beams=1` and `do_sample=False`
        - *contrastive search* if `penalty_alpha>0.` and `top_k>1`
        - *multinomial sampling* if `num_beams=1` and `do_sample=True`
        - *beam-search decoding* if `num_beams>1` and `do_sample=False`
        - *beam-search multinomial sampling* if `num_beams>1` and `do_sample=True`
        - *diverse beam-search decoding* if `num_beams>1` and `num_beam_groups>1`
        - *constrained beam-search decoding* if `constraints!=None` or `force_words_ids!=None`
        - *assisted decoding* if `assistant_model` or `prompt_lookup_num_tokens` is passed to `.generate()`
        - *dola decoding* if `dola_layers` is passed to `.generate()`

    To learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies).

    <Tip>

    A large number of these flags control the logits or the stopping criteria of the generation. Make sure you check
    the [generate-related classes](https://huggingface.co/docs/transformers/internal/generation_utils) for a full
    description of the possible manipulations, as well as examples of their usage.

    </Tip>

    Arg:
        > Parameters that control the length of the output

        max_length (`int`, *optional*, defaults to 20):
            The maximum length the generated tokens can have. Corresponds to the length of the input prompt +
            `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
        max_new_tokens (`int`, *optional*):
            The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
        min_length (`int`, *optional*, defaults to 0):
            The minimum length of the sequence to be generated. Corresponds to the length of the input prompt +
            `min_new_tokens`. Its effect is overridden by `min_new_tokens`, if also set.
        min_new_tokens (`int`, *optional*):
            The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
        early_stopping (`bool` or `str`, *optional*, defaults to `False`):
            Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
            `True`, where the generation stops as soon as there are `num_beams` complete candidates; `False`, where an
            heuristic is applied and the generation stops when is it very unlikely to find better candidates;
            `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical
            beam search algorithm).
        max_time (`float`, *optional*):
            The maximum amount of time you allow the computation to run for in seconds. generation will still finish
            the current pass after allocated time has been passed.
        stop_strings (`str or List[str]`, *optional*):
            A string or a list of strings that should terminate generation if the model outputs them.

        > Parameters that control the generation strategy used

        do_sample (`bool`, *optional*, defaults to `False`):
            Whether or not to use sampling ; use greedy decoding otherwise.
        num_beams (`int`, *optional*, defaults to 1):
            Number of beams for beam search. 1 means no beam search.
        num_beam_groups (`int`, *optional*, defaults to 1):
            Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
            [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details.
        penalty_alpha (`float`, *optional*):
            The values balance the model confidence and the degeneration penalty in contrastive search decoding.
        dola_layers (`str` or `List[int]`, *optional*):
            The layers to use for DoLa decoding. If `None`, DoLa decoding is not used. If a string, it must
            be one of "low" or "high", which means using the lower part or higher part of the model layers, respectively.
            "low" means the first half of the layers up to the first 20 layers, and "high" means the last half of the
            layers up to the last 20 layers.
            If a list of integers, it must contain the indices of the layers to use for candidate premature layers in DoLa.
            The 0-th layer is the word embedding layer of the model. Set to `'low'` to improve long-answer reasoning tasks,
            `'high'` to improve short-answer tasks. Check the [documentation](https://github.com/huggingface/transformers/blob/main/docs/source/en/generation_strategies.md)
            or [the paper](https://arxiv.org/abs/2309.03883) for more details.

        > Parameters that control the cache

        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should use the past last key/values attentions (if applicable to the model) to
            speed up decoding.
        cache_implementation (`str`, *optional*, default to `None`):
            Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are:

            - `"dynamic"`: [`DynamicCache`]
            - `"static"`: [`StaticCache`]
            - `"offloaded_static"`: [`OffloadedStaticCache`]
            - `"sliding_window"`: [`SlidingWindowCache`]
            - `"hybrid"`: [`HybridCache`]
            - `"mamba"`: [`MambaCache`]
            - `"quantized"`: [`QuantizedCache`]

            If none is specified, we will use the default cache for the model (which is often [`DynamicCache`]). See
            our [cache documentation](https://huggingface.co/docs/transformers/en/kv_cache) for further information.
        cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):
            Arguments used in the key-value cache class can be passed in `cache_config`. Can be passed as a `Dict` and
            it will be converted to its repsective `CacheConfig` internally.
            Otherwise can be passed as a `CacheConfig` class matching the indicated `cache_implementation`.
        return_legacy_cache (`bool`, *optional*, default to `True`):
            Whether to return the legacy or new format of the cache when `DynamicCache` is used by default.

        > Parameters for manipulation of the model output logits

        temperature (`float`, *optional*, defaults to 1.0):
            The value used to module the next token probabilities. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 1.0
        top_k (`int`, *optional*, defaults to 50):
            The number of highest probability vocabulary tokens to keep for top-k-filtering. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 50.
        top_p (`float`, *optional*, defaults to 1.0):
            If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
            `top_p` or higher are kept for generation. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 1.0
        min_p (`float`, *optional*):
            Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
            value between 0 and 1. Typical values are in the 0.01-0.2 range, comparably selective as setting `top_p` in
            the 0.99-0.8 range (use the opposite of normal `top_p` values).
        typical_p (`float`, *optional*, defaults to 1.0):
            Local typicality measures how similar the conditional probability of predicting a target token next is to
            the expected conditional probability of predicting a random token next, given the partial text already
            generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that
            add up to `typical_p` or higher are kept for generation. See [this
            paper](https://arxiv.org/pdf/2202.00666.pdf) for more details.
        epsilon_cutoff (`float`, *optional*, defaults to 0.0):
            If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
            `epsilon_cutoff` will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the
            size of the model. See [Truncation Sampling as Language Model
            Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
        eta_cutoff (`float`, *optional*, defaults to 0.0):
            Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between
            0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) *
            exp(-entropy(softmax(next_token_logits)))`. The latter term is intuitively the expected next token
            probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3,
            depending on the size of the model. See [Truncation Sampling as Language Model
            Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
        diversity_penalty (`float`, *optional*, defaults to 0.0):
            This value is subtracted from a beam's score if it generates a token same as any beam from other group at a
            particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
        repetition_penalty (`float`, *optional*, defaults to 1.0):
            The parameter for repetition penalty. 1.0 means no penalty. See [this
            paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
        encoder_repetition_penalty (`float`, *optional*, defaults to 1.0):
            The paramater for encoder_repetition_penalty. An exponential penalty on sequences that are not in the
            original input. 1.0 means no penalty.
        length_penalty (`float`, *optional*, defaults to 1.0):
            Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
            the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
            likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while
            `length_penalty` < 0.0 encourages shorter sequences.
        no_repeat_ngram_size (`int`, *optional*, defaults to 0):
            If set to int > 0, all ngrams of that size can only occur once.
        bad_words_ids (`List[List[int]]`, *optional*):
            List of list of token ids that are not allowed to be generated. Check
            [`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
        force_words_ids (`List[List[int]]` or `List[List[List[int]]]`, *optional*):
            List of token ids that must be generated. If given a `List[List[int]]`, this is treated as a simple list of
            words that must be included, the opposite to `bad_words_ids`. If given `List[List[List[int]]]`, this
            triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one
            can allow different forms of each word.
        renormalize_logits (`bool`, *optional*, defaults to `False`):
            Whether to renormalize the logits after applying all the logits processors (including the custom
            ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the score logits
            are normalized but some logit processors break the normalization.
        constraints (`List[Constraint]`, *optional*):
            Custom constraints that can be added to the generation to ensure that the output will contain the use of
            certain tokens as defined by `Constraint` objects, in the most sensible way possible.
        forced_bos_token_id (`int`, *optional*, defaults to `model.config.forced_bos_token_id`):
            The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
            multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
            language token.
        forced_eos_token_id (`int` or List[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
            The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
            list to set multiple *end-of-sequence* tokens.
        remove_invalid_values (`bool`, *optional*, defaults to `model.config.remove_invalid_values`):
            Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash.
            Note that using `remove_invalid_values` can slow down generation.
        exponential_decay_length_penalty (`tuple(int, float)`, *optional*):
            This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been
            generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where
            penalty starts and `decay_factor` represents the factor of exponential decay
        suppress_tokens (`List[int]`, *optional*):
            A list of tokens that will be suppressed at generation. The `SupressTokens` logit processor will set their
            log probs to `-inf` so that they are not sampled.
        begin_suppress_tokens  (`List[int]`, *optional*):
            A list of tokens that will be suppressed at the beginning of the generation. The `SupressBeginTokens` logit
            processor will set their log probs to `-inf` so that they are not sampled.
        forced_decoder_ids (`List[List[int]]`, *optional*):
            A list of pairs of integers which indicates a mapping from generation indices to token indices that will be
            forced before sampling. For example, `[[1, 123]]` means the second generated token will always be a token
            of index 123.
        sequence_bias (`Dict[Tuple[int], float]`, *optional*)):
            Dictionary that maps a sequence of tokens to its bias term. Positive biases increase the odds of the
            sequence being selected, while negative biases do the opposite. Check
            [`~generation.SequenceBiasLogitsProcessor`] for further documentation and examples.
        token_healing (`bool`, *optional*, defaults to `False`):
            Heal tail tokens of prompts by replacing them with their appropriate extensions.
            This enhances the quality of completions for prompts affected by greedy tokenization bias.
        guidance_scale (`float`, *optional*):
            The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`.
            Higher guidance scale encourages the model to generate samples that are more closely linked to the input
            prompt, usually at the expense of poorer quality.
        low_memory (`bool`, *optional*):
            Switch to sequential beam search and sequential topk for contrastive search to reduce peak memory.
            Used with beam search and contrastive search.
        watermarking_config (`BaseWatermarkingConfig` or `dict`, *optional*):
            Arguments used to watermark the model outputs by adding a small bias to randomly selected set of "green"
            tokens. See the docs of [`SynthIDTextWatermarkingConfig`] and [`WatermarkingConfig`] for more
            details. If passed as `Dict`, it will be converted to a `WatermarkingConfig` internally.

        > Parameters that define the output variables of generate

        num_return_sequences (`int`, *optional*, defaults to 1):
            The number of independently computed returned sequences for each element in the batch.
        output_attentions (`bool`, *optional*, defaults to `False`):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more details.
        output_hidden_states (`bool`, *optional*, defaults to `False`):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more details.
        output_scores (`bool`, *optional*, defaults to `False`):
            Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
        output_logits (`bool`, *optional*):
            Whether or not to return the unprocessed prediction logit scores. See `logits` under returned tensors for
            more details.
        return_dict_in_generate (`bool`, *optional*, defaults to `False`):
            Whether or not to return a [`~utils.ModelOutput`], as opposed to returning exclusively the generated
            sequence. This flag must be set to `True` to return the generation cache (when `use_cache` is `True`)
            or optional outputs (see flags starting with `output_`)

        > Special tokens that can be used at generation time

        pad_token_id (`int`, *optional*):
            The id of the *padding* token.
        bos_token_id (`int`, *optional*):
            The id of the *beginning-of-sequence* token.
        eos_token_id (`Union[int, List[int]]`, *optional*):
            The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.

        > Generation parameters exclusive to encoder-decoder models

        encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0):
            If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the
            `decoder_input_ids`.
        decoder_start_token_id (`int` or `List[int]`, *optional*):
            If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token or a list of length
            `batch_size`. Indicating a list enables different start ids for each element in the batch
            (e.g. multilingual models with different target languages in one batch)

        > Generation parameters exclusive to assistant generation
        is_assistant (`bool`, *optional*, defaults to `False`):
            Whether the model is an assistant (draft) model.
        num_assistant_tokens (`int`, *optional*, defaults to 20):
            Defines the number of _speculative tokens_ that shall be generated by the assistant model before being
            checked by the target model at each iteration. Higher values for `num_assistant_tokens` make the generation
            more _speculative_ : If the assistant model is performant larger speed-ups can be reached, if the assistant
            model requires lots of corrections, lower speed-ups are reached.
        num_assistant_tokens_schedule (`str`, *optional*, defaults to `"constant"`):
            Defines the schedule at which max assistant tokens shall be changed during inference.
            - `"heuristic"`: When all speculative tokens are correct, increase `num_assistant_tokens` by 2 else
              reduce by 1. `num_assistant_tokens` value is persistent over multiple generation calls with the same assistant model.
            - `"heuristic_transient"`: Same as `"heuristic"` but `num_assistant_tokens` is reset to its initial value after each generation call.
            - `"constant"`: `num_assistant_tokens` stays unchanged during generation
        assistant_confidence_threshold (`float`, *optional*, defaults to 0.4):
            The confidence threshold for the assistant model. If the assistant model's confidence in its prediction for the current token is lower
            than this threshold, the assistant model stops the current token generation iteration, even if the number of _speculative tokens_
            (defined by `num_assistant_tokens`) is not yet reached. The assistant's confidence threshold is adjusted throughout the speculative iterations to reduce the number of unnecessary draft and target forward passes, biased towards avoiding false negatives.
            `assistant_confidence_threshold` value is persistent over multiple generation calls with the same assistant model.
            It is an unsupervised version of the dynamic speculation lookahead
            from Dynamic Speculation Lookahead Accelerates Speculative Decoding of Large Language Models <https://arxiv.org/abs/2405.04304>.
        prompt_lookup_num_tokens (`int`, *optional*):
            The number of tokens to be output as candidate tokens.
        max_matching_ngram_size (`int`, *optional*):
            The maximum ngram size to be considered for matching in the prompt. Default to 2 if not provided.
        assistant_early_exit(`int`, *optional*):
            If set to a positive integer, early exit of the model will be used as an assistant. Can only be used with
            models that support early exit (i.e. models where logits from intermediate layers can be interpreted by the LM head).
        assistant_lookbehind(`int`, *optional*, defaults to 10):
            If set to a positive integer, the re-encodeing process will additionally consider the last `assistant_lookbehind` assistant tokens
            to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
            See this [blog](https://huggingface.co/blog/universal_assisted_generation) for more details.
        target_lookbehind(`int`, *optional*, defaults to 10):
            If set to a positive integer, the re-encodeing process will additionally consider the last `target_lookbehind` target tokens
            to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
            See this [blog](https://huggingface.co/blog/universal_assisted_generation) for more details.

        > Parameters related to performances and compilation

        compile_config (CompileConfig, *optional*):
            If using a static cache, this controls how `generate` will `compile` the forward pass for performance
            gains.

        disable_compile (`bool`, *optional*): Whether to disable the automatic compilation of the forward pass. Automatic compilation happens when specific criteria are met, including using a compileable cache. Please open an issue if you find the need to use this flag.

        > Wild card

        generation_kwargs:
            Additional generation kwargs will be forwarded to the `generate` function of the model. Kwargs that are not
            present in `generate`'s signature will be used in the model forward pass.
    )output_attentionsoutput_hidden_statesoutput_scoresoutput_logitsc                 K   s  | dd| _| dd | _| dd| _| dd | _| dd| _| d	d | _| d
d | _| dd| _| dd| _	| dd| _
| dd | _| dd | _| dd| _| dd | _| dd | _| jd ur| jtv rt| j }t| jtr|| j| _| dd | _| dd | _| dd| _| dd| _| dd| _| dd | _| dd| _| dd| _| d d| _| d!d| _| d"d| _| d#d| _| d$d| _ | d%d| _!| d&d | _"| d'd | _#| d(d| _$| d)d | _%| d*d | _&| d+d | _'| d,d| _(| d-d | _)| d.d | _*| d/d | _+| d0d | _,| d1d | _-| d2d| _.| d3d | _/| d4d | _0| d5d }|d u r_d | _1nt|t2ri|| _1nt3|| _1| d6d| _4| d7d| _5| d8d| _6| d9d| _7| d:d | _8| d;d| _9| d<d | _:| d=d | _;| d>d | _<| d?d| _=| d@d | _>d| _?| dAd| _@| dBdC| _A| dDdE| _B| dFd | _C| dGd | _D| dHd | _E| dIdJ| _F| dKdJ| _G| dLtH | _I| dMd| _J| dNi | _K| dOd| _L| dPd | _M| dQtN| _O| jLsU|P D ]*\}}z	tQ| || W q* tRyT } ztSTdR| dS| dT|   |d }~ww | jUddU d S )VN
max_length   max_new_tokens
min_lengthr   min_new_tokensearly_stoppingFmax_timestop_strings	do_sample	num_beamsr(   num_beam_groupspenalty_alphadola_layers	use_cacheTcache_implementationcache_configreturn_legacy_cacheprefill_chunk_sizetemperature      ?top_k2   top_pmin_p	typical_pepsilon_cutoff        
eta_cutoffdiversity_penaltyrepetition_penaltyencoder_repetition_penaltylength_penaltyno_repeat_ngram_sizebad_words_idsforce_words_idsrenormalize_logitsconstraintsforced_bos_token_idforced_eos_token_idremove_invalid_values exponential_decay_length_penaltysuppress_tokensbegin_suppress_tokensforced_decoder_idssequence_biastoken_healingguidance_scale
low_memorywatermarking_confignum_return_sequencesrP   rQ   rR   rS   return_dict_in_generatepad_token_idbos_token_ideos_token_idencoder_no_repeat_ngram_sizedecoder_start_token_idnum_assistant_tokensnum_assistant_tokens_scheduleconstantassistant_confidence_thresholdg?prompt_lookup_num_tokensmax_matching_ngram_sizeassistant_early_exitassistant_lookbehind
   target_lookbehindcompile_configdisable_compilegeneration_kwargsr   r   r   z
Can't set z with value z for )is_init)VpoprT   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   CACHE_CONFIG_MAPPING
isinstancedict	from_dictrd   re   rf   rh   rj   rk   rl   rm   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   BaseWatermarkingConfigWatermarkingConfigr   rP   rQ   rR   rS   r   r   r   r   r   r   is_assistantr   r   r   r   r   r   r   r   CompileConfigr   r   r   r   r   r   r   itemssetattrAttributeErrorloggererrorvalidate)selfkwargscache_config_classr   keyvalueerrrM   rM   rN   __init__  s   

zGenerationConfig.__init__c                 C   s   t | jddS )NTignore_metadata)hashto_json_stringr   rM   rM   rN   __hash__  s   zGenerationConfig.__hash__c                 C   s2   t |tsdS | jddd}|jddd}||kS )NFT)use_diffr   )r   rO   r   )r   otherself_without_metadataother_without_metadatarM   rM   rN   __eq__  s
   
zGenerationConfig.__eq__c                 C   s   | j j d| jdd S )N Tr   	__class__r@   r   r   rM   rM   rN   __repr__  s   zGenerationConfig.__repr__Nassistant_modelr   returnc                 C   s   | j dus
| jdurtj}n?| jdkr8| jdu r4| jdur0| jdkr0| jdur0| jdkr0tj}ntj	}ntj
}n| jdkrAtj}n| jdu rJtj}ntj}|dus[| jdus[| jdurg|dv rctj}ntd| jdury|dv rutj}|S td|S )	a  
        Returns the generation mode triggered by the [`GenerationConfig`] instance.

        Arg:
            assistant_model (`PreTrainedModel`, *optional*):
                The assistant model to be used for assisted generation. If set, the generation mode will be
                assisted generation.

        Returns:
            `GenerationMode`: The generation mode triggered by the instance.
        Nr(   Fr   T)r8   r9   zYou've set `assistant_model`, which triggers assisted generate. Currently, assisted generate is only supported with Greedy Search and Sample.zYou've set `dola_layers`, which triggers DoLa generate. Currently, DoLa generate is only supported with Greedy Search and Sample.)rx   rv   r6   rK   r]   r\   rh   r_   rD   rE   rF   r^   rL   rJ   rI   r   r   rG   
ValueErrorr`   rH   )r   r   generation_moderM   rM   rN   get_generation_mode  s@   










z$GenerationConfig.get_generation_modeFc                 C   s  | j dvrtd| j  d| jdur!| jdkr!td| j d| jdur5| jdk r5td| j d d	}|r;d
}| jdu rd| }| jdurZ| jdkrZt|jd| jdt	 | j
durp| j
dkrpt|jd| j
dt	 | jdurt|jd| jdt	 | jdur| jdkrt|jd| jdt	 | jdur| jdkr| jdu rt|jd| jdt	 | jdur| jdkrt|jd| jdt	 | jdur| jdkrt|jd| jdt	 | jdu rtdt	 d| _| jdkrcd| }| j durt|jd| j dt	 | jdur | jdkr t|jd| jdt	 | jdur8| jdkr8t|jd| jdt	 | jdurP| jdkrPt|jd| jdt	 | jdurbt|jd| jdt	 n| jduso| jdurd | }| jd!u rt|jd"| jd| jdur| jdkrt|jd| jd| jdks| jdkrd#}| jd!u rt|d$ | j| j dkrt|d% | jdkrt|d& | jdur| jdu s| jd'k rtd(| j d)t	 | jdkr| jdkr| jdu rtd*| j d+n| j| jkrtd,| j d-| j d+| jdur2| jtvr2td.| j d/t | jdur_t| j}|du rLtd0| j d1t| j|sZ| | j| _| j!  | j"du rd2}d3D ]}	t#| |	durt$%|j|	t#| |	d4 qi| j&durt| j&t'st| j&t(std5t) t' | j&| _&| j&!  t| j*t+std6t,| j* d7| j-d!ur| j.D ]}
t#| |
d!u rtd8|
 d9|
 d:t	 qd;}|D ]}t/| |rtd<| d=qdS )>aa  
        Validates the values of the attributes of the [`GenerationConfig`] instance. Raises exceptions in the presence
        of parameterization that can be detected as incorrect from the configuration instance alone.

        Note that some parameters not validated here are best validated at generate runtime, as they may depend on
        other inputs and/or the model, such as parameters related to the generation length.

        Arg:
            is_init (`bool`, *optional*, defaults to `False`):
                Whether the validation is performed during the initialization of the instance.
        >   FTneverz6`early_stopping` must be a boolean or 'never', but is .Nr   z0`max_new_tokens` must be greater than 0, but is z*`pad_token_id` should be positive but got z. This will cause errors when batch generating, if there is padding. Please set `pad_token_id` explicitly as `model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation z This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.Fz`do_sample` is set to `False`. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `{flag_name}`.rg   rf   )	flag_name
flag_valuerj   rk   rl   ri   rh   rn   rm   ro   z-`num_beams` is set to None - defaulting to 1.r(   z`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used in beam-based generation modes. You should set `num_beams>1` or unset `{flag_name}`.rY   r^   rp   rs   rx   a  one of `constraints`, `force_words_ids` is not `None`, triggering constrained beam search. However, `{flag_name}` is set to `{flag_value}`, which is incompatible with this generation mode. Set `constraints` and `force_words_ids` to `None` or unset `{flag_name}` to continue.Tr\   zu`diversity_penalty` is not 0.0 or `num_beam_groups` is not 1, triggering group beam search. In this generation mode, z"`do_sample` must be set to `False`z4`num_beams` should be divisible by `num_beam_groups`zZ`diversity_penalty` should be greater than `0.0`, otherwise your groups will be identical.g333333?z]`dola_layers` is set to trigger DoLa decoding, but `repetition_penalty` is set to a value of zo, which could induce unwanted repetition. The recommended value for DoLa decoding is `repetition_penalty>=1.2`.z_Greedy methods without beam search do not support `num_return_sequences` different than 1 (got z).z`num_return_sequences` (z-) has to be smaller or equal to `num_beams` (z Invalid `cache_implementation` (z). Choose one of: zJYou provided a `cache_config` but the cache implementation you are using (zl) does not require any config. Make sure to use the correct cache implementation matching your cache config.zrYou have set `use_cache` to `False`, but {cache_arg} is set to {cache_arg_value}. {cache_arg} will have no effect.)rb   rc   rd   )	cache_argcache_arg_valuez`watermarking_config` as a dict is deprecated. Please construct `watermarking_config` object with `WatermarkingConfig` or `SynthIDTextWatermarkingConfig` class.z0You provided `compile_config` as an instance of z0, but it must be an instance of `CompileConfig`.z5`return_dict_in_generate` is NOT set to `True`, but `z5` is. When `return_dict_in_generate` is not `True`, `z` is ignored.)logits_processorstopping_criteriaprefix_allowed_tokens_fnsynced_gpusr   streamernegative_prompt_idsnegative_prompt_attention_maskz
Argument `zn` is not a valid argument of `GenerationConfig`. It should be passed to `generate()` (or a pipeline) directly.)0rY   r   rV   r   warningswarnr\   rf   formatUserWarningrj   rk   rl   rh   r_   rm   ro   r]   r^   rp   rs   rx   rv   r`   rq   r   rb   ALL_CACHE_IMPLEMENTATIONSrc   r   getr   r   r   ra   getattrr   warning_oncer   r   SynthIDTextWatermarkingConfigFutureWarningr   r   typer   extra_output_flagshasattr)r   r   fix_locationgreedy_wrong_parameter_msgsingle_beam_wrong_parameter_msgconstrained_wrong_parameter_msggroup_error_prefixcache_classno_cache_warningarg_nameextra_output_flaggenerate_argumentsargrM   rM   rN   r   N  s  



$









zGenerationConfig.validatesave_directoryconfig_file_namepush_to_hubc              
   K   s  z-t jdd}|   W d   n1 sw   Y  t|dkr,ttdd |D W n tyA } ztdt| d}~ww |dd}|durbt d	t |	d
ddur^td||d
< |durh|nt
}tj|rxtd| dtj|dd |r|dd}|d|tjjd }	| j|	fi |}	| |}
tj||}| j|dd td|  |r| j||	|
||	d
d dS dS )a)  
        Save a generation configuration object to the directory `save_directory`, so that it can be re-loaded using the
        [`~GenerationConfig.from_pretrained`] class method.

        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the configuration JSON file will be saved (will be created if it does not exist).
            config_file_name (`str` or `os.PathLike`, *optional*, defaults to `"generation_config.json"`):
                Name of the generation configuration JSON file to be saved in `save_directory`.
            push_to_hub (`bool`, *optional*, defaults to `False`):
                Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
                namespace).
            kwargs (`Dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        T)recordNr   c                 S   s   g | ]}|j qS rM   )message).0wrM   rM   rN   
<listcomp>j  s    z4GenerationConfig.save_pretrained.<locals>.<listcomp>zThe generation config instance is invalid -- `.validate()` throws warnings and/or exceptions. Fix these issues to save the configuration.

Thrown during validation:
use_auth_tokenrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.tokenV`token` and `use_auth_token` are both specified. Please set only the argument `token`.zProvided path (z#) should be a directory, not a file)exist_okcommit_messagerepo_idr   zConfiguration saved in )r   r   )r   catch_warningsr   lenr   strr   r   r   r   r   ospathisfileAssertionErrormakedirssplitsep_create_repo_get_files_timestampsjointo_json_filer   info_upload_modified_files)r   r   r   r   r   caught_warningsexcr   r   r   files_timestampsoutput_config_filerM   rM   rN   save_pretrainedL  s^   


z GenerationConfig.save_pretrainedmainpretrained_model_name	cache_dirforce_downloadlocal_files_onlyr   revisionc                 K   s*  |dur|nt }|dd}	|dd}
|dd}|dd}|dd}|dd	}|d
d}|durFtdt |durDtd|}d|d}|durS||d< tj||}t	|}tj
|}tjtj||rt|}d}nAt|r|}t|}n6|}zt|||||
|	||||||d}t||}W n ty     ty   td| d| d| dw z| |}||d
< W n tjtfy   td| dw |rtd|  ntd| d|  |ddu r| j|fi |\}}t||_||fS | j|fi |}t||_|S )a1  
        Instantiate a [`GenerationConfig`] from a generation configuration file.

        Args:
            pretrained_model_name (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a configuration file saved using the
                  [`~GenerationConfig.save_pretrained`] method, e.g., `./my_model_directory/`.
            config_file_name (`str` or `os.PathLike`, *optional*, defaults to `"generation_config.json"`):
                Name of the generation configuration JSON file to be loaded from `pretrained_model_name`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the configuration files and override the cached versions if
                they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or `bool`, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
                the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.

                <Tip>

                To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`.

                </Tip>

            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final configuration object.

                If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
                dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
                part of `kwargs` which has not been used to update `config` and is otherwise ignored.
            subfolder (`str`, *optional*, defaults to `""`):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
                specify the folder name here.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
                by the `return_unused_kwargs` keyword parameter.

        Returns:
            [`GenerationConfig`]: The configuration object instantiated from this pretrained model.

        Examples:

        ```python
        >>> from transformers import GenerationConfig

        >>> # Download configuration from huggingface.co and cache.
        >>> generation_config = GenerationConfig.from_pretrained("openai-community/gpt2")

        >>> # E.g. config was saved using *save_pretrained('./test/saved_model/')*
        >>> generation_config.save_pretrained("./test/saved_model/")
        >>> generation_config = GenerationConfig.from_pretrained("./test/saved_model/")

        >>> # You can also specify configuration names to your generation configuration file
        >>> generation_config.save_pretrained("./test/saved_model/", config_file_name="my_configuration.json")
        >>> generation_config = GenerationConfig.from_pretrained("./test/saved_model/", "my_configuration.json")

        >>> # If you'd like to try a minor variation to an existing configuration, you can also pass generation
        >>> # arguments to `.from_pretrained()`. Be mindful that typos and unused arguments will be ignored
        >>> generation_config, unused_kwargs = GenerationConfig.from_pretrained(
        ...     "openai-community/gpt2", top_k=1, foo=False, do_sample=True, return_unused_kwargs=True
        ... )
        >>> generation_config.top_k
        1

        >>> unused_kwargs
        {'foo': False}
        ```Nresume_downloadproxiesr   	subfolderr   _from_pipeline
_from_autoFr   r   r   config)	file_typefrom_auto_classusing_pipelineT)
r  r  r  r  r  r   
user_agentr  r  r   z!Can't load the configuration of 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z2' is the correct path to a directory containing a z filez"It looks like the config file at 'z' is not a valid JSON file.zloading configuration file z from cache at return_unused_kwargs)r   r   r   r   r   r   r   r   r  r   existsr   r   r   r   r   EnvironmentError	Exception_dict_from_json_filejsonJSONDecodeErrorUnicodeDecodeErrorr   r	  r   r   r   r   )clsr  r   r  r  r  r   r  r   r  r  r   r  from_pipeliner  commit_hashr  config_pathis_localresolved_config_fileconfiguration_fileconfig_dictr  unused_kwargsrM   rM   rN   from_pretrained  s   _

	



z GenerationConfig.from_pretrained	json_filec                 C   s@   t |ddd}| }W d    n1 sw   Y  t|S )Nrutf-8encoding)openreadr%  loads)r(  r2  readertextrM   rM   rN   r$  T  s   

z%GenerationConfig._dict_from_json_filer/  c                 K   s   | dd}| dd | dd d|v r d|v r |d |d< | di i ||}|jdi |}td|  |rA||fS |S )	a  
        Instantiates a [`GenerationConfig`] from a Python dictionary of parameters.

        Args:
            config_dict (`Dict[str, Any]`):
                Dictionary that will be used to instantiate the configuration object.
            kwargs (`Dict[str, Any]`):
                Additional parameters from which to initialize the configuration object.

        Returns:
            [`GenerationConfig`]: The configuration object instantiated from those parameters.
        r   Fr  Nr  r   zGenerate config rM   )r   updater   r	  )r(  r/  r   r   r  r0  rM   rM   rN   r   Z  s   zGenerationConfig.from_dictdc                 C   s^   | dddurt|d tst|d dd |d< | D ]}t|tr,| | q dS )a.  
        Checks whether the passed dictionary and its nested dicts have a *torch_dtype* key and if it's not None,
        converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"*
        string, which can then be stored in the json format.
        torch_dtypeNr   r(   )r   r   r   r  valuesr   dict_torch_dtype_to_str)r   r=  r   rM   rM   rN   r@  |  s   

z(GenerationConfig.dict_torch_dtype_to_strc                 C   sZ   |   }t   }i }| D ]\}}||vs!|dks!||| kr%|||< q| | |S )a'  
        Removes all attributes from config which correspond to the default config attributes for better readability and
        serializes to a Python dictionary.

        Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance,
        r   )to_dictrO   r   r@  )r   r/  default_config_dictserializable_config_dictr   r   rM   rM   rN   to_diff_dict  s   

zGenerationConfig.to_diff_dictc                 C   sL   t | j}d|v r|d= d|v r|d= d|v r|d= t|d< | | |S )z
        Serializes this instance to a Python dictionary.

        Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
        r   r   r   r   )copydeepcopy__dict__r   r@  r   outputrM   rM   rN   rA    s   
zGenerationConfig.to_dictTr   r   c                    sp   |du r	|   }n|  }|rtD ]}||d qfdd fdd |} |}tj|dddd	 S )
aG  
        Serializes this instance to a JSON string.

        Args:
            use_diff (`bool`, *optional*, defaults to `True`):
                If set to `True`, only the difference between the config instance and the default `GenerationConfig()`
                is serialized to JSON string.
            ignore_metadata (`bool`, *optional*, defaults to `False`):
                Whether to ignore the metadata fields present in the instance

        Returns:
            `str`: String containing all the attributes that make up this configuration instance in JSON format.
        TNc                    s@   t | tr fdd|  D S t | tr fdd| D S | S )Nc                    s   i | ]\}}t | |qS rM   )r   r   r   r   convert_keys_to_stringrM   rN   
<dictcomp>      zSGenerationConfig.to_json_string.<locals>.convert_keys_to_string.<locals>.<dictcomp>c                    s   g | ]} |qS rM   rM   )r   itemrK  rM   rN   r     s    zSGenerationConfig.to_json_string.<locals>.convert_keys_to_string.<locals>.<listcomp>)r   r   r   listobjrK  rM   rN   rL    s
   

z?GenerationConfig.to_json_string.<locals>.convert_keys_to_stringc                    s4   t | tr fdd|  D S t| r|  S | S )Nc                    s   i | ]	\}}| |qS rM   rM   rJ  convert_dataclass_to_dictrM   rN   rM    s    zVGenerationConfig.to_json_string.<locals>.convert_dataclass_to_dict.<locals>.<dictcomp>)r   r   r   r   rA  rQ  rS  rM   rN   rT    s
   
zBGenerationConfig.to_json_string.<locals>.convert_dataclass_to_dictr   indent	sort_keys
)rD  rA  METADATA_FIELDSr   r%  dumps)r   r   r   r/  metadata_fieldrM   )rT  rL  rN   r     s   
zGenerationConfig.to_json_stringjson_file_pathc                 C   sF   t |ddd}|| j|d W d   dS 1 sw   Y  dS )a  
        Save this instance to a JSON file.

        Args:
            json_file_path (`str` or `os.PathLike`):
                Path to the JSON file in which this configuration instance's parameters will be saved.
            use_diff (`bool`, *optional*, defaults to `True`):
                If set to `True`, only the difference between the config instance and the default `GenerationConfig()`
                is serialized to JSON file.
        r   r4  r5  r   N)r7  writer   )r   r\  r   writerrM   rM   rN   r    s   "zGenerationConfig.to_json_filemodel_configc                    s   |  }|dd dd | D }| j|ddd |jdd}||urMt }|  }    D ]}t |t||k}||v rL|rLt |||  q2 j	du rat
 fd	d
 jD rad _	t  _ S )a  
        Instantiates a [`GenerationConfig`] from a [`PretrainedConfig`]. This function is useful to convert legacy
        [`PretrainedConfig`] objects, which may contain generation parameters, into a stand-alone [`GenerationConfig`].

        Args:
            model_config (`PretrainedConfig`):
                The model config that will be used to instantiate the generation config.

        Returns:
            [`GenerationConfig`]: The configuration object instantiated from those parameters.
        r   Nc                 S   s   i | ]\}}|d ur||qS NrM   rJ  rM   rM   rN   rM    rN  z6GenerationConfig.from_model_config.<locals>.<dictcomp>FT)r   r   )decoderc                 3   s    | ]	}t  |d V  qdS )FN)r   )r   r   generation_configrM   rN   	<genexpr>  s
    

z5GenerationConfig.from_model_config.<locals>.<genexpr>)rA  r   r   r   get_text_configrO   keysr   r   r   anyr   r   r   )r(  r_  r/  decoder_configdefault_generation_configdecoder_config_dictattris_unsetrM   rb  rN   from_model_config  s(   

z"GenerationConfig.from_model_configc                    sX   g  |  D ]\}}t| |rt| ||  | q|    fdd|  D }|S )a  
        Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes,
        returning all the unused kwargs.

        Args:
            kwargs (`Dict[str, Any]`):
                Dictionary of attributes to tentatively update this class.

        Returns:
            `Dict[str, Any]`: Dictionary containing all the key-value pairs that were not used to update the instance.
        c                    s   i | ]\}}| vr||qS rM   rM   rJ  	to_removerM   rN   rM  2  rN  z+GenerationConfig.update.<locals>.<dictcomp>)r   r   r   appendr   )r   r   r   r   r0  rM   rn  rN   r<    s   

zGenerationConfig.updater`  )F)NF)NNFFNr  )TF)T)!r@   rA   rB   rC   r   r   r   r   r   r   r6   r   r   r   r   r   PathLikeboolr  classmethodr1  r$  r	   r   r   r@  rD  rA  r   r  r   rm  r<  rM   rM   rM   rN   rO   d   sv      %w
>  
N
 :!,+rO   c                   @   s   e Zd ZdZedd Zdeeej	f fddZ
deeef fdd	Zd
d Zdd Zdd Zdd Zedd Zedd ZdS )r   zGeneric watermarking configc                 K   s^   | di |}g }|  D ]\}}t||r!t||| || q|D ]}||d q$|S )a  
        Constructs a BaseWatermarkingConfig instance from a dictionary of parameters.

        Args:
            config_dict (Dict[str, Any]): Dictionary containing configuration parameters.
            **kwargs: Additional keyword arguments to override dictionary values.

        Returns:
            BaseWatermarkingConfig: Instance of BaseWatermarkingConfig constructed from the dictionary.
        NrM   )r   r   r   rp  r   )r(  r/  r   r  ro  r   r   rM   rM   rN   r   :  s   

z BaseWatermarkingConfig.from_dictr\  c                 C   sZ   t |ddd}|  }tj|dddd }|| W d   dS 1 s&w   Y  dS )	z
        Save this instance to a JSON file.

        Args:
            json_file_path (Union[str, os.PathLike]): Path to the JSON file in which this configuration instance's parameters will be saved.
        r   r4  r5  r   TrU  rX  N)r7  rA  r%  rZ  r]  )r   r\  r^  r/  json_stringrM   rM   rN   r  P  s
   "z#BaseWatermarkingConfig.to_json_filer   c                 C   s   t | j}|S )z
        Serializes this instance to a Python dictionary.

        Returns:
            Dict[str, Any]: Dictionary of all the attributes that make up this configuration instance.
        )rE  rF  rG  rH  rM   rM   rN   rA  ]  s   zBaseWatermarkingConfig.to_dictc                 c   s*    t | j D ]	\}}||fV  q	d S r`  rE  rF  rG  r   )r   rk  r   rM   rM   rN   __iter__g  s   zBaseWatermarkingConfig.__iter__c                 C   s   | j j d|   S )Nr   r   r   rM   rM   rN   r   k  s   zBaseWatermarkingConfig.__repr__c                 C   s   t j| jddd S )z
        Serializes this instance to a JSON formatted string.

        Returns:
            str: JSON formatted string representing the configuration instance.
        r   )rV  rX  )r%  rZ  rG  r   rM   rM   rN   r   n  s   z%BaseWatermarkingConfig.to_json_stringc                 K   s,   |  D ]\}}t| |rt| || qdS )z
        Update the configuration attributes with new values.

        Args:
            **kwargs: Keyword arguments representing configuration attributes and their new values.
        N)r   r   r   )r   r   r   r   rM   rM   rN   r<  w  s
   
zBaseWatermarkingConfig.updatec                 C      d S r`  rM   r   rM   rM   rN   r        zBaseWatermarkingConfig.validatec                 C   rw  r`  rM   )r   
vocab_sizerM   rM   rN   construct_processor  rx  z*BaseWatermarkingConfig.construct_processorN)r@   rA   rB   rC   rs  r   r   r   r   rq  r  r	   r   rA  rv  r   r   r<  r   r   rz  rM   rM   rM   rN   r   6  s    

	
r   c                   @   sh   e Zd ZdZ					ddee dee d	ee d
ee dee f
ddZdd Z	deddfddZ
dS )r   a  
    Class that holds arguments for watermark generation and should be passed into `GenerationConfig` during `generate`.
    See [this paper](https://arxiv.org/abs/2306.04634) for more details on the arguments.

    Accepts the following keys:
        - greenlist_ratio (`float`):
            Used for watermarking. The ratio of "green" tokens used to the vocabulary size. Defaults to 0.25.
        - bias (`float`):
            Used with watermarking. The bias added to the selected "green" tokens' logits. Defaults to 2.0.
        - hashing_key (`int`):
            Hashing key used for watermarking. Defaults to 15485863 (the millionth prime).
        - seeding_scheme (`str`):
            Algorithm to use for watermarking. Accepts values:
                - "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from the paper)
                - "selfhash": "green" tokens selection depends on the current token itself (Algorithm 3 from the paper)
                    The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash".
        - context_width(`int`):
            The context length of previous tokens to use in seeding. Higher context length makes watermarking more robust.
          ?       @K lefthashr(   greenlist_ratiobiashashing_keyseeding_schemecontext_widthc                 C   s"   || _ || _|| _|| _|| _d S r`  )r  r  r  r  r  )r   r  r  r  r  r  rM   rM   rN   r     s
   
zWatermarkingConfig.__init__c                 C   sv   d}| j dvrt|jdd| j dd| j  krdks)n t|jdd	| j d| jd
ks9t|jdd| jdd S )N}Some of the keys in `watermarking_config` are defined incorrectly. `{key}` should be {correct_value}` but found {found_value})selfhashr~  r  z[`selfhash`, `lefthash`]r   correct_valuefound_valuern   rg   r  zin range between 0.0 and 1.0r(   r  za positive integer)r  r   r   r  r  r   watermark_missing_arg_msgrM   rM   rN   r     s6   

zWatermarkingConfig.validatery  r   r*   c              	   C   s    t ||| j| j| j| j| jdS )N)ry  devicer  r  r  r  r  )r*   r  r  r  r  r  r   ry  r  rM   rM   rN   rz    s   z&WatermarkingConfig.construct_processorN)r{  r|  r}  r~  r(   )r@   rA   rB   rC   r   floatintr   r   r   rz  rM   rM   rM   rN   r     s(    
r   c                   @   s`   e Zd ZdZ					ddedee ded	ed
ededefddZdd ZdeddfddZ	dS )r   a  
    Class that holds arguments for watermark generation and should be passed into `GenerationConfig` during `generate`.
    See [this paper](https://www.nature.com/articles/s41586-024-08025-4) for more details on the arguments.

    Args:
        ngram_len (`int`):
            Ngram length.
        keys (`List[int]`):
            A sequence of watermarking keys, one for each depth.
        context_history_size (`int`, *optional*, defaults to 1024):
            Size of the tensor to keep track of seen contexts.
        sampling_table_seed (`int`, *optional*, defaults to 0):
            Random seed to generate the sampling table.
        sampling_table_size (`int`, *optional*, defaults to 65536):
            Size of the sampling table.
        skip_first_ngram_calls (`bool`, *optional*, defaults to `False`):
            Whether to skip first ngram calls.
        debug_mode (`bool`, optional, *optional*, defaults to `False`):
            Logits are modified to uniform one got before watermarking modification is applied. This is to test the
            implementation.

    Examples:
    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, SynthIDTextWatermarkingConfig

    >>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b', padding_side="left")
    >>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b')

    >>> # SynthID Text configuration
    >>> watermarking_config = SynthIDTextWatermarkingConfig(
    ...     keys=[654, 400, 836, 123, 340, 443, 597, 160, 57],
    ...     ngram_len=5,
    ... )

    >>> # Generation with watermarking
    >>> tokenized_prompts = tokenizer(["Once upon a time, "], return_tensors="pt", padding=True)
    >>> output_sequences = model.generate(
    ...     **tokenized_prompts, watermarking_config=watermarking_config, do_sample=True, max_new_tokens=10
    ... )
    >>> watermarked_text = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)
    ```
       r      F	ngram_lenrf  context_history_sizesampling_table_seedsampling_table_sizeskip_first_ngram_calls
debug_modec                 C   s.   || _ || _|| _|| _|| _|| _|| _d S r`  )r  rf  r  r  r  r  r  )r   r  rf  r  r  r  r  r  rM   rM   rN   r     s   

z&SynthIDTextWatermarkingConfig.__init__c                 C   s(   d}| j dkrt|jdd| j dd S )Nr  i   r  z< 2**24r  )r  r   r   r  rM   rM   rN   r     s   
z&SynthIDTextWatermarkingConfig.validatery  r   r*   c              
   C   s&   t | j| j| j| j| j|| j| jdS )N)r  rf  r  r  r  r  r  r  )r)   r  rf  r  r  r  r  r  r  rM   rM   rN   rz  $  s   z1SynthIDTextWatermarkingConfig.construct_processorN)r  r   r  FF)
r@   rA   rB   rC   r  r
   rr  r   r   rz  rM   rM   rM   rN   r     s0    /
r   c                   @   sx   e Zd ZU dZdZeed< dZee ed< dZ	e
eef ed< dZeed	< dZee ed
< dZdeeef fddZdS )r   aU  
    Class that holds arguments relative to `torch.compile` behavior, when using automatic compilation in `generate`.
    See [`torch.compile`](https://pytorch.org/docs/stable/generated/torch.compile.html) for more details on the arguments.

    Args:
        fullgraph (`bool`, *optional*, defaults to `True`):
            If `True`, requires that the whole forward be capturable in a single graph.
        dynamic (`bool` or `None`, *optional*):
            Whether to try to use dynamic shape graphs.
        backend (`str` or `Callable`, *optional*, defaults to `"inductor"`):
            Backend to be used.
        mode (`str`, *optional*, defaults to `"reduce-overhead"`):
            Controls balance between performance and overhead.
        options (`dict`, *optional*):
            A dictionary of options to pass to the backend.

    Examples:
    ```python
    >>> from transformers import AutoModelForCausalLM, AutoTokenizer, CompileConfig

    >>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b')
    >>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b').cuda()

    >>> # Automatic compile configuration, used with static cache
    >>> compile_config = CompileConfig(dynamic=True)

    >>> # Generation with static cache and compile config
    >>> input = tokenizer.encode("Hello there, how", return_tensors="pt").cuda()
    >>> output = model.generate(
    ...     input, do_sample=False, max_new_tokens=300, cache_implementation="static", compile_config=compile_config
    ... )
    >>> output_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
    ```
    T	fullgraphNr5   inductorbackendzreduce-overheadmodeoptionsr   c                 C   s   t dd | j D S )z0Serializes this instance to a Python dictionary.c                 S   s   i | ]\}}|d kr||qS )_compile_all_devicesrM   rJ  rM   rM   rN   rM  `  rN  z)CompileConfig.to_dict.<locals>.<dictcomp>ru  r   rM   rM   rN   rA  ^  s   zCompileConfig.to_dict)r@   rA   rB   rC   r  rr  __annotations__r5   r   r  r   r   r   r  r  r   r  r	   r   rA  rM   rM   rM   rN   r   1  s   
 #r   )ArC   rE  r%  r   r   abcr   r   dataclassesr   r   typingr   r   r   r	   r
   r   r   r   r   configuration_utilsr   utilsr   r   r   r   r   r   r   r   r   modeling_utilsr   
get_loggerr@   r   rY  r    NEED_SETUP_CACHE_CLASSES_MAPPINGQUANT_BACKEND_CLASSES_MAPPINGr   cache_utilsr   r   r    r!   r"   r#   r$   r%   r&   r'   logits_processr)   r*   rP  rf  r6   rO   r   r   r   r   rM   rM   rM   rN   <module>   sj   $,
0
         [RMY