o
    hv                     @   s,  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZ ddlZddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# erae raddl$Z$e"%e&Z'ed Z(G d	d
 d
eZ)G dd deZ*ee*j+e*_+e*j+j dure*j+j j,dddde*j+_ dS dS )zH
Feature extraction saving/loading class for common feature extractors.
    N)UserDict)TYPE_CHECKINGAnyOptionalUnion   )custom_object_save)FEATURE_EXTRACTOR_NAMEPushToHubMixin
TensorTypeadd_model_info_to_auto_map"add_model_info_to_custom_pipelinescached_file	copy_funcdownload_urlis_flax_availableis_jax_tensoris_numpy_arrayis_offline_modeis_remote_urlis_tf_availableis_torch_availableis_torch_deviceis_torch_dtypeloggingrequires_backendsSequenceFeatureExtractorc                       s   e Zd ZdZddeeeef  dedee	f f fddZ
dedee fd	d
ZdefddZdd Zdd Zdd Zdd Zdd Zddeeee	f  fddZddeeee	f  fddZdddZ  ZS ) BatchFeatureaU  
    Holds the output of the [`~SequenceFeatureExtractor.pad`] and feature extractor specific `__call__` methods.

    This class is derived from a python dictionary and can be used as a dictionary.

    Args:
        data (`dict`, *optional*):
            Dictionary of lists/arrays/tensors returned by the __call__/pad methods ('input_values', 'attention_mask',
            etc.).
        tensor_type (`Union[None, str, TensorType]`, *optional*):
            You can give a tensor_type here to convert the lists of integers in PyTorch/TensorFlow/Numpy Tensors at
            initialization.
    Ndatatensor_typec                    s   t  | | j|d d S )N)r   )super__init__convert_to_tensors)selfr   r   	__class__ y/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/feature_extraction_utils.pyr!   L   s   zBatchFeature.__init__itemreturnc                 C   s   t |tr
| j| S td)z
        If the key is a string, returns the value of the dict associated to `key` ('input_values', 'attention_mask',
        etc.).
        zRIndexing with integers is not available when using Python based feature extractors)
isinstancestrr   KeyErrorr#   r(   r&   r&   r'   __getitem__P   s   

zBatchFeature.__getitem__c                 C   s    z| j | W S  ty   tw N)r   r,   AttributeErrorr-   r&   r&   r'   __getattr__Z   s
   zBatchFeature.__getattr__c                 C   s
   d| j iS Nr   r   r#   r&   r&   r'   __getstate__`      
zBatchFeature.__getstate__c                 C   s   d|v r|d | _ d S d S r2   r3   )r#   stater&   r&   r'   __setstate__c   s   zBatchFeature.__setstate__c                 C   
   | j  S r/   )r   keysr4   r&   r&   r'   r:   h   r6   zBatchFeature.keysc                 C   r9   r/   )r   valuesr4   r&   r&   r'   r;   l   r6   zBatchFeature.valuesc                 C   r9   r/   )r   itemsr4   r&   r&   r'   r<   p   r6   zBatchFeature.itemsc                    s   |d u rdS t |tst|}|tjkr)t stddd l}|j |j}| fS |tjkrFt	 s5tddd l
fdd j}| fS |tjkrat sRtddd lm} |j t}| fS d	 fdd	 t}| fS )
NNNzSUnable to convert output to TensorFlow tensors format, TensorFlow is not installed.r   zMUnable to convert output to PyTorch tensors format, PyTorch is not installed.c                    s   t | ttfr;t| dkr;t | d tjrt| } n t | d ttfr;t| d dkr;t | d d tjr;t| } t | tjrF | S  | S )Nr   )	r*   listtuplelennpndarrayarray
from_numpytensor)value)torchr&   r'   	as_tensor   s   


z5BatchFeature._get_is_as_tensor_fns.<locals>.as_tensorzEUnable to convert output to JAX tensors format, JAX is not installed.c                    sn   t | ttfr0t | d tttjfr0dd | D }tt|dkr0|d u r0 dd | D td} tj| |dS )Nr   c                 S   s   g | ]}t |qS r&   )r@   .0valr&   r&   r'   
<listcomp>   s    zIBatchFeature._get_is_as_tensor_fns.<locals>.as_tensor.<locals>.<listcomp>r   c                 S   s   g | ]}t |qS r&   )rA   asarrayrI   r&   r&   r'   rL      s    )dtype)	r*   r>   r?   rA   rB   r@   setobjectrM   )rF   rN   
value_lens)rH   r&   r'   rH      s
   $r/   )r*   r   
TENSORFLOWr   ImportError
tensorflowconstant	is_tensorPYTORCHr   rG   JAXr   	jax.numpynumpyrC   r   r   )r#   r   tfrV   jnpr&   )rH   rG   r'   _get_is_as_tensor_fnss   s<   

(

z"BatchFeature._get_is_as_tensor_fnsc                 C   sj   |du r| S |  |\}}|  D ]!\}}z||s"||}|| |< W q   |dkr/tdtd| S )a5  
        Convert the inner content to tensors.

        Args:
            tensor_type (`str` or [`~utils.TensorType`], *optional*):
                The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
                `None`, no modification is done.
        Noverflowing_valueszKUnable to create tensor returning overflowing values of different lengths. zUnable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.)r]   r<   
ValueError)r#   r   rV   rH   keyrF   rE   r&   r&   r'   r"      s    	zBatchFeature.convert_to_tensorsc           
      O   s  t | dg ddl}i }|d}|dd}|du rEt|dkrE|d }t|r*nt|ts8t|s8t|tr;|}n
t	dt| d| 
 D ]2\}}	t|	|jrc||	rc|	j|i |||< qIt|	|jrw|durw|	j||d	||< qI|	||< qI|| _| S )
a  
        Send all values to device by calling `v.to(*args, **kwargs)` (PyTorch only). This should support casting in
        different `dtypes` and sending the `BatchFeature` to a different `device`.

        Args:
            args (`Tuple`):
                Will be passed to the `to(...)` function of the tensors.
            kwargs (`Dict`, *optional*):
                Will be passed to the `to(...)` function of the tensors.
                To enable asynchronous data transfer, set the `non_blocking` flag in `kwargs` (defaults to `False`).

        Returns:
            [`BatchFeature`]: The same instance after modification.
        rG   r   Ndevicenon_blockingFz*Attempting to cast a BatchFeature to type z. This is not supported.)ra   rb   )r   rG   getr@   r   r*   r+   r   intr_   r<   Tensoris_floating_pointtor   )
r#   argskwargsrG   new_datara   rb   argkvr&   r&   r'   rg      s(   

zBatchFeature.tor=   r/   )r)   r   )__name__
__module____qualname____doc__r   dictr+   r   r   r   r!   r.   r1   r5   r8   r:   r;   r<   r]   r"   rg   __classcell__r&   r&   r$   r'   r   =   s    .
;r   c                   @   sf  e Zd ZdZdZdd ZdefddZe						d+d
e	ee
jf dee	ee
jf  dededee	eef  defddZd,de	ee
jf defddZed
e	ee
jf deeeef eeef f fddZedeeef defddZdeeef fddZede	ee
jf defdd Zdefd!d"Zd#e	ee
jf fd$d%Zd&d' Zed-d)d*ZdS ).FeatureExtractionMixinz
    This is a feature extraction mixin used to provide saving/loading functionality for sequential and image feature
    extractors.
    Nc                 K   sl   | dd| _| D ](\}}zt| || W q ty3 } ztd| d| d|   |d}~ww dS )z'Set elements of `kwargs` as attributes.processor_classNz
Can't set z with value z for )pop_processor_classr<   setattrr0   loggererror)r#   ri   r`   rF   errr&   r&   r'   r!     s   zFeatureExtractionMixin.__init__ru   c                 C   s
   || _ dS )z%Sets processor class as an attribute.N)rw   )r#   ru   r&   r&   r'   _set_processor_class  s   
z+FeatureExtractionMixin._set_processor_classFmainpretrained_model_name_or_path	cache_dirforce_downloadlocal_files_onlytokenrevisionc           
      K   s   ||d< ||d< ||d< ||d< | dd}|dur*tdt |dur(td|}|dur2||d	< | j|fi |\}	}| j|	fi |S )
a  
        Instantiate a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a feature extractor, *e.g.* a
        derived class of [`SequenceFeatureExtractor`].

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or `bool`, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
                the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.


                <Tip>

                To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`.

                </Tip>

            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        Returns:
            A feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`].

        Examples:

        ```python
        # We can't instantiate directly the base class *FeatureExtractionMixin* nor *SequenceFeatureExtractor* so let's show the examples on a
        # derived class: *Wav2Vec2FeatureExtractor*
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h"
        )  # Download feature_extraction_config from huggingface.co and cache.
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "./test/saved_model/"
        )  # E.g. feature_extractor (or model) was saved using *save_pretrained('./test/saved_model/')*
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("./test/saved_model/preprocessor_config.json")
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h", return_attention_mask=False, foo=False
        )
        assert feature_extractor.return_attention_mask is False
        feature_extractor, unused_kwargs = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h", return_attention_mask=False, foo=False, return_unused_kwargs=True
        )
        assert feature_extractor.return_attention_mask is False
        assert unused_kwargs == {"foo": False}
        ```r   r   r   r   use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`.r   )rv   warningswarnFutureWarningr_   get_feature_extractor_dict	from_dict)
clsr~   r   r   r   r   r   ri   r   feature_extractor_dictr&   r&   r'   from_pretrained  s&   Xz&FeatureExtractionMixin.from_pretrainedsave_directorypush_to_hubc           	      K   s  | dd}|dur tdt |dddurtd||d< tj|r.t	d| dtj
|dd	 |rX| d
d}| d|tjjd }| j|fi |}| |}| jdurdt| || d tj|t}| | td|  |r| j|||||dd |gS )az  
        Save a feature_extractor object to the directory `save_directory`, so that it can be re-loaded using the
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] class method.

        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file will be saved (will be created if it does not exist).
            push_to_hub (`bool`, *optional*, defaults to `False`):
                Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
                namespace).
            kwargs (`Dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        r   Nr   r   r   zProvided path (z#) should be a directory, not a fileT)exist_okcommit_messagerepo_id)configzFeature extractor saved in )r   r   )rv   r   r   r   rc   r_   ospathisfileAssertionErrormakedirssplitsep_create_repo_get_files_timestamps_auto_classr   joinr	   to_json_filery   info_upload_modified_files)	r#   r   r   ri   r   r   r   files_timestampsoutput_feature_extractor_filer&   r&   r'   save_pretrained  sB   


z&FeatureExtractionMixin.save_pretrainedr)   c                 K   sp  | dd}| dd}| dd}| dd}| dd}| dd}| d	d}	| d
d}
| dd}|	durJtdt |durHtd|	}| dd}| dd}d|d}|durc||d< t ro|
sotd d}
t|}t	j
|}t	j
|rt	j
|t}t	j
|r|}d}n;t|r|}t|}n0t}zt|||||||
||||d}W n ty     ty   td| d| dt dw z!t|dd}| }W d   n1 sw   Y  t|}W n tjy   td| dw |r
td|  ntd| d|  |s4d |v r&t|d  ||d < d!|v r4t|d! ||d!< ||fS )"a6  
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] using `from_dict`.

        Parameters:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.

        Returns:
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the feature extractor object.
        r   Nr   Fresume_downloadproxies	subfolderr   r   r   r   r   r   _from_pipeline
_from_autofeature extractor)	file_typefrom_auto_classusing_pipelinez+Offline mode: forcing local_files_only=TrueT)	r   r   r   r   r   r   r   
user_agentr   z"Can't load feature extractor for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z2' is the correct path to a directory containing a z fileutf-8encodingz"It looks like the config file at 'z' is not a valid JSON file.zloading configuration file z from cache at auto_mapcustom_pipelines)rv   r   r   r   r_   r   ry   r   r+   r   r   isdirr   r	   r   r   r   r   OSError	ExceptionopenreadjsonloadsJSONDecodeErrorr   r   )r   r~   ri   r   r   r   r   r   r   r   r   r   from_pipeliner   r   is_localfeature_extractor_fileresolved_feature_extractor_filereadertextr   r&   r&   r'   r     s   




	



z1FeatureExtractionMixin.get_feature_extractor_dictr   c                 K   s   | dd}g }| D ]\}}||v r|||< || q|D ]}| |d q | di |}td|  |r>||fS |S )a	  
        Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of
        parameters.

        Args:
            feature_extractor_dict (`Dict[str, Any]`):
                Dictionary that will be used to instantiate the feature extractor object. Such a dictionary can be
                retrieved from a pretrained checkpoint by leveraging the
                [`~feature_extraction_utils.FeatureExtractionMixin.to_dict`] method.
            kwargs (`Dict[str, Any]`):
                Additional parameters from which to initialize the feature extractor object.

        Returns:
            [`~feature_extraction_utils.FeatureExtractionMixin`]: The feature extractor object instantiated from those
            parameters.
        return_unused_kwargsFNzFeature extractor r&   )rv   r<   appendry   r   )r   r   ri   r   	to_remover`   rF   feature_extractorr&   r&   r'   r   6  s   
z FeatureExtractionMixin.from_dictc                 C   s8   t | j}| jj|d< d|v r|d= d|v r|d= |S )z
        Serializes this instance to a Python dictionary. Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
        feature_extractor_typemel_filterswindow)copydeepcopy__dict__r%   rn   )r#   outputr&   r&   r'   to_dict[  s   zFeatureExtractionMixin.to_dict	json_filec                 C   sL   t |dd}| }W d   n1 sw   Y  t|}| di |S )a  
        Instantiates a feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] from the path to
        a JSON file of parameters.

        Args:
            json_file (`str` or `os.PathLike`):
                Path to the JSON file containing the parameters.

        Returns:
            A feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`]: The feature_extractor
            object instantiated from that JSON file.
        r   r   Nr&   )r   r   r   r   )r   r   r   r   r   r&   r&   r'   from_json_fileh  s
   

z%FeatureExtractionMixin.from_json_filec                 C   sb   |   }| D ]\}}t|tjr| ||< q|dd}|dur'||d< tj|dddd S )z
        Serializes this instance to a JSON string.

        Returns:
            `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
        rw   Nru      T)indent	sort_keys
)	r   r<   r*   rA   rB   tolistrv   r   dumps)r#   
dictionaryr`   rF   rw   r&   r&   r'   to_json_string{  s   z%FeatureExtractionMixin.to_json_stringjson_file_pathc                 C   sB   t |ddd}||   W d   dS 1 sw   Y  dS )z
        Save this instance to a JSON file.

        Args:
            json_file_path (`str` or `os.PathLike`):
                Path to the JSON file in which this feature_extractor instance's parameters will be saved.
        wr   r   N)r   writer   )r#   r   writerr&   r&   r'   r     s   "z#FeatureExtractionMixin.to_json_filec                 C   s   | j j d|   S )N )r%   rn   r   r4   r&   r&   r'   __repr__  s   zFeatureExtractionMixin.__repr__AutoFeatureExtractorc                 C   sD   t |ts|j}ddlm  m} t||st| d|| _dS )a  
        Register this class with a given auto class. This should only be used for custom feature extractors as the ones
        in the library are already mapped with `AutoFeatureExtractor`.

        <Tip warning={true}>

        This API is experimental and may have some slight breaking changes in the next releases.

        </Tip>

        Args:
            auto_class (`str` or `type`, *optional*, defaults to `"AutoFeatureExtractor"`):
                The auto class to register this new feature extractor with.
        r   Nz is not a valid auto class.)	r*   r+   rn   transformers.models.automodelsautohasattrr_   r   )r   
auto_classauto_moduler&   r&   r'   register_for_auto_class  s   


z.FeatureExtractionMixin.register_for_auto_class)NFFNr}   )F)r   )rn   ro   rp   rq   r   r!   r+   r|   classmethodr   r   PathLiker   boolr   r   r?   rr   r   r   PreTrainedFeatureExtractorr   r   r   r   r   r   r   r&   r&   r&   r'   rt      sR    o=t$rt   r   r   zfeature extractor file)rP   object_classobject_files)-rq   r   r   r   r   collectionsr   typingr   r   r   r   rZ   rA   dynamic_module_utilsr   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rG   
get_loggerrn   ry   r   r   rt   r   formatr&   r&   r&   r'   <module>   s6   T
 @   @