o
    h                     @   s   d dl Z d dlmZ d dlmZmZ d dlZddlmZm	Z	m
Z
mZmZ ddlmZmZ e	 r7d dlZd dlZe
 r@ddlmZ eeZeed	d
G dd deZdd ZdS )    N)BytesIO)ListUnion   )add_end_docstringsis_av_availableis_torch_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args),MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMEST)has_image_processorc                       sf   e Zd ZdZ fddZdddZddeeee f f fdd	Z	dddZ
dd ZdddZ  ZS )VideoClassificationPipelinea  
    Video classification pipeline using any `AutoModelForVideoClassification`. This pipeline predicts the class of a
    video.

    This video classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"video-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=video-classification).
    c                    s*   t  j|i | t| d | t d S )Nav)super__init__r
   check_model_typer   )selfargskwargs	__class__ /var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/pipelines/video_classification.pyr   6   s   
z$VideoClassificationPipeline.__init__Nc                 C   st   i }|d ur
||d< |d ur||d< i }|d ur||d< |d ur1|dvr,t d| d||d< nd|d< |i |fS )	Nframe_sampling_rate
num_framestop_k)softmaxsigmoidnonez'Invalid value for `function_to_apply`: z2. Valid options are ['softmax', 'sigmoid', 'none']function_to_applyr   )
ValueError)r   r   r   r   r"   preprocess_paramspostprocess_paramsr   r   r   _sanitize_parameters;   s    


z0VideoClassificationPipeline._sanitize_parametersinputsc                    sB   d|v rt dt |d}|du rtdt j|fi |S )a	  
        Assign labels to the video(s) passed as inputs.

        Args:
            inputs (`str`, `List[str]`):
                The pipeline handles three types of videos:

                - A string containing a http link pointing to a video
                - A string containing a local path to a video

                The pipeline accepts either a single video or a batch of videos, which must then be passed as a string.
                Videos in a batch must all be in the same format: all as http links or all as local paths.
            top_k (`int`, *optional*, defaults to 5):
                The number of top labels that will be returned by the pipeline. If the provided number is higher than
                the number of labels available in the model configuration, it will default to the number of labels.
            num_frames (`int`, *optional*, defaults to `self.model.config.num_frames`):
                The number of frames sampled from the video to run the classification on. If not provided, will default
                to the number of frames specified in the model configuration.
            frame_sampling_rate (`int`, *optional*, defaults to 1):
                The sampling rate used to select frames from the video. If not provided, will default to 1, i.e. every
                frame will be used.
            function_to_apply(`str`, *optional*, defaults to "softmax"):
                The function to apply to the model output. By default, the pipeline will apply the softmax function to
                the output of the model. Valid options: ["softmax", "sigmoid", "none"]. Note that passing Python's
                built-in `None` will default to "softmax", so you need to pass the string "none" to disable any
                post-processing.

        Return:
            A list of dictionaries or a list of list of dictionaries containing result. If the input is a single video,
            will return a list of `top_k` dictionaries, if the input is a list of several videos, will return a list of list of
            `top_k` dictionaries corresponding to the videos.

            The dictionaries contain the following keys:

            - **label** (`str`) -- The label identified by the model.
            - **score** (`int`) -- The score attributed by the model for that label.
        videoszuThe `videos` argument has been renamed to `inputs`. In version 5 of Transformers, `videos` will no longer be acceptedNzICannot call the video-classification pipeline without an inputs argument!)warningswarnFutureWarningpopr#   r   __call__)r   r'   r   r   r   r   r-   P   s   '
z$VideoClassificationPipeline.__call__r   c           	      C   s   |d u r	| j jj}|ds|drtt|j}t	|}d}|| d }t
j|||t
jd}t||}t|}| j|| jd}| jdkrN|| j}|S )Nzhttp://zhttps://r   r   )numdtype)return_tensorspt)modelconfigr   
startswithr   requestsgetcontentr   opennplinspaceint64read_video_pyavlistimage_processor	frameworktotorch_dtype)	r   videor   r   	container	start_idxend_idxindicesmodel_inputsr   r   r   
preprocess   s   



z&VideoClassificationPipeline.preprocessc                 C   s   | j di |}|S )Nr   )r2   )r   rG   model_outputsr   r   r   _forward   s   z$VideoClassificationPipeline._forward   r   c                    s   | j jjkr j jj} jdkr7|dkr|jd d}n|dkr*|jd  }n|jd }||\}}ntd j |	 }|	 } fddt
||D S )	Nr1   r   r   r    zUnsupported framework: c                    s$   g | ]\}}| j jj| d qS ))scorelabel)r2   r3   id2label).0rM   _idr   r   r   
<listcomp>   s   $ z;VideoClassificationPipeline.postprocess.<locals>.<listcomp>)r2   r3   
num_labelsr?   logitsr   r    topkr#   tolistzip)r   rI   r   r"   probsscoresidsr   rR   r   postprocess   s   


z'VideoClassificationPipeline.postprocess)NNNN)N)Nr   )rK   r   )__name__
__module____qualname____doc__r   r&   r   strr   r-   rH   rJ   r\   __classcell__r   r   r   r   r   )   s    
 
1r   c                 C   sr   g }|  d |d }|d }t| jddD ]\}}||kr! n||kr.||v r.|| qtdd |D S )Nr   rL   )rB   c                 S   s   g | ]}|j d dqS )rgb24)format)
to_ndarray)rP   xr   r   r   rS      s    z#read_video_pyav.<locals>.<listcomp>)seek	enumeratedecodeappendr9   stack)rC   rF   framesstart_index	end_indexiframer   r   r   r<      s   

r<   )r)   ior   typingr   r   r5   utilsr   r   r   r	   r
   baser   r   r   numpyr9   models.auto.modeling_autor   
get_loggerr]   loggerr   r<   r   r   r   r   <module>   s    
 