o
    h                     @   s  d dl mZmZ d dlmZ d dlmZmZ d dlZ	ddl
mZmZmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZ e rQd dlZdd	l
mZ e rXd dlZe r_d dlZ e rhd dl!mZ" 	dbd
e	j#deee$f deeee$f  de	j#fddZ%de	j&dfd
e	j#de'dee de	j(deee$ef  de	j#fddZ)dd Z*			dcd
ee	j#ddddf dee+ dee$ deee$ef  ddf
ddZ,dbde-e.e.f fd d!Z/	"		ddd#e	j#d$ee.e-e.e.f e0e. e-e. f d%e+d&ee. deee$ef  de-fd'd(Z1				"	ded
e	j#d$e-e.e.f d)d*d+ee. dee d,e+deee$ef  de	j#fd-d.Z2		dfd
e	j#d/ee'ee' f d0ee'ee' f dee deee$ef  de	j#fd1d2Z3		dfd
e	j#d$e-e.e.f deee$ef  deee$ef  de	j#f
d3d4Z4dgd6d7Z5d5e	j#de	j#fd8d9Z6dhd:d;Z7d5edefd<d=Z8did?d@Z9d>e	j#de	j#fdAdBZ:djdCdDZ;d>edefdEdFZ<dGdH Z=dIdJ Z>G dKdL dLeZ?e?j@dMddfd
e	j#dNee.e-e.e.f ee-e.e.f  f dOe?dPee'ee' f deee$ef  deee$ef  de	j#fdQdRZAd
edefdSdTZB		dfd
e	j#dee deee$ef  de	j#fdUdVZCdWdX ZDdYe0d de-eEe-e.e.f e0d f eEe.e-e-e.e.f e.f f f fdZd[ZFd\eEe-e.e.f df d]eEe.e-e.e.f f de0d fd^d_ZGG d`da daZHdS )k    )
CollectionIterable)ceil)OptionalUnionN   )ChannelDimension
ImageInputget_channel_dimension_axisget_image_sizeinfer_channel_dimension_format)ExplicitEnum
TensorTypeis_jax_tensoris_tf_tensoris_torch_tensor)is_flax_availableis_tf_availableis_torch_availableis_vision_availablerequires_backends)PILImageResamplingimagechannel_diminput_channel_dimreturnc                 C   s   t | tjstdt|  |du rt| }t|}||kr!| S |tjkr-| d} | S |tj	kr9| d} | S t
d| )a)  
    Converts `image` to the channel dimension format specified by `channel_dim`.

    Args:
        image (`numpy.ndarray`):
            The image to have its channel dimension set.
        channel_dim (`ChannelDimension`):
            The channel dimension format to use.
        input_channel_dim (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred from the input image.

    Returns:
        `np.ndarray`: The image with the channel dimension set to `channel_dim`.
    ,Input image must be of type np.ndarray, got N)   r   r   )r   r   r   z&Unsupported channel dimension format: )
isinstancenpndarray	TypeErrortyper   r   FIRST	transposeLAST
ValueError)r   r   r   target_channel_dim r(   q/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/image_transforms.pyto_channel_dimension_format5   s   



r*   scaledata_formatdtypeinput_data_formatc                 C   sP   t | tjstdt|  | tj| }|dur!t|||}||}|S )a  
    Rescales `image` by `scale`.

    Args:
        image (`np.ndarray`):
            The image to rescale.
        scale (`float`):
            The scale to use for rescaling the image.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the image. If not provided, it will be the same as the input image.
        dtype (`np.dtype`, *optional*, defaults to `np.float32`):
            The dtype of the output image. Defaults to `np.float32`. Used for backwards compatibility with feature
            extractors.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred from the input image.

    Returns:
        `np.ndarray`: The rescaled image.
    r   N)r   r   r    r!   r"   astypefloat64r*   )r   r+   r,   r-   r.   rescaled_imager(   r(   r)   rescale\   s   
r2   c                 C   s   | j tjkr
d}|S t| | tr4td| kr%t| dkr%d}|S td|   d| 	  dtd| krFt| dkrFd}|S td	|   d| 	  d)
z
    Detects whether or not the image needs to be rescaled before being converted to a PIL image.

    The assumption is that if the image is of type `np.float` and all values are between 0 and 1, it needs to be
    rescaled.
    Fr      zZThe image to be converted to a PIL image contains values outside the range [0, 255], got [z, z%] which cannot be converted to uint8.r   TzXThe image to be converted to a PIL image contains values outside the range [0, 1], got [)
r-   r   uint8allcloser/   intallr&   minmax)r   
do_rescaler(   r(   r)   _rescale_for_pil_conversion   s0   r;   zPIL.Image.Imagetorch.Tensor	tf.Tensorzjnp.ndarrayr:   
image_modec                 C   s   t tdg t| tjjr| S t| st| r|  } nt| r&t	
| } nt| t	js5tdt|  t| tj|} | jd dkrJt	j| ddn| } |du rTt| n|}|r]t| d} | t	j} tjj| |dS )	a  
    Converts `image` to a PIL Image. Optionally rescales it and puts the channel dimension back as the last axis if
    needed.

    Args:
        image (`PIL.Image.Image` or `numpy.ndarray` or `torch.Tensor` or `tf.Tensor`):
            The image to convert to the `PIL.Image` format.
        do_rescale (`bool`, *optional*):
            Whether or not to apply the scaling factor (to make pixel values integers between 0 and 255). Will default
            to `True` if the image type is a floating type and casting to `int` would result in a loss of precision,
            and `False` otherwise.
        image_mode (`str`, *optional*):
            The mode to use for the PIL image. If unset, will use the default mode for the input image type.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.

    Returns:
        `PIL.Image.Image`: The converted image.
    visionz Input image type not supported: r   axisNr3   mode)r   to_pil_imager   PILImager   r   numpyr   r   arrayr    r&   r"   r*   r   r%   shapesqueezer;   r2   r/   r4   	fromarray)r   r:   r>   r.   r(   r(   r)   rE      s    
 
rE   c           
      C   s  | \}}d}|dur.t t||f}t t||f}|| | |kr.|| | }tt|}||kr6||ks>||krG||krG||}}	||	fS ||k rm|}	|dura|durat|| | }||	fS t|| | }||	fS |}|dur|durt|| | }	||	fS t|| | }	||	fS )aC  
    Computes the output image size given the input image size and the desired output size.

    Args:
        image_size (`Tuple[int, int]`):
            The input image size.
        size (`int`):
            The desired output size.
        max_size (`int`, *optional*):
            The maximum allowed output size.
    N)floatr8   r9   r6   round)

image_sizesizemax_sizeheightwidthraw_sizemin_original_sizemax_original_sizeohowr(   r(   r)   get_size_with_aspect_ratio   s0    

rY   Tinput_imagerP   default_to_squarerQ   c                 C   s   t |ttfr t|dkrt|S t|dkr|d }ntd|r&||fS t| |\}}||kr5||fn||f\}}|}	|	t|	| | }
}|duri||	krZtd| d| ||krit||
 | |}
}||krq||
fS |
|fS )a  
    Find the target (height, width) dimension of the output image after resizing given the input image and the desired
    size.

    Args:
        input_image (`np.ndarray`):
            The image to resize.
        size (`int` or `Tuple[int, int]` or List[int] or `Tuple[int]`):
            The size to use for resizing the image. If `size` is a sequence like (h, w), output size will be matched to
            this.

            If `size` is an int and `default_to_square` is `True`, then image will be resized to (size, size). If
            `size` is an int and `default_to_square` is `False`, then smaller edge of the image will be matched to this
            number. i.e, if height > width, then image will be rescaled to (size * height / width, size).
        default_to_square (`bool`, *optional*, defaults to `True`):
            How to convert `size` when it is a single int. If set to `True`, the `size` will be converted to a square
            (`size`,`size`). If set to `False`, will replicate
            [`torchvision.transforms.Resize`](https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.Resize)
            with support for resizing only the smallest edge and providing an optional `max_size`.
        max_size (`int`, *optional*):
            The maximum allowed for the longer edge of the resized image: if the longer edge of the image is greater
            than `max_size` after being resized according to `size`, then the image is resized again so that the longer
            edge is equal to `max_size`. As a result, `size` might be overruled, i.e the smaller edge may be shorter
            than `size`. Only used if `default_to_square` is `False`.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.

    Returns:
        `tuple`: The target (height, width) dimension of the output image after resizing.
    r   r   r   z7size must have 1 or 2 elements if it is a list or tupleNzmax_size = zN must be strictly greater than the requested size for the smaller edge size = )r   tuplelistlenr&   r   r6   )rZ   rP   r[   rQ   r.   rR   rS   shortlongrequested_new_short	new_shortnew_longr(   r(   r)   get_resize_output_image_size   s,   %
rd   resampler   reducing_gapreturn_numpyc                 C   s   t tdg |dur|ntj}t|dkstd|du r!t| }|du r'|n|}d}t| tj	j	s=t
| }t| ||d} |\}}	| j|	|f||d}
|rqt|
}
|
jdkr^tj|
dd	n|
}
t|
|tjd
}
|rot|
dn|
}
|
S )a  
    Resizes `image` to `(height, width)` specified by `size` using the PIL library.

    Args:
        image (`np.ndarray`):
            The image to resize.
        size (`Tuple[int, int]`):
            The size to use for resizing the image.
        resample (`int`, *optional*, defaults to `PILImageResampling.BILINEAR`):
            The filter to user for resampling.
        reducing_gap (`int`, *optional*):
            Apply optimization by resizing the image in two steps. The bigger `reducing_gap`, the closer the result to
            the fair resampling. See corresponding Pillow documentation for more details.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the output image. If unset, will use the inferred format from the input.
        return_numpy (`bool`, *optional*, defaults to `True`):
            Whether or not to return the resized image as a numpy array. If False a `PIL.Image.Image` object is
            returned.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.

    Returns:
        `np.ndarray`: The resized image.
    r?   Nr   zsize must have 2 elementsF)r:   r.   )re   rf   r@   rA   r   gp?)r   resizer   BILINEARr^   r&   r   r   rF   rG   r;   rE   r   rI   ndimexpand_dimsr*   r   r%   r2   )r   rP   re   rf   r,   rg   r.   r:   rR   rS   resized_imager(   r(   r)   ri   >  s*   !
ri   meanstdc                 C   s2  t | tjs
td|du rt| }t| |d}| j| }t| jtj	s+| 
tj} t |trCt||krBtd| dt| n|g| }tj|| jd}t |trht||krgtd| dt| n|g| }tj|| jd}|tjkr| | | } n| j| | j} |durt| ||} | S | } | S )a  
    Normalizes `image` using the mean and standard deviation specified by `mean` and `std`.

    image = (image - mean) / std

    Args:
        image (`np.ndarray`):
            The image to normalize.
        mean (`float` or `Collection[float]`):
            The mean to use for normalization.
        std (`float` or `Collection[float]`):
            The standard deviation to use for normalization.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the output image. If unset, will use the inferred format from the input.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format of the input image. If unset, will use the inferred format from the input.
    zimage must be a numpy arrayN)r.   zmean must have z$ elements if it is an iterable, got r-   zstd must have )r   r   r    r&   r   r
   rJ   
issubdtyper-   floatingr/   float32r   r^   rI   r   r%   Tr*   )r   rn   ro   r,   r.   channel_axisnum_channelsr(   r(   r)   	normalize  s6   





rw   c                 C   s  t tdg t| tjstdt|  t|tr t|dkr$t	d|du r,t
| }|dur2|n|}t| tj|} t| tj\}}|\}}t|t|}}|| d }	|	| }
|| d }|| }|	dkr|
|kr|dkr||kr| d|	|
||f } t| |tj} | S t||}t||}| jdd ||f }tj| |d	}t|| d }|| }t|| d }|| }| |d||||f< |	|7 }	|
|7 }
||7 }||7 }|dtd|	t||
td|t||f }t||tj}|S )
a  
    Crops the `image` to the specified `size` using a center crop. Note that if the image is too small to be cropped to
    the size given, it will be padded (so the returned result will always be of size `size`).

    Args:
        image (`np.ndarray`):
            The image to crop.
        size (`Tuple[int, int]`):
            The target size for the cropped image.
        data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
        input_data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
    Returns:
        `np.ndarray`: The cropped image.
    r?   r   r   zOsize must have 2 elements representing the height and width of the output imageNr   .)rJ   )r   center_cropr   r   r    r!   r"   r   r^   r&   r   r*   r   r#   r   r6   r9   rJ   
zeros_liker   r8   )r   rP   r,   r.   output_data_formatorig_height
orig_widthcrop_height
crop_widthtopbottomleftright
new_height	new_width	new_shape	new_imagetop_pad
bottom_padleft_pad	right_padr(   r(   r)   ry     sH    

.ry   bboxes_centerc                 C   sL   |  d\}}}}tj|d|  |d|  |d|  |d|  gdd}|S )Nr@         ?dimunbindtorchstack)r   center_xcenter_yrS   rR   bbox_cornersr(   r(   r)   _center_to_corners_format_torch  s   *r   c                 C   sH   | j \}}}}tj|d|  |d|  |d|  |d|  gdd}|S )Nr   r@   rA   rt   r   r   r   r   r   rS   rR   bboxes_cornersr(   r(   r)   _center_to_corners_format_numpy   s   *r   c                 C   sP   t j| dd\}}}}t j|d|  |d|  |d|  |d|  gdd}|S )Nr@   rA   r   tfunstackr   r   r(   r(   r)   _center_to_corners_format_tf*  s   *r   c                 C   F   t | rt| S t| tjrt| S t| rt| S tdt	|  )a}  
    Converts bounding boxes from center format to corners format.

    center format: contains the coordinate for the center of the box and its width, height dimensions
        (center_x, center_y, width, height)
    corners format: contains the coordinates for the top-left and bottom-right corners of the box
        (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
    Unsupported input type )
r   r   r   r   r    r   r   r   r&   r"   )r   r(   r(   r)   center_to_corners_format5  s   r   r   c                 C   sD   |  d\}}}}|| d || d || || g}tj|ddS )Nr@   r   r   r   )r   
top_left_x
top_left_ybottom_right_xbottom_right_ybr(   r(   r)   _corners_to_center_format_torchJ  s   

r   c                 C   s@   | j \}}}}tj|| d || d || || gdd}|S )Nr   r@   rA   r   r   r   r   r   r   r   r(   r(   r)   _corners_to_center_format_numpyU  s   

	r   c                 C   sH   t j| dd\}}}}t j|| d || d || || gdd}|S )Nr@   rA   r   r   r   r(   r(   r)   _corners_to_center_format_tfc  s   

	r   c                 C   r   )a  
    Converts bounding boxes from corners format to center format.

    corners format: contains the coordinates for the top-left and bottom-right corners of the box
        (top_left_x, top_left_y, bottom_right_x, bottom_right_y)
    center format: contains the coordinate for the center of the box and its the width, height dimensions
        (center_x, center_y, width, height)
    r   )
r   r   r   r   r    r   r   r   r&   r"   )r   r(   r(   r)   corners_to_center_formatq  s   
r   c                 C   s   t | tjr>t| jdkr>| jtjkr| tj} | dddddf d| dddddf   d| dddddf   S t	| d d| d   d| d   S )z*
    Converts RGB color to unique ID.
       Nr      r   i   r   )
r   r   r    r^   rJ   r-   r4   r/   int32r6   )colorr(   r(   r)   	rgb_to_id  s
   J$r   c                 C   s   t | tjr1|  }tt| jdg }tj|tjd}t	dD ]}|d |d|f< |d }q |S g }t	dD ]}|
| d  | d } q7|S )z*
    Converts unique ID to RGB color.
    r   rp   r   .)r   r   r    copyr\   r]   rJ   zerosr4   rangeappend)id_mapid_map_copy	rgb_shapergb_mapir   _r(   r(   r)   	id_to_rgb  s   

r   c                   @   s    e Zd ZdZdZdZdZdZdS )PaddingModezP
    Enum class for the different padding modes to use when padding images.
    constantreflect	replicate	symmetricN)__name__
__module____qualname____doc__CONSTANTREFLECT	REPLICATE	SYMMETRICr(   r(   r(   r)   r     s    r   g        paddingrD   constant_valuesc                    s   du rt   fdd}||}|tjkr&||}tj |d|d n1|tjkr4tj |dd n#|tjkrBtj |dd n|tjkrPtj |d	d ntd
| |durct	 |  S    S )a  
    Pads the `image` with the specified (height, width) `padding` and `mode`.

    Args:
        image (`np.ndarray`):
            The image to pad.
        padding (`int` or `Tuple[int, int]` or `Iterable[Tuple[int, int]]`):
            Padding to apply to the edges of the height, width axes. Can be one of three formats:
            - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
            - `((before, after),)` yields same before and after pad for height and width.
            - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
        mode (`PaddingMode`):
            The padding mode to use. Can be one of:
                - `"constant"`: pads with a constant value.
                - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
                  vector along each axis.
                - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
                - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
        constant_values (`float` or `Iterable[float]`, *optional*):
            The value to use for the padding if `mode` is `"constant"`.
        data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use same as the input image.
        input_data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format for the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.

    Returns:
        `np.ndarray`: The padded image.

    Nc                    s  t | ttfr| | f| | ff} nOt | tr,t| dkr,| d | d f| d | d ff} n3t | trCt| dkrCt | d trC| | f} nt | trXt| dkrXt | d trX| } ntd|  tjkrjdg| R ng | dR }  jdkr~dg| R } | S | } | S )za
        Convert values to be in the format expected by np.pad based on the data format.
        r   r   r   zUnsupported format: )r   r      )	r   r6   rM   r\   r^   r&   r   r#   rk   )valuesr   r.   r(   r)   _expand_for_data_format  s   "$
$$z$pad.<locals>._expand_for_data_formatr   )rD   r   r   rC   edger   zInvalid padding mode: )
r   r   r   r   padr   r   r   r&   r*   )r   r   rD   r   r,   r.   r   r(   r   r)   r     s$   +



r   c                 C   s:   t tdg t| tjjs| S | jdkr| S | d} | S )z
    Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
    as is.
    Args:
        image (Image):
            The image to convert.
    r?   RGB)r   convert_to_rgbr   rF   rG   rD   convert)r   r(   r(   r)   r     s   

r   c                 C   sx   |du rt | n|}|tjkr| ddddf } n|tjkr(| ddddf } ntd| |dur:t| ||d} | S )a  
    Flips the channel order of the image.

    If the image is in RGB format, it will be converted to BGR and vice versa.

    Args:
        image (`np.ndarray`):
            The image to flip.
        data_format (`ChannelDimension`, *optional*):
            The channel dimension format for the output image. Can be one of:
                - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use same as the input image.
        input_data_format (`ChannelDimension`, *optional*):
            The channel dimension format for the input image. Can be one of:
                - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            If unset, will use the inferred format of the input image.
    N.r@   zUnsupported channel dimension: rh   )r   r   r%   r#   r&   r*   )r   r,   r.   r(   r(   r)   flip_channel_order  s   

r   c                 C   s   |   r| S |  S N)is_floating_pointrM   )xr(   r(   r)   _cast_tensor_to_floatB  s   r   imagesc                 C   sx   i }i }t | D ]&\}}|jdd }||vrg ||< || | |t|| d f||< qdd | D }||fS )a  
    Groups images by shape.
    Returns a dictionary with the shape as key and a list of images with that shape as value,
    and a dictionary with the index of the image in the original list as key and the shape and index in the grouped list as value.
    r   Nc                 S   s    i | ]\}}|t j|d dqS )r   r   )r   r   ).0rJ   r   r(   r(   r)   
<dictcomp>Y  s     z)group_images_by_shape.<locals>.<dictcomp>)	enumeraterJ   r   r^   items)r   grouped_imagesgrouped_images_indexr   r   rJ   r(   r(   r)   group_images_by_shapeH  s   r   processed_imagesr   c                    s    fddt t D S )z>
    Reconstructs a list of images in the original order.
    c                    s(   g | ]} | d    | d  qS )r   r   r(   )r   r   r   r   r(   r)   
<listcomp>c  s    z"reorder_images.<locals>.<listcomp>)r   r^   )r   r   r(   r   r)   reorder_images]  s   
r   c                   @   s    e Zd ZdZdejfddZdS )NumpyToTensorz4
    Convert a numpy array to a PyTorch tensor.
    r   c                 C   s   t |ddd S )Nr   r   r   )r   
from_numpyr$   
contiguous)selfr   r(   r(   r)   __call__n  s   zNumpyToTensor.__call__N)r   r   r   r   r   r    r   r(   r(   r(   r)   r   i  s    r   r   )NNN)TNN)NNNTN)NN)r   r<   r   r<   )r   r=   r   r=   )r   r<   r   r<   )r   r=   r   r=   )Icollections.abcr   r   mathr   typingr   r   rH   r   image_utilsr   r	   r
   r   r   utilsr   r   r   r   r   utils.import_utilsr   r   r   r   r   rF   r   r   
tensorflowr   	jax.numpyjnpr    strr*   rs   rM   r-   r2   r;   boolrE   r\   r6   rY   r]   rd   ri   rw   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   dictr   r   r   r(   r(   r(   r)   <module>   sh  	

*
&
6+
F

K
@


T




 
W
&6

