o
    ohj8                     @  s  d dl mZ ddlmZ ddlmZ ddlmZ ddd	d
ZddddZej	edd Z
ej	eeddd Zej	eeddeddZej	ededdZedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zej	eejd$d%d&d'dfd*d+Zej	eejd,d-d.dgd/d0Zed1d2 Zed3d4 Zed5d6 Zed7d8 Zej	eejd9d%d&d'dfd:d;Zej	eejd<d-d.dgd=d>Zed?d@ ZdhdCdDZ ej	eejdEdBdFdidjdGdEZ!edHdI Z"ej	eedJdkdKdLZ#ej	ee$dMdldNdMZ%edOdP Z&ej	ee$dQdldRdQZ'edmdTdUZ(edndXdYZ)ej	ed(ej*fdod\d]Z+d^d_ Z,ej	edpd`daZ-edbdc Z.d(S )q    )annotations   )jit   )core)mathicore.constexprc                 C  s4   d}| j }|dkr|dL }|d7 }|dks	t|S )Nr   r   valuer   	constexpr)r   log2n r   l/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/language/standard.py_log2
   s   
r   c                 C  s$   | j }t||d @ dko|dkS )Nr   r   r
   )r   r   r   r   r   _is_power_of_two   s   r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r   r   )xdivr   r   r   cdiv   s   r   sigmoidc                 C  s   ddt |    S )Nr   )r   exp)r   r   r   r   r   +   s   softmaxFc                 C  s0   | t | d }t|}t|d}t|||S )Nr   )maxr   r   sumfdiv)r   ieee_roundingznumdenr   r   r   r   2   s   

c                 C  s   t j| | jg|dS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r    r   r   r   ravel<   s   	r#   c                 C  sX   | | | }|| }|| }|| }t || |}|| }|||  }	|| }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_jr   r   r   	swizzle2dH   s   r0   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zerosp   s   
r4   c                 C  s   t | j| jS )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r4   r2   r3   )inputr   r   r   
zeros_like}   s   r6   c           	      C  sJ   |r| |ko	||k }nd}| |kp|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_retr   r   r   _argmax_combine      rC   c                 C     t | |||dS NTrC   r:   r;   r<   r=   r   r   r   _argmax_combine_tie_break_left      rI   c                 C  rE   r7   rG   rH   r   r   r   _argmax_combine_tie_break_fast   rJ   rK   c                 C     t | |S N)r   maximumabr   r   r   _elementwise_max      rR   rN   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argNTc                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjt dk rEt | j r6| 	t j
} n| j s?J d| 	t j} t j| |t|dS N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrI   rK   r   r3   primitive_bitwidthis_floatingtofloat32is_intint32reducerR   r5   axisrT   rU   rZ   r   r   r   r      s   
r   zmaximum indexr>   )rW   c                 C     t | |d||d\}}|S NT)rT   rU   rZ   )r   r5   rf   r>   rZ   _retr   r   r   argmax      rl   c           	      C  sJ   |r| |ko	||k }nd}| |k p|}t || |}t |||}||fS r7   r8   )	r:   r;   r<   r=   r>   r?   lt	value_ret	index_retr   r   r   _argmin_combine   rD   rq   c                 C  rE   rF   rq   rH   r   r   r   _argmin_combine_tie_break_left   rJ   rs   c                 C  rE   r7   rr   rH   r   r   r   _argmin_combine_tie_break_fast   rJ   rt   c                 C  rL   rM   r$   rO   r   r   r   _elementwise_min   rS   ru   r%   c                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjdk rBt | j r3| 	t j
} n| j s<J d| 	t j} t j| |t|dS rX   )r   r\   r]   rs   rt   r   r3   r^   r_   r`   ra   rb   rc   rd   ru   re   r   r   r   min   s   
rv   zminimum indexc                 C  rg   rh   )rv   ri   r   r   r   argmin   rm   rw   c                 C  s   | | S rM   r   rO   r   r   r   _sum_combine      rx   in_dtyper3   c                 C  s^   t |}|d ur|S d }|  r| jdk rt j}|S d }|S |  r-| jdk r+t jnd }|S )Nr[   )r   _unwrap_if_constexpris_int_signedint_bitwidthrc   is_int_unsigneduint32)rz   r3   	out_dtyper   r   r   _pick_sum_dtype  s   
r   r   )	dtype_argc                 C  s0   t | j|}|d ur| |} tj| |t|dS )NrY   )r   r3   r`   r   rd   rx   )r5   rf   rZ   r3   r   r   r   r   r     s   
c                 C  s   | |A S rM   r   rO   r   r   r   _xor_combine!  ry   r   zxor sumc                 C  s&   t | jj d t j| |t|dS )Nz#xor_sum only supported for integersrY   )r   static_asserttypescalarrb   rd   r   )r5   rf   rZ   r   r   r   xor_sum)  s   r   cumsumc                 C     t | } t | |t|S rM   )r   r\   associative_scanrx   r5   rf   reverser   r   r   r   4     
c                 C  s   | | S rM   r   rO   r   r   r   _prod_combine@  ry   r   cumprodc                 C  r   rM   )r   r\   r   r   r   r   r   r   r   E  r   n_dimsc                 C  s<  | j |? }|d|  dd|| d  g}t| |}tddd d d d f }tt|d|  dd d d d d f ||j}tt|| dd d d d d f ||j}	t|| j}t|	| j}	tj	| jj
dd}
|j|
dd}|	j|
dd}| j|
dd}|t||	k|k||A t|A }|j| jddS )Nr   r   r   Tbitwidthsignedbitcast)r"   r   r!   arangebroadcast_tor   r`   r3   r2   get_int_dtyper^   r9   r6   )r   flipr   r   n_outerr2   ymaskleftrightidtypeileftirightixrk   r   r   r   _compare_and_swapQ  s   
40"r   stageorderc                 C  s   | j |? }t||k |dkr6|d|d |   dd| g}tttddddddf || j}n|}t|D ]}t| ||||  |} q=| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r"   r   r   r!   r   r   r2   static_ranger   )r   r   r   r   r   r2   r   r   r   r   r   _bitonic_mergee  s   
.r   dim
descendingc                 C  sv   |du rt | jd n|}t|t | jd kd t| j| }td|d D ]}t| |||k r4dn||} q)| S )a  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   z+only minor dimension is currently supportedr   )lenr2   r   r   r   r   r   )r   r   r   _dimr   r   r   r   r   sort~  s   r   c                 C  sJ   t | } t |}| d u rt|d } | t|d ks J dt | S )Nr   z2Currently only support flipping the last dimension)r   r{   r   r   )r   r2   r   r   r   _get_flip_dim  s   


r   c           
      C  s<  t t| jt|| j  t t| j t| j}t| jt| jt|| j  }t j| jj	dd}t 
| j|dddg| }t ||}t dddddf dt dd k}t ||D ],}|}t d|d D ]}	|	|kr|	|d krt ||	}qnt|| |d d|jd}qbt 
|| jj| jdd} | S )	z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    Tr   r   r   r   Nr   )rZ   r3   )r   r   r   r2   r   r"   r   r   r3   r^   r!   r`   expand_dimsr   r   r   )
r   r   stepsstartr   r   r   r   flip2r&   r   r   r   r     s"   
 (r   c                 C  sD   t | |}t|jdkr|S t ||jdd d|jd  g S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r2   r!   )rP   rQ   cr   r   r   
interleave  s   &r   )r   r	   )F)NFTF)TF)rz   r	   r3   r	   )NFN)r3   r	   r7   )r   F)r   r	   r   r	   )r   r	   r   r	   r   r	   )r   r	   r   r	   rM   )/
__future__r   runtime.jitr    r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r#   r0   r4   r6   rC   rI   rK   rR   _add_reduction_docstrr   rl   rq   rs   rt   ru   rv   rw   rx   r   r   r   r   _add_scan_docstrr   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   r   <module>   s    

	


'











	
	
		