o
    oh                     @   sT  d dl mZ d dl mZ ddlmZ ddlmZ edZdee	 defd	d
Z
de	dejegef fddZde	dejegef fddZde	dejegef fddZeje
g ddeddIddZeje
ddgdedejdIddZeje
ddgdedejdIddZeje
ddgded ejdId!d"Zeje
ddgded#ejdId$d%Zeje
ddgded&ejdId'd(Zeje
ddgded)ejdId*d+Zeje
ddgded,ejdId-d.Zeje
dgded/ejdId0d1Zeje
ddgded2ejdId3d4Zejejed5dId6d7Zejed8dJd:d;Zeje
dgded<dId=d>Zeje
ddgded?ejdId@dAZeje
ddgdedBejdIdCdBZeje
ddgdedDejdIdEdDZ ejedFdIdGdHZ!dS )K   )core)semantic    wraps)ListTdtypesreturnc                    s    fdd}|S )al  
    We're following libdevice's convention to check accepted data types for math functions.
    It is not a good practice to support all data types as accelerators/GPUs don't support
    many float16 and bfloat16 math operations.
    We should let the users know that they are using and invoke explicit cast to convert
    the data type to the supported one.
    c                    s   t   fdd}|S )Nc                     s^   t | t |  }dd |D D ]}|jjj vr'td  d|jjj q| i |S )Nc                 S   s   g | ]
}t |tjr|qS  )
isinstancer   tensor).0ar   r   h/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/language/math.py
<listcomp>   s    z@_check_dtype.<locals>.wrapper.<locals>.check.<locals>.<listcomp>zExpected dtype z	 but got )listvaluestypescalarname
ValueError)argskwargsall_argsarg)r	   fnr   r   check   s   z,_check_dtype.<locals>.wrapper.<locals>.checkr   )r   r   r	   )r   r   wrapper   s   z_check_dtype.<locals>.wrapperr   )r	   r   r   r   r   _check_dtype	   s   	r    r   c                       dt dt f fdd}|S )Nfuncr
   c                       d}|j  d| _| S )Nzk
    Computes the element-wise {name} of :code:`x`.

    :param x: the input values
    :type x: Block
    r   format__doc__r"   docstrr$   r   r   
_decorator$   s   z)_add_math_1arg_docstr.<locals>._decoratorr   r   r*   r   r$   r   _add_math_1arg_docstr"   s   
r-   c                    r!   )Nr"   r
   c                    r#   )Nz
    Computes the element-wise {name} of :code:`x` and :code:`y`.

    :param x: the input values
    :type x: Block
    :param y: the input values
    :type y: Block
    r$   r%   r(   r$   r   r   r*   3   s   z)_add_math_2arg_docstr.<locals>._decoratorr+   r,   r   r$   r   _add_math_2arg_docstr1   s   r.   c                    r!   )Nr"   r
   c                    r#   )Nz
    Computes the element-wise {name} of :code:`x`, :code:`y`, and :code:`z`.

    :param x: the input values
    :type x: Block
    :param y: the input values
    :type y: Block
    :param z: the input values
    :type z: Block
    r$   r%   r(   r$   r   r   r*   D   s   
z)_add_math_3arg_docstr.<locals>._decoratorr+   r,   r   r$   r   _add_math_3arg_docstrB   s   r/   )int32int64uint32uint64r   z-most significant N bits of the 2N-bit productNc                 C   D   t | |} t ||}t| ||\} }t|| j|j| jS N)r   	to_tensorr   binary_op_type_legalizationr   create_umulhihandler   xy_builderr   r   r   umulhiU      r>   fp32fp64exponentialc                 C   "   t | |} t|| j| jS r5   )r   r6   r   r   
create_expr9   r   r;   r=   r   r   r   exp_      rF   zexponential (base 2)c                 C   rC   r5   )r   r6   r   r   create_exp2r9   r   rE   r   r   r   exp2h   rG   rI   znatural logarithmc                 C   rC   r5   )r   r6   r   r   
create_logr9   r   rE   r   r   r   logq   rG   rK   zlogarithm (base 2)c                 C   rC   r5   )r   r6   r   r   create_log2r9   r   rE   r   r   r   log2z   rG   rM   cosinec                 C   rC   r5   )r   r6   r   r   
create_cosr9   r   rE   r   r   r   cos   rG   rP   sinec                 C   rC   r5   )r   r6   r   r   
create_sinr9   r   rE   r   r   r   sin   rG   rS   zfast square rootc                 C   rC   r5   )r   r6   r   r   create_sqrtr9   r   rE   r   r   r   sqrt   rG   rU   z?precise square root (rounding to nearest wrt the IEEE standard)c                 C   rC   r5   )r   r6   r   r   create_precise_sqrtr9   r   rE   r   r   r   sqrt_rn   rG   rW   zinverse square rootc                 C   rC   r5   )r   r6   r   r   create_rsqrtr9   r   rE   r   r   r   rsqrt   rG   rY   zabsolute valuec                 C   s   t | |} | j}| r%tj| jdtj|d}t|	| j
|j
| jS | r4t|| j
| jS | rCt|| j
| jS | rI| S J d| )N   )r=   FzUnexpected dtype )r   r6   dtypeis_fp8e4b15r   fullshapeint8r   
create_andr9   r   is_floatingcreate_fabsis_int_signedcreate_iabsis_int_unsigned)r;   r=   r[   maskr   r   r   abs   s   rg   zfast divisionFc                 C   s2   t |}t| |} t||}t| |||S r5   )r   _constexpr_to_valuer   r6   fdiv)r;   r<   ieee_roundingr=   r   r   r   ri      s   
ri   z<precise division (rounding to nearest wrt the IEEE standard)c                 C   r4   r5   )r   r6   r   r7   r   create_precise_divfr9   r   r:   r   r   r   div_rn   r?   rl   zerror functionc                 C   rC   r5   )r   r6   r   r   
create_erfr9   r   rE   r   r   r   erf   rG   rn   floorc                 C   rC   r5   )r   r6   r   r   create_floorr9   r   rE   r   r   r   ro      rG   ceilc                 C   rC   r5   )r   r6   r   r   create_ceilr9   r   rE   r   r   r   rq      rG   zfused multiply-addc                 C   sx   t | |} t ||}t ||}t| ||\} }t|| |\}} t|||\}}t|| j|j|j| jS r5   )r   r6   r   r7   r   
create_fmar9   r   )r;   r<   zr=   r   r   r   fma   s   ru   r5   )FN)" r   r   	functoolsr   typingr   TypeVarr   strr    Callabler-   r.   r/   builtinr>   _tensor_member_fnrF   rI   rK   rM   rP   rS   rU   rW   rY   rg   ri   rl   rn   ro   rq   ru   r   r   r   r   <module>   s    


