o
    lh                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&d d! Z'd"d# Z(d7d$ee fd%d&Z)d8d'ej*d(eeej+f fd)d*Z,d'e-d(ej.fd+d,Z/d(e-fd-d.Z0d/d0 Z1d9d2d3Z2d9d4d5Z3ej4j5e2  e3 d6Z6dS ):    N)_path_to_binary)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   c                   C   s   t jdddkS )NTRITON_INTERPRET01)osenvironget r   r   l/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r    c                   C   s   t  rd S tjjj S N)r    tritonruntimedriveractiveget_current_targetr   r   r   r   r'      s   r'   c                  C      t  } | d u r	dS | jdkS )NFcudar'   backendtargetr   r   r   is_cuda#      r.   c                   C   s   t  otj d dkS )Nr   	   )r.   torchr)   get_device_capabilityr   r   r   r   	is_hopper(   s   r3   c                  C   r(   )NFhipr*   r,   r   r   r   is_hip,   r/   r5   c                  C   s&   t  } | d u s| jdkrdS | jdkS )Nr4   Fgfx90ar'   r+   archr,   r   r   r   is_hip_mi2001      
r9   c                  C   &   t  } | d u s| jdkrdS | jdv S )Nr4   F)gfx940gfx941gfx942r7   r,   r   r   r   is_hip_mi3008   r:   r?   c                  C   r;   )Nr4   Fgfx950r7   r,   r   r   r   is_hip_mi350?   r:   rA   c                   C   s   t  pt pt S r"   )r9   r?   rA   r   r   r   r   is_hip_cdnaF   r!   rB   c                  C   r(   )NFxpur*   r,   r   r   r   is_xpuJ   r/   rD   c                  C   s   t  } | d u r	dS t| jS )N )r'   strr8   r,   r   r   r   get_archO   r/   rG   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr	   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrH   lowhighrW   rK   xr   r   r   numpy_randomT   s,   


*rf   re   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerM   r   r   )rK   namerU   lstripr^   rX   rV   r   r1   tensortlr   )re   ri   dst_typetsigned_type_namex_signedr   r   r   	to_tritonr   s   
rr   c                 C   s   t t|  S r"   )rm   	str_to_tyr   re   r   r   r   str_to_triton_dtype   s   ru   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$rL   znot a triton or torch dtype: )rR   r#   languagerK   rj   r1   rematchrF   group	TypeErrortype)rK   mr   r   r   torch_dtype_name   s   
r}   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )rR   r   basecpunumpyr^   rX   rV   r}   rK   r1   Tensorr   float
ValueErrorrt   r   r   r   to_numpy   s   
 r   Fc                 C   sn   t  rdS t s
dS td\}}| rdnd}ttt|d}t|dks*J |tj	
 d d	ko6||kS )
NTFptxas)   r   )r      .   r   r0   )r    r.   r   tuplemaprS   splitlenr1   r)   r2   )
byval_only_cuda_versionmin_cuda_versioncuda_version_tupler   r   r   supports_tma   s   r   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )r   r   r   r   tma_skip_msg   s   r   )reason)NNNr"   )F)7r   rw   r   rV   r1   r#   triton.languagerv   rm   triton.backends.nvidia.compilerr   pytestnumpy.randomr   typingr   r   triton.runtime.jitr   r   r   rT   rU   integral_dtypesr\   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypesr    r'   r.   r3   r5   r9   r?   rA   rB   rD   rG   rf   ndarrayr   rr   rF   rK   ru   r}   r   r   r   markskipifrequires_tmar   r   r   r   <module>   sN    

 

