o
    oh2W                    @  s  d dl mZ d dlZd dlmZmZmZmZmZ d dl	Z	ddl
mZ ddlmZ edZG d	d
 d
eZdPddZdPddZdQddZdRddZdSdTd"d#ZdUd(d)Z	 	*dVdWd/d0ZdXd3d4ZdYd8d9ZdYd:d;ZdYd<d=ZdZd>d?ZdZd@dAZd[dCdDZdZdEdFZ d\dKdLZ!d\dMdNZ"d]dQdRZ#d^dSdTZ$d_dUdVZ%d_dWdXZ&d_dYdZZ'd_d[d\Z(d_d]d^Z)d`d_d`Z*d_dadbZ+d_dcddZ,d_dedfZ-dadgdhZ.dbdidjZ/dcdkdlZ0dddodpZ1d_dqdrZ2d_dsdtZ3d_dudvZ4d_dwdxZ5d_dydzZ6d_d{d|Z7deddZ8dfddZ9dgddZ:dhddZ;diddZ<djddZ=dkddZ>dlddZ?dmddZ@dnddZAdoddZBdpddZCdqddZD	drdsddZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMdd ZNdtddƄZOduddʄZPdd̄ ZQdvddЄZRdwddӄZSdxddՄZTdyddׄZUdzddZVd{ddZWdd ZXdd ZYd|ddZZd}ddZ[d~ddZ\dddZ]dddZ^dddZ_dddZ`dddZadd dZbdddZcdd Zddd
dZedddZfdddZgdddZhdddZidd Zjdd!d"Zkdd$d%Zldd(d)Zmdd+d,Zndd.d/Zodd0d1Zpdd2d3Zqdd4d5Zrdd9d:Zsdd=d>Ztdd?d@ZudAdB ZvdSdCdDZwddFdGZxddHdIZyddNdOZzdS (      )annotationsN)ListOptionalSequenceTupleTypeVar   )ir   )coreTc                      s   e Zd Z fddZ  ZS )IncompatibleTypeErrorImplc                   s@   || _ || _d| j   d | j  | _tt| | j d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__ l/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/language/semantic.pyr      s   z"IncompatibleTypeErrorImpl.__init__)__name__
__module____qualname__r   __classcell__r   r   r   r   r      s    r   axisintbuilder
ir.builderreturn	tl.tensorc                 C  *   | dvrt d|  t|| tjS )Nr   r
   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrortltensorcreate_get_program_idint32r   r    r   r   r   
program_id      r,   c                 C  r$   )Nr%   z-num_programs axis must be 0, 1, or 2 but got )r&   r'   r(   create_get_num_programsr*   r+   r   r   r   num_programs!   r-   r/   a_tytl.dtypeb_tyc                 C  s   | j }|j }| j}|j}||kr||kr| S |S |tjjjkr'||kr%| S |S |tjjjkr6||kr4|S | S td| d| )Nzunexpected signedness r   )int_bitwidthint_signednessr'   dtype
SIGNEDNESSUNSIGNED	TypeError)r0   r2   a_rankb_ranka_snb_snr   r   r   integer_promote_impl,   s   r=   a_is_scalarboolb_is_scalar
div_or_modc                 C  sT  ||kr)|r
| |fn|| f\}}|  j|  jkr)|r'|tjtjfv r'tjS |S |  s1| r4tjS |  s<| r?tjS | 	 sG|	 rO|rLtjS tjS | 
 r_|
 r_|r\tjS tjS | 
 sg|
 rjtjS |  r{| r{| |krx| S tjS |  r| std|  d| |r| j|jkrtd|   d |  d t| |S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer'   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr8   r4   r   r=   )r0   r>   r2   r@   rA   	scalar_ty	tensor_tyr   r   r   computation_type_impl<   s:   
rR   T
check_typec                 C  s  t | trt|| tjS t | trdd|   krdk r$n ntj}n8d|   kr.dk r4n ntj}n(d|   kr>dk rDn ntj	}nd|   krNdk rTn ntj
}ntd|  dtd	| ||d
S t | trd}ddd  }td | }|tdks|dks| | ks||  kr|krn ntj}ntj}td	| ||d
S t | tjrt| j|S t | tjr| S |rtd|  dt|  d| S )N           l                             l            zNonrepresentable integer .r   r5   r    g      8g   ?r      absinf        zcannot convert z	 of type z
 to tensor)
isinstancer?   r'   r(   get_int1int1r   r*   uint32int64uint64r&   fullfloat__builtins__rH   rJ   	constexpr	to_tensorrE   r8   type)xr    rS   r5   min_float32max_float32abs_xr   r   r   rh   o   s>   


rh   r   r   allow_ptr_aNonec                 C  sJ   |   r!|st| ||  r| |krt| || r#t| |d S d S N)is_ptrr   is_floating)r   r   rn   r   r   r   check_ptr_type_impl   s   


rs   Flhstl.tensor | numbers.NumberrhsTuple[tl.tensor, tl.tensor]c                 C  s  t | tj}t |tj}|r| }	t| |} |r|}
t||}| jj}|jj}t||| t||| |r| s| st|||||}|rN|	dk rN|	 sX|r\|
dk r\|	 r\t
d| r|r{| |	  krp| ks{n t
d|	 d| |r| |
  kr| ksn t
d|
 d| |rtd|	||dnt| ||} |rtd|
||dnt|||}t| ||\} }| |fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type r   rY   )r^   numbersNumberrh   ri   scalarrs   rq   rR   is_int_unsignedr&   rO   get_int_min_valueget_int_max_valuerd   castbroadcast_impl_value)rt   rv   r    allow_lhs_ptrallow_rhs_ptrarithmetic_checkrA   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tyr   r   r   binary_op_type_checking_impl   sL   

""

r   	binary_opcallablec                 C  s   | j jjdks|jjsd S | j j}|j j}||ksJ | s!J t| tj|} t|tj|}|| |d|}|	 }t
||tj}| }t
||tj}tt|||t||||}	d|j d|j }
t|	|
| d S )N@   Fr   z! overflow detected for operation )ri   rz   r3   optionssanitize_overflowrO   r~   r'   rb   r}   r(   	get_int64r|   and_
less_equalgreater_equalr   device_assert)rt   rv   r    r   r   r   ret	max_value	min_valuecondmsgr   r   r    binary_op_sanitize_overflow_impl   s    r   inputotherr   c                 C  sH  t | ||dd\} }| jj}|jj}| r| rtd| r3| s3|| } }| jj}|jj}| rr|j}|j rf|jjdk rf|j	 rXt
t
j|j |}nt
j|}||j|d}t
|| j|| jS | rt
|| j|j| jS | r|rt| ||t t
|| j|j| jS td| )NTzcannot add pointers togetherr   FrB   )r   ri   rz   rq   r8   handler5   r{   r3   is_blockr'   
block_typerb   get_block_shapesto_ircreate_int_castr(   create_addptrrr   create_faddrO   r   add
create_add)r   r   r   r    input_scalar_tyother_scalar_tyother_handlei64_tyr   r   r   r      s0   

r   c                 C  s   t | ||dd\} }| jj}| r"t|| jt||j| jS |	 r3t|
| j|j| jS | rM|r@t| ||t t|| j|j| jS td| )NTFrB   )r   ri   rz   rq   r'   r(   r   r   minusrr   create_fsubrO   r   sub
create_subr8   r   r   r   r    rP   r   r   r   r     s    r   c                 C  s|   t | ||\} }| jj}| rt|| j|j| jS | r7|r*t	| ||t
 t|| j|j| jS td| NrB   )r   ri   rz   rr   r'   r(   create_fmulr   rO   r   mul
create_mulr8   r   r   r   r   r     s   r   c                 C  s   t | ||dddd\} }| jj}|jj}| r#| r#t|||}nI| r2| r2t| ||} n:| rI| rIt| tj|} t|tj|}n#| re| re|j|jkr^t|||}nt| ||} nt	d| t
|| j|j| jS NFTrB   )r   ri   rz   rr   rO   r~   r'   rH   fp_mantissa_widthr8   r(   create_fdivr   )r   r   r    r   r   r   r   r   truediv#  s    r   c                 C  s   t | ||dddd\} }| jj}|jj}| rK| rKt||}t| ||} t|||}| r>t|	| j
|j
| jS t|| j
|j
| jS td| r   )r   ri   rz   rO   r=   r~   is_int_signedr'   r(   create_sdivr   create_udivr8   )r   r   r    r   r   ret_tyr   r   r   floordiv=  s   
r   ieee_roundingc                 C  s^   | j j}|j j}| r| stdt| ||dddd\} }|| j|j}t|| j S )Nz4both operands of fdiv must have floating scalar typeFT)	ri   rz   rr   r8   r   r   r   r'   r(   )r   r   r   r    r   r   r   r   r   r   fdivL  s   r   c                 C  s   t | ||dddd\} }| jj}|jj}| r%t|| j|j| jS | r]|j	|j	kr?t
d|  d |  d | rPt|| j|j| jS t|| j|j| jS t
d| )NFTzCannot mod z by rC   rB   )r   ri   rz   rr   r'   r(   create_fremr   rO   r4   r8   r   r   create_sremcreate_urem)r   r   r    rP   r   r   r   r   modW  s    r   rj   ypropagate_nantl.PropagateNanc                 C     t | ||\} }| j}| r<|tjjkr"t|| j|j| j	S |tjj
kr5t|| j|j| j	S td| | rMt|| j|j| j	S | r^t|| j|j| j	S td| NzUnexpected propagate_nan Unexpected dtype )r   r5   rr   r'   PropagateNanALLr(   create_minimumfr   ri   NONEcreate_minnumfr&   r   create_minsir{   create_minuir8   rj   r   r   r    r5   r   r   r   minimump     r   c                 C  r   r   )r   r5   rr   r'   r   r   r(   create_maximumfr   ri   r   create_maxnumfr&   r   create_maxsir{   create_maxuir8   r   r   r   r   maximum  r   r   minmaxc                 C  sn   t |||\}}t | ||\} }t | ||\} }| j}| r/t|| j|j|j|| jS td| d)Nr   z(. Only floating point clamp is supported)	r   r5   rr   r'   r(   create_clampfr   ri   r8   )rj   r   r   r   r    r5   r   r   r   clamp  s    r   c                 C  st   t | ||\} }| jj}|jj}| r| st||t||}||kr,t| ||} ||kr6t|||}| |fS rp   )r   ri   rz   rO   r   r=   r~   )r   r   r    input_sca_tyother_sca_tyr   r   r   r   bitwise_op_type_checking_impl  s   

r   c                 C  *   t | ||\} }t|| j|j| jS rp   )r   r'   r(   
create_andr   ri   r   r   r    r   r   r   r        r   c                 C  r   rp   )r   r'   r(   	create_orr   ri   r   r   r   r   or_  r   r   c                 C  r   rp   )r   r'   r(   
create_xorr   ri   r   r   r   r   xor_  r   r   c                 C  D   | j  st| td|} |j  st|td|}t| ||S Nr`   )ri   is_int1bitcastr'   r5   r   r   r   r   r   logical_and  
   

r   c                 C  r   r   )ri   r   r   r'   r5   r   r   r   r   r   
logical_or  r   r   c                 C  s&   | j  st| td|} t| |S r   )ri   r   r   r'   r5   invert)r   r    r   r   r   not_  s   

r   c                 C  r   rp   )r   r'   r(   create_lshrr   ri   r   r   r   r   lshr  r   r   c                 C  r   rp   )r   r'   r(   create_ashrr   ri   r   r   r   r   ashr  r   r   c                 C  r   rp   )r   r'   r(   
create_shlr   ri   r   r   r   r   shl  r   r   c                 C  s   | S rp   r   r   r   r   r   plus  s   r   c                 C  sJ   | j j}| rtd|  d t||||}t	|| d|S )Nz$wrong type argument to unary minus ()T)
ri   rz   rq   r&   r   r'   r(   get_null_valuer   r   )r   r    r   _0r   r   r   r     s
   r   c                 C  sP   | j j}| s| rtd|  d t||	||}t
| ||S )Nz%wrong type argument to unary invert (r   )ri   rz   rq   rr   r&   r   r'   r(   get_all_ones_valuer   r   )r   r    r   _1r   r   r   r     s
   r   vtl.block_typec                 C  s&   | j  stjS | j j}ttj|S rp   )ri   r   r'   r`   shaper   )r   r   r   r   r   
_bool_like  s   
r   c                 C     t | ||\} }| jj}| rt|| j|jt| S |	 rB|
 r4t|| j|jt| S t|| j|jt| S td| r   )r   ri   rz   rr   r'   r(   create_fcmpOGTr   r   rO   r   create_icmpSGTcreate_icmpUGTr8   r   r   r    rP   r   r   r   greater_than     r  c                 C  r  r   )r   ri   rz   rr   r'   r(   create_fcmpOGEr   r   rO   r   create_icmpSGEcreate_icmpUGEr8   r  r   r   r   r     r  r   c                 C  r  r   )r   ri   rz   rr   r'   r(   create_fcmpOLTr   r   rO   r   create_icmpSLTcreate_icmpULTr8   r  r   r   r   	less_than)  r  r  c                 C  r  r   )r   ri   rz   rr   r'   r(   create_fcmpOLEr   r   rO   r   create_icmpSLEcreate_icmpULEr8   r  r   r   r   r   8  r  r   c                 C  n   t | ||\} }| jj}| rt|| j|jt| S |	 r0t|
| j|jt| S td| r   )r   ri   rz   rr   r'   r(   create_fcmpOEQr   r   rO   create_icmpEQr8   r  r   r   r   equalG     r  c                 C  r  r   )r   ri   rz   rr   r'   r(   create_fcmpUNEr   r   rO   create_icmpNEr8   r  r   r   r   	not_equalS  r  r  startendc                 C  s   t | tr
t |tstdt| d? }t|d? }|s|r"td|| kr*td||  }||d @ dkr:td|g}ttj|}t|| ||S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr
   r   z#arange's range must be a power of 2)	r^   r   r&   r?   r'   r   r*   r(   create_make_range)r  r  r    is_start_int64is_end_int64ranger   r   r   r   r   aranged  s   r!  r   	List[int]r5   c                 C  s   t |tjr|jjdksJ dt|||}n(|d u rtd|dkr,|||}nt	|d|j
 }||}t||}t|| |S )Nr
   zonly accepts size-1 tensorz2dtype must be specified when value is not a tensorr   get_)r^   r'   r(   numelrE   r~   r&   r   r   getattrnamesplat)r   rE   r5   r    get_value_fnr   r   r   rd   u  s   rd   rE   c                 C  sF   | j  r	J dt|dkr| S t| j|}t|| j||S )NzCannot splat a block tensorr   )	ri   r   lenr'   r   r5   r(   create_splatr   )rE   r   r    r   r   r   r   r'    s
   r'  	dst_shapecan_reorderc                 C  sR   d}|D ]}||9 }q| j j|krtdt| j j|}t|| j|||S )Nr
   z:reshape() cannot change total number of elements in tensor)	ri   r$  r&   r'   r   rz   r(   create_reshaper   )r   r+  r,  r    r$  sr   r   r   r   reshape  s   
r/  c                 C  sZ   dd | j D }||d | j st| ||dS t| jj|}t|	| j
||S )Nc                 S  s   g | ]}t |qS r   r'   _constexpr_to_value.0rj   r   r   r   
<listcomp>      zexpand_dims.<locals>.<listcomp>r
   )r   r    )r   insertri   r   r'  r'   r   rz   r(   create_expand_dimsr   )r   r   r    r+  r   r   r   r   expand_dims  s   
r8  c                 C  sX   |sJ dt | jdksJ t| jj| jd |jd  g}t|| j|j|S )Nz;current implementation of `cat` always may reorder elementsr
   r   )	r)  r   r'   r   ri   rz   r(   
create_catr   )rt   rv   r,  r    ret_typer   r   r   cat  s   "r;  abc                 C  s   t | ||\} }| jg k}|rt| d|} t|d|}t| jd tjr*td}nd}| j|g }t| jj|}t	|
| j|j|}|rQt|dgd|d}|S )Nr   r   Fr,  r    )r   r   r8  r^   r'   rg   r   ri   rz   r(   create_joinr   r/  )r<  r=  r    
was_rank_1two	new_shaper:  r   r   r   r   join  s   
rD  c                 C  sp   t | jdks	J t| jd dksJ | jd d }t| jj|}|| j\}}t	||t	||fS )Nr   r>  r   )
r)  r   r'   r1  r   ri   rz   create_splitr   r(   )r<  r    rC  r:  outLHSoutRHSr   r   r   split  s   

rH  dims
Tuple[int]c                   s~   t  jt |krtdtdd |D ttt |kr%td| t jj	 fdd|D }t
| j||S )Nz5permute dims must have the same length as input shapec                 s  s    | ]}t |V  qd S rp   r0  r3  dr   r   r   	<genexpr>  s    zpermute.<locals>.<genexpr>z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                   s   g | ]} j | qS r   r   rK  r   r   r   r4    r5  zpermute.<locals>.<listcomp>)r)  r   r&   sortedlistr   r'   r   ri   rz   r(   create_transr   )r   rI  r    r:  r   r   r   permute  s   "rR  c                 C  s   | j  st| j |}t|| j||S | j  }t|t|kr.t	d| d| ||kr4| S t
|D ]#\}}|| |kr[|dkr[t	d||  d| d| d| d| 
q8t| j j|}t|| j||S )Nz!Cannot broadcast, rank mismatch: z, r
   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )ri   r   r'   r   r(   r*  r   r   r)  r&   	enumeraterz   create_broadcast)r   r   r    r   	src_shapeiitemr   r   r   broadcast_impl_shape  s,   

rY  c              	   C  s^  | j }|j }| r'| s't|j|j}t||j|	 |}| |fS | sH| rHt|j|j}t|| j|	 |} | |fS | r+| r+|	 }|	 }t
|t
|k rtt
|t
|D ]}t|| jdt|jdg|j } | j }|	 }qkn/t
|t
|k rtt
|t
|D ]}t||jdt|jdg|j }|j }|	 }qt
|t
|ksJ g }t|D ]3\}	}
||	 }|
dkr|| q|dks||
kr||
 qtdt|	 d t|
 d t| ||krt|j|}t|| j||} ||kr+t|j|}t||j||}| |fS )Nr   r
   z?Cannot make_shape_compatible: incompatible dimensions at index rS  r   )ri   r   r'   r   rz   r   r(   r*  r   r   r)  r   r7  valuesrT  appendr&   strrU  )rt   rv   r    lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaperW  leftrightr   r   r   r   r     sl   +'



r   rounding_modeOptional[str]c                 C  s<   | d u rd S | dkrt jjS | dkrt jjS td|  d)NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r	   ROUNDING_MODERTNERTZr&   )re  r   r   r   _str_to_rounding_mode,  s   rl  dst_tyc                 C  s   | j }| rt|j| j  }||kr| S |j}|j}| s%| r+t| ||S |j}|j}||krCt	dt
| d t
| t|| j|||S )Nz!Cannot bitcast data-type of size z to data-type of size )ri   r   r'   r   rz   r   rq   r~   primitive_bitwidthr&   r\  r(   create_bitcastr   r   )r   rm  r    src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitsr   r   r   r   6  s    r   fp_downcast_roundingc                 C  s@  | j }| rt|j| j  }||kr| S |j}|j}t|}d}| rC| rC|j|jk rC|d u r:t	j
j}n|t	j
jkrBd}n|d urUtdt| d t| | s]| rt|jdd usiJ d|jd | |||dS | r|| s| r| s|rt|| j||||S | r| r| r| stt| tj|||S | o| o|j|jk}|rt|| j|||S | o| o|j|jk }	|	rt|| j|||S | r8| r8|j|jks|j|jkr8|  o|!  }
|! r)| j"|}t|#|| j"}t$| ||S t|%| j|||
|S |& r~| r~|! r]| j"|}t|#|| j"}t$| ||S |  rpt|'| j|||S t|(| j|||S | r|& r|! s|  st|)| j|||S t|*| j|||S |+ r| r|j}|dkrt|,| j|||S |d	krt$t| tj-|t|.d
tj-|S | r|+ rt|/| j|||S |+ r|+ rt|0| j|||S J d|  d| )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type._builderr   r
   r   zcannot cast z to )1ri   r   r'   r   rz   r   rl  rr   rn  r	   ri  rj  r&   r\  is_fp8e4b15codegen_fnsgetrN   r(   create_fp_to_fpr   r   rL   rK   rM   r~   rH   create_fp_trunccreate_fp_extrO   r3   r4   r   is_boolr5   r   r  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprq   create_ptr_to_intrb   r   create_int_to_ptrro  )r   rm  r    ru  rp  rq  rr  use_custom_roundingtruncate_fpext_fpsign_extendtyr   bitwidthr   r   r   r~   I  s   






&r~   c                 C  s\   t jj}| r,| dkrt jj}|S | dkrt jj}|S | dkr$t jj}|S td|  d|S )Nz.ca.cgz.cvCache modifier  not supported)r	   CACHE_MODIFIERr   CACGCVr&   cache_modifiercacher   r   r   _str_to_load_cache_modifier     r  c                 C  sp   t jj}| r6| dkrt jj}|S | dkrt jj}|S | dkr$t jj}|S | dkr.t jj}|S td|  d|S )Nz.wbr  z.csz.wtr  r  )r	   r  r   WBr  CSWTr&   r  r   r   r   _str_to_store_cache_modifier      	r  c                 C  sH   t jj}| r"| dkrt jj}|S | dkrt jj}|S td|  d|S )N
evict_lastevict_firstzEviction policy r  )r	   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr&   )eviction_policyevictionr   r   r   _str_to_eviction_policy  s   r  c                 C  sD   d }| r | dkrt jj}|S | dkrt jj}|S td|  d|S )NzeronanzPadding option r  )r	   PADDING_OPTIONPAD_ZEROPAD_NANr&   )padding_optionpaddingr   r   r   _str_to_padding_option  s   r  c                 C  sp   t jj}| r6| dkrt jj}|S | dkrt jj}|S | dkr$t jj}|S | dkr.t jj}|S td|  d|S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r	   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr&   )
sem_optionsemr   r   r   _str_to_sem  r  r  c                 C  s\   t jj}| r,| dkrt jj}|S | dkrt jj}|S | dkr$t jj}|S td|  d|S )Ngpuctasysr  r  )r	   MEM_SYNC_SCOPEGPUCTASYSTEMr&   )scope_optionscoper   r   r   _str_to_scope  r  r  c                 C  s   | rEt | ds
| g} dd | D } | D ]}t|tr(d|  kr't|k s*J  J qt| dks3J t| tt| ksAJ dt| S dS )N__iter__c                 S  "   g | ]}t |tjr|jn|qS r   r^   r'   rg   rE   r3  elemr   r   r   r4       " z0_canonicalize_boundary_check.<locals>.<listcomp>r   z'Duplicate dimension in `boundary_check`r   )hasattrr^   r   r)  setrO  )boundary_checkblock_shapedimr   r   r   _canonicalize_boundary_check  s   
,r  c	              
   C  s   |d us|d urt d| jjj}	|	tjksJ d|	 r(|tjjkr(t d| jj}
t	||

 }t|| j||||||
S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r&   ri   
element_tyr'   r`   rO   r	   r  r  r  r   r(   create_tensor_pointer_loadr   )ptrmaskr   r  r  r  r  is_volatiler    elt_tyrm  r   r   r   _load_block_pointer  s   
r  c	              
   C  s  | j j std| j   d|d u r|d urtd|s!|r%td| j  s@|r5|j  r5td|r@|j  r@td| j  r_|d urRt|| j  |}|d ur_t|| j  |}| j j}	|	j}
|
t	j
k}|r}t	j}
t	|
|	j}	t| |	|} |d urt||
|}| j  r| j  }t	|
|}n|
}|d u rt	|| j||||}nt	|| j|j|r|jnd ||||}|rt|t	j
|}|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)ri   rz   rq   r&   r   r   rY  r   r  r'   r`   int8pointer_typeaddress_spacer~   r   r(   create_loadr   create_masked_load)r  r  r   r  r  r  r  r  r    ptr_tyr  r  r   rm  r   r   r   r   _load_legacy0  sP   




r  r  r  Optional[tl.tensor]r  r   r  r\  r  r  r  c	              
   C  s^   t |}	t|}
t|}| j r#| jj r#t| |||||	|
||	S t| |||||	|
||	S rp   )	r  r  r  ri   rq   r  r   r  r  )r  r  r   r  r  r  r  r  r    r  r  r  r   r   r   loadn  s   r  desc_ptrblock_tyc                 C  s    | | j||}t||S rp   )$create_reinterpret_tensor_descriptorr   r   r'   $_experimental_tensor_descriptor_base)r  r  r    r   r   r   r   reinterpret_tensor_descriptor~  s   r  c                 C  sh   t | dkrd S | d dksJ d| d  d|j d }| d |ks2J | d| d| d  d S )	Nr   r      zAtensor descriptor block shape must have at least 8 rows, but got r  r
   z2 tensor descriptor block shape must have at least  columns, but got )r)  rn  )r   r5   min_colsr   r   r   validate_descriptor_block  s   r  desc&tl._experimental_tensor_desciptor_basec                 C  s   t | tjsJ t| j| j t| j}t||ks&J d| dt| t||dd}|| j	|t
|t|}t|| jS Nz	expected z offsets, but got Frequire_i64)r^   r'   r  r  r  r5   r)  _convert_to_ir_valuescreate_descriptor_loadr   r  r  r(   r   )r  offsetsr  r  r    ndimrj   r   r   r   descriptor_load  s   
$r  'tl._experimental_tensor_descriptor_basec                 C  s   t | tjsJ t| j| j t| j}t||ks&J d| dt| |j| jks.J t||dd}t	|
| j|j|tjS r  )r^   r'   r  r  r  r5   r)  r   r  r(   create_descriptor_storer   void)r  rE   r  r    r  r   r   r   descriptor_store  s   
$r  c           
      C  sB  t | tjsJ |dksJ d|dksJ dt| jdks'J d| j | jd dks6J d| j t|jdksEJ d	|j |jd d
ksTJ d|j | j}d|j d
 }| jd |ksuJ d| d| d| jd  t| j|jd | jd g}t	||fddd }|
| j|j|||}	t|	|S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r
   *descriptor block must have 1 row, but got x offsets must be 1D, but got r  z5descriptor gather must have at least 8 rows, but got r  zdescriptor gather of  must have at least r  Fr  )r^   r'   r  r)  r  r   r5   rn  r   r  create_descriptor_gatherr   r   r(   )
r  	x_offsetsy_offsetr  r  r    r5   r  ri   rj   r   r   r   descriptor_gather  s(   r  c                 C  s  t | tjsJ t| jdksJ d| j | jd dks&J d| j t|jdks5J d|j |jd dksDJ d|j | j}d	|j d }| jd |kseJ d
| d| d| jd  t	||fddd }|
| j|j|j| td tjS )Nr   r  r   r
   r  r   r  z6descriptor scatter must have at least 8 rows, but got r  zdescriptor scatter of r  r  Fr  )r^   r'   r  r)  r  r   shapaer5   rn  r  create_descriptor_scatterr   r(   r  )r  rE   r  r  r    r5   r  r   r   r   descriptor_scatter  s"   r  global_addressbox_dimList[tl.tensor]
global_dimglobal_strideelement_stride	elem_typeinterleave_layoutswizzle_mode	fill_modec                 C  sj   |r|d j tjksJ t|
| j|jdd |D dd |D dd |D dd |D ||||	
tjS )Nr   c                 S     g | ]}|j qS r   r   r2  r   r   r   r4        z$tensormap_create.<locals>.<listcomp>c                 S  r  r   r  r2  r   r   r   r4    r  c                 S  r  r   r  r2  r   r   r   r4    r  c                 S  r  r   r  r2  r   r   r   r4    r  )r5   r'   rb   r(   create_tensormap_creater   r  )r  r	  r
  r  r  r  r  r  r  r  r    r   r   r   tensormap_create  s    r  c                 C     t || jt jS rp   )r'   r(   #create_tensormap_fenceproxy_acquirer   r  )r  r    r   r   r   tensormap_fenceproxy_acquire     r  c           	   	   C  s   |d urt d| jj }|j st|||}|j s"J d||j ks7J d| d|j  d| jjj|jjksPJ d| jjj d|jj d| jjj}|tjks^J dt||}t	|||}t
|| j|j|||tjS )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r&   ri   r  r   r   rY  r'   r`   r  r~   r(   create_tensor_pointer_storer   r  )	r  valr  r  r  r  r    r  r  r   r   r   _store_block_pointer  s"   
2

r  c           	   	   C  s6  | j j std| j   d|rtd| j  s0|j  r%td|r0|j  r0td| j  rKt|| j  |}|d urKt|| j  |}| j j}|j}|t	j
krgt	j}t	||j}t| ||} t|||}|d u rt	|| j|j||t	jS |j j stdt	|| j|j|j||t	jS )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  z"Mask must have boolean scalar type)ri   rz   rq   r&   r   r   rY  r   r  r'   r`   r  r  r  r~   r(   create_storer   r  r  create_masked_store)	r  r  r  r  r  r  r    r  r  r   r   r   _store_legacy  s2   



"r!  r  c           	      C  sl   t |}t|}| j s| jj rtd| j r,| jj r,t	| ||||||S t
| ||||||S )N"Cannot store to a constant pointer)r  r  ri   is_constrz   r&   rq   r  r   r  r!  )	r  r  r  r  r  r  r    r  r  r   r   r   storeC  s   r$  cmpr  r  c              	   C  sN   t |}t|}| jjj}|jdvrtdt|	| j
|j
|j
|||jS )N)   r  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  ri   rz   r  rn  r&   r'   r(   create_atomic_casr   )r  r%  r  r  r  r    r  r   r   r   
atomic_casY  s   

"r(  op&Tuple[tl.tensor, tl.tensor, tl.tensor]c                 C  sF  | j j std| j   | j  s| j j rtd| j jj}|tju r4|dkr4td| d |tj	tj
tjtjfv rLtd| d t| | j  rk|d ur^t|| j  |}|d urkt|| j  |}t|| j jj|}|d u r|d}tj	}| j  r||| j  }ttj	| j  }t||}| ||fS )Nz)Pointer argument of store instruction is r"  r   atomic_z does not support fp16z does not support T)ri   rz   rq   r&   r   r#  r  r'   rF   r`   r  int16rG   r\  r   rY  r   r~   r_   r*  r   r(   )r  r  r  r)  r    r  mask_irmask_tyr   r   r   atom_red_typechecking_implb  s.   




r/  c                 C  s  t | ||d|\} }}t|}t|}|jj}| rG| r3t|	t
jj| j|j|j|||jS t|	t
jj| j|j|j|||jS |tjtjhvrVtd| tg d||}|tjkretjntj}t|||}	t| t|d|}
|tjkrtjntj}t|||}t| t|d|}t|||}t|||}t|	t
jj|
j|	jt|||j|||	j}t|	t
jj|j|jt|||j|||j}t||||}t|||S )Nr   z#atomic_max not supported for dtype r]   r
   )r/  r  r  ri   rz   rO   r   r'   r(   create_atomic_rmwr	   	ATOMIC_OPMAXr   UMAXrH   rJ   r8   rd   r*   rb   r   r  ra   rc   r   r  r   UMINwherer  r  r  r  r  r    sca_tyr  i_typei_vali_ptrui_typeui_valui_ptrposnegpos_retneg_retr   r   r   r   
atomic_max}  J     rB  c                 C  s  t | ||d|\} }}t|}t|}|jj}| rG| r3t|	t
jj| j|j|j|||jS t|	t
jj| j|j|j|||jS |tjtjhvrVtd| tg d||}|tjkretjntj}t|||}	t| t|d|}
|tjkrtjntj}t|||}t| t|d|}t|||}t|||}t|	t
jj|
j|	jt|||j|||	j}t|	t
jj|j|jt|||j|||j}t||||}t|||S )Nr   z#atomic_min not supported for dtype r]   r
   )r/  r  r  ri   rz   rO   r   r'   r(   r0  r	   r1  MINr   r4  rH   rJ   r8   rd   r*   rb   r   r  ra   rc   r   r  r   r3  r5  r6  r   r   r   
atomic_min  rC  rE  c              
   C  sj   t | ||d|\} }}t|}t|}|jj}| rtjjntjj	}t
||| j|j|j|||jS )Nr   )r/  r  r  ri   rz   rr   r	   r1  FADDADDr'   r(   r0  r   )r  r  r  r  r  r    r7  r)  r   r   r   
atomic_add  s   $rH  c              
   C  N   t | ||d|\} }}t|}t|}t|tjj| j	|j	|j	|||j
S )Nand)r/  r  r  r'   r(   r0  r	   r1  ANDr   ri   r  r  r  r  r  r    r   r   r   
atomic_and      rM  c              
   C  rI  )Nor)r/  r  r  r'   r(   r0  r	   r1  ORr   ri   rL  r   r   r   	atomic_or  rN  rQ  c              
   C  rI  )Nxor)r/  r  r  r'   r(   r0  r	   r1  XORr   ri   rL  r   r   r   
atomic_xor  rN  rT  c              
   C  rI  )Nxchg)r/  r  r  r'   r(   r0  r	   r1  XCHGr   ri   rL  r   r   r   atomic_xchg  s    rW  c                 C  sH   |   |jjv sJ d|jj d|  |  } | dkrd} ttj| S )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr   allowed_dot_input_precisionsupperr%  r	   INPUT_PRECISION)input_precisionr    r   r   r   _str_to_dot_input_precision  s   r`  accr_  max_num_imprecise_acc	out_dtypec              
   C  s  | j  r
|j  sJ | j r|j rn@| jtjtjtjtjtj	fv s.J d| j |jtjtjtjtjtj	fv sEJ d|j | j|jksWJ d| j d|j | j
 sa|j
 rot| tj|} t|tj|}|d u rw|jj}t||}t| j}t|j}||  krdksn ||  krdksn J d| j d|j d	| jd
 j|jd jksJ d| j d|j d| jd
 j d|jd j d		|jdd usJ d|jd | j |j }	| jd j|	d kr| jd
 j|	d kr|jd
 j|	d ksJ d|	d  d|	d  d|	d  | j j r6| j jtjks-J d|d}
tj}n1| r?td| j j sM| j j rV|d}
tj	}n| r`|dn|d}
|}| j jd }|j jd
 }| j jd
 }|dkr| j jd nd }t||r|||gn||g}|d u r||
|r|||gn||g}n|j }|j |ksJ |d u r| j r|j r|jj!}nd}n| j r|j r||krtd| d| d	t"|#| j |j ||||S )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   r      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r   r>  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r
   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()$ri   r   r5   rN   r'   r  uint8rF   rG   rH   ry  r~   r   default_dot_input_precisionr`  r)  r   rE   rz  r{  rz   rO   	get_int32r*   rM   r&   rK   get_fp32rL   get_fp16r   r*  r   max_num_imprecise_acc_defaultr(   
create_dot)rt   rv   ra  r_  rb  rc  r    lhs_rankrhs_rankrh  r   ret_scalar_tyMNKBr   
acc_handler   r   r   dot  s   

$


F0, 


 
"

"rx  float_formatc                 C  s.   t tj|  d }|d u rtd|  d|S )NzInvalid float format: rX   )r%  r	   ScaleDotElemTypeTYr]  r&   )ry  ty_enumr   r   r   _str_to_fp_typeL  s   r|  c                 C  s   t jt jt jt jd|}|du r-|dksJ d| | jt jks+J d| j | S | j|kr4| S t jt jt jt jd| }| j|ksQJ d| d| j t	| ||S )z
    If float_format is subbyte, make sure it's packed as uint8 and return it.
    Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
    )e5m2e4m3bf16fp16Ne2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for rX  )
r'   float8e5
float8e4nvrG   rF   r{  r5   ri  uint16r   )r  ry  r    	triton_tyunsigned_tyr   r   r   _bitcast_to_fp_typeS  s   
 r  	lhs_scale
lhs_format	rhs_scale
rhs_formattl.tensor | None	fast_mathc
                 C  sJ  | j  r
|j  sJ t| j}
t|j}|
|  kr dks9n |
|  kr+dks9n J d| j d|j d|j}|j}t|}t|}h d}||v sVJ d| ||v saJ d| |d u pot|tjoo|jd u }|d u p~t|tjo~|jd u }t	| ||	} t	|||	}| j jd	 }|j jd	d  \}}|d
krdnd}|d
krdnd}|| || j jd  ksJ d| j d|j d|
dkr| j jd nd }t
||r|||gn||g}|	d}|d u r|	||r|||gn||g}n|j}|j |ksJ |rd n|j}|rd n|j}t|	| j|||j|||||S )Nr   rd  re  rf  r   >   r  r  r~  r}  r  zNYI: lhs_format zNYI: rhs_format rg  r  r
   r>  zCReduction dimension should pack the same number of elements; (lhs: r   )ri   r   r)  r   rE   r|  r^   r'   rg   r  r   rl  r*  r   r(   create_dot_scaled)rt   r  r  rv   r  r  ra  r  rc  r    rp  rq  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noners  ru  rt  PACKED_APACKED_Brv  r   r   rw  rhs_scale_handlelhs_scale_handler   r   r   
dot_scalede  sN   

F

 r  	conditionc                 C  s   | j tjkrtd| j   t| tj|} t|||dd\}}| j r6t	| ||\} }t	|||\}}nt	| ||\} }|j}t
|| j|j|j|S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r5   r'   r`   warningswarnr~   r   ri   r   r   r(   create_selectr   )r  rj   r   r    ra  r   r   r   r   r5    s   

r5  c                 C  s"   |r	t ||}n|}t | |S rp   )r'   r   r(   )rj   rP   rb  res_tyr   r   r   wrap_tensor  s   r  inputsSequence[tl.tensor]Tuple[tl.tensor, ...]c                   s    d u rt fddD d d jjt} |k s'J d| d fddtD tfddD sAJ d	d
d D  |   t fddttD S )Nc                 3  s&    | ]}t ||jjgd  dV  qdS )Tr?  N)r/  r$  rE   r3  tr    r   r   rM    s   $ zreduction.<locals>.<genexpr>r   z&reduction axis must be < inputs rank (r   c                   s   g | ]
\}}| kr|qS r   r   )r3  rW  r.  )r   r   r   r4    s    zreduction.<locals>.<listcomp>c                 3  s    | ]	}|j j kV  qd S rp   )ri   r   r  rN  r   r   rM    s    z-all reduction inputs must have the same shapec                 S  r  r   r  r  r   r   r   r4    r  c                 3  *    | ]}t | | jjV  qd S rp   r  
get_resultri   rz   r3  rW  )r  	reduce_oprb  r   r   rM       ( )	tupleri   r   r)  rT  allcreate_reduceverifyr   )r  r   region_builder_fnr    rankr   )r   r    r  r  rb  r   r   	reduction  s   "r  reversec                   s    d j jt}| |  kr|k s!n J d| d| d|dk r)||7 } D ]}|j jks7J dq+|dd  D |||   t fdd	tt D S )
Nr   z
scan axis z must be < inputs rank (r   z(all scan inputs must have the same shapec                 S  r  r   r  r  r   r   r   r4    r  z$associative_scan.<locals>.<listcomp>c                 3  r  rp   r  r  r  scan_opr   r   r   rM    r  z#associative_scan.<locals>.<genexpr>)ri   r   r)  create_scanr  r  r   )r  r   r  r  r    r  r  r   r  r   associative_scan  s   ."r  srcindexc                 C  s   |j  s	J dt| jj}t|jj|ksJ d| |  kr&|k s2n J d| d| d|dk r:||7 }t|D ]}||krEq>|jj| | jj| ksYJ d| dq>|| j|j|}t|| jj	|jjS )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r   r   z
index dim z( must match the corresponding source dim)
r5   rO   r)  ri   r   r   create_gatherr   r  rz   )r  r  r   r    r  rL  gatherr   r   r   r    s   .*r  num_binsc                 C  sJ   t | jdksJ d| j sJ dt|| j|ttj	|gS )Nr
   z histogram only supports 1D inputz%histogram only supports integer input)
r)  r   r5   rO   r'   r(   create_histogramr   r   r*   )r   r  r    r   r   r   	histogram  s   "r  rZ  c                 C  s@   t dt| jt|krtd| jdt|| j  | S )Nr
   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   r)  r   r&   r   set_attrr	   	make_attrget_contextrj   rZ  r   r   r   multiple_of  s   r  c                 C  :   t | jt |krtd| jdt|| j  | S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr)  r   r&   r   r  r	   r  r  r  r   r   r   max_contiguous	     r  c                 C  r  )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  r   r   r   max_constancy  r  r  c                 C  s   t |  t jS rp   )r'   r(   create_barrierr  r  r   r   r   debug_barrier  s   r  prefixargshexc                 C  s   |  ds|r| d7 } |  ds|r| d d d } t| dkr)| ds)d|  } dd |D }dd |D }t|| |||tjS )N rS  r>  r   c                 S  r  r   r  r3  argr   r   r   r4  %  r  z device_print.<locals>.<listcomp>c                 S  s*   g | ]}|j tjtjtjtjtjfv qS r   )r5   r'   r`   r  r,  r*   rb   r  r   r   r   r4  &  s   * )endswithr)  
startswithr'   r(   create_printr  )r  r  r  r    new_args	is_signedr   r   r   device_print  s   r  r   r   c                 C  s$   |j jsd S t|| j|tjS rp   )r   debugr'   r(   create_assertr   r  )r   r   r    r   r   r   r   *  s   r   c                 C  r  rp   )r'   r(   create_assumer   r  )r   r    r   r   r   assume0  r  r  c                 C  s  t |tr
t|}t |tjrH|r-d|j  krdk s'n J d|j d| |jS d|j  kr8dk sBn J d|j d| |jS t |tjr|jjdksXJ d	|j	
 saJ d
|j	tjkrv|rv| |j|  |j	 S |j	tjkr|sJ d|jS J dt| )NrV   rW   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerT   rU   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r
   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )r^   r   r'   rg   rE   r   rk  r(   r$  r5   rO   rb   r   r   get_int64_tyr   r*   ri   )r    r  r  r   r   r   _convert_elem_to_ir_value4  s*   



r  c                   s,   t |dr fdd|D S t |gS )Nr  c                   s   g | ]}t  |qS r   )r  r  r    r  r   r   r4  N  s    z)_convert_to_ir_values.<locals>.<listcomp>)r  r  )r    	list_liker  r   r  r   r  L  s   
r  basec              	     s8  t ||}t ||}t ||dd}| j r| jj r td| jjtjkr4t| t	tj
| jj|} t ds< g dd  D  tdd  D sPJ d	t|dsX|g}d
d |D }t|ttt|ksoJ dt fdd||||fD sJ d|| j||| |}t|t	t| jj S )NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c                 S  r  r   r  r  r   r   r   r4  d  r  z"make_block_ptr.<locals>.<listcomp>c                 s  s2    | ]}t |tod |  kodk n  V  qdS )rT   rU   N)r^   r   r  r   r   r   rM  e  s   0 z!make_block_ptr.<locals>.<genexpr>zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                 S  r  r   r  r  r   r   r   r4  k  r  z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc                 3  s     | ]}t  t |kV  qd S rp   )r)  )r3  r  r  r   r   rM  o  s    zBExpected shape/strides/offsets/block_shape to have the same length)r  ri   rq   r  r   r&   r'   r`   r~   r  r  r  r  r  rO  rP  r   r)  create_make_block_ptrr   r(   r   )r  r   stridesr  r  orderr    r   r   r  r   make_block_ptrR  s,   



  r  c                 C  s&   t ||dd}t|| j|| jS )NFr  )r  r'   r(   create_advancer   ri   )r  r  r    r   r   r   advancey  s   r  r  r  List[tl.constexpr]"tl._experimental_tensor_descriptorc                   s,  t |}d|  krdksn td| dt ||kr)td| dt | t ||kr;td| dt | t|d	 |d	< |d	 d
krStd|d	   fdd|D } fdd|D }t|}t| jtjssJ t| jj	|} 
| jdd |D dd |D |}t||||S )Nr      z Expected 2 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got r>  r
   z-Tensor descriptor last dim must be 1 but got c                   s   g | ]}t | qS r   )rh   r2  r  r   r   r4    r5  z*make_tensor_descriptor.<locals>.<listcomp>c                   s"   g | ]}t | jtj d qS )rw  )rh   tor'   rb   r2  r  r   r   r4    r  c                 S  r  r   r  )r3  r.  r   r   r   r4    r  )r)  r&   r'   r1  _unwrap_shaper^   ri   r  r   r  create_make_tensor_descriptorr   _experimental_tensor_descriptor)r  r   r  r  r    r  ri   r   r   r  r   make_tensor_descriptor  s&   
 r  )r   r   r    r!   r"   r#   )r0   r1   r2   r1   r"   r1   )r0   r1   r>   r?   r2   r1   r@   r?   rA   r?   r"   r1   )T)rS   r?   )r   r1   r   r1   rn   r?   r"   ro   )FFTF)rt   ru   rv   ru   r    r!   r"   rw   )rt   r#   rv   r#   r    r!   r   r   )
r   ru   r   ru   r   r?   r    r!   r"   r#   )r   ru   r   ru   r    r!   r"   r#   )
r   ru   r   ru   r   r?   r    r!   r"   r#   )rj   r#   r   r#   r   r   r    r!   )
rj   r#   r   r#   r   r#   r   r   r    r!   )r   r#   r   r#   r    r!   r"   rw   )r   r#   r   r#   r    r!   r"   r#   )r   r#   r    r!   )r   r#   r"   r#   )r   r#   r    r!   r"   r#   )r   r#   r    r#   r"   r#   )r   r#   r"   r   )r  r   r  r   r    r!   r"   r#   )r   r"  r5   r1   r    r!   r"   r#   )rE   r#   r   r"  r    r!   r"   r#   )
r   r#   r+  r"  r,  r?   r    r!   r"   r#   )r   r#   r   r   r    r!   r"   r#   )
rt   r#   rv   r#   r,  r?   r    r!   r"   r#   )r<  r#   r=  r#   r    r!   r"   r#   )r<  r#   r    r!   r"   rw   )r   r#   rI  rJ  r    r!   r"   r#   )r   r#   r   r"  r    r!   r"   r#   )rt   r#   rv   r#   r    r!   r"   r#   )re  rf  )r   r#   rm  r1   r    r!   r"   r#   rp   )
r   r#   rm  r1   r    r!   ru  rf  r"   r#   )r  r#   r  r  r   r  r  r   r  r\  r  r\  r  r\  r  r?   r    r!   r"   r#   )r  r#   r  r   r    r!   )
r  r  r  r\  r  r\  r    r!   r"   r#   )r  r  rE   r#   r    r!   r"   r#   )r  r\  r  r\  r    r!   r"   r#   )rE   r#   r    r!   r"   r#   )r  r#   r	  r#   r
  r  r  r  r  r  r  r  r  r   r  r   r  r   r  r   r    r!   r"   r#   )r  r#   r    r!   r"   r#   )r  r#   r  r#   r  r  r  r\  r  r\  r    r!   r"   r#   )r  r#   r%  r#   r  r#   r  r\  r  r\  r    r!   r"   r#   )r  r#   r  r#   r  r#   r)  r\  r    r!   r"   r*  )r  r#   r  r#   r  r#   r  r\  r  r\  r    r!   r"   r#   )rt   r#   rv   r#   ra  r#   r_  rf  rb  r   rc  r1   r    r!   r"   r#   )ry  r\  )r  r#   ry  r\  r    r!   )rt   r#   r  r#   r  r\  rv   r#   r  r  r  r\  ra  r  r  r?   rc  r1   r    r!   r"   r#   )
r  r#   rj   r#   r   r#   r    r!   r"   r#   )r  r  r   r   r    r!   r"   r  )
r  r  r   r   r  r?   r    r!   r"   r  )
r  r#   r  r#   r   r   r    r!   r"   r#   )r   r#   r  r   r    r!   r"   r#   )rj   r#   rZ  r"  r"   r#   )r    r!   r"   r#   )
r  r\  r  r  r  r?   r    r!   r"   r#   )r   r#   r   r\  r    r!   r"   r#   )r  r#   r    r!   r"   r#   )r  r#   r   r  r  r  r  r  r    r!   r"   r  ){
__future__r   r  typingr   r   r   r   r   rx   _C.libtritonr	   r  r   r'   r   	Exceptionr   r,   r/   r=   rR   rh   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r   r  r  r!  rd   r'  r/  r8  r;  rD  rH  rR  rY  r   rl  r   r~   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r$  r(  r/  rB  rE  rH  rM  rQ  rT  rW  r`  rx  r|  r  r  r5  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r   r   r   r   <module>   s    3+&#


:
p>,	''		I0	'