o
    nÇh¿J  ã                   @   sÒ   d dl mZmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ defd	d
„Zdd„ Ze	ddG dd„ dƒƒZG dd„ deƒZdS )é    )ÚBaseBackendÚ	GPUTarget)ÚirÚpassesÚllvmÚamd)Ú	dataclass)ÚAnyÚDictÚTuple)Ú
ModuleTypeN)ÚPathÚtargetc                 C   s   dd„ S )Nc                 S   s   dS )N©é   r   r   © )ÚlhsTypeÚrhsTyper   r   úp/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/backends/amd/compiler.pyÚ<lambda>   s    zmin_dot_size.<locals>.<lambda>r   ©r   r   r   r   Úmin_dot_size   s   r   c                 C   s    | dkrdnd}t  d|¡dkS )NÚgfx942Ú1Ú0ÚTRITON_HIP_USE_BLOCK_PINGPONG)ÚosÚgetenv)ÚarchÚdefaultr   r   r   Úis_pingpong_enabled   s   r    T)Úfrozenc                   @   sV  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed	< dZ
eed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZee ed< dZee ed< dZeed< dZee ed< dZeed< dZeed< dZeed < dZeed!< dZeed"< dZ eed#< d$Z!eed%< d&Z"eed'< d(d)„ Z#d*d+„ Z$dS ),Ú
HIPOptionsé   Ú	num_warpsr   Úwaves_per_eué   Ú
num_stagesÚnum_ctasr   Únum_buffers_warp_specÚnum_consumer_groupsÚreg_dec_producerÚreg_inc_consumerNÚextern_libsr   Úcluster_dimsFÚdebugTÚsanitize_overflowr   )Úfp8e5Úsupported_fp8_dtypesr   Údeprecated_fp8_dtypesÚieeeÚdefault_dot_input_precision)r4   Úallowed_dot_input_precisionsÚenable_fp_fusionÚlaunch_cooperative_gridÚmatrix_instr_nonkdimÚkpackÚallow_flush_denormÚmax_num_imprecise_acc_defaultÚhipÚbackend_nameÚnoneÚinstruction_sched_variantc                 C   sä   t tƒjd }| jd u ri nt| jƒ}d| jv s"d| jv s"d| jv r$dnd}t | d|¡ | jdkr4d	n| j}t | d
|¡ ddg}|D ]}t	||› d ƒ||< qDt | dt
| ¡ ƒ¡ | jdkrl| j| jd	 @ dkspJ dƒ‚d S )NÚlibÚgfx10Úgfx11Úgfx12é    é@   Ú	warp_sizeÚgfx950r   r:   ÚocmlÚocklz.bcr-   r   znum_warps must be a power of 2)r   Ú__file__Úparentr-   Údictr   ÚobjectÚ__setattr__r:   ÚstrÚtupleÚitemsr$   )ÚselfÚdefault_libdirr-   rG   r:   ÚlibsrA   r   r   r   Ú__post_init__G   s   & ÿzHIPOptions.__post_init__c                 C   s.   d  dd„ | j ¡ D ƒ¡}t | d¡¡ ¡ S )NÚ_c                 S   s   g | ]\}}|› d |› ‘qS )ú-r   )Ú.0ÚnameÚvalr   r   r   Ú
<listcomp>X   s    z#HIPOptions.hash.<locals>.<listcomp>úutf-8)ÚjoinÚ__dict__rR   ÚhashlibÚsha256ÚencodeÚ	hexdigest)rS   Úkeyr   r   r   ÚhashW   s   zHIPOptions.hash)%Ú__name__Ú
__module__Ú__qualname__r$   ÚintÚ__annotations__r%   r'   r(   r)   r*   r+   r,   r-   rM   r.   rQ   r/   Úboolr0   r   rP   r2   r   r3   r5   r6   r7   r8   r9   r:   r;   r<   r>   r@   rV   re   r   r   r   r   r"      s8   
 r"   c                       s  e Zd Zedefdd„ƒZdeddf‡ fdd„Zdefdd	„Zd
d„ Z	dd„ Z
deeef fdd„Zdd„ Zee ¡ dd„ ƒƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZed d!„ ƒZed"d#„ ƒZed$d%„ ƒZd&d'„ Ze ¡ d(d)„ ƒZ‡  ZS )*Ú
HIPBackendr   c                 C   s
   | j dkS )Nr=   )Úbackendr   r   r   r   Úsupports_target^   s   
zHIPBackend.supports_targetÚreturnNc                    s&   t ƒ  |¡ t|jtƒsJ ‚d| _d S )NÚhsaco)ÚsuperÚ__init__Ú
isinstancer   rP   Ú
binary_ext)rS   r   ©Ú	__class__r   r   rr   b   s   
zHIPBackend.__init__c                    sæ   dt  d| jj¡i}| jjdv r#ttjƒ}| dh¡ tt	|ƒƒ|d< dˆ vrOttj
ƒ}| jjdv r:| h d£¡ n| jjdv rG| d	d
h¡ tt	|ƒƒ|d< dˆ vr]t  dd¡dk|d< | ‡ fdd„tj ¡ D ƒ¡ tdi |¤ŽS )Nr   ÚTRITON_OVERRIDE_ARCH)Úgfx940Úgfx941r   Útf32r6   r2   >   Úfp8e4b8Úfp8e4nvÚfp8e5b16rH   r|   r1   r7   ÚTRITON_DEFAULT_FP_FUSIONr   c                    s*   i | ]}|ˆ v rˆ | d ur|ˆ | “qS ©Nr   )rY   Úk©Úoptsr   r   Ú
<dictcomp>z   s   * z,HIPBackend.parse_options.<locals>.<dictcomp>r   )r   r   r   r   Úsetr"   r6   ÚupdaterQ   Úsortedr2   Ú__dataclass_fields__Úkeys)rS   r‚   Úargsr6   r2   r   r   r   Úparse_optionsg   s    

zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r   r&   )r$   r(   Úsharedr.   )rS   Úmetadatar   r   r   Úpack_metadata}   s   úzHIPBackend.pack_metadatac                 C   s   dt | jƒi}|S )Nr   )r   r   )rS   ÚoptionsÚcodegen_fnsr   r   r   Úget_codegen_implementation‡   s   z%HIPBackend.get_codegen_implementationc                 C   s   ddl m} d|iS )Nr   )Ú	libdeviceztriton.language.extra.libdevice)Útriton.language.extra.hipr‘   )rS   r‘   r   r   r   Úget_module_map‹   s   zHIPBackend.get_module_mapc                 C   s   t  |¡ d S r   )r   Úload_dialects)rS   Úctxr   r   r   r”      s   zHIPBackend.load_dialectsc                   C   s   t j dd¡dkS )NÚAMDGCN_USE_BUFFER_OPSr   r   )r   ÚenvironÚgetr   r   r   r   Úuse_buffer_ops“   s   zHIPBackend.use_buffer_opsc                 C   sL   dd l }d}t| dƒr|  ¡ |kS t| |jƒr$t| dƒr$|  ¡  ¡ |kS dS )Nr   iÿÿÿÚ	ptr_rangeÚuntyped_storageF)ÚtorchÚhasattrrš   rs   ÚTensorr›   Úsize)Úargrœ   Ú
MAX_INT_32r   r   r   Úis_within_2gb˜   s   
zHIPBackend.is_within_2gbc                 C   s$   t  | ¡}d| v r|ddgg7 }|S )NÚSztt.pointer_rangerE   )r   Ú
parse_attr)ÚdescÚretr   r   r   r¤   £   s   
zHIPBackend.parse_attrc                 K   s:   t j| |fi |¤Ž}t ¡ r|dkrt | ¡r|d7 }|S )NÚtensorr£   )r   Úget_arg_specializationrl   r™   r¢   )r    ÚtyÚkwargsr¦   r   r   r   r¨   ª   s   z!HIPBackend.get_arg_specializationc                  C   sp   t  d¡} | d urt| ƒ}| ¡ r|S ttƒjd }| ¡ r |S tdƒ}| ¡ r*|S tdƒ}| ¡ r4|S tdƒ‚)NÚTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r   r   r   Úis_filerK   rL   Ú	Exception)Úlld_env_pathÚlldr   r   r   Úpath_to_rocm_lld³   s   
zHIPBackend.path_to_rocm_lldc                 C   sŽ   t  | j¡}| ¡  tj |¡ tj |¡ tj 	|¡ tj 
|¡ tj |¡ tj |¡ tj |¡ tj |¡ tj |¡ | | ¡ | S r   )r   Úpass_managerÚcontextÚenable_debugr   ÚcommonÚadd_inlinerÚttirÚadd_rewrite_tensor_pointerÚadd_canonicalizerÚadd_combineÚadd_reorder_broadcastÚadd_cseÚadd_licmÚadd_symbol_dceÚadd_loop_unrollÚrun)ÚmodrŒ   rŽ   Úpmr   r   r   Ú	make_ttirÇ   s   
zHIPBackend.make_ttirc                 C   s&  t  | j¡}| ¡  tj |d|j› |j|j	|j
¡ | | ¡ t  | j¡}| ¡  tj |¡ tj |¡ tj |¡ tjj ||j|j|j¡ tj |¡ tjj |¡ tj |d¡ tjj |¡ tt dd¡ƒ}tt dd¡ƒ}|jdkr}d }}t |j¡r|jdksŒJ d	ƒ‚tjj ||j||¡ tj |¡ |j  ¡ d
kr­tjj !||j¡ tj |d¡ tj |¡ tj "|¡ t |j¡ràtjj #|¡ t$|jƒ}|rà|jdkràtjj %|¡ t& '¡ rútjj (|¡ tj |¡ tjj )||j¡ tj |¡ tj *|¡ tj +|¡ | | ¡ | S )Nzhip:TÚTRITON_HIP_GLOBAL_PREFETCHr   ÚTRITON_HIP_LOCAL_PREFETCHzlocal-prefetchr   r   zÕTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.r?   r&   ),r   r±   r²   r³   r   r¶   Úadd_convert_to_ttgpuirr   r$   rG   r(   r¿   ÚttgpuirÚadd_coalesceÚadd_remove_layout_conversionsÚadd_optimize_thread_localityr   Úadd_accelerate_matmulr9   r:   Úadd_optimize_epilogueÚadd_optimize_dot_operandsÚadd_hoist_layout_conversionsri   r   r   r@   Úhas_matrix_core_featurer'   Úadd_stream_pipeliner´   r¸   ÚlowerÚinsert_instruction_sched_hintsÚadd_reduce_data_duplicationÚadd_reorder_instructionsr    Úadd_block_pingpongrl   r™   Úadd_canonicalize_pointersÚadd_convert_to_buffer_opsr»   r½   )rÀ   rŒ   rŽ   rÁ   Úglobal_prefetchÚlocal_prefetchÚuse_block_pingpongr   r   r   Ú
make_ttgir×   sV   ÿ



zHIPBackend.make_ttgirc                    sH  | }t  |j¡}| ¡  tjj ||j¡ d}tjj 	||j|¡ tj
 |¡ tj
 |¡ tj |¡ d}tjj ||j|¡ tj |¡ tj |¡ tj
 |¡ tj
 |¡ tj |¡ tj |¡ tj |¡ |j ¡ dkr{tjj ||j|j¡ tj dd¡dkrŠtj |¡ tjj ||¡ | |¡ t  !¡  t  ¡ }t  "||¡‰ t #ˆ ¡ d}tj dd¡dkr·d	}t  $ˆ tj%|j|¡ t &ˆ |j¡ t 'ˆ d
¡ t (ˆ dd¡ t (ˆ dd¡ t (ˆ dd¡ t (ˆ d|j)dk¡ dd„ ˆ  *¡ D ƒ}	|	d  +tj,¡ |	d  -dd|j.|j) › ¡ |	d  -d|j/› ¡ |j0rdnd}
|	d  -d|
¡ tj dd¡dkr>|	d  1d	¡ |	d  2¡  t 3|	d ¡ tj dd¡dkrnt4t5ƒj6d }t7|d ƒt7|d ƒt7|d ƒg}t  8ˆ |¡ n|j9r‚‡ fdd„|j9D ƒ}t  8ˆ |¡ t  :ˆ t j;|jdg |j<¡ |  =d¡|d< t >ˆ ¡ t ?ˆ ¡ t7ˆ ƒS ) Nr   Tr?   ÚTRITON_DISABLE_LINE_INFOr   Ú ÚTRITON_ENABLE_ASANr   ú+xnackiô  Ú__oclc_finite_only_optFÚ__oclc_correctly_rounded_sqrt32Ú__oclc_unsafe_math_optÚ__oclc_wavefrontsize64rF   c                 S   s   g | ]}|  ¡ s|‘qS r   )Úis_declaration)rY   Úfnr   r   r   r\   K  s    z(HIPBackend.make_llir.<locals>.<listcomp>zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr4   zdenormal-fp-math-f32rA   z
asanrtl.bczocml.bczockl.bcc                    s    g | ]\}}t  ˆ |¡r|‘qS r   )r   Úneed_extern_lib)rY   rZ   Úpath©Úllvm_modr   r   r\   k  s     z
ttg.sharedr‹   )@r   r±   r²   r³   r   r   rÆ   Ú%add_decompose_unsupported_conversionsr   Úadd_optimize_lds_usageÚconvertÚadd_scf_to_cfÚadd_index_to_llvmirÚadd_allocate_shared_memoryÚadd_to_llvmirr´   r¸   r»   Úadd_cf_to_llvmirÚadd_arith_to_llvmirr½   r@   rÐ   Úlower_instruction_sched_hintsr'   r   r—   r˜   ÚllvmirÚadd_di_scopeÚadd_builtin_func_to_llvmirr¿   r   Úinit_targetsÚ	to_moduleÚattach_target_tripleÚattach_datalayoutÚTARGET_TRIPLEÚset_isa_versionÚset_abi_versionÚset_bool_control_constantrG   Úget_functionsÚset_calling_convÚCALLING_CONV_AMDGPU_KERNELÚadd_fn_attrr$   r%   r;   Úadd_fn_target_featureÚadd_fn_asan_attrÚset_all_fn_arg_inregr   rK   rL   rP   Úlink_extern_libsr-   Úoptimize_moduleÚOPTIMIZE_O3r7   Úget_int_attrÚcleanup_bitcode_metadataÚdisable_print_inline)ÚsrcrŒ   rŽ   rÀ   rÁ   Úcustom_lds_sizeÚ_HIPBackend__HIP_FTZr²   Útarget_featuresÚfnsÚdenormal_moderT   Úpathsr   rç   r   Ú	make_llir  s~   




ý

zHIPBackend.make_llirc              	   C   sj   t  d| ¡}t|ƒdksJ ‚|d |d< t | tj|jdg |jd¡}t	j
 dd¡d	kr3td
ƒ t|ƒ |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rZ   rÜ   FÚAMDGCN_ENABLE_DUMPr   r   z!// -----// AMDGCN Dump //----- //)ÚreÚfindallÚlenr   Útranslate_to_asmr   rú   r   r7   r   r—   r˜   Úprint)r  rŒ   rŽ   ÚnamesÚamdgcnr   r   r   Úmake_amdgcny  s   zHIPBackend.make_amdgcnc                 C   s  d}t j dd¡dkrd}t | |j|¡}t ¡ }t 	¡ h}t 	¡ 1}t
|jdƒ}| |¡ W d   ƒ n1 s9w   Y  t |ddd	|jd
|jg¡ W d   ƒ n1 sVw   Y  t
|jdƒ}	|	 ¡ }
W d   ƒ n1 spw   Y  W d   ƒ |
S W d   ƒ |
S 1 sˆw   Y  |
S )NrÜ   rÝ   r   r   rÞ   Úwbz-flavorÚgnuz-sharedz-oÚrb)r   r—   r˜   r   Úassemble_amdgcnr   rl   r°   ÚtempfileÚNamedTemporaryFileÚopenrZ   ÚwriteÚ
subprocessÚ
check_callÚread)r  rŒ   rŽ   r  rp   Ú	rocm_pathÚtmp_outÚtmp_inÚfd_inÚfd_outr¦   r   r   r   Ú
make_hsacoˆ  s,   

ÿý
ÿ
ûþ
ûùzHIPBackend.make_hsacoc                    s^   ‡ ‡fdd„|d< ‡ ‡fdd„|d< ‡ ‡fdd„|d< ‡ ‡fdd„|d	< ‡ ‡fd
d„|d< d S )Nc                    ó   ˆ  | |ˆ ¡S r   )rÂ   ©r  rŒ   ©rŽ   rS   r   r   r   š  ó    z'HIPBackend.add_stages.<locals>.<lambda>r¶   c                    r-  r   )rÚ   r.  r/  r   r   r   ›  r0  Úttgirc                    r-  r   )r  r.  r/  r   r   r   œ  r0  Úllirc                    r-  r   )r  r.  r/  r   r   r     r0  r  c                    r-  r   )r,  r.  r/  r   r   r   ž  r0  rp   r   )rS   ÚstagesrŽ   r   r/  r   Ú
add_stages™  s
   zHIPBackend.add_stagesc                 C   s&   t jt ¡ dgdd}|› d| j› S )Nz	--versionr]   )ÚencodingrX   )r$  Úcheck_outputrl   r°   r   )rS   Úversionr   r   r   re      s   zHIPBackend.hash)rf   rg   rh   Ústaticmethodr   rn   rr   r	   rŠ   r   r   r
   rP   r   r“   r”   Ú	functoolsÚ	lru_cacher™   r¢   r¤   r¨   r°   rÂ   rÚ   r  r  r,  r4  re   Ú__classcell__r   r   ru   r   rl   \   sB    







5
k

rl   )Útriton.backends.compilerr   r   Útriton._C.libtritonr   r   r   r   Údataclassesr   Útypingr	   r
   r   Útypesr   r`   r   r   r  r$  r9  Úpathlibr   r   r    r"   rl   r   r   r   r   Ú<module>   s"    B