o
    oh_.                     @   s  d dl mZ d dlmZ d dlmZmZ d dlmZ dd Z	G dd de
ZeG d	d
 d
ZG dd dZdd Zdd Zdedee defddZdedefddZdedefddZdedee defddZdedefddZdededefd d!Zdededefd"d#Zdedefd$d%Zdedefd&d'Zd(Zed)krd d*lmZ eed+Zejd,d-d.d/ ejd0d1ed2d3 ejd4ed5d6d7 e  Z!e Zg Z"e!j#D ]Z$ee$Z%e%& Z'e"(e%j) e*e' qd8d9 ej+, D Z-d:d9 ej+, D Z.e.d  d  Z/ee/Z0ee/Z1e!j23d;4d<&Z5d=Z2e2d>6e-7 Z2e2d>7 Z2e2e07 Z2e2d>7 Z2e2e17 Z2e57e2 W d?   n	1 s>w   Y  d@d9 ej+, D Z8dAd9 ej+9 D Z:ee:e/Z;ee/Z<ee:e/Z=ee/Z>ee/Z?e!j23dB4d<OZ5d5Z2e2d=7 Z2e2dC7 Z2e2dD7 Z2e2d>7 Z2e2d>6e87 Z2e2d>7 Z2e2e;7 Z2e2d>7 Z2e2e>7 Z2e2d>7 Z2e2e<7 Z2e2d>7 Z2e2e=7 Z2e2d>7 Z2e2e?7 Z2e57e2 W d?   d?S 1 sw   Y  d?S d?S )E    )defaultdict)Path)SequenceUnion)	dataclassc                 C   s   | d uS N )xr   r   e/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/tools/link.py_exists   s   r   c                   @   s   e Zd ZdS )LinkerErrorN)__name__
__module____qualname__r   r   r   r
   r      s    r   c                   @   sb   e Zd ZU eed< ee ed< ee ed< eeedf  ed< eed< eed< eed< eed	< dS )
KernelLinkerMetaorig_kernel_name	arg_names
arg_ctypesNsizessig_hashtriton_suffixsuffix	num_specs)r   r   r   str__annotations__r   r   intr   r   r   r
   r      s   
 r   c                   @   sd   e Zd ZdddZdefddZdefd	d
ZdefddZdedefddZdede	fddZ
dS )HeaderParserreturnNc                 C   sF   dd l }|d| _|d| _|d| _|d| _tt| _d S )Nr   z'//[\s]*tt-linker:[\s]*([\w]+):(.+):(.+)z^([\w]+)_([\w]+)_([\w]+)$z[\s]*(\w+)\s(\w+)[,]?z[c,d])	recompilelinker_directiveskernel_namec_sig
arg_suffixr   listkernels)selfr   r   r   r
   __init__   s   zHeaderParser.__init__headerc                 C   s   |  D ]N}|drR| j|}t|rR|d|d|d}}}| |\}}}	| |\}
}| |	|\}}| 	d
||gt|||
|||	|	|d qd S )Nz//         _)r   r   r   r   r   r   r   r   )
splitlines
startswithr    matchr   group_match_name_match_c_sig_match_suffix_add_kerneljoinr   )r&   r(   lnmker_namer"   	algo_infonamer   r   c_typesr   r   r   r   r   r
   extract_linker_meta-   s.   
"z HeaderParser.extract_linker_metar8   c                 C   sN   | j |}t|r |d|d|d}}}|||fS t| d)Nr)   r*   r+   z is not a valid kernel name)r!   r/   r   r0   r   )r&   r8   r7   r:   r   r   r   r   r
   r1   D   s
   "
zHeaderParser._match_namer"   c                 C   sV   | j |}t|r$g g }}|D ]\}}|| || q||fS t| d)Nz" is not a valid argument signature)r"   findalllenappendr   )r&   r"   r7   tysargstyarg_namer   r   r
   r2   K   s   

zHeaderParser._match_c_sigr   c           	      C   s   | d}ddd}d}g }tt|D ]]}|t|}|dkr(t| d|tt|7 }| j||rT|d7 }|d g|t|   |	|||   |d7 }|t|d k rc||d  }q|d gt|t|   q||fS )N,r)      )cdr   z is not a valid kernel suffix)
splitranger>   findr   r   r#   r/   extendr?   )	r&   r   r"   rA   s2ir   r   iposr   r   r
   r3   V   s$   

zHeaderParser._match_suffixr:   kerc              
   C   sv   || j v r1| j | d }t|j|jD ]\}}||kr0td| dd|j dd|j q| j | | d S )NrH   z Mismatched signature for kernel z: 
	existing sig is: rD   z
	current is: )r%   zipr   r   r5   r?   )r&   r:   rP   lastcurnew_r   r   r
   r4   m   s   
$zHeaderParser._add_kernel)r   N)r   r   r   r'   r   r<   r1   r2   r3   r   r4   r   r   r   r
   r      s    
r   c                 C   s   d dd t| j| jD S )N, c                 S      g | ]\}}| d | qS  r   .0rB   argr   r   r
   
<listcomp>{       z0gen_signature_with_full_args.<locals>.<listcomp>)r5   rQ   r   r   r7   r   r   r
   gen_signature_with_full_argsz   s   r_   c                 C   sN   dd t | j| jD }dd t | j| jD }ddd t ||D }|S )Nc                 S      g | ]
\}}|d kr|qS r)   r   )rZ   rB   hintr   r   r
   r\          z!gen_signature.<locals>.<listcomp>c                 S   r`   ra   r   rZ   r[   rb   r   r   r
   r\      rc   rU   c                 S   rV   rW   r   rY   r   r   r
   r\      r]   )rQ   r   r   r   r5   )r7   	arg_typesr   sigr   r   r
   gen_signature~   s   rg   r:   metasr   c              	   C   s&   d|  dt |d  d|  d|  d	S )N

CUresult (CUstream stream, rH   z);
void load_();
void unload_();
    )r_   )r:   rh   r   r   r
   make_algo_decls   s   
rm   metac                 C   s:   d| j  dt|  d| j  dt|  d| j  d| j  dS )Nri   _default(CUstream stream, z);
CUresult rj   z, int algo_id);
void load_rk   rl   )r   r_   )rn   r   r   r
   make_global_decl   s   rp   c                 C   sD   d| j  dt|  d}|d| j  dd| j d7 }|d7 }|S )	N	CUresult ro   z){
	  return 	(stream, rU   z, 0);
}
r   r_   r5   r   rn   srcr   r   r
   make_default_algo_kernel   s    rx   c                    s  d|  d}t |dd dD ]}|d|j d|j d|j dt| d		7 }q|d7 }|d|  dt|d
  d7 }|d7 }t |dd dD ]I}dd  d fddt|j|j	D }|t
|j	rid| dnd7 }dd t|j|j	D }|d|j d|j d|j dd| d		7 }qF|d7 }|d7 }|d7 }dD ]^}|d| d|  d7 }t |dd dD ]}|d| d|j d|j d|j d	7 }q|d| d|  d 7 }|d7 }t |d!d dD ]}|d"| d|j d|j d|j d	7 }q|d7 }q|S )#Nz// launcher for: 
c                 S      | j  S r   r   r^   r   r   r
   <lambda>       z.make_kernel_hints_dispatcher.<locals>.<lambda>)keyrq   r,   rj   );
rH   z){c                 S   rz   r   r{   r^   r   r   r
   r|      r}   c                 S   s8   |dkrd|  d| dS |dkrd|  d| dS d S )NrE   (z % z == 0)r)   z == )r   )valrb   r   r   r
   r|      s
   z && c                    s"   g | ]\}}|d ur ||qS r   r   )rZ   r   rb   cond_fnr   r
   r\      s
    z0make_kernel_hints_dispatcher.<locals>.<listcomp>z  if (z)
zif (1)
c                 S   r`   ra   r   rd   r   r   r
   r\      rc   z    return rs   rU   z#  return CUDA_ERROR_INVALID_VALUE;
rt   loadunloadz
// z for: c                 S   rz   r   r{   r^   r   r   r
   r|      r}   void ();
z() {c                 S   rz   r   r{   r^   r   r   r
   r|      r}     )sortedr   r   r   rg   r_   r5   rQ   r   r   any)r:   rh   rw   rn   condsr   moder   r   r
   make_kernel_hints_dispatcher   s8   .0**
r   c                 C   sV   d| j  dt|  d}|d| j  d7 }|d| j  dd| j d	7 }|d
7 }|S )Nrq   rj   z, int algo_id){
z   assert (algo_id < (int)sizeof(z_kernels));
rr   z_kernels[algo_id](stream, rU   r   rt   ru   rv   r   r   r
   !make_kernel_meta_const_dispatcher   s
    r   namesc                 C   sH   dt | d}|d|j d7 }| D ]
}|d| d7 }q|d7 }|S )Nz3typedef CUresult (*kernel_func_t)(CUstream stream, r   zkernel_func_t z_kernels[] = {
r   z,
z};
)r_   r   )r   rn   rw   r:   r   r   r
   make_func_pointers   s   r   c                 C   sR   d}dD ]"}|d| d|j  d7 }| D ]}|d| d| d7 }q|d7 }q|S )	N r   r   r,   z(void){
r   r   z}

r   )r   rn   rw   r   r:   r   r   r
   make_kernel_load_def   s   
r   c                 C   s   d| j  d}|S )Nint z_get_num_algos(void);r   rv   r   r   r
   make_get_num_algos_decl   s   r   c                 C   s4   d| j  d}|d| j  d| j  d7 }|d7 }|S )Nr   z_get_num_algos(void){
z  return (int)(sizeof(z_kernels) / sizeof(z_kernels[0]));
rt   r   rv   r   r   r
   make_get_num_algos_def   s   r   a0  
Triton ahead-of-time linker:

This program takes in header files generated by compile.py, and generates a
single entry-point responsible for dispatching the user's input to the right
kernel given the specializations that were compiled.

Example usage:
python link.py /path/to/headers/*.h -o kernel_name
__main__)ArgumentParser)descriptionheaders+z_Paths to header files to link. Must include linker directive annotations (autogenerated by ttc))nargshelpz--outz-ozOut filename)typer   z--prefixr   z(String to prefix kernel dispatcher names)r   defaultr   c                 C      g | ]	\}}t ||qS r   )rm   rZ   r:   rn   r   r   r
   r\         r\   c                 C   s   g | ]\}}|qS r   r   r   r   r   r
   r\     s    z.hwz#include <cuda.h>
ry   Nc                 C   r   r   )r   r   r   r   r
   r\   *  r   c                 C   s   g | ]}|qS r   r   )rZ   r:   r   r   r
   r\   +  s    z.cz#include <stdint.h>
z#include <assert.h>
)@collectionsr   pathlibr   typingr   r   dataclassesr   r   	Exceptionr   r   r   r_   rg   r   rm   rp   rx   r   r   r   r   r   r   descr   argparser   parseradd_argument
parse_argsrA   includesr   r(   h_path	read_texth_strr?   r:   r<   r%   items
algo_decls
meta_listsrn   get_num_algos_declglobal_decloutwith_suffixopenfpr5   writedefskeysr   func_pointers_defmeta_const_defload_unload_defget_num_algos_defdefault_algo_kernelr   r   r   r
   <module>   s    ]	
)	






$