o
    ohEG                     @   s  d dl Z d dlmZ d dlZd dlZzd dlZd dlm	Z	 W n e
y)   e
dw d dlZd dlmZmZ dd Zdefd	d
Zdd Zdd Zdd ZedddgZeddddddZeddd ej D ZeddddddZedd d ej D Zed!dd"d#d$Zi d%d ej D Zdd"d#d&Ze d'd e D  ej!D ]'Z"d(e" Z#d)e" d*dd+e" d*d"d,e" d*d#iZ$e d-d e$ D  qd.d/ Z%d0d1 Z&dAd2d3Z'd4d5 Z(dBd8d9Z)dCd:d;Z*d<d= Z+d>d? Z,e-d@kre,  dS dS )D    N)
namedtuple)NegationQueryzPFailed to import hatchet. `pip install llnl-hatchet` to get the correct version.)COMPUTE_METADATA_SCOPE_NAME
TritonHookc                 C   s   g }t | ts
| g} | r>| D ]/}| }|| D ]$}||v r dnd}|dd   }|||fv r<|||   nqqt|dkrLtd| d|S )N (inc) (r   zMetric z< is not found. Use the --list flag to list available metrics)
isinstancelistlowersplitstripappendlenRuntimeError)metricsinclusive_metricsexclusive_metricsretmetric
raw_metricsuffixraw_metric_no_unit r   j/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/profiler/viewer.pymatch_available_metrics   s"   
r   databasec                    s8    fdd g }| D ]} |}|d ur| | q
|S )Nc                    s   d| vr| S | d d t krd S t| d dkr"t| d dkr"d S | dg }g }|D ]} |}|d ur;|| q,t|dksHt|dkrN|| d< | S d S )Nframenamer   r   children)r   r   getr   )noder   new_childrenchild	new_childremove_frame_helperr   r   r&   &   s"    
z*remove_frames.<locals>.remove_frame_helper)r   )r   new_databaser!   new_noder   r%   r   remove_frames    s   
r)   c                    sR   t | }t|}|d}tj|}|   fdd|jj	D }| ||fS )N   c                    s   g | ]}| vr|qS r   r   ).0r   r   r   r   
<listcomp>F       z#get_raw_metrics.<locals>.<listcomp>)
jsonloadr)   popht
GraphFramefrom_literalshow_metric_columns	dataframecolumns)filer   device_infogfr   r   r,   r   get_raw_metrics@   s   

r;   c              	   C   s  t jd| jdgd}|D ]}|| D ]}|| | d }|| | d }|| | d }tjD ]}| d |k}	| |	 }
d| |
jvrDq/d	}|d
kr|dkrUd|d  }nX|dkr`d|d  }nM|dkru|d | d d d |d  }n8|dkr|d |d  d |d  }n%|dkr|dkrd|d  }n|dks|dkrd|d  }ntd| |j|	df  |
d|  d	| 7  < q/qq|S ) N        min_timeindexr7   archnum_sms
clock_rate	device_idflopsr   CUDA80g  2C   89g bB90r   g    x:Ai     mB100i @       @@g    .AHIPgfx90ag  //bBgfx941gfx942g ?y"CzUnsupported device type: )	pd	DataFramer?   r   flops_widthr7   
ValueErrorlocfillna)dfr9   min_time_flopsdevice_typedevice_indexr@   rA   rB   widthidxdevice_frames	max_flopsr   r   r   get_min_time_flopsJ   sB   
",r`   c           
      C   s   t jd| jdgd}|D ]<}|| D ]5}| d |k}| | }|| | d }|| | d }d| | d d	 }	|j|df  |d
 |	 7  < qq|S )Nr<   r=   r>   rC   memory_clock_rate	bus_width   rM   rG   bytes)rR   rS   r?   rV   )
rX   r9   min_time_bytesrZ   r[   r]   r^   ra   rb   peak_bandwidthr   r   r   get_min_time_bytesm   s    rg   
FactorDictr   factortimer*   gMbP?gư>g&.>)ztime/sztime/msztime/usztime/nsavg_timec                 C      i | ]
\}}d | |qS avg_r   r+   keyvaluer   r   r   
<dictcomp>|   s    rr   cpu_time)z
cpu_time/szcpu_time/mszcpu_time/uszcpu_time/nsavg_cpu_timec                 C   rl   rm   r   ro   r   r   r   rr      s    rd   g    eArK   )zbyte/szgbyte/sztbyte/sc                 C   s   i | ]}|t qS r   )bytes_factor_dictr+   rp   r   r   r   rr      s    
)zflop/szgflop/sztflop/sc                 C   s   i | ]}|t d tqS )rD   )rh   default_flop_factor_dictrv   r   r   r   rr      s    rD   flopz/sgfloptflopc                 C   s   i | ]}|t ttqS r   )rh   factor_namefactor_dictrv   r   r   r   rr      r.   c                    s  g } fdd}|D ]G}|dkrJt | j|}t| j|}	|| jdt}
| jd  }|	d |d t|
 | jd< tj| jj	|df< |
d q|tv rt| }|j}|j}t| d }| j| || jdt ||  | j| d< |
| d q|tjv s|tjv s|tjv s|tjv r|tjv p|tjv }|tjv p|tjv }|r|rtntn|rtnt}|rd	nd}|jd
 |d
d  }|| j||}|r|| jd  }||j|  | j| d< |
| d q|d
}|d }t|dkrF|d }|dkrtd| t| d }| j| }d}d|v r,d}| j| jd }n| j|  }|| d | j|| < |
||  qt| d }|
| q|D ]}|drf| j
| qV| j
| qV|S )Nc                    sD   t | d }|jd |dd dd  }| | |j|  S )Nr   /r   r*   ))r   r   r   ri   )rX   r   r|   time_metric_name	time_unitr   r   r   r   get_time_seconds   s   "z(derive_metrics.<locals>.get_time_secondsutilrj   rC   r=   r   r   rs   r}   r*   zcount (inc)%zUnsupported unit r   z(inc)g      Y@)rg   r6   r`   time_factor_dictisnacombinemaxnpnanrV   r   derivable_metricsr   ri   r   cpu_time_factor_dictavg_time_factor_dictavg_cpu_time_factor_dictr   r   rU   ilocsumendswithinc_metricsexc_metrics)r:   r   r   r   r9   derived_metricsr   r   re   rY   time_secinternal_frame_indicesderivable_metricmetric_namemetric_factor_dictmatched_metric_nameis_cpuis_avgr|   metric_time_unit
time_valuemetric_name_and_unitmetric_unitsingle_framer   totalderived_metricr   r   r   derive_metrics   st   





r   c                 C   sr   |dkr| j d dd | j d< | S |dkr&| j d dd | j d< | S |dkr7| j d dd | j d< | S )	Nfile_function_liner   c                 S      |  dd S )Nr}   r   xr   r   r   <lambda>       zformat_frames.<locals>.<lambda>function_linec                 S   r   )N:r   r   r   r   r   r   r      r   file_functionc                 S   s6   |  dd  dd  d|  dd  dd  S )Nr}   r   r   r   @r   r   r   r   r   r      s   6 )r6   apply)r:   formatr   r   r   format_frames   s   

r   c                 C   sl   |rd| d}| j |dd} |r"d| d}t|}| j |dd} |r4d|d| ig}| j |dd} | S )Nz1
MATCH ("*")->(".", p)->("*")
WHERE p."name" =~ "z"
T)squashz*
MATCH (".", p)->("*")
WHERE p."name" =~ "*z>= )filterr   )r:   includeexclude	thresholdr   queryinclusion_queryr   r   r   filter_frames   s   r   c                 C   s:   d|v r| j d j}t|}|dk rtd d S d S d S )Nzbytes (inc)r   zZWarning: Negative byte values detected, this is usually the result of a datatype overflow
)r6   valuesr   nanminprint)r:   r   byte_valuesmin_byte_valuer   r   r   emit_warnings   s   
r   d   Fc                 C   s   t | |} t| j|d|dd |r]td|d   | jj|d gdd}tdt|D ]0}t|j| d d	krF|j| d d d	 d
 n|j| d }td||j| |d   q,t	| | d S )NTF)metric_columnexpand_namedepthrender_headerzSorted kernels by metric r   )by	ascendingr*   r   r   z...z{:105} {:.4})
r   r   treer6   sort_valuesranger   r   r   r   )r:   r   r   r   print_sorted	sorted_dfrowkernel_namer   r   r   
print_tree  s   
 r   c           
      C   s   t |d6}t|\}}}}	t|| dksJ d|  t|| |||	} t||||| d }|| fW  d    S 1 s>w   Y  d S )Nrr   z"No metrics found in the input file)openr;   r   update_inclusive_columnsr   r   )
r   filenamer   r   r   fr:   r   r   r9   r   r   r   parse  s   $r   c                 C   s   t | dS}t|\}}}}td |r+|D ]}|dd   }td|  qtd |rH|D ]}|dd   }td|  q3W d    d S W d    d S 1 s[w   Y  d S )Nr   zAvailable inclusive metrics:r   r   z- zAvailable exclusive metrics:)r   r;   r   r   r   r   )	file_namer   _r   r   r   r   r   r   r   show_metrics  s    	"r   c                  C   s  t jdt jd} | jddddd | jdd	td d
d | jddtd dd | jddtd dd | jddtd dd | jddtddd | jddtg dddd | jddd d!d" | jd#d$td d%d |  \}}t|d&kssJ d'|d( }|j	r|j	
d)nd }|j}|j}|j}|j}|j}	|j}
|j}|r|rtd*|jrt| d S |rt|||||\}}|
rt||
|||\}}||}t||||	| d S d S )+Nz,Performance data viewer for proton profiles.)descriptionformatter_classz-lz--list
store_truea,  List available metrics. Metric names are case insensitive and ignore units.
Derived metrics can be created when source metrics are available.
- time/s, time/ms, time/us, time/ns: time
- avg_time/s, avg_time/ms, avg_time/us, avg_time/ns: time / count
- flop[<8/16/32/64>]/s, gflop[<8/16/32/64>]/s, tflop[<8/16/32/64>]/s: flops / time
- byte/s, gbyte/s, tbyte/s: bytes / time
- util: max(sum(flops<width>) / peak_flops<width>_time, sum(bytes) / peak_bandwidth_time)
- <metric>/%%: frame(metric) / sum(metric). Only availble for inclusive metrics (e.g. time)
)actionhelpz-mz	--metricszAt maximum two metrics can be specified, separated by comma.
There are two modes:
1) Choose the output metric to display. It's case insensitive and ignore units.
2) Derive a new metric from existing metrics.
)typedefaultr   z-iz	--includea  Find frames that match the given regular expression and return all nodes in the paths that pass through the matching frames.
For example, the following command will display all paths that contain frames that contains "test":
```
proton-viewer -i ".*test.*" path/to/file.json
```
z-ez	--excludezExclude frames that match the given regular expression and their children.
For example, the following command will exclude all paths starting from frames that contains "test":
```
proton-viewer -e ".*test.*" path/to/file.json
```
z-tz--thresholdzrExclude frames(kernels) whose metrics are below the given threshold. This filter only applies on the first metric.z-dz--depthr   z The depth of the tree to displayz-fz--format)fullr   r   r   r   a!  Formatting the frame name.
- full: include the path, file name, function name and line number.
- file_function_line: include the file name, function name and line number.
- function_line: include the function name and line number.
- file_function: include the file name and function name.
)r   choicesr   r   z--print-sortedFz6Sort output by metric value instead of chronologically)r   r   r   z--diff-profilez-diffzCompare two profiles. When used as 'proton-viewer -m time -diff file1.log file2.log', computes the difference: file2['time'] - file1['time']r*   zMust specify a file to readr   ,z'Cannot specify both include and exclude)argparseArgumentParserRawTextHelpFormatteradd_argumentstrfloatintparse_known_argsr   r   r   r   r   r   r   r   diff_profiler   rU   r
   r   r   subr   )	argparserargstarget_argsr   r   r   r   r   r   r   diffr   r:   r   gf2r   r   r   r   main,  s   
r   __main__)NNNN)r   NF)NNN).r   collectionsr   r/   pandasrR   hatchetr2   hatchet.queryr   ImportErrornumpyr   triton.profiler.hookr   r   r   r)   r;   r`   rg   rh   r   ri   itemsr   r   r   ru   keysr   rw   updaterT   r\   r{   r|   r   r   r   r   r   r   r   r   __name__r   r   r   r   <module>   sn     
#

(I



q
