o
    hO                     @   sF  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlZd dlmZ g dZG dd	 d	eZd
d Zdd Zdd Zdd ZG dd dZG dd dZedg dZG dd deZG dd deZG dd deZG dd dZd d! Zd"Zd#Z d$d% Z!d/d'd(Z"			)	*	+	,	&	&	&d0d-d.Z#dS )1    N)defaultdict
namedtuple)
attrgetter)AnyOptional)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                       s   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	e
dd Z							d%ddZdd Zdd ZdedefddZ		 	d&d!d"Zd#d$ Z  ZS )'r	   z'A list of Events (for pretty printing).c                    sR   | dd }| dd}| dd}t j|i | || _|| _d| _|| _d S )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__ p/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/torch/autograd/profiler_util.pyr      s   
zEventList.__init__c                 C   s"   |    |   |   d| _d S )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r   r    r    r!   _build_tree'   s   
zEventList._build_treec                 C   s   |   S N)tabler%   r    r    r!   __str__-      zEventList.__str__c                    s   	 t   tt| D ]C}| | jd urM| | jj| | jkrMt| | jjdkrM| | j| | j_| | j| | j_| | jD ]}| | j|_q? | q
t dkrVd S  fddt| D }| 	  | 
| q)NT   r   c                    s   g | ]
\}}| vr|qS r    r    ).0indev	to_deleter    r!   
<listcomp>@       z/EventList._remove_dup_nodes.<locals>.<listcomp>)setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r   idxchnew_evtsr    r/   r!   r#   0   s$   

zEventList._remove_dup_nodesc           
      C   s   dd | D }t |tdd}tj|dd d}|D ]U\}}t |dd d}g }|D ]D}t|dkri|d	 }	|jj|	jjksF|jj|	jjkrK|  n|		| |j
d
u s]J d|j ||	 nt|dks2|| q*qd
S )a4  Populate child events into each underlying FunctionEvent object.

        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        c                 S   s"   g | ]}|j s|jtjkr|qS r    )is_asyncdevice_typer   CPUr,   evtr    r    r!   r1   T   s    z4EventList._populate_cpu_children.<locals>.<listcomp>thread)keyc                 S   s   | j | jfS r'   )rF   node_ideventr    r    r!   <lambda>a   s    z2EventList._populate_cpu_children.<locals>.<lambda>c                 S   s   | j j| j j gS r'   )
time_rangestartendrI   r    r    r!   rK   s       r   Nz(There is already a CPU parent event for )sortedr   	itertoolsgroupbyr5   rL   rM   rN   r   append_cpu_childr6   rG   set_cpu_parentappend)
r   sync_eventseventsthreads
_thread_idthread_eventsthread_events_current_eventsrJ   parentr    r    r!   r"   D   s@   



z EventList._populate_cpu_childrenc                    s    fdd i }| D ]} |d u r&|j d ur&|j|jf}||vr&|j ||< q
| D ]$} |}|d urM|jd us:J |j|jf}||v rJ|| |_ q)g |_ q)d S )Nc                    s$   | d u rd S | j dkr| S  | jS Nr+   )scoper6   rE   	bw_parentr    r!   rc      s
   

z6EventList._set_backward_stacktraces.<locals>.bw_parent)stacksequence_nrrF   
fwd_thread)r   
fwd_stacksrE   tpr    rb   r!   r$      s$   
z#EventList._set_backward_stacktracesc                 C   s   t dd | D S )Nc                 s       | ]}|j V  qd S r'   )self_cpu_time_totalr,   rJ   r    r    r!   	<genexpr>       z0EventList.self_cpu_time_total.<locals>.<genexpr>)sumr%   r    r    r!   rk      s   zEventList.self_cpu_time_totalNd   K   7   P   Fc                 C   s    t | ||||||| j| j|d
S )a(  Print an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
                ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
                ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
                ``self_xpu_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda/xpu ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r   )r   rt   ru   rv   rw   rx   ry   rz   r    r    r!   r(      s   zEventList.tablec                 C   s&  ddl }| js	dn| j}t|dx}d}|d | D ]K}|jdu r#q|d|j|jj|j |j	s6|j
n
d|j d|j
 d	 |jD ]}|d
|j d|jj d|j
 d| d| d |d7 }qFqt| dkr||| d |j |  |d W d   dS 1 sw   Y  dS )zExport an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r+      ])osr   openwrite
trace_nameformatrL   rM   
elapsed_us	is_remoterF   rH   r9   r5   seektellSEEK_SETtruncate)r   pathr   device_namefnext_idrE   _r    r    r!   export_chrome_trace   sL   




"zEventList.export_chrome_tracec                 C   s   g dS )N)rk   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_totalr    r%   r    r    r!   supported_export_stacks_metrics	  r*   z)EventList.supported_export_stacks_metricsr   metricc           	      C   s   ||   vrtdt|    tdd}t|dY}| D ]M}|jrkt|jdkrkt||ddddd	d}t	|dkrkd
}t
|jD ]}|||7 }|d7 }qH|d d d tt	| }||d  qW d    d S 1 sww   Y  d S )Nzmetric should be one of: z ;	
____r}   r   r|   devicexpuprivateuse1 ;rP    
)r   
ValueErrorstr	maketransr   rd   r5   getattrreplaceintreversed	translater   )	r   r   r   translate_tabler   rE   metric_value	stack_strentryr    r    r!   export_stacks  s4   


"zEventList.export_stacksr   c                    s   | j sJ tt}dttdf f fdd}| D ] || |||   qt| | j| j	| j
d}|D ]  jd|  _|sEd _|sJd _q6|S )a  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

            group_by_overload_name: Differentiate operators by their overload name e.g. aten::add.Tensor
            and aten::add.out will be aggregated separately

        Returns:
            An EventList containing FunctionEventAvg objects.
        return.c                    sr   t | jt | jt | jt | jt | jg}|r| j |r(|t | j |dkr5|| j	d | 7 }t
|S Nr   )r   rG   rH   rB   	is_legacyis_user_annotationrV   overload_nameinput_shapesrd   tuple)rJ   group_by_input_shapesgroup_by_stack_ngroup_by_overload_namerG   ra   r    r!   get_keyC  s   z'EventList.key_averages.<locals>.get_keyr   r   r   Nr   )r   r   r   r   r   r:   r	   valuesr   r   r   rd   r   r   )r   r   r   r   statsr   avg_listr    ra   r!   key_averages)  s4   


zEventList.key_averagesc                 C   s(   t  }| D ]	}||7 }d|_qd|_|S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NTotal)r   rG   )r   
total_statrE   r    r    r!   total_averagej  s   zEventList.total_average)Nrp   rq   rr   rs   NF)Fr   F)__name__
__module____qualname____doc__r   r&   r)   r#   r"   r$   propertyrk   r(   r   r   r   r   r   r   __classcell__r    r    r   r!   r	      s4    
E

*8
Ar	   c                 C   sD   d}d}| |kr| | ddS | |kr| | ddS | ddS )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr    )time_usUS_IN_SECONDUS_IN_MSr    r    r!   _format_timex  s   r   c                 C   s6   |dkr| dksJ d|  dS | d | ddS )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r    )r   total_time_usr    r    r!   _format_time_share  s   r   c                 C   s   d}d| }d| }t | |kr| d | ddS t | |kr*| d | ddS t | |kr:| d | ddS t| d S )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBr    r    r!   _format_memory  s   r   c                    s   t  fddS )Nc                    s   t t|  S r'   )r   r   r%   r7   r    r!   rK     s    z!_attr_formatter.<locals>.<lambda>)r   r   r    r   r!   _attr_formatter  s   r   c                   @   sp   e Zd ZdZedZedZedZedZedZ	edZ
edd	 Zed
d Zeededdd ZdS )r
   z{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_timedevice_timecpu_time_totaldevice_time_totalrk   self_device_time_totalc                 C      | j dkrdS d| j | j  S Nr   g        r   )countr   r%   r    r    r!   r        zFormattedTimesMixin.cpu_timec                 C   r   r   )r   r   r%   r    r    r!   r     r   zFormattedTimesMixin.device_timez<`cuda_time` is deprecated, please use `device_time` instead.categoryc                 C      | j S r'   )r   r%   r    r    r!   	cuda_time     zFormattedTimesMixin.cuda_timeN)r   r   r   r   r   cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r   r   r   FutureWarningr   r    r    r    r!   r
     s$    

r
   c                   @   s   e Zd Zdd Zdd ZdS )r   c                 C   s   || _ || _d S r'   )rM   rN   )r   rM   rN   r    r    r!   r     s   
zInterval.__init__c                 C   s   | j | j S )z4
        Returns the length of the interval
        )rN   rM   r%   r    r    r!   r     s   zInterval.elapsed_usN)r   r   r   r   r   r    r    r    r!   r     s    r   r   )r7   r   durationc                   @   s  e Zd ZdZddddddddddddejddddddddfddZdd	 Zd
d Zdd Z	e
dd Ze
dd Ze
ededdd Ze
dd Ze
dd Ze
dd Ze
ededdd Ze
dd  Ze
ed!edd"d# Ze
d$d% Zd&d' ZdS )(r   z.Profiling information about a single function.Nr   FrP   c                 C   s   || _ || _|| _|| _|| _t||| _|| _|| _g | _	d| _
g | _d | _|| _|| _|| _|	| _|
| _|| _|| _|| _|| _|| _|| _|| _|| _|d u rT|n|| _|| _|| _|| _d| _d| _d| _ d S )Nr+   rP   )!idrH   r7   r   r   r   rL   rF   rf   r9   r   r8   r6   r   concrete_inputskwinputsrd   r`   r   cpu_memory_usagedevice_memory_usagerA   r   re   rB   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r   r   r7   rF   start_usend_usr   rf   r   rd   r`   r   r   r   rA   r   re   rH   rB   r   r   r   r   r   r   r   r   r    r    r!   r     sB   
zFunctionEvent.__init__c                 C   s(   | j tjksJ | jt||| d S r'   )rB   r   rC   r9   rV   r   )r   r7   r   r   r    r    r!   append_kernel  s   zFunctionEvent.append_kernelc                 C   s>   | j tjksJ t|tsJ |j tjksJ | j| dS )zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)rB   r   rC   
isinstancer   r8   rV   )r   childr    r    r!   rT     s   zFunctionEvent.append_cpu_childc                 C   s8   | j tjksJ t|tsJ |j tjksJ || _dS )a$  Set the immediate CPU parent of type FunctionEvent.

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)rB   r   rC   r   r   r6   )r   r^   r    r    r!   rU     s   
zFunctionEvent.set_cpu_parentc                 C   0   | j s	| jtjkrdS | jtdd | jD  S )Nr   c                 s   rj   r'   )r   r,   r   r    r    r!   rm   /      
z6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>)rA   rB   r   rC   r   ro   r8   r%   r    r    r!   self_cpu_memory_usage+  
   
z#FunctionEvent.self_cpu_memory_usagec                 C   r  )Nr   c                 s   rj   r'   )r   r  r    r    r!   rm   7  r  z9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>)rA   rB   r   rC   r   ro   r8   r%   r    r    r!   self_device_memory_usage3  r  z&FunctionEvent.self_device_memory_usagezO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r   c                 C   r   r'   r  r%   r    r    r!   self_cuda_memory_usage;  r   z$FunctionEvent.self_cuda_memory_usagec                 C   s   | j tjkr| j S dS r   )rB   r   rC   rL   r   r%   r    r    r!   r   C  s   
zFunctionEvent.cpu_time_totalc                 C   r  )Nr   c                 s   rj   r'   )r   r  r    r    r!   rm   N  r  z4FunctionEvent.self_cpu_time_total.<locals>.<genexpr>)rA   rB   r   rC   r   ro   r8   r%   r    r    r!   rk   J  r  z!FunctionEvent.self_cpu_time_totalc                 C   s   | j s| jsdS | jtjkr/| js%tdd | jD tdd | jD  S tdd | jD S | jtj	tj
tjfv s<J | j S )Nr   c                 s   rj   r'   r   r,   kinfor    r    r!   rm   Y  rn   z2FunctionEvent.device_time_total.<locals>.<genexpr>c                 s   rj   r'   r   )r,   r?   r    r    r!   rm   Y  r  c                 s   rj   r'   r	  r
  r    r    r!   rm   ^  rn   )rA   r   rB   r   rC   r   ro   r9   r8   CUDAPrivateUse1MTIArL   r   r%   r    r    r!   r   R  s   


zFunctionEvent.device_time_totalzA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                 C   r   r'   r  r%   r    r    r!   cuda_time_totalg  r   zFunctionEvent.cuda_time_totalc                 C   sV   | j s| jsdS | jtjkr| jtdd | jD  S | jtjtj	tj
fv s(J | jS )Nr   c                 s   rj   r'   r  r  r    r    r!   rm   t  r  z7FunctionEvent.self_device_time_total.<locals>.<genexpr>)rA   r   rB   r   rC   r   ro   r8   r  r  r  r%   r    r    r!   r   o  s   

z$FunctionEvent.self_device_time_totalzK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                 C   r   r'   r   r%   r    r    r!   r     r   z"FunctionEvent.self_cuda_time_totalc                 C   r   r'   r   r%   r    r    r!   rG     s   zFunctionEvent.keyc                 C   s*  | j }| j}| j}dg d| j d| j d| j d| j d| j d| j	 d| j
j d	| j
j d
tdd | jD  d| d| d| j d| j dt| j d| j d| d| d| j d| j d| j d| j dS )Nr   z<FunctionEvent id=z name=z overload_name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=c                 S   s   g | ]}|j qS r    )r   r  r    r    r!   r1     rO   z*FunctionEvent.__repr__.<locals>.<listcomp>r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r   r   joinr   r7   r   rB   rH   r   rL   rM   rN   r   r8   rF   r   r   rA   r   re   r   )r   r   r   r   r    r    r!   __repr__  sR   .
zFunctionEvent.__repr__)r   r   r   r   r   rC   r   r   rT   rU   r   r  r  r   r   r  r   rk   r   r  r   r   rG   r  r    r    r    r!   r     sx    	
@






r   c                   @   s2   e Zd ZdZdddZdd Zdd	 Zd
d ZdS )r   z:Used to average stats over multiple FunctionEvent objects.r   Nc                 C   s   d | _ d| _d| _d| _d| _d | _d| _d| _d| _d| _	d | _
d | _d | _d | _d| _d| _d| _d| _d | _d | _tj| _d| _d| _d S )Nr   F)rG   r   rH   rA   r   r   r   r   rk   r   r   r   rd   r`   r   r   r  r  r8   r6   r   rC   rB   r   r   r%   r    r    r!   r     s.   
zFunctionEventAvg.__init__c                 C   s`  | j d u r=|j | _ |j| _|j| _|j| _|j| _|j| _|j| _|j| _|j| _|j	| _	|j
| _
|j| _|j| _|j| _t|ttfsFJ |j | j ksNJ |  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _|  j|j7  _| jd u r|j| _| S |jd ur|  j|j7  _| S r'   )rG   rH   rA   r   r6   r8   r   r   rd   r`   rB   r   r   r   r   r   r   r   r   rk   r   r   r   r  r  r   r   r   otherr    r    r!   r:     s@   


zFunctionEventAvg.addc                 C   s
   |  |S r'   )r:   r  r    r    r!   __iadd__  s   
zFunctionEventAvg.__iadd__c                 C   sx   | j sdn| j }| j}| j}| j}d| j d| j d| j d| d| d| d| dt| j d	| j	 d| d
| dS )Nr|   z<FunctionEventAvg key=z self_cpu_time=r  z  self_r  r   r  r  r  r  )
r   r   r   r   rG   r   r   r   r   r   )r   r   self_device_timer   device_memoryr    r    r!   r    s,   zFunctionEventAvg.__repr__)r   N)r   r   r   r   r   r:   r  r  r    r    r    r!   r     s    
&r   c                   @   s   e Zd Zdd ZdS )r   c                 C   s(   t |dkrtj|n|| |< | | S r_   )r5   torch_C	_demangle)r   rG   r    r    r!   __missing__  s    zStringTable.__missing__N)r   r   r   r"  r    r    r    r!   r     s    r   c                   @   s    e Zd ZdZdd Zdd ZdS )r   z=Acceleration structure for accessing mem_records in interval.c                 C   sL   || _ g | _g | _t|dkr$tdd t|D }t| \| _| _d S d S )Nr   c                 S   s    g | ]\}}|d    |fqS r   )start_ns)r,   irr    r    r!   r1     s     z*MemRecordsAcc.__init__.<locals>.<listcomp>)_mem_records_start_nses_indicesr5   rQ   r;   zip)r   mem_recordstmpr    r    r!   r     s   zMemRecordsAcc.__init__c                 c   sL    t | j|d }t | j|d }t||D ]}| j| j|  V  qdS )z
        Return all records in the given interval
        To maintain backward compatibility, convert us to ns in function
        i  N)bisectbisect_leftr(  bisect_rightr4   r'  r)  )r   r   r   	start_idxend_idxr%  r    r    r!   in_interval  s   zMemRecordsAcc.in_intervalN)r   r   r   r   r   r2  r    r    r    r!   r     s    r   c                    s   g d}t  fdd|D S )N))autograd/__init___make_grads)r3  backward)ztorch/tensorr5  )_internal/common_utilsprof_callable)r6  prof_func_call)r6  prof_meth_callc                 3   s*    | ]}|d   v o|d  v  V  qdS )r   r+   Nr    )r,   r   r   r    r!   rm     s   ( z&_filter_stack_entry.<locals>.<genexpr>)all)r   filtered_entriesr    r:  r!   _filter_stack_entry
  s   r=  z[memory]z[OutOfMemory]c                 C   s   t tddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r7   filtered_out_namesr    r    r!   _filter_name  s   
rA  Fc                 C   s$   t  }||  } |r| drd} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r7   with_wildcardstring_tabler    r    r!   _rewrite_name-  s   
rE  rp   rq   rr   rs   c
           .         s  t | dkrdS tdd | D }
tdd | D }| d j}|s'|
r'tdtdd | D }tdd | D }d	urNtt| fd
ddd|||d} tdd | D d }|d	urbt||}tdd | D d }|d	urvt||}d}|}d	}dd | D }t |dk}|rtdd |D d }|d	urt||}dg}|r|d |g d7 }|d	ur|	 nd}|
r|
d| d| d| d| dg |r|
ddg |r|r|
| d d| d g |d! td"d | D }|r|d# d$ dgdg  gd> fd&d'	}d(d) }|| |r'|| |d*| d	 D ]}|| q/|rC|d+ || |rQ|d, ||d-d. |ryd/d | D }t |dkrw|t|\}}|d0|  || nd1}d }d } d }!d	}g fd2d3}"d}#d}$| D ]/}%|#|%j7 }#|%jtjkr|%jr|$|%j7 }$q|%jtjtjtjfv r|%js|$|%j7 }$q|d	ur|"d4|!  |"| |	r|"d4|!  |"d5 |"|  |"|j|  |"|  d6d7 }&d}'| D ]?}%|'|kr n6|	r|%jd	urq|'d*7 }'|%j}(|d	ur-t |(|d8 kr-|(d	|d8  d9 }(t|%j|#|%_|%js>t|%j|#nd|%_|(g})|rg|%j}*|d	urbt |*|d8 krb|*d	|d8  d9 }*|)|*g7 })|)|%j|%j|%j|%j|%j g7 })|
rt|%j|$|%_!|)
|%j"|%j!|%j#|%j$g |r|)
t%|%j&t%|%j'g |r|r|)
t%|%j(t%|%j)g |)|%j* |r|)|%j+ |r|)t,|%j-d	|  |r|%j.dkr|)d: n
|)|%j.| d; |r d}+t |%j/dkr|&|%j/d |}+|)|+ |"|j|)  |r7dgt |d*  },|%j/d*d	 D ]}-|"|j|,|&|-|g   q|,d |"|j|,  q|"|  |"d<t0|#  |
r^|"d|d	urT|	 nd d=t0|$  d1S )?zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c                 s       | ]}|j d kV  qdS r   Nr  rl   r    r    r!   rm   F      z_build_table.<locals>.<genexpr>c                 s   rF  rG  r  rl   r    r    r!   rm   G  rH  z9use_device is None, but there is device performance data.c                 s   (    | ]}|j d uot|j dkV  qd S r   )r   r5   rl   r    r    r!   rm   O  
    
c                 s   rI  r   )r   r5   rl   r    r    r!   rm   T  rJ  Nc                    s"   t |  ddddddS )Nr|   r   r   r   )r   r   ra   )rt   r    r!   rK   ]  s    
z_build_table.<locals>.<lambda>T)rG   reverser   c                 s   s    | ]}t |jV  qd S r'   )r5   rG   rD   r    r    r!   rm   j  rH     c                 s   s    | ]
}t t|jV  qd S r'   )r5   r   r   rD   r    r    r!   rm   n  s       c                 S   s*   g | ]}|j d urt|j dkr|j qS r   )rd   r5   rD   r    r    r!   r1   v  s    $z _build_table.<locals>.<listcomp>c                 s   s"    | ]}t d d |D V  qdS )c                 s   s    | ]}t |V  qd S r'   r5   )r,   r   r    r    r!   rm   |  s    z)_build_table.<locals>.<genexpr>.<genexpr>N)max)r,   rd   r    r    r!   rm   |  s     NamezOverload Name)z
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc                 s   s    | ]}|j d kV  qdS )rP   N)rH   rD   r    r    r!   rm     rH  zNode IDr   r  c                    s\   d  d| t |  d d   7  < d  d|  d   7  < d  |   7  < d S )Nr   z{: }r   -)r   )paddingtext_dir)SPACING_SIZEheader_sep_lstline_length_lstrow_format_lstr    r!   
add_column  s
   z _build_table.<locals>.add_columnc                 S   sr   g d}| dks
J t dtt| d tt|d }|dkr'|t|k s)J tdt|d |t| fS )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r+   
   g      )	rO  minmathlog10floatr5   powfloorr   )r   flop_headers	log_flopsr    r    r!   auto_scale_flops  s
   & z&_build_table.<locals>.auto_scale_flopsr+   zInput ShapeszSource Location<)rU  c                 S   s   g | ]
}|j d kr|j qS r#  )r   rD   r    r    r!   r1     r2   zTotal Fc                    s     |    d d S )Nr   )rV   )r   )resultr    r!   rV     s   
z_build_table.<locals>.append=z1This report only display top-level ops statisticsc                 S   sD   t | |kr t | | }| |d  } t | dkr d| dd   } | S )Nra  ...rN  )r   src_column_widthoffsetr    r    r!   	trim_path  s   z_build_table.<locals>.trim_pathra  ro  z--z8.3fzSelf CPU time total: z time total: )r  )2r5   anyr   RuntimeErrorr	   rQ   rO  rc  rV   upperr=   rk   rB   r   rC   r   r   r  r  r  r   r   r6   rG   r   r   rA   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r   rH   r   r   r   rd   r   r  ).rX   rt   ry   ru   rv   rw   rx   r   r   rz   has_device_timehas_device_memr   has_input_shapeshas_overload_namesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthrp  stacks	has_stackheadersr   append_node_idrZ  rk  r   	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthrV   sum_self_cpu_time_totalsum_self_device_time_totalrE   rr  event_limitr7   
row_valuesr   	src_fieldempty_headersr   r    )rV  rW  rX  rm  rY  rt   r!   r{   6  s  















	




		


r{   )F)	NNrp   rq   rr   rs   FFF)$r-  rR   rd  collectionsr   r   operatorr   typingr   r   typing_extensionsr   r  torch.autogradr   __all__listr	   r   r   r   r   r
   r   r   r   r   r   r   r=  r>  r?  rA  rE  r{   r    r    r    r!   <module>   sP     ` PQ	
