
    |Yh                        d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlZd dlmZ g dZ G d d	e          Zd
 Zd Zd Zd Z G d d          Z G d d          Z edg d          Z G d de          Z G d de          Z G d de          Z G d d          Zd ZdZdZ d Z!d'd!Z"	 	 	 	 	 	 	 	 	 d(d&Z#dS ))    N)defaultdict
namedtuple)
attrgetter)AnyOptional)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                        e Zd ZdZ fdZd Zd Zd Zd Zd Z	e
d             Z	 	 	 	 	 	 	 ddZd Zd ZdedefdZ	 	 	 ddZd Z xZS )r
   z'A list of Events (for pretty printing).c                     |                     dd           }|                     dd          }|                     dd          } t                      j        |i | || _        || _        d| _        || _        d S )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__s         n/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/torch/autograd/profiler_util.pyr   zEventList.__init__   s    ZZd33
$4e<<ZZe44
$)&)))%- %    c                     |                                   |                                  |                                  d| _        d S )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r   s    r"   _build_treezEventList._build_tree'   sF    ##%%%   &&(((r#   c                 *    |                                  S N)tabler(   s    r"   __str__zEventList.__str__-   s    zz||r#   c                    	 t                      t          t          |                     D ]}| |         j        | |         j        j        | |         j        k    rt          | |         j        j                  dk    rq| |         j        | |         j        _        | |         j        | |         j        _        | |         j        D ]}| |         j        |_                            |           t                    dk    rd S fdt          |           D             }| 	                                 | 
                    |           J)NT   r   c                 "    g | ]\  }}|v	|S  r1   ).0indev	to_deletes      r"   
<listcomp>z/EventList._remove_dup_nodes.<locals>.<listcomp>@   s'    RRRwsBS	=Q=Q=Q=Q=Qr#   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r   idxchnew_evtsr5   s       @r"   r&   zEventList._remove_dup_nodes0   s1   	"ISYY'' 
' 
'I(4S	,1T#Y^CCDI0=>>!CC8<S	8NDI(53793DDI(0"3i4 = =(,S	(<MM#&&&9~~""RRRR)D//RRRHJJLLLKK!!!#	"r#   c                 j   d | D             }t          |t          d                    }t          j        |d           }|D ]\  }}t          |d           }g }|D ]}t	          |          dk    r|d         }	|j        j        |	j        j        k    s|j        j        |	j        j        k    r|                                 nD|		                    |           |j
        J d	|j                     |                    |	           nt	          |          dk    |                    |           ӌdS )
a4  Populate child events into each underlying FunctionEvent object.

        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        c                 J    g | ] }|j         s|j        t          j        k    |!S r1   )is_asyncdevice_typer	   CPUr2   evts     r"   r6   z4EventList._populate_cpu_children.<locals>.<listcomp>T   s=     
 
 
<
 %(Oz~$E$E $E$E$Er#   thread)keyc                     | j         | j        fS r+   )rL   node_idevents    r"   <lambda>z2EventList._populate_cpu_children.<locals>.<lambda>a   s    u|U]&C r#   c                 4    | j         j        | j         j         gS r+   )
time_rangestartendrP   s    r"   rR   z2EventList._populate_cpu_children.<locals>.<lambda>s   s    5#3#9E<L<P;P"Q r#   r   Nz(There is already a CPU parent event for )sortedr   	itertoolsgroupbyr9   rT   rU   rV   r   append_cpu_childr:   rM   set_cpu_parentappend)
r   sync_eventseventsthreads
_thread_idthread_eventsthread_events_current_eventsrQ   parents
             r"   r%   z EventList._populate_cpu_childrenD   s    
 

 
 

 8$$
 
 
 #CC
 
 
  *1 	- 	-%J#QQ  N 35N' - -.))A--+B/F(.&2C2GGG +/&2C2GGG '**,,,,//666!,44QeiQQ 544,,V444 .))A--  %%e,,,,#-	- 	-r#   c                     fdi }| D ]0} |          #|j         |j        |j        f}||vr
|j         ||<   1| D ]?} |          }|0|j        J |j        |j        f}||v r||         |_         8g |_         @d S )Nc                 F    | d S | j         dk    r| S  | j                  S Nr/   )scoper:   )rK   	bw_parents    r"   rj   z6EventList._set_backward_stacktraces.<locals>.bw_parent   s/    {ta
 y000r#   )stacksequence_nrrL   
fwd_thread)r   
fwd_stacksrK   tprj   s        @r"   r'   z#EventList._set_backward_stacktraces   s    	1 	1 	1 	1 	1 
 	. 	.Cy~~%#)*?_cj1J&&$'IJqM 	# 	#C	#A}|///]AL1
?? *1CII "CI	# 	#r#   c                 4    t          d | D                       S )Nc              3   $   K   | ]}|j         V  d S r+   )self_cpu_time_totalr2   rQ   s     r"   	<genexpr>z0EventList.self_cpu_time_total.<locals>.<genexpr>   s%      ??5,??????r#   )sumr(   s    r"   rs   zEventList.self_cpu_time_total   s    ??$??????r#   Nd   K   7   P   Fc                 H    t          | ||||||| j        | j        |
  
        S )a(  Print an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
                ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
                ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
                ``self_xpu_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda/xpu ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r   )r   r|   r}   r~   r   r   r   r   s           r"   r,   zEventList.table   s?    : !5"7$;/'"7
 
 
 	
r#   c                    ddl }| j        sdn| j        }t          |d          5 }d}|                    d           | D ]}|j        
|                    d                    |j        |j        j        |j                                        |j	        s|j
        nd|j         d|j
         d	                     |j        D ]@}|                    d
|j         d|j        j         d|j
         d| d| d           |dz  }At          |           dk    rD|                    |                                dz
  |j                   |                                 |                    d           ddd           dS # 1 swxY w Y   dS )zExport an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r/      ])osr   openwrite
trace_nameformatrT   rU   
elapsed_us	is_remoterL   rO   r=   r9   seektellSEEK_SETtruncate)r   pathr   device_namefnext_idrK   _s           r"   export_chrome_tracezEventList.export_chrome_trace   s    				$($4Jff$:J$__ +	G GGCLLL !! !!>)' (.v,1133"}R

Q#+QQ3:QQQ( (     ! !A GG(cn ( (!$!5( ( #&*( (
 ")( ( +6( ( (	 	 	 qLGG! 4yy1}}qvvxx!|R[111

GGCLLLW+	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	 +	s   E	E;;E?E?c                 
    g dS )N)rs   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_totalr1   r(   s    r"   supported_export_stacks_metricsz)EventList.supported_export_stacks_metrics	  s    
 
 
 	
r#   r   metricc           	         ||                                  vr1t          dt          |                                            z             t                              dd          }t	          |d          5 }| D ]}|j        rt          |j                  dk    rt          ||                    dd                              dd                              d	d                    }t          |          dk    rxd
}t          |j                  D ]}||                    |          z  }|dz  } |d d         dz   t          t          |                    z   }|                    |dz              	 d d d            d S # 1 swxY w Y   d S )Nzmetric should be one of: z ;	
____r   r   r   devicexpuprivateuse1 ;rW    
)r   
ValueErrorstr	maketransr   rk   r9   getattrreplaceintreversed	translater   )	r   r   r   translate_tabler   rK   metric_value	stack_strentrys	            r"   export_stackszEventList.export_stacks  s   ==????+d::<<==>   --&99$__ 	2 2 29 2SY!!3!3#*vx88 11 99	$ $L <((1,,$&	%-ci%8%8 - -E%)I)III%,II$-crcNS$83s<?P?P;Q;Q$Q		D 01112	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2s   3C<E==FFr   c                    | j         sJ t          t                    }dt          t          df         ffd}| D ])| ||||                                                  *t          |                                | j        | j	        | j
                  }|D ](j        d|         _        |sd_        |sd_        )|S )a  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

            group_by_overload_name: Differentiate operators by their overload name e.g. aten::add.Tensor
            and aten::add.out will be aggregated separately

        Returns:
            An EventList containing FunctionEventAvg objects.
        return.c                    t          | j                  t          | j                  t          | j                  t          | j                  t          | j                  g}|r|                    j                   |r'|                    t          | j                             |dk    r|| j	        d |         z  }t          |          S Nr   )r   rM   rO   rH   	is_legacyis_user_annotationr]   overload_nameinput_shapesrk   tuple)rQ   group_by_input_shapesgroup_by_stack_ngroup_by_overload_namerM   rK   s        r"   get_keyz'EventList.key_averages.<locals>.get_keyC  s     EIEM""E%&&EO$$E,--C & .

3,---$ 4

3u122333!##u{#4$4#455::r#   r   r   r   Nr   )r   r   r   r   r   r>   r
   valuesr   r   r   rk   r   r   )r   r   r   r   statsr   avg_listrK   s          @r"   key_averageszEventList.key_averages)  s   . 9DEU9V9V	38_	 	 	 	 	 	$  	 	C.0@BX  c#hhhhLLNN'/'	
 
 
  	' 	'C	"3#3"34CI( &#% ) '$&!r#   c                 R    t                      }| D ]}||z  }d|_        d|_        |S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NTotal)r   rM   )r   
total_statrK   s      r"   total_averagezEventList.total_averagej  s>     &''
 	" 	"C#J!JNN 
r#   )Nrw   rx   ry   rz   NF)Fr   F)__name__
__module____qualname____doc__r   r)   r-   r&   r%   r'   propertyrs   r,   r   r   r   r   r   r   __classcell__)r!   s   @r"   r
   r
      sG       11& & & & &       " " "(C- C- C-J# # #4 @ @ X@
   "#(
 (
 (
 (
T6 6 6p
 
 
2# 2s 2 2 2 24 $$	? ? ? ?B      r#   r
   c                 R    d}d}| |k    r	| |z  ddS | |k    r	| |z  ddS | ddS )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr1   )time_usUS_IN_SECONDUS_IN_MSs      r"   _format_timer   x  sb    "LH,L(/////(H$,,,,,r#   c                 P    |dk    r| dk    sJ d|              dS | dz  |z  ddS )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r1   )r   total_time_uss     r"   _format_time_sharer     sI    !|||GgGG|||uo-44444r#   c                     d}d|z  }d|z  }t          |           |k    r| dz  |z  ddS t          |           |k    r| dz  |z  ddS t          |           |k    r| dz  |z  ddS t          |           dz   S )z&Return a formatted memory size string.i         ?r   z GBz MBz KBz B)absr   )nbytesKBMBGBs       r"   _format_memoryr     s    	B	B	B
6{{b3,#,,,,,	V		3,#,,,,,	V		3,#,,,,,6{{T!!r#   c                 (     t           fd          S )Nc                 >    t          t          |                     S r+   )r   r   )r   r;   s    r"   rR   z!_attr_formatter.<locals>.<lambda>  s    gdD.A.A!B!B r#   )r   r;   s   `r"   _attr_formatterr     s    BBBBCCCr#   c                       e Zd ZdZ ed          Z ed          Z ed          Z ed          Z ed          Z	 ed          Z
ed             Zed	             Ze ed
e          d                         ZdS )r   z{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_timedevice_timecpu_time_totaldevice_time_totalrs   self_device_time_totalc                 @    | j         dk    rdnd| j        z  | j         z  S Nr   g        r   )countr   r(   s    r"   r   zFormattedTimesMixin.cpu_time  s%    jAooss31D+Dtz+QQr#   c                 @    | j         dk    rdnd| j        z  | j         z  S r   )r   r   r(   s    r"   r   zFormattedTimesMixin.device_time  s%    jAooss31G+G$*+TTr#   z<`cuda_time` is deprecated, please use `device_time` instead.categoryc                     | j         S r+   )r   r(   s    r"   	cuda_timezFormattedTimesMixin.cuda_time  s     r#   N)r   r   r   r   r   cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r   r   r   FutureWarningr   r1   r#   r"   r   r     s         
 #?:..L%om44O()9::+O,?@@-o.CDD!01I!J!JR R XR U U XU ZF     	  X
     r#   r   c                       e Zd Zd Zd ZdS )r   c                 "    || _         || _        d S r+   )rU   rV   )r   rU   rV   s      r"   r   zInterval.__init__  s    
r#   c                      | j         | j        z
  S )z4
        Returns the length of the interval
        )rV   rU   r(   s    r"   r   zInterval.elapsed_us  s     x$*$$r#   N)r   r   r   r   r   r1   r#   r"   r   r     s2          % % % % %r#   r   r   )r;   r   durationc                      e Zd ZdZddddddddddddej        ddddddddfdZd Zd Zd	 Z	e
d
             Ze
d             Ze
 ede          d                         Ze
d             Ze
d             Ze
d             Ze
 ede          d                         Ze
d             Ze
 ede          d                         Ze
d             Zd ZdS )r   z.Profiling information about a single function.Nr   FrW   c                    || _         || _        || _        || _        || _        t          ||          | _        || _        || _        g | _	        d| _
        g | _        d | _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        || _        || _        ||n|| _        || _        || _        || _        d| _        d| _        d| _         d S )Nr/   rW   )!idrO   r;   r   r   r   rT   rL   rm   r=   r   r<   r:   r   concrete_inputskwinputsrk   ri   r   cpu_memory_usagedevice_memory_usagerG   r   rl   rH   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r   r	  r;   rL   start_usend_usr   rm   r   rk   ri   r   r  r  rG   r   rl   rO   rH   r  r  r   r  r   r
  r  r   s                              r"   r   zFunctionEvent.__init__  s   : #	"/)$,Xv$>$>!)3%'
1337-9*9(0 

)3%5(; &( +'2!-(0FF6H 	  )$)
2D "!#$&!!!r#   c                     | j         t          j        k    sJ | j                            t          |||                     d S r+   )rH   r	   rI   r=   r]   r   )r   r;   r   r  s       r"   append_kernelzFunctionEvent.append_kernel  s@    :>1111F4::;;;;;r#   c                     | j         t          j        k    sJ t          |t                    sJ |j         t          j        k    sJ | j                            |           dS )zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)rH   r	   rI   
isinstancer   r<   r]   )r   childs     r"   r[   zFunctionEvent.append_cpu_child  sc     :>1111%///// JN2222  '''''r#   c                     | j         t          j        k    sJ t          |t                    sJ |j         t          j        k    sJ || _        dS )a$  Set the immediate CPU parent of type FunctionEvent.

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)rH   r	   rI   r  r   r:   )r   re   s     r"   r\   zFunctionEvent.set_cpu_parent  sQ     :>1111&-00000!Z^3333 r#   c                     | j         s| j        t          j        k    rdS | j        t          d | j        D                       z
  S )Nr   c              3   $   K   | ]}|j         V  d S r+   )r  r2   r  s     r"   ru   z6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>/  s6       +
 +
',E"+
 +
 +
 +
 +
 +
r#   )rG   rH   r	   rI   r  rv   r<   r(   s    r"   self_cpu_memory_usagez#FunctionEvent.self_cpu_memory_usage+  s[    = 	D,
>>1$s +
 +
040A+
 +
 +
 (
 (
 
 	
r#   c                     | j         s| j        t          j        k    rdS | j        t          d | j        D                       z
  S )Nr   c              3   $   K   | ]}|j         V  d S r+   )r  r  s     r"   ru   z9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>7  s6       .
 .
*/E%.
 .
 .
 .
 .
 .
r#   )rG   rH   r	   rI   r  rv   r<   r(   s    r"   self_device_memory_usagez&FunctionEvent.self_device_memory_usage3  s[    = 	D,
>>1'# .
 .
373D.
 .
 .
 +
 +
 
 	
r#   zO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r   c                     | j         S r+   r"  r(   s    r"   self_cuda_memory_usagez$FunctionEvent.self_cuda_memory_usage;  s     ,,r#   c                 b    | j         t          j        k    r| j                                        S dS r   )rH   r	   rI   rT   r   r(   s    r"   r   zFunctionEvent.cpu_time_totalC  s+    z~--?--///1r#   c                     | j         s| j        t          j        k    rdS | j        t          d | j        D                       z
  S )Nr   c              3   $   K   | ]}|j         V  d S r+   )r   r  s     r"   ru   z4FunctionEvent.self_cpu_time_total.<locals>.<genexpr>N  s6       )
 )
%*E )
 )
 )
 )
 )
 )
r#   )rG   rH   r	   rI   r   rv   r<   r(   s    r"   rs   z!FunctionEvent.self_cpu_time_totalJ  s[    = 	D,
>>1"S )
 )
.2.?)
 )
 )
 &
 &
 
 	
r#   c                    | j         s| j        sdS | j        t          j        k    rb| j        s=t          d | j        D                       t          d | j        D                       z   S t          d | j        D                       S | j        t          j	        t          j
        t          j        t          j        fv sJ | j                                        S )Nr   c              3   $   K   | ]}|j         V  d S r+   r  r2   kinfos     r"   ru   z2FunctionEvent.device_time_total.<locals>.<genexpr>Y  $      DDe5>DDDDDDr#   c              3   $   K   | ]}|j         V  d S r+   r   )r2   rC   s     r"   ru   z2FunctionEvent.device_time_total.<locals>.<genexpr>Y  s>       K K-/B(K K K K K Kr#   c              3   $   K   | ]}|j         V  d S r+   r+  r,  s     r"   ru   z2FunctionEvent.device_time_total.<locals>.<genexpr>^  r.  r#   )rG   r   rH   r	   rI   r   rv   r=   r<   CUDAPrivateUse1MTIAHPUrT   r   r(   s    r"   r   zFunctionEvent.device_time_totalR  s    = 	 	1z~--> EDDt|DDDDDs K K373DK K K H H  
 DDt|DDDDDD#&	(     ?--///r#   zA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                     | j         S r+   r0  r(   s    r"   cuda_time_totalzFunctionEvent.cuda_time_totalh  s     %%r#   c                    | j         s| j        sdS | j        t          j        k    r&| j        t          d | j        D                       z
  S | j        t          j        t          j	        t          j
        t          j        fv sJ | j        S )Nr   c              3   $   K   | ]}|j         V  d S r+   r0  r  s     r"   ru   z7FunctionEvent.self_device_time_total.<locals>.<genexpr>u  s6       0 0,1'0 0 0 0 0 0r#   )rG   r   rH   r	   rI   r   rv   r<   r2  r3  r4  r5  r(   s    r"   r   z$FunctionEvent.self_device_time_totalp  s    = 	 	1z~--)C 0 0595F0 0 0 - -   #&	(     ))r#   zK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                     | j         S r+   r   r(   s    r"   r   z"FunctionEvent.self_cuda_time_total  s     **r#   c                     | j         S r+   r   r(   s    r"   rM   zFunctionEvent.key  s
    yr#   c                 2   | j         }| j        }| j        }d                    g d| j         d| j         d| j         d| j         d| j         d| j	         d| j
        j         d	| j
        j         d
t          d | j        D                        d| d| d| j         d| j         dt          | j                   d| j         d| d| d| j         d| j         d| j         d| j         d          S )Nr   z<FunctionEvent id=z name=z overload_name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=c                     g | ]	}|j         
S r1   )r	  r  s     r"   r6   z*FunctionEvent.__repr__.<locals>.<listcomp>  s     I I Ie I I Ir#   r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r   r  joinr	  r;   r   rH   rO   r   rT   rU   rV   r   r<   rL   r   r  rG   r   rl   r   )r   r   r   r  s       r"   __repr__zFunctionEvent.__repr__  s   o*"6y y y y y y y y y y	 y y y y$J\ y y y y+y y y y6:ly y y yNRN_y y y y-y y y y7;7Jy y y y   I It7H I I IJJy y y y NYy y y y aly y y y I	y y y y (,{	y y y y CFdFWBXBX	y y y y
 !% 5y y y y
 9Dy y y y
 Tgy y y y y y y y 37.y y y y KOJZy y y y hlguy y y y y y	
r#   )r   r   r   r   r	   rI   r   r  r[   r\   r   r  r"  r   r  r%  r   rs   r   r7  r   r   rM   rF  r1   r#   r"   r   r     s(       88 N 7>' >' >' >'@< < <	( 	( 	(
! 
! 
! 
 
 X
 
 
 X
 ZY  - -	  X
-   X 
 
 X
 0 0 X0* ZK  & &	  X
& * * X*  ZU  + +	  X
+   X
 
 
 
 
r#   r   c                   ,    e Zd ZdZddZd Zd Zd ZdS )	r   z:Used to average stats over multiple FunctionEvent objects.r   Nc                 \   d | _         d| _        d| _        d| _        d| _        d | _        d| _        d| _        d| _        d| _	        d | _
        d | _        d | _        d | _        d| _        d| _        d| _        d| _        d | _        d | _        t(          j        | _        d| _        d| _        d S )Nr   F)rM   r   rO   rG   r   r   r   r   rs   r   r   r   rk   ri   r  r  r  r"  r<   r:   r	   rI   rH   r   r  r(   s    r"   r   zFunctionEventAvg.__init__  s    "&
#$)-#$&'() +,#7;,0%)
$(
%&() *+"-.%;?37'1~$


r#   c                    | j         |j         | _         |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j	        | _	        |j
        | _
        |j        | _        |j        | _        |j        | _        t          |t          t           f          sJ |j         | j         k    sJ | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | xj        |j        z  c_        | j        |j        | _        n|j        | xj        |j        z  c_        | S r+   )rM   rO   rG   r   r:   r<   r   r   rk   ri   rH   r   r   r   r  r   r   r   r   rs   r   r  r  r  r"  r   r  r   others     r"   r>   zFunctionEventAvg.add  s   8 yDH =DL!NDM"_DN#.DO % 2D!&!4D % 2DDJDJ$0D"_DN#.DO&+&>D#%-1A!BCCCCCyDH$$$$u33%"99  E$==  ##u'CC##!77  E$==  ""e&AA""%%)GG%%

ek!

:DJJ[$JJ%+%JJr#   c                 ,    |                      |          S r+   )r>   rJ  s     r"   __iadd__zFunctionEventAvg.__iadd__  s    xxr#   c                     | j         sdn| j         }| j        }| j        }| j        }d| j         d| j         d| j         d| d| d| d| dt          | j                   d	| j	         d| d
| dS )Nr   z<FunctionEventAvg key=z self_cpu_time=r>  z  self_r@  r   rA  rB  rC  rD  )
r   r  r   r  rM   r   r   r   r   r  )r   r   self_device_timer   device_memorys        r"   rF  zFunctionEventAvg.__repr__  s    $(OHff:*0dTX d dd>Z d dfjfw d d d d(8d d;Fd dNYd dilmqm~iid d $ 5d d8Cd dS`d d d	
r#   )r   N)r   r   r   r   r   r>   rM  rF  r1   r#   r"   r   r     s\        DD   2$ $ $L  	
 	
 	
 	
 	
r#   r   c                       e Zd Zd ZdS )r   c                     t          |          dk    rt          j                            |          n|| |<   | |         S rh   )r9   torch_C	_demangle)r   rM   s     r"   __missing__zStringTable.__missing__  s:     033xx!||EH&&s+++S	Cyr#   N)r   r   r   rV  r1   r#   r"   r   r     s#            r#   r   c                       e Zd ZdZd Zd ZdS )r   z=Acceleration structure for accessing mem_records in interval.c                     || _         g | _        g | _        t          |          dk    r>t	          d t          |          D                       }t          | \  | _        | _        d S d S )Nr   c                 L    g | ]!\  }}|d                                           |f"S r   )start_ns)r2   irs      r"   r6   z*MemRecordsAcc.__init__.<locals>.<listcomp>  s-    RRR41a1Q4==??A.RRRr#   )_mem_records_start_nses_indicesr9   rX   r?   zip)r   mem_recordstmps      r"   r   zMemRecordsAcc.__init__  so    '&(#%{aRR9[;Q;QRRRSSC.13i+Ddmmm  r#   c              #      K   t          j        | j        |dz            }t          j        | j        |dz            }t	          ||          D ]}| j        | j        |                  V  dS )z
        Return all records in the given interval
        To maintain backward compatibility, convert us to ns in function
        i  N)bisectbisect_leftr_  bisect_rightr8   r^  r`  )r   r  r  	start_idxend_idxr\  s         r"   in_intervalzMemRecordsAcc.in_interval  s{      
 &t'7DII	%d&6FFy'** 	6 	6A#DM!$455555	6 	6r#   N)r   r   r   r   r   rj  r1   r#   r"   r   r     s8        GG8 8 86 6 6 6 6r#   r   c                 B     g d}t           fd|D                       S )N))autograd/__init___make_grads)rl  backward)ztorch/tensorrn  )_internal/common_utilsprof_callable)ro  prof_func_call)ro  prof_meth_callc              3   B   K   | ]}|d          v o	|d         v  V  dS )r   r/   Nr1   )r2   r   r   s     r"   ru   z&_filter_stack_entry.<locals>.<genexpr>  s;      OOAaDEM3adem4OOOOOOr#   )all)r   filtered_entriess   ` r"   _filter_stack_entryrv    s;       OOOO>NOOOOOOr#   z[memory]z[OutOfMemory]c                 2    t           t          ddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r;   filtered_out_namess     r"   _filter_namer{    s2     	 *.)	 %%%r#   Fc                 d    t                      }||          } |r|                     d          rd} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r;   with_wildcardstring_tables      r"   _rewrite_namer  /  s;    ==LD #???++ 	#"DKr#   rw   rx   ry   rz   c
                   ./012 t          |           dk    rdS t          d | D                       }
t          d | D                       }| d         j        }|s|
rt          d          t          d | D                       }t          d | D                       }&t	          t          | fd	d
          |||          } t          d | D                       dz   }|t          ||          }t          d | D                       dz   }|t          ||          }d}|}d}d | D             }t          |          dk    }|r.t          d |D                       dz   }|t          ||          }dg}|r|                    d           |g dz  }||	                                nd}|
r&|
                    d| d| d| d| dg           |r9|
                    ddg           |r |r|
                    | dd| dg           |                    d           t          d | D                       }|r|                    d            d!.dg2dg/. g0d7./02fd#	}d$ } ||           |r ||           |d%|z   d         D ]} ||           |r |                    d&            ||           |r"|                    d'            ||d()           |r`d* | D             }t          |          dk    r? |t          |                    \  }}|                    d+|             ||           nd,}2d         }/d         } 0d         }!d}g 11fd-}"d}#d}$| D ]n}%|#|%j        z  }#|%j        t          j        k    r|%j        r|$|%j        z  }$3|%j        t          j        t          j        t          j        fv r|%j        s
|$|%j        z  }$o| |"d.|!z              |"|           |	r |"d.|!z              |"d/            |"|             |" |j        |             |"|            d0 }&d}'| D ] }%|'|k    r n|	r|%j        |'d%z  }'|%j        }(|&t          |(          |d1z
  k    r|(d|d1z
           d2z   }(t1          |%j        |#          |%_        |%j        st1          |%j        |#          nd|%_        |(g})|r5|%j        }*|&t          |*          |d1z
  k    r|*d|d1z
           d2z   }*|)|*gz  })|)|%j        |%j        |%j        |%j        |%j         gz  })|
rGt1          |%j        |$          |%_!        |)
                    |%j"        |%j!        |%j#        |%j$        g           |rz|)
                    tK          |%j&                  tK          |%j'                  g           |r=|r;|)
                    tK          |%j(                  tK          |%j)                  g           |)                    |%j*                   |r|)                    |%j+                   |r/|)                    tY          |%j-                  d|                    |r@|%j.        dk    r|)                    d3           n|)                    |%j.        |z  d4           |rFd}+t          |%j/                  dk    r |&|%j/        d         |          }+|)                    |+            |" |j        |)            |rqdgt          |          d%z
  z  },|%j/        d%d         D ]#}- |" |j        |, |&|-|          gz               $|,                    d            |" |j        |,            " |"|             |"d5ta          |#                      |
r4 |"d||	                                nd d6ta          |$                      d1                    1          S )8zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   ,   K   | ]}|j         d k    V  dS r   Nr;  rt   s     r"   ru   z_build_table.<locals>.<genexpr>H  s*      OOu%6:OOOOOOr#   c              3   ,   K   | ]}|j         d k    V  dS r  r$  rt   s     r"   ru   z_build_table.<locals>.<genexpr>I  s*      PP7!;PPPPPPr#   z9use_device is None, but there is device performance data.c              3   X   K   | ]%}|j         d uot          |j                   dk    V  &d S r   )r   r9   rt   s     r"   ru   z_build_table.<locals>.<genexpr>Q  sS         
	4	'	GC0B,C,Ca,G     r#   c              3   X   K   | ]%}|j         d uot          |j                   dk    V  &d S r   )r   r9   rt   s     r"   ru   z_build_table.<locals>.<genexpr>V  sS         
	D	(	IS1D-E-E-I     r#   Nc                     t          |                     dd                              dd                              dd                    S )Nr   r   r   r   )r   r   )rK   r|   s    r"   rR   z_build_table.<locals>.<lambda>_  sC    OOFH55WUH--W]H55	! ! r#   T)rM   reverser   c              3   >   K   | ]}t          |j                  V  d S r+   )r9   rM   rJ   s     r"   ru   z_build_table.<locals>.<genexpr>l  s*      ;;SCLL;;;;;;r#      c              3   X   K   | ]%}t          t          |j                            V  &d S r+   )r9   r   r   rJ   s     r"   ru   z_build_table.<locals>.<genexpr>p  s5      KKSc#c&6"7"788KKKKKKr#      c                 Z    g | ](}|j         	t          |j                   dk    !|j         )S r   )rk   r9   rJ   s     r"   r6   z _build_table.<locals>.<listcomp>x  s;       sy'<SYRSASAS	ASASASr#   c              3   H   K   | ]}t          d  |D                       V  dS )c              3   4   K   | ]}t          |          V  d S r+   r9   )r2   r   s     r"   ru   z)_build_table.<locals>.<genexpr>.<genexpr>~  s(      225CJJ222222r#   N)max)r2   rk   s     r"   ru   z_build_table.<locals>.<genexpr>~  s9      GGu22E22222GGGGGGr#   NamezOverload Name)z
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc              3   ,   K   | ]}|j         d k    V  dS )rW   N)rO   rJ   s     r"   ru   z_build_table.<locals>.<genexpr>  s)      ==s*======r#   zNode IDr   rD  c                     dxx         d|z   t          |           z   dz   dz  z   z  cc<   dxx         d| z  dz  z   z  cc<   dxx         | z   z  cc<   d S )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r"   
add_columnz _build_table.<locals>.add_column  s    qHs7||+c1S<5GH	
 	qS7]cL.@AAg44r#   c                 d   g d}| dk    sJ t          dt          t          j        |           dz  t	          t          |          dz
                                }|dk    r|t          |          k     sJ t          dt          j        |          dz            |t          |                   fS )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r/   
   g      )	r  minmathlog10floatr9   powfloorr   )r  flop_headers	log_flopss      r"   auto_scale_flopsz&_build_table.<locals>.auto_scale_flops  s    
 
 
 qyyyy3tz%0014eC<M<MPQ<Q6R6RSSTT	A~~)c,.?.?"?"?"?"?BI..577c)nn9UVVr#   r/   zInput ShapeszSource Location<)r  c                 2    g | ]}|j         d k    |j         S rZ  )r  rJ   s     r"   r6   z _build_table.<locals>.<listcomp>  s!    BBB3CIMMSYMMMr#   zTotal Fc                 \                         |                                 d           d S )Nr   )r]   )r   results    r"   r]   z_build_table.<locals>.append  s-    adr#   =z1This report only display top-level ops statisticsc                     t          |           |k    r<t          |           |z
  }| |d          } t          |           dk    rd| dd          z   } | S )Nr  ...r  )r   src_column_widthoffsets      r"   	trim_pathz_build_table.<locals>.trim_path
  sX    t99'''YY!11F=D4yy1}}tABBx'r#   r  r  z--z8.3fzSelf CPU time total: z time total: )rD  )2r9   anyr   RuntimeErrorr
   rX   r  r  r]   upperrA   rs   rH   r	   rI   r   r   r2  r3  r4  r   r   r:   rM   r   r  rG   r   r  r   r   r   r   r  r  r   r   r   r  r  r  r"  r   rO   r   r   r  rk   r   rE  )3r_   r|   r   r}   r~   r   r   r   r   r   has_device_timehas_device_memr   has_input_shapeshas_overload_namesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  stacks	has_stackheadersr   append_node_idr  r  r   	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthr]   sum_self_cpu_time_totalsum_self_device_time_totalrK   r  event_limitr;   
row_valuesr   	src_fieldempty_headersr   r  r  r  r  r  s3    `                                            @@@@@r"   r   r   8  s[    6{{arOOOOOOOOPPPPPPPN%J  X/ XVWWW      
       
     	 	 	 ")!
 
 
  ;;F;;;;;a?( 13HIIKKFKKKKKaO*!"57NOO- #  F FaI KGGGGGGG!K 	  +"#35IJJhG ('''    G )3(>*""$$$FK 
%%%''''&&&)))		
 	
 	
  	
 	
 	
  	. 	NN"(((-K---   NN<   ==f=====N "y!!! LTNTN$}oO5 5 5 5 5 5 5 5 5W W W J !!! &
$%%%Q++--. ) )
'(((( (~&&&
&''' 3()))
#c2222 BB&BBB	y>>Q*:*:3y>>*J*J'[,NN2L22333J)****J"J"J!!$KJ F      !" E E3#::?jn,,,&#*DD&&O&  * '#*DD& s[ !!!v Ds[ !!!BCCC
F:
F:g&'''
F:   K b6 b6)##E  	S^%?1Kw ,T>SVW>W1W1W501456>D1#%< 
  

 <s13JKKK 	 V
 	*-M%1&&*?!*CCC -.K1F1J.K Lu T=/)J '!"
 	

  	'9*,F( (C$ 2,-'    	 #3#788"3#<==	    n !! 's'>??&s'CDD	   	I	
 	
 	
  	+ck*** 	Kc#"2334H5H4HIJJJ 	EyA~~!!$''''!!SY%<"C"CDDD 	)I39~~!!%Icil4DEE	i((( z *-... 		6DCLL1$45M122  %J%'99U<L+M+M*NN   
   $$$F$:$m4555
F:
FJ<0G#H#HJJKKK 
F**@J$$&&&f F F'(BCCF F	
 	
 	
 776??r#   )F)	NNrw   rx   ry   rz   FFF)$re  rY   r  collectionsr   r   operatorr   typingr   r   typing_extensionsr   rS  torch.autogradr	   __all__listr
   r   r   r   r   r   r   r   r   r   r   r   rv  rx  ry  r{  r  r   r1   r#   r"   <module>r     s         / / / / / / / /                       ( ( ( ( ( (  % % % % % %	 	 	[ [ [ [ [ [ [ [|
  5 5 5" " "D D D               <	% 	% 	% 	% 	% 	% 	% 	% 
H<<<	=	=N
 N
 N
 N
 N
' N
 N
 N
bN
 N
 N
 N
 N
* N
 N
 N
b    +   6 6 6 6 6 6 6 6,	P 	P 	P  * & & &&    F F F F F Fr#   