
    }YhE                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZmZ d dlmZ d dlm Z m!Z! g d	Z"d
e#de$e#e#f         fdZ%dee&         dej'        de(e#ef         fdZ)dej'        de(e#ef         dej        j*        fdZ+d,dej        j*        dej        j*        fdZ,dej*        dej*        fdZ-dej*        de.ej'                 de.ej'                 de.ej'                 fdZ/ej0        ej1        ej2        ej3        ej4        ej5        ej6        ej7        ej8        ej9        ej7        ej:        ej;        gZ<ej=        ej>        gZ?ej0        ej@        ej1        ejA        ej2        d iZBde.ej'                 de(e#ej*        f         fdZCde.ej'                 de(e#ej*        f         de(ej*        ej*        f         fdZD G d  d!          ZEd-d$ZFd%eEdeGfd&ZH G d' d(          ZIdejJ        fdej        j*        d)ee(e#ef                  d*e&ejJ                 dej        j*        fd+ZKdS ).    N)defaultdict)Iterable)Enum)AnycastOptional)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 P    |                      dd          ^ }}|r|d         nd|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      t/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/torch/fx/experimental/optimization.py_parent_namer$   %   s3    
 MM#q))MVT&6!99B,,    patternnodemodulesc                 j   t          |j                  dk    rdS |j        d         |f}t          | |          D ]x\  }}t          |t          j                  s dS |j        dk    r dS t          |j        t                    s dS |j        |vr dS t          ||j                           |ur dS ydS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r&   r'   r(   nodesexpected_typecurrent_nodes         r#   r   r   /   s     49~~u"&)A,!5E'*7E':': 
 
#|,00 	55?m++55,-s33 	55g--55+,--]BB55 C4r%   
new_modulec                     t          | j        t                    sJ t          | j                  \  }}||| j        <   t	          ||         ||           d S N)r.   r   r2   r$   setattr)r'   r(   r7   parent_namer"   s        r#   r   r   C   sY     dk3'''''$T[11K%GDKGK $
33333r%   Fmodelc                    t           j        t           j        ft           j        t           j        ft           j        t           j        ft           j        t           j        fg}|st          j	        |           } |rt          | t          j        j                  st          j        |           }n| }t          |                                          }t          j	        |j                  }|D ]}|j        D ]}t'          |||          rt)          |j        d         j                  dk    r8||j        d         j                 }	||j                 }
|
j        se|d         t           j        t           j        t           j        fv rt3          |	|
          }nt5          |	|
          }t7          |j        d         ||           |                    |j        d                    |                    |           t          j        ||          S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr.   torchr/   GraphModulesymbolic_tracedictnamed_modulesgraphr4   r   r+   r,   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r<   inplaceno_tracepatternsfx_modelr(   	new_graphr&   r'   first_layerbnfused_layers               r#   r   r   L   s    
BN#	BN#	BN#	BN#	H  %e$$ :eUX-ABB $U++8))++,,Ghn--I + +O 	+ 	+D%gtW== +ty|)**Q..%dil&9:T[)- 1:")RY	!BBB"3K"D"DKK"5k2"F"FK#DIaL';GGG**49Q<888$$T***	+  >(I...r%   c                     t          j        |           } G d dt          j         j                  } ||                                          S )z5
    Removes all dropout layers from the module.
    c                   P     e Zd Zdedeedf         deeef         def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r,   .kwargsr   c                     t          | j        |         t          j                  rt	          |          dk    sJ |d         S t                                          |||          S )Nr   r   )r.   
submodulesr>   Dropoutr+   superr*   )selfr   r,   r]   	__class__s       r#   r*   z2remove_dropout.<locals>.DropoutRemover.call_module{   sY     $/&12:>> A4yyA~~~~Awww**64@@@r%   )__name__
__module____qualname__r
   tupler	   rK   r2   r   r*   __classcell__)rc   s   @r#   DropoutRemoverr\   z   s}        	A 	A(-hm(<	AFJ3PS8n	A	A 	A 	A 	A 	A 	A 	A 	A 	A 	Ar%   ri   )r/   rJ   rH   Transformer	transform)r<   rU   ri   s      r#   r   r   t   si      ''HA A A A A- A A A >(##--///r%   orig_moduler4   inputsoutputsc                 P   t          j                    }i |D ]!}|                    |j                  }||<   "|D ] }|                    |fd          }||<   !|                    fd|D                        |                                 t          j        | |          S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |          S r9    )xenvs    r#   <lambda>z"extract_subgraph.<locals>.<lambda>   s    s1v r%   c                      g | ]
}|         S rq   rq   ).0outputrs   s     r#   
<listcomp>z$extract_subgraph.<locals>.<listcomp>   s    888fc&k888r%   )r/   Graphplaceholderr"   	node_copyrw   lintrI   )	rl   r4   rm   rn   rV   inputnew_noder'   rs   s	           @r#   r   r      s     

I"$C  ((44E

  &&t-=-=-=-=>>D		8888888999NN>+y111r%   c                 *    t          j        |           S r9   )	th_mkldnnMkldnnBatchNorm)a_s     r#   rt   rt      s    !:1!=!= r%   c                    i }| D ]}|j         dk    rt          |j        t                    sJ ||j                 }t	          |          t
          v rqt          t	          |                   |t          j                  }t          |t          j	                  sJ t          j        |          ||<   t          |||           |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r*   )r1   r.   r   r2   r3   
mkldnn_maprH   floatr>   ModulerF   rG   r   )r4   r(   old_modulesr'   
cur_moduler7   s         r#   r   r      s     /1K ? ?7m##dk3///// -JJ:--'Z(8(89*ekRR
!*bi88888*.-
*C*CJ'#D':>>>r%   r   c                     | D ]Q}|j         dk    rDt          |j        t                    sJ ||j                 }||v rt	          ||||                    RdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r*   N)r1   r.   r   r2   r   )r4   r(   r   r'   r   s        r#   r   r      su      L L7m##dk3///// -J[((#D';z3JKKKL Lr%   c                   $    e Zd Zdej        fdZdS )r   fx_graphc                 >    || _         g | _        g | _        g | _        d S r9   )r   r4   start_nodes	end_nodes)rb   r   s     r#   __init__zMklSubgraph.__init__   s#     $&
*,(*r%   N)rd   re   rf   r/   ry   r   rq   r%   r#   r   r      s1        + + + + + + +r%   r   
   r   c                 H     dddt           dt          f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrM   r   c                    | j         }	:| j        j        	| j        j        t	          	                                         d |D             t          t          t          j	                 d | j
        D                       }t          	| j        ||          
fd} |fd          }t          j        j        t                                                                |fd          }||k     S )Nc                 @    g | ]}t          j        |j                  S rq   )rH   randnshaperv   r'   s     r#   rx   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s$    IIITTZ00IIIr%   c                 (    g | ]}|j         d          S )r   )r,   r   s     r#   rx   z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>   s    *T*T*TD49Q<*T*T*Tr%   c                     t                    D ]} |              t          j                    }t                    D ]} |              t          j                    |z
  S r9   )rangetime)fr   beginiterswarmups      r#   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark   sc    6]]  IKKE5\\  9;;&&r%   c                  6    d  d  D              D             S )Nc                 6    g | ]}|                                 S rq   )to_denserv   is     r#   rx   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>  s-       !"

  r%   c                 6    g | ]}|                                 S rq   )	to_mkldnnr   s     r#   rx   zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>  s     1W1W1WA!++--1W1W1Wr%   rq   sample_inputs	submodules   r#   rt   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s7      &/i1W1W1W1W1W&X   r%   c                         S r9   rq   r   s   r#   rt   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s    		=(A r%   )r   r   owning_moduler   r   	propagater   listr/   r0   r   r   r4   r   rM   rK   rL   )rM   input_nodesoutput_argsr   mkl_timeno_mkl_timer   r   example_inputsrU   r   r   r   s         @@r#   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s8   '~3H.4Kh)).999II[III4=*T*TEO*T*T*TUU$Xu{KUU		' 	' 	' 	' 	' 	' 9    
 
 	O!4	(?(?(A(A#B#BK	
 	
 	
  i A A A A ABB+%%r%   )r   bool)r   r   r   r   rU   r   s   ``` @@r#   r   r      s[     HK& & & & & & & & & & & &> r%   rM   c                 2    t          | j                  dk    S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r+   r4   )rM   s    r#   r   r     s     u{ar%   c                   @    e Zd Zd ZdefdZdedefdZdedefdZd	S )
r   c                 2    d g|z  | _         dg|z  | _        d S )Nr   r!   size)rb   ns     r#   r   zUnionFind.__init__  s     ,06A: !sQw			r%   vc                 .    || j         |<   d| j        |<   d S )Nr   r   )rb   r   s     r#   make_setzUnionFind.make_set   s    A	!r%   r   c                     | j         |         }||k    r|S |J |                     |          | j         |<   t          t          | j         |                   S r9   )r!   findr   int)rb   r   pars      r#   r   zUnionFind.find$  sO    k!n88H3ACQ(((r%   r   bc                     |                      |          |                      |          }}||k    r|S | j        |         | j        |         k     r||}}|| j        |<   | j        |xx         | j        |         z  cc<   d S r9   )r   r   r!   )rb   r   r   s      r#   joinzUnionFind.join,  sz    yy||TYYq\\166H9Q<$)A,&&aqAA	!	!$r%   N)rd   re   rf   r   r   r   r   r   rq   r%   r#   r   r     s        ' ' '#    )c )c ) ) ) )%c %c % % % % % %r%   r   pass_configtracerc                    dddt           id}|i }|                    |           |d         rt          |           } |d         rt          |           } |d         du r| S t	          |d         t
                    st          d	          d|d         vrt          d
          |d         d         } |            }|                    t          j	        |                     t          j        |j                   t          |                                           } G d dt                    }t          j                  D ]}|j        }	|j        dk    r||j                 }
t)          |
          t*          v rq|j        }	t/          |
                                d          }|F|j        t4          j        k    s
J d            |j        t5          j        d          k    s
J d            n6|j        dk    r+|j        t*          v r|j        }	n|j        t:          v r|j        }	|	|j        k    r|	|j        k    r t?          d |j         D                       s!                    |          5  t          j"        |j         fd          }ddd           n# 1 swxY w Y   tG          tH          t          j%        j&                 |          |_         '                    |          5  (                    dd|f          }|)                    |           |f|_         ddd           n# 1 swxY w Y   tU          t          j                  |          }|_+        j        D ]}|j        dk    r|j        dk    r|j         d         }t          |j,                  }|D ]B}|j        dk    r5|j        dk    r*|)                    |           -                    |           Ct]          |j,                  dk    r-                    |           t]          j                  }t_          |          fdta          j                  D ]
\  }}|j        dk    r(|j        dk    r||_1        2                    |           9|j        dk    r?|j        dk    r4 |j         d                   J  |j         d                   |_3        fd|j4        D             }t]          |          dk    rt?          d |D                       rJ tk          |          }|d         |_6        |dd         D ]}7                    |d         |           tq          fd          }j        D ]}ts          |d          r8|:                    |j6                           j        ;                    |           ts          |d          r8|:                    |j1                           j<        ;                    |           ts          |d          r8|:                    |j3                           j=        ;                    |           |>                                D ]l} ||          s_|j<        |j=        z   D ]9}|j         d         }|)                    |           -                    |           :t          |j        ||           md}j        D ]}|j        dk    s|j        dk    r|dz  }t          jA        t                    C                    d |           D                                 t          j        |           }|S )!a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZdS )*optimize_for_inference.<locals>.MklSupportr   r      N)rd   re   rf   NOYESUNKNOWNrq   r%   r#   
MklSupportr   b  s        r%   r   r*   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   ,   K   | ]}|j         d k    V  dS )r   N)r   )rv   args     r#   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>  s)      II3:3IIIIIIr%   c                 2                         d| f          S )Nr   )call_method)r   r   s    r#   rt   z(optimize_for_inference.<locals>.<lambda>  s    )=)=kA4)P)P r%   r   r   r   r   c                     t          | d          r                    | j                  S t          | d          r                    | j                  S d S )Ncolorstart_color)hasattrr   r   r   )r   ufs    r#   	get_colorz)optimize_for_inference.<locals>.get_color  sT    1g 	$7717###1m$$ 	*771=)))tr%   c                 p    g | ]2}t          |t          j                  r |          ' |          3S r9   )r.   r/   r0   )rv   r   r   s     r#   rx   z*optimize_for_inference.<locals>.<listcomp>  sQ       a)) 9Q<<+ 	! ,++r%   c              3      K   | ]}|d u V  	d S r9   rq   r   s     r#   r   z)optimize_for_inference.<locals>.<genexpr>  s&      9919999999r%   r   c                  "    t                     S r9   )r   )r   s   r#   rt   z(optimize_for_inference.<locals>.<lambda>  s    H@U@U r%   r   r   	end_colorzmkldnn conversions: %s)Er   updater   r   r.   rK   RuntimeErrortracerF   rG   r/   rI   rootrL   r   r   r4   r   r1   r   r3   mkldnn_supportedr   next
parametersdtyperH   r   devicemkldnn_supported_unknownr   anyr,   inserting_beforemap_argr   rg   r'   r	   inserting_aftercreate_noderP   r   r   rN   rQ   r+   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerrd   infor|   )r<   r   r   default_pass_configr   
cur_tracerr(   r   r'   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderN   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrM   prvmkldnn_conversionsresultr   r   r   s                              @@@r#   r   r   6  sX   & #."? 
 {+++>* U+, &u%%34==)*BCTJJ JHIII-.FGGGQRRR+,DEkRJe 4 455HN:?H---$()<)<)>)>$?$?G    T    X^$$ "' "'$-7m## -JJ#333",.#'
(=(=(?(?#F#F #/+1U[@@@C A@@ ,2el56I6IIII; JII W''{...",. 888","4jm++*"444IItyIIIII **400   jIPPPP               
 U27#34kBBDI))$// ' '"..}j4'RR**7333 $w' ' ' ' ' ' ' ' ' ' ' ' ' ' ' $D$8$8'BBK&H  	* 	*7m##z(A(Ay|H$$E . .7m++{0J0J..x888''---4:!####D)))HN##I	9		B    $ #8>22 4 47m##{(B(B&DKK    W%%$+*C*C9TYq\**666&Yty|44DNN   -  J :!##99j99999999
++J#ADJ)!""~ 4 4
1{33334 -88U8U8U8U,V,VM J J4!! 	B"''$*--.4;;DAAA4'' 	N"''$"2334@GGMMM4%% 	J"''$.112<CCDIII %%'' = =  '' 	=)EO; * *il**3///##D))))%+w<<< $ $;+%%
)B)B!#h$$%=?QRRRMMOOO^E8,,FMs$   4JJ"	%J"	-6L//L3	6L3	)FF)r   r   )LrF   r   operatorr   collectionsr   collections.abcr   enumr   typingr   r   r   rH   torch.fxr/   torch.nnr>   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnr   torch.fx.noder	   r
   torch.fx.passes.shape_propr   torch.nn.utils.fusionr   r   __all__r2   rg   r$   r3   r0   rK   r   r   r   r   r   r   r   rA   rE   rB   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr   rq   r%   r#   <module>r+     s|       # # # # # # $ $ $ $ $ $       & & & & & & & & & &                       & & & & & & & & & * * * * * * * * 0 0 0 0 0 0 H H H H H H H H   - -sCx - - - -d^#%759#s(^   (4
'4 cN48=4 4 4 4%/ %/ %/58? %/ %/ %/ %/P0") 0	 0 0 0 0&22=2 M2 "']	2 2 2 2. IINGLL	J	O	MFL & %L(,7 Iy%Iy%N==
T"'] T#ry.5I    &L=L#ry.!L bi*+L L L L"+ + + + + + + ++ + + +\ +  $        % % % % % % % %: -1 ip p8?p$sCx.)p Op X_	p p p p p pr%   