o
    j9:jH                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlZd dlmZ d dlmZ d dlm  mZ d dlm  mZ d dlmZmZ d dlmZ d dlmZm Z  g d	Z!d
e"de#e"e"f fddZ$dee% dej&de'e"ef fddZ(dej&de'e"ef dejj)fddZ*d8dejj)dejj)fddZ+dej)dej)fddZ,dej)de-ej& de-ej& de-ej& fd d!Z.ej/ej0ej1ej2ej3ej4ej5ej6ej7ej8ej6ej9ej:gZ;ej<ej=gZ>ej/ej?ej0ej@ej1d"d# iZAde-ej& de'e"ej)f fd$d%ZBde-ej& de'e"ej)f d&e'ej)ej)f fd'd(ZCG d)d* d*ZDd9d-d.ZEd/eDdeFfd0d1ZGG d2d3 d3ZHdejIfdejj)d4e'e"ef dB d5e%ejI dejj)fd6d7ZJdS ):    N)defaultdict)Iterable)Enum)Anycast)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 C   s*   |  dd^ }}|r|d |fS d|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentname r    i/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/torch/fx/experimental/optimization.py_parent_name%   s   r"   patternnodemodulesc                 C   s   t |jdkr	dS |jd |f}t| |D ]2\}}t|tjs" dS |jdkr* dS t|jts3 dS |j|vr; dS t	||j |urG dS qdS )Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r#   r$   r%   nodesexpected_typecurrent_noder    r    r!   r   /   s    

r   
new_modulec                 C   sL   t | jtstdt| j t| j\}}||| j< t|| || d S )NExpected str target, got )r*   r   r.   AssertionErrorr/   r"   setattr)r$   r%   r3   parent_namer   r    r    r!   r   C   s
   
r   Fmodelc                 C   s4  t jt jft jt jft jt jft jt jfg}|st	| } |r&t
| tjjs,t| }n| }t| }t	|j}|D ]W}|jD ]Q}t|||rt|jd jdkrTqA||jd j }	||j }
|
jseqA|d t jt jt jfv rwt|	|
}nt|	|
}t|jd || ||jd  || qAq<t||S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr*   torchr+   GraphModulesymbolic_tracedictnamed_modulesgraphr0   r   r'   r(   usersr   track_running_statsr
   r   r   replace_all_uses_with
erase_node)r8   inplaceno_tracepatternsfx_modelr%   	new_graphr#   r$   first_layerbnfused_layerr    r    r!   r   M   s<   








r   c                 C   s*   t | }G dd dtj j}|| S )z5
    Removes all dropout layers from the module.
    c                       s>   e Zd Zdedeedf deeef def fddZ	  Z
S )z&remove_dropout.<locals>.DropoutRemoverr   r(   .kwargsr   c                    sH   t | j| tjrt|dkrtdt| |d S t |||S )Nr   z Expected 1 arg for Dropout, got r   )r*   
submodulesr9   Dropoutr'   r5   superr&   )selfr   r(   rU   	__class__r    r!   r&   |   s
   z2remove_dropout.<locals>.DropoutRemover.call_module)__name__
__module____qualname__r   tupler   rF   r.   r   r&   __classcell__r    r    rZ   r!   DropoutRemover{   s    

ra   )r+   rE   rC   Transformer	transform)r8   rP   ra   r    r    r!   r   u   s   
r   orig_moduler0   inputsoutputsc                    s|   t  }i  |D ]}||j}| |< q|D ]}|| fdd}| |< q| fdd|D  |  t | |S )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                    s    |  S Nr    )xenvr    r!   <lambda>       z"extract_subgraph.<locals>.<lambda>c                    s   g | ]} | qS r    r    ).0outputri   r    r!   
<listcomp>       z$extract_subgraph.<locals>.<listcomp>)r+   Graphplaceholderr   	node_copyrn   lintrD   )rd   r0   re   rf   rQ   inputnew_noder$   r    ri   r!   r      s   	

r   c                 C   s
   t | S rg   )	th_mkldnnMkldnnBatchNorm)a_r    r    r!   rk      s   
 rk   c                 C   s   i }| D ]H}|j dkrLt|jtstdt|j ||j }t|tv rLtt| |tj}t|t	j
s?tdt| t|||< t||| q|S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r&   r4   zExpected nn.Module, got )r-   r*   r   r.   r5   r/   
mkldnn_maprC   floatr9   ModulerA   rB   r   )r0   r%   old_modulesr$   
cur_moduler3   r    r    r!   r      s   

r   r~   c                 C   sZ   | D ](}|j dkr*t|jtstdt|j ||j }||v r*t||||  qdS )za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r&   r4   N)r-   r*   r   r.   r5   r/   r   )r0   r%   r~   r$   r   r    r    r!   r      s   	

r   c                   @   s   e Zd ZdejfddZdS )r   fx_graphc                 C   s   || _ g | _g | _g | _d S rg   )r   r0   start_nodes	end_nodes)rY   r   r    r    r!   __init__   s   
zMklSubgraph.__init__N)r\   r]   r^   r+   rq   r   r    r    r    r!   r      s    r   
   r   c                    s*   dddt dtf fdd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrH   r   c                    s   | j }d u r| jj| jjt dd |D  tttj	 dd | j
D }t| j||fdd}| fdd}tjjt  | fdd}||k S )	Nc                 S   s   g | ]}t |jqS r    )rC   randnshaperm   r$   r    r    r!   ro      s    z@gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<listcomp>c                 S   s   g | ]}|j d  qS )r   )r(   r   r    r    r!   ro      s    c                    s<   t D ]}|   qt }t  D ]}|   qt | S rg   )rangetime)frz   begin)iterswarmupr    r!   	benchmark  s   z?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmarkc                      s   dd dd  D  D S )Nc                 S      g | ]}|  qS r    )to_denserm   ir    r    r!   ro   
  s    zRgen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>.<locals>.<listcomp>c                 S   r   r    )	to_mkldnnr   r    r    r!   ro     rp   r    r    sample_inputs	submoduler    r!   rk   
  s    z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>c                      s     S rg   r    r    r   r    r!   rk     rl   )r   r   owning_moduler~   r	   	propagater   listr+   r,   r   r   r0   r   rH   rF   rG   )rH   input_nodesoutput_argsr   mkl_timeno_mkl_timeexample_inputsrP   r   r~   r   r   r!   use_mkl_heuristic   s&   
z,gen_mkl_autotuner.<locals>.use_mkl_heuristic)r   bool)r   r   r   r   r    r   r!   r      s   	"r   rH   c                 C   s   t | jdkS )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r'   r0   )rH   r    r    r!   r     s   r   c                   @   sF   e Zd Zdd ZdefddZdedefddZd	ed
efddZdS )r   c                 C   s   d g| | _ dg| | _d S )Nr   r   size)rY   nr    r    r!   r   %  s   zUnionFind.__init__vc                 C   s   || j |< d| j|< d S )Nr   r   )rY   r   r    r    r!   make_set)  s   
zUnionFind.make_setr   c                 C   sF   | j | }||kr|S |d u rtd| || j |< tt| j | S )NzParent is None)r   r5   findr   int)rY   r   parr    r    r!   r   -  s   
zUnionFind.findry   bc                 C   sf   |  ||  |}}||kr|S | j| | j| k r ||}}|| j|< | j|  | j| 7  < d S rg   )r   r   r   )rY   ry   r   r    r    r!   join6  s   

zUnionFind.joinN)r\   r]   r^   r   r   r   r   r   r    r    r    r!   r   $  s
    	r   pass_configtracerc              	      sH  dddt id}|du ri }|| |d rt| } |d r#t| } |d du r+| S t|d ts6td	d|d vr@td
|d d }| }|t	|  t
|j  t|  }G dd dt}t jD ]}|j}	|jdkr||j }
t|
tv r|j}	t|
 d}|dur|jtjkrtd|jtdkrtdn|jdkr|jtv r|j}	n|jtv r|j}	|	|jkr"|	|jkrt dd |j!D sqk "| t
#|j! fdd}W d   n1 sw   Y  t$t%t
j&j' ||_! (|  )dd|f}|*| |f|_!W d   n	1 sw   Y  qkt+t j|}| _, jD ]B}|jdkrr|jdkrr|j!d }t|j-}|D ]}|jdkrc|jdkrc|*|  .| qKt/|j-dkrr .| q1t/ j}t0|fddt1 jD ]{\}}|jdkr|jdkr||_23| q|jdkr|jdkrƈ|j!d du rtd|j!d |_4qfdd|j5D }t/|dkrِqt dd |D rtd t6|}|d |_7|d!d D ]}8|d | qqt9 fd"d} jD ]9}t:|d#r#|;|j7 j<| t:|d$r5|;|j2 j=<| t:|d%rG|;|j4 j><| q|? D ](}||st|j=|j> D ]}|j!d }|*|  .| qZt@|j|| qMd} jD ]}|jdks|jdkr|d!7 }q{tABtCDd&|  E  t
|  }|S )'a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                   @   s   e Zd ZdZdZdZdS )z*optimize_for_inference.<locals>.MklSupportr   r      N)r\   r]   r^   NOYESUNKNOWNr    r    r    r!   
MklSupportl  s    r   r&   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc                 s   s    | ]}|j d kV  qdS )r   N)r   )rm   argr    r    r!   	<genexpr>  s    z)optimize_for_inference.<locals>.<genexpr>c                    s     d| fS )Nr   )call_methodr   r   r    r!   rk     s    z(optimize_for_inference.<locals>.<lambda>r   r   r   r   c                    s0   t | dr | jS t | dr | jS d S )Ncolorstart_color)hasattrr   r   r   r   )ufr    r!   	get_color  s
   

z)optimize_for_inference.<locals>.get_colorz!Expected color for to_dense inputc                    s,   g | ]}t |tjr |d ur |qS rg   )r*   r+   r,   r   )r   r    r!   ro     s    
z*optimize_for_inference.<locals>.<listcomp>c                 s   s    | ]}|d u V  qd S rg   r    r   r    r    r!   r     s    zFound None in cur_colorsr   c                      s   t  S rg   )r   r    r   r    r!   rk     rl   r   r   	end_colorzmkldnn conversions: %s)Fr   updater   r   r*   rF   RuntimeErrortracerA   rB   r+   rD   rootrG   r   r   r0   r   r-   r   r/   mkldnn_supportedr   next
parametersdtyperC   r|   r5   devicemkldnn_supported_unknownr   anyr(   inserting_beforemap_argr   r_   r$   r   inserting_aftercreate_noderK   r   r~   rI   rL   r'   r   	enumerater   r   r   all_input_nodessortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerr\   infort   )r8   r   r   default_pass_configr   
cur_tracerr%   r   r$   supports_mkldnnr   sample_parametermkldnn_argsdense_xr~   prv_noderI   user	num_nodescur_idx
cur_colorsother_colormkldnn_graphsrH   prvmkldnn_conversionsresultr    )r   r   r   r!   r   @  s   
	





















r   )FF)r   r   )KrA   r   operatorr   collectionsr   collections.abcr   enumr   typingr   r   rC   torch.fxr+   torch.nnr9   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnrw   torch.fx.noder   r   torch.fx.passes.shape_propr	   torch.nn.utils.fusionr
   r   __all__r.   r_   r"   r/   r,   rF   r   r}   r   r   r   r   r   r<   r@   r=   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr{   r   r   r   r   r   r   r   Tracerr   r    r    r    r!   <module>   s   





(

"

1	