o
    j9:j                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z	d dlZd dlZd dlZd dlZd dlZd dlmZmZmZ d dlmZmZ d dlmZ d dlZd dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d d	l#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 ddl8m9Z9m:Z: ddl:m;Z; ddl<m=Z=m>Z>m?Z?m@Z@mAZA ddlBmCZC eDeEZFdaGeHeIeJeKf  dB eLd< daMeNeLd< daOeIePeJdB f dB eLd< e&eEdZQe&eEdZReHe ZSe Tddd gZUg d!ZVejWd"eNfd#d$ZX		dxd%eHe= d&eNd'eJdB d"dfd(d)ZYd*eHe= d"ejZfd+d,Z[		 dyd%eSdB d-eIeJeJf d.eJdB d/ePd"df
d0d1Z\d-eIeJeJf d"eIeJeUf fd2d3Z]d4ejj,d*eSd"dfd5d6Z^ej_d"ed fd7d8Z`i aaeIeJeIeJeHeJ f f eLd9< i abeIeJeHeJ f eLd:< dacePdB eLd;< i adeIeJeJf eLd<< i aeeIeJeHeJ f eLd=< d afePeLd>< dzd?d@Zgej_d"ed fdAdBZhG dCdD dDZiG dEdF dFZjd%eSd"dfdGdHZkd%eSd"dfdIdJZldKeHeJdB  d"dfdLdMZmd%ee= d"dfdNdOZndPeeoeepf  d"dfdQdRZqdzdSdTZrej_d"ed fdUdVZsejtG dWdX dXZuev ZwdYePdB dZeIeJef d"eIeJeIeJeHeJ f f fd[d\Zxd]eIeJef d"eIeJeIeJef f fd^d_Zyd"eIeJef fd`daZzd"eIeJeIeJeHeJ f f fdbdcZ{	d{ddee= e;B deeJdfeNd"ePdB fdgdhZ|diedjed"dfdkdlZ}dmeJd"efdndoZ~ddpdqedreJf dsejjdteIeJef due3dB d"eJf
dvdwZdS )|    N)CallableIteratorSequence)AnyIO)patch)
draw_graphget_aot_graph_nameget_graph_being_compiled)fx)save_graph_repro)get_debug_dir)utils)getArtifactLogger)trace_structured)signpost_event)GraphModule)_extract_tensor_metadataTensorMetadata)legalize_graph)FileLike)
OrderedSet)tree_map   )configir)ExternKernel)BaseSchedulerNodeFusedSchedulerNodeNopKernelSchedulerNode
OutputNodeSchedulerNode)VGRAPH_EXECUTION_ORDERFRECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSir_pre_fusionir_post_fusionBufMetanamen_origin)dotz-Gnslimit=2z-Gnslimit1=2z-Gmaxiter=5000returnc                   C   s   t dd uS )Nr+   )shutilwhich r/   r/   \/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/torch/_inductor/debug.pyhas_dot?   s   r1   nodesprint_graphfnamec              	   C   sF  t  s
td dS |du rt }t| }|jD ]j}d|jvr q|jd j}t|t	r=t|d t
r8|d f}n/|d }n*t|trgz|jd j}| d j }|rXt	|nd}W n tyf   d}Y nw d}t|tjrs|jj}t||ddddd}	|	|jd< q|rt| ti |}
t|
 |
j  t|
|dtjjd	 dS )
z$
    Draw a graph in fname.svg.
    z*draw_buffers() requires `graphviz` packageNfusion_metar   r   r/   tensor_metaF)
clear_metadot_graph_shape)r1   logwarningr
   create_fx_from_snodesr2   metagroup
isinstancetupleintstrsnodeget_outputsnodemaybe_get_size	Exceptionr   ComputedBufferdatadtyper   printr   r   graphlintr   r   tracer8   )r2   r3   r4   rK   rD   r=   rB   sizerI   metadatagmr/   r/   r0   draw_buffersD   sF   








rQ   snodesc              
      sb  dt dtdtf fdd}tdg d}i }i }tj }d}g }d}| D ]}	|	 r1d	}
|
}n-|		 r:d
}
|
}n$t
|	trDd}
|
}nt
|	trOd}
|	j}nt
|	trZd}
|	j}ntdtjj|	 d}|
 d| }||}i }t|	drd|	 i}|j|d|d}dttB dtf fdd  |	r|| |	 }||_|||	|
|jd< |||< |	 D ]}||| < q|du r|}q&| D ]Q}	|	 }|	jj}|| }g }|D ]9}|j|v r||j }n!| | |!|j}|||j< W d   n	1 sw   Y  ||krq|| qt"||_#q|$t%|dkr*|d  |S t"| |S )B
    Creates a FX Graph from a list of SchedulerNode objects.
    r)   r,   .c                 S   s   dt dtfdd}| |_|S )Nargsr,   c                  W   s   dS Nr   r/   )rT   r/   r/   r0   func1      z;create_fx_from_snodes.<locals>.get_fake_func.<locals>.func1)r   r@   __name__)r)   rV   r/   r/   r0   get_fake_func   s   z,create_fx_from_snodes.<locals>.get_fake_func
FusionMeta)r=   rB   typeNexterntemplatenopcomputefusedzUnknown node typeoriginal_atenz: 
get_devicedevicer/   rT   kwargsrB   c                    s8   t | trt fdd| jD S tdd |  D S )Nc                 3   s    | ]} |V  qd S Nr/   ).0x	in_outputr/   r0   	<genexpr>   s    z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>c                 s   s(    | ]}|j D ]	}t|jtV  qqd S rf   )usersr>   rD   r    )rg   bufuserr/   r/   r0   rk      s    
)r>   r   anyrR   rC   )rB   ri   r/   r0   rj      s
   
z(create_fx_from_snodes.<locals>.in_outputr5   r   r   )&rA   r   r@   collections
namedtupletorchr   Graph	is_externis_templater>   r   r!   r=   r   RuntimeError	_inductorr   get_fused_kernel_name	get_nodeshasattrrb   call_functionr   boolappendget_namer)   r<   rC   read_writesreadsinserting_beforeplaceholderr?   rT   outputlen)rR   rY   rZ   buf_to_fx_nodenode_to_fx_noderK   
first_nodeoutputsr=   rB   	node_type
fused_name	func_name	node_funcre   fx_noder)   rm   depsnew_argsdepdep_noder/   ri   r0   r;   }   s   




	



r;   node_name_to_buf_nameparent_buf_name	n_originsc           
      C   s   | d u rd S | D ]S}|  }| }|d ur)t|dkr)t|||d u r%|n| qt|dkr5|d |ks7J |j}|d u sC|jd u rDq|jD ]}|j}	|	|vrZ|d u rV|n|||	< qGqd S )Nr   r   )r~   ry   r   $update_orig_fx_node_name_to_buf_namerD   originsr)   )
r2   r   r   r   rD   buf_namechildren_nodesir_nodeorigin	node_namer/   r/   r0   r      s0   
r   c                 C   sp   i }|   D ]\}}||vrt|g||< q|| | qi }|   D ]\}}t|| }t||||< q$|S rf   )itemsr   addr   r(   )r   buf_name_to_n_noder   r   node_name_to_buf_metan_noder/   r/   r0   get_node_name_to_buf_meta  s   r   rP   c                 C   sP   i }t || |du rdS t|}| jjD ]}|j|v r%||j|jd< qdS )rS   Nbuf_meta)r   r   rK   r2   r)   getr<   )rP   rR   r   r   rD   r/   r/   r0   annotate_orig_fx_with_snodes  s   

r   c               	   c   s    t jdddk} dd l}t|jjj}t	
 }| s,z
d V  W |  d S |  w |tdd t jt d}t j|dd tt j|d	t  d
}|tj |td || zd V  W || |  d S || |  w )NTORCH_COMPILE_DEBUG01r   z*functorch.compile.config.debug_partitionerTtorchinductor)exist_okaot_z
_debug.log3[%(filename)s:%(lineno)d %(levelname)s] %(message)s)osenvironr   torch._functorch.aot_autogradlogging	getLogger
_functorchaot_autogradrX   
contextlib	ExitStackcloseenter_contextr   pathjoinr   makedirsFileHandlerr	   setLevelDEBUGsetFormatter	Formatter
addHandlerremoveHandler)compile_debugrr   r9   stackr   fhr/   r/   r0   enable_aot_logging'  s>   




r    _inductor_post_to_pre_grad_nodes._inductor_triton_kernel_to_post_grad_node_info_pre_grad_graph_id#_inductor_pre_grad_node_stack_trace_inductor_kernel_stack_trace(_inductor_kernel_provenance_debug_handlec                   C   s   da d S rU   )r   r/   r/   r/   r0   -reset_inductor_kernel_provenance_debug_handle[  s   r   c                  c   s    t } t }t }t }t }t}da i ai ai ai adazdV  W | a |a|a|a|a|adS | a |a|a|a|a|aw )zzContext manager that resets provenance tracking globals upon entering
    and restores their original values when exiting.r   N)r   r   copyr   r   r   r   )original_pre_grad_graph_idoriginal_post_to_pre_grad_nodes-original_triton_kernel_to_post_grad_node_info+original_inductor_pre_grad_node_stack_trace$original_inductor_kernel_stack_trace0original_inductor_kernel_provenance_debug_handler/   r/   r0   reset_provenance_globals`  sH   r   c                   @   s*  e Zd Ze ZedededB fddZd)ddZ	deddfd	d
Z
	d*dededededee f
ddZej	d*dededededeee  f
ddZdedefddZd)ddZd)ddZdededdfddZdee dB dedB d edB ddfd!d"Zd)d#d$Zd%eded& dB fd'd(ZdS )+DebugContextfolder_namer,   Nc                 C   sV   t jjpt }tjD ]}tj|d|  d| }tj	|s(t
| |  S q
d S )Nr   .)r   rM   	debug_dirr   r   _counterr   r   r   existsr   )r   r   ndirnamer/   r/   r0   create_debug_dir  s   

zDebugContext.create_debug_dirc                 C   s   d | _ d | _t | _d S rf   )_prof_pathr   r   _stack)selfr/   r/   r0   __init__  s   zDebugContext.__init__new_pathc                 C   s   | j sd S |dsJ |ddlm} z.|| d tj|r(t| t	| j | W d    W d S 1 s;w   Y  W d S  t
yT   td| j | Y d S w )Nz.debugr   )FileLockz.lockz(Failed to copy debug files from %s to %s)r   endswithfilelockr   r   r   r   r-   rmtreecopytreeOSErrorr9   r:   )r   r   r   r/   r/   r0   r     s   
&
zDebugContext.copywfilename
write_moderT   re   c                 O   s.   | j sJ ttj| j ||g|R i |S rf   r   openr   r   r   )r   r   r   rT   re   r/   r/   r0   fopen  s   
$zDebugContext.fopenc                 o   s\    | j sJ ttj| j ||g|R i |}|V  W d    d S 1 s'w   Y  d S rf   r   )r   r   r   rT   re   fr/   r/   r0   fopen_context  s
   
&"zDebugContext.fopen_contextsuffixc                 C   s   | j sJ tj| j |S rf   )r   r   r   r   )r   r   r/   r/   r0   r     s   
zDebugContext.filenamec                 C   s   t jjd urJdd l}| jsJ tj| jtj| j d}|	|d}|j
| jtj| jd W d    n1 s=w   Y  t j| d S d S )Nr   z.tar.gzzw:gz)arcname)r   rM   
upload_tartarfiler   r   r   r   basenamer   r   )r   r   tar_filetarr/   r/   r0   r     s   
zDebugContext.upload_tarc                    s   t jr#td  j} tj dtdd f fdd}| j	|| | j
t|  t jjs2d S | t | _t jjrD| dtj t jjrQ| dtj d S d S )Nztorch._dynamolevelr,   c                    s     |  d S rf   )r   )r   r9   r/   r0   reset_log_level  s   z/DebugContext.__enter__.<locals>.reset_log_levelz	debug.logzinfo.log)r   debugr   r   r   r   r   r   r   callbackr   r"   set_debug_handlerrM   enabledr   r	   r   	debug_log_setup_log_captureinfo_logINFO)r   
prev_levelr   r/   r   r0   	__enter__  s   
zDebugContext.__enter__r   c                 C   sp   t d}| j| |}t |}|| |t d |	| |t
|j| | j|j| d S )Nztorch._inductorr   )r   r   r   r   r   StreamHandlerr   r   r   r   minr   r   r   )r   r   r   r9   fdchr/   r/   r0   r    s   



zDebugContext._setup_log_captureexc_typeexc_valexc_tbc                 C   sF   | j r| j   |   | jr|   tdt | j | j	  d S )Nz%s debug trace: %s)
r   disable_save_profile_datar   r   r9   r:   r
   r   r   )r   r
  r  r  r/   r/   r0   __exit__  s   
zDebugContext.__exit__c                 C   s   | j sJ | j | d | d)}tj| j |d}|  |d |d |d |d W d    d S 1 s?w   Y  d S )Nzcompile.profzcompile.stats)streamcumtimed   tottime)	r   
dump_statsr   r   pstatsStats
strip_dirs
sort_statsprint_stats)r   r  statsr/   r/   r0   r    s   



"zDebugContext._save_profile_datar)   ).Nc                 C   sb   t jjr$tt j|r$ztt| |W S  ty#   tjddd Y d S w dtdtdd fdd}|S )	Nz Ignoring exception in debug codeTexc_inforT   re   r,   c                  _   s   d S rf   r/   rd   r/   r/   r0   ignored)  rW   z)DebugContext.__getattr__.<locals>.ignored)	r   rM   r   getattrDebugFormatterrF   r9   r:   r   )r   r)   r  r/   r/   r0   __getattr__   s   zDebugContext.__getattr__r,   N)r   )rX   
__module____qualname__	itertoolscountr   staticmethodrA   r   r   r   r   r   r   r   contextmanagerr   r   r   r   r  r@   r  r[   BaseExceptionr  r  r   r   r/   r/   r/   r0   r     sh    









r   c                   @   s  e Zd ZdeddfddZdejjdeej	 ddfdd	Z
dejjdeej	 ddfd
dZdeddfddZdeddfddZededefddZdeddfddZdejjdeddfddZd%dededdfddZdedeej dedef d ed!ed"edB ddfd#d$ZdS )&r  handlerr,   Nc                 C   s"   |j | _ |j| _|j| _|| _d S rf   )r   r   r   r)  )r   r)  r/   r/   r0   r   0  s   
zDebugFormatter.__init__rP   inputsc              
   C   s   |  dH}d }tjjjjrtjj|}t	j
|j}tjjjj}tjjddd t|||d||d W d    n1 sAw   Y  W d    n1 sPw   Y  |  d}||jdd W d    d S 1 sow   Y  d S )Nzfx_graph_runnable.pyF)ztrace.enabledztrace.save_real_tensorsinductor)save_dirstable_hashzfx_graph_readable.pyprint_output)r   rr   rw   r   rM   save_real_tensors_subclasses
fake_utilstry_convert_fake_to_realr   r   r   r)   r   r   writeprint_readable)r   rP   r*  r  r,  r-  r/   r/   r0   fx_graph6  s.   "zDebugFormatter.fx_graphc                 C   sB   |  d}||jdd W d    d S 1 sw   Y  d S )Nzfx_graph_transformed.pyFr.  )r   r4  r5  )r   rP   r*  r  r/   r/   r0   fx_graph_transformedT  s   "z#DebugFormatter.fx_graph_transformedr2   c                 C   @   |  d}|| | W d    d S 1 sw   Y  d S )Nzir_pre_fusion.txtr   r4  	_write_irr   r2   r  r/   r/   r0   r&   \     "zDebugFormatter.ir_pre_fusionc                 C   r8  )Nzir_post_fusion.txtr9  r;  r/   r/   r0   r'   `  r<  zDebugFormatter.ir_post_fusionc                 C   s2   t  }| D ]}||  |d q| S )Nz


)ioStringIOr4  	debug_strgetvalue)r2   rm   rD   r/   r/   r0   r:  d  s
   zDebugFormatter._write_irc                 C   s   t || dd d S )Nzgraph_diagram.svg)r4   )rQ   r   )r   r2   r/   r/   r0   graph_diagraml  s   zDebugFormatter.graph_diagramc                 C   s,   t || t|| ddtdtjjd d S )Nzorig_fx_graph_diagram.svgFT)r4   r7   progparse_stack_tracer8   )r   r   r   GRAPHVIZ_COMMAND_SCALABLEr   rM   r8   )r   rP   r2   r/   r/   r0   draw_orig_fx_grapho  s   

z!DebugFormatter.draw_orig_fx_graphpyr   	extensionc                 C   s   t || d|  d S )Nzoutput_code.)r-   r   r   )r   r   rG  r/   r/   r0   output_code~  s   zDebugFormatter.output_coder)   input_nodestimingsChoiceCallerelapseprecompile_elapseprescreening_elapsec                    s   ddl m  dt jdtttf f fdd|tj tj fdd|D |||d	}| j	d
ddd,}|
 D ]\}	}
t|	 }|| |
|d< t|| |d q;W d    d S 1 sew   Y  d S )Nr   )FixedLayoutrD   r,   c              	      s  t | dr	| j}nd}|t| jd}z7|  }t| rC |j|jtj	j
|jtj	j
|jtj	j
j|jddd}t||d< nt||d< W n	 tyS   Y nw z
t|  |d< W n	 tyg   Y nw z
t|  |d	< W n	 ty{   Y nw zttj	j
|  |d
< W n	 ty   Y nw zttj	j
|  |d< W n	 ty   Y nw zttj	j
|  |d< W n	 ty   Y nw t | drt| jtjrڈ| j|d< |S )Nr)    )r)   r[   r   )fallback)rI   rN   strideoffsetlayoutrI   rc   rR  rN   numelrH   )rz   r)   r[   rX   get_output_specr>   rc   rI   r"   rK   sizevarsoptimization_hintsrN   rR  optimization_hintrS  rA   rF   	get_dtyperb   
get_strideget_size	get_numelrH   r   IRNode)rD   r   	node_inforT  static_layoutrO  build_node_infor/   r0   rb    st   

	z>DebugFormatter.log_autotuning_results.<locals>.build_node_infoc                    s   g | ]} |qS r/   r/   rg   rD   )rb  r/   r0   
<listcomp>      z9DebugFormatter.log_autotuning_results.<locals>.<listcomp>)op_namecuda_device_namecuda_device_countrI  autotuning_timeprecompile_timeprescreening_timezautotuning_result_json_list.txtatzutf-8)encodingbenchmark_result
)r   rO  r^  dictrA   rr   cudaget_device_namedevice_countr   r   	info_dictupdatejsondumpr4  )r   r)   rI  rJ  rL  rM  rN  general_propertiesr  callertimert  r/   ra  r0   log_autotuning_results  s,   	"9	
"z%DebugFormatter.log_autotuning_results)rF  )rX   r"  r#  r   r   rr   r   r   listTensorr6  r7  SchedulerNodeListr&   r'   r&  rA   r:  rA  rE  rH  r   r^  rp  floatr{  r/   r/   r/   r0   r  /  sX    



r  c                 C   .   t tjrt dt|  tj	|  d S )NzBEFORE FUSION
%s)
ir_pre_fusion_logisEnabledForr   r  infor  r:  r"   r   r&   r2   r/   r/   r0   log_ir_pre_fusion     r  c                 C   r  )NzAFTER FUSION
%s)
ir_post_fusion_logr  r   r  r  r  r:  r"   r   r'   r  r/   r/   r0   log_ir_post_fusion  r  r  schedulec                    sD   zt ddd  fddd W d S  ty!   tjddd Y d S w )	Nartifactc                   S   
   dddS )Ninductor_collective_schedulerv  r)   rm  r/   r/   r/   r/   r0   <lambda>     z+_dump_collective_schedule.<locals>.<lambda>c                      s    S rf   r/   r/   r  r/   r0   r    s    metadata_fn
payload_fnzAFailed to log inductor_collective_schedule via structured loggingTr  )r   rF   r9   r   r  r/   r  r0   _dump_collective_schedule  s   
r  c                    s&    fdd| D }|rt | d S d S )Nc                    s0   g | ]}t t|d d  tjrt ddqS )rD   Npython_kernel_name)r>   r  r   _CollectiveKernelrc  opr/   r0   rd    s    
z+log_collective_schedule.<locals>.<listcomp>)r  )r2   r  r/   r  r0   log_collective_schedule  s   
r  node_runtimesc              	      sJ  zt jjjdtt dB dtt ffdd}dtdtdB fdd}g  | D ]]\}}t|j	d	|
 }t|j	r;d
nd}g }z0| D ])}|j	}	|	 }
t|	jtjrX|	 nd}|	 }|||
||||d qDW n	 tyx   Y nw  ||||d q&tddd  fddd W dS  ty   tjddd Y dS w )zDLog per-op runtime estimates and output tensor metadata for TLParse.rh   Nr,   c                    s   | d ur
t  | S g S rf   )r|  rh   )to_optimization_hintsr/   r0   to_list	  s   z,log_runtime_and_tensor_meta.<locals>.to_listrI   c                 S   s"   | d u rd S t | }|d}|S )Nztorch.)rA   removeprefix)rI   sr/   r/   r0   dtype_to_str  s
   
z1log_runtime_and_tensor_meta.<locals>.dtype_to_strr  
collectiver_   )shaperR  rI   )r)   r[   estimated_runtime_nsr   r  c                   S   r  )N inductor_runtime_and_tensor_metarv  r  r/   r/   r/   r/   r0   r  9  r  z-log_runtime_and_tensor_meta.<locals>.<lambda>c                      s   d iS )Nopsr/   r/   )r  r/   r0   r  =      r  z.Failed to log inductor_runtime_and_tensor_metaTr  )r"   rK   rW  rX  r   r   r|  rA   r  rD   r~   r   is_collectiverC   rE   r>   rT  r   Layoutr[  maybe_get_dtyper}   rF   r   r9   r   )r  r  r  r  
runtime_nsr)   op_typer   rm   irnoder  rR  rI   r/   )r  r  r0   log_runtime_and_tensor_meta  sV   
"
	
r  c                   C   sH   t sdS ztddd dd d W dS  ty#   tjddd	 Y dS w )
z:Emit a structured artifact with the graph execution order.Nr  c                   S   r  )Ngraph_executionrv  r  r/   r/   r/   r/   r0   r  J  r  z%log_graph_execution.<locals>.<lambda>c                   S   s   dt iS )Ngraph_execution_order)r#   r/   r/   r/   r0   r  N  r  r  zFailed to log graph_executionTr  )r#   r   rF   r9   r   r/   r/   r/   r0   log_graph_executionC  s   r  c                   c   sB    g a i adazdV  W t  dada dadS t  dada daw )z5Record graph execution order and log it once on exit.TNF)r#   r%   r$   r  r/   r/   r/   r0   $record_and_log_graph_execution_orderT  s   r  c                   @   s    e Zd ZU eed< ejed< dS )TensorMetadataHoldertensor_metadatarc   N)rX   r"  r#  r   __annotations__rr   rc   r/   r/   r/   r0   r  d  s   
 r  pre_grad_graph_idpost_to_pre_grad_nodes_jsonc              
      s  i i d}t |tstd |S t | ts|S tt}tt}zdttt	f dt
fdd}| D ]\ }t |tsGtd |  W S |D ]q}||sV|    W S |d| kro||d	    |  |d	   fd
d|dg D }|r| \}	||	s|    W S |	d| kr||	d	   | |	d	  |fdd|	dg D  |s~qIq4dttt	f ddfdd}
|
| |
| ||dW S  ty } z"tdddt|t d td| td|  |W  Y d}~S d}~ww )zx
    Create bidirectional mappings between pre_grad graph nodes
    and post_grad graph code nodes, and vice versa.
    )	preToPost	postToPrezCProvenance tacking error: post_to_pre_grad_nodes_json is not a dictrD   r,   c                 S   sB   t | tstd dS d| vsd| vsd| vrtd dS dS )NzVProvenance tacking error: node provenance in post_to_pre_grad_nodes_json is not a dictFgraph_idr)   	from_nodezYProvenance tacking error: node provenance in post_to_pre_grad_nodes_json has wrong formatT)r>   rp  r9   error)rD   r/   r/   r0   check_format  s   
z8create_mapping_pre_post_grad_nodes.<locals>.check_formatzIProvenance tacking error: post_to_pre_grad_nodes_json value is not a listr  r)   c                    s   g | ]}| fqS r/   r/   rg   r   )	outer_keyr/   r0   rd    re  z6create_mapping_pre_post_grad_nodes.<locals>.<listcomp>r  c                 3   s    | ]}| fV  qd S rf   r/   r  )
parent_keyr/   r0   rk     s    
z5create_mapping_pre_post_grad_nodes.<locals>.<genexpr>dNc                 S   &   | D ]
}t | | | |< qt| } d S rf   r|  rp  r  keyr/   r/   r0   convert_sets_to_lists     zAcreate_mapping_pre_post_grad_nodes.<locals>.convert_sets_to_listsr+  provenance_tracking_error"create_mapping_pre_post_grad_nodesfunction	error_msgstack_tracez post_to_pre_grad_nodes_json:  %szpre_grad_graph_id:  %s)r>   rp  r9   r  r@   rp   defaultdictr   rA   r   r|   r   r|  r   r   popextendrF   r   	traceback
format_exc)r  r  empty_returnpre_to_postpost_to_prer  
node_arrayrD   r   current_noder  er/   )r  r  r0   r  m  st   








	r  triton_kernel_to_post_grad_jsonc              
   C   s   i i d}t | tstd |S tt}z;|  D ]\}}t |ts.td |  W S |D ]	}|| 	| q0qdtt
tf ddfdd}|| | |dW S  tyy } ztd	d
dt
|t d td|  |W  Y d}~S d}~ww )zqCreate bidirectional mappings between triton kernel name and post_grad
    graph code nodes, and vice versa.
    )cppCodeToPostpostToCppCodezGProvenance tacking error: triton_kernel_to_post_grad_json is not a dictzMProvenance tacking error: triton_kernel_to_post_grad_json value is not a listr  r,   Nc                 S   r  rf   r  r  r/   r/   r0   r    r  zFcreate_node_mapping_kernel_to_post_grad.<locals>.convert_sets_to_listsr+  r  "create_mapping_kernel_to_post_gradr  z$triton_kernel_to_post_grad_json:  %s)r>   rp  r9   r  rp   r  r   r   r|  r   rA   r   rF   r   r  r  )r  r  post_to_cpp_coder  r  	curr_noder  r  r/   r/   r0   'create_node_mapping_kernel_to_post_grad  sN   	



	r  c               
   C   s   z6i } t r0tt}i t|} tjjr0tj	dd}t
| | W d    n1 s+w   Y  d| d< | W S  tyX } ztdddt|t d i W  Y d }~S d }~ww )	Nz/inductor_provenance_tracking_node_mappings.jsonr   g       @versionr+  r  dump_inductor_provenance_infor  )r   r  r   r   r   rM   r   r"   r   r   rv  rw  rF   r   rA   r  r  )node_mappingnode_mapping_kernelr  r  r/   r/   r0   r  
  s@   	r  c               
   C   s   zAt di } tt tt B }i }|D ]'}t|g }t }|D ]}|| |g  q$t|g |t|d||< q|W S  tyc } zt	dddt
|t d i W  Y d}~S d}~ww )zCreate kernel information JSONr  )stack_tracespost_grad_nodespre_grad_nodesr+  r  create_kernel_information_jsonr  N)r   r   r   r   keysr   ru  r|  rF   r   rA   r  r  )r  all_kernelsresultkernel_namer  r  	post_noder  r/   r/   r0   r  0  s<   
	r  node_scheduler  rt   c           
   
      sr  t jjdkrdS zddlm}m} td7 ag }| dt }|rRt| ts'J t	
|g  | jr>| jj}| vr= | n  fdd| jD  t|  }n:t| tsYJ t }| D ])}|||fvr|jdurt	
|g  ||j    fdd|jjD  q^t|}t
|g | tW S  ty }	 ztd	d
dt|	t d W Y d}	~	dS d}	~	ww )z
    Set the mapping between `kernel_name` and the post_grad nodes in `node_schedule`.

    Returns a unique int debug handler for each call to this function.
    r   Nr   )DisableReductionEnableReduction:c                 3        | ]}|j  vr|j V  qd S rf   r)   rg   r   curr_node_infor/   r0   rk     s    
z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>c                 3   r  rf   r  r  r  r/   r0   rk     s    
r+  r  'set_kernel_post_grad_provenance_tracingr  )r   rM   provenance_tracking_levelcodegen.simd_kernel_featuresr  r  r   r>   r   r   
setdefaultorigin_noder)   r}   r  r   r|  get_stack_tracesr   rD   ru  r   rF   r   rA   r  r  )
r  r  rt   r  r  r  origin_node_namestack_traces_setrB   r  r/   r  r0   r  Z  sd   

	r  rT   re   c            
      O   s   t jt d}t j|st | dtdtfdd}t|| |f\}}d}| d| dt	t
 d	}t|d
}t||f| W d   n1 sMw   Y  ttjrgd| d|d}	t|	 dS dS )z
    This function is used to save arguments for a compile_fx_inner function call
    to the file system.  Later on one can replay the compile_fx_inner call
    with the saved arguments using load_args_and_run_compile_fx_inner.
    inductor_saved_argsrh   r,   c                 S   s    t | tjrtt| | jS | S )z
        Pickle FakeTensor will result in error:
        AttributeError: Can't pickle local object 'WeakValueDictionary.__init__.<locals>.remove'

        Convert all Tensor to metadata. This may also makes pickle faster.
        )r>   rr   r}  r  r   rc   r  r/   r/   r0   handle_tensor  s   z5save_args_for_compile_fx_inner.<locals>.handle_tensorcompile_fx_inner/_z.pklwbNz3
Arguments for a compile_fx_inner call is saved to z. To replay the call,
run the following:

from torch._inductor.debug import load_args_and_run_compile_fx_inner
load_args_and_run_compile_fx_inner(z
)
        )r   r   r   tempfile
gettempdirr   mkdirr   r   nextsave_args_cntr   picklerw  r9   r  r   r   rJ   )
rT   re   folderr  args_to_savekwargs_to_savefn_namer   r   messager/   r/   r0   save_args_for_compile_fx_inner  s$   
r  r   c              	   C   s   ddl m} t| d}t|\}}W d    n1 sw   Y  dtdtfdd}tjjdd	}|6 t	
d
d t|||f\}}||i |W  d    W  d    S 1 s]w   Y  W d    d S 1 smw   Y  d S )Nr   )r  rbrh   r,   c                 S   s0   t | trtjj| jj| jj| jj	| j
S | S rf   )r>   r  rr   _dynamotestingrand_stridedr  r  rR  rI   rc   r  r/   r/   r0   r    s   
z9load_args_and_run_compile_fx_inner.<locals>.handle_tensorT)allow_non_fake_inputs	save_argsF)torch._inductor.compile_fxr  r   r  loadr   rr   r1  FakeTensorModer   r   r   )r   r  r   rT   re   r  	fake_moder/   r/   r0   "load_args_and_run_compile_fx_inner  s   Rr  )package_pathfunc.exported_programinductor_configsr  c             
   C   s  ddl m} ddlm} ddlm} ddlm} |jj	}|j
dd}	t|	tjjs+J |j\}
}z[|r@|jjdkr@||d	|d
 |r|jjdkrt|	}t|j}t|}|||d |d |d
\}}t|}tjj||dd}| |j
dd|||ddd | |	|
||||dW S  |y } z||dd|d td |d }~w ty } z|rd}|jjdkrd}||d	||d |d }~ww )Nr   )AccuracyError)dump_to_minify)r   )_aoti_flatten_inputsF)check_guards   aot_inductor)options   r   )strictTaccuracy)r  r  load_and_runcheck_accuracy)r  r  r(  aot_inductor_accuracyminify)commandr$  zAccuracy failedrun)torch._dynamo.debug_utilsr  torch._dynamo.repro.aotir  torch._inductorr   r  r   r#  dump_aoti_minifiermoduler>   rr   r   r   example_inputsrepro_levelr   deepcopyr?   exportr9   r:   rF   )r  r  r  r  r  r  r   r   use_minifierrP   rT   re   gm_copyexample_inputs_copyconfig_copyflat_example_inputstuple_inputsflattened_epr  r,  r/   r/   r0   aot_inductor_minifier_wrapper  s   




	
r>  )FNrU   r!  )F)rp   r   r   dataclasses	functoolsr=  r$  rv  r   r   os.pathr  r  r-   r  r  collections.abcr   r   r   typingr   r   unittest.mockr   rr   functorch.compiler   r	   r
   r   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   r0  r   torch._loggingr   torch._logging._internalr   torch._utils_internalr   torch.fx.graph_moduler   torch.fx.passes.shape_propr   r   torch.fx.passes.tools_commonr   torch.typesr   torch.utils._ordered_setr   torch.utils._pytreer   rP  r   r   r   	schedulerr   r   r   r    r!   virtualizedr"   r   rX   r9   r#   r|  rp  rA   objectr  r$   r|   r%   r@   r  r  r~  rq   r(   rD  cacher1   rQ   rs   r;   r   r   r   r'  r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r?   r  r  r  r  	dataclassr  r%  r  r  r  r  r  r  r  r  r6  ExportedProgramr>  r/   r/   r/   r0   <module>   s,  
 



9f

$



 +
2  *
@

a

<"&-

P.

