o
    j9:j                 	   @   s  U d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZmZ erCd dlmZ d dlmZ e jdd	d	kZd
ZdedB fddZdedB fddZdedB fddZdedB fddZdedB fddZdedB fddZ defddZ!defddZ"e jdd	d	kZ#d
Z$dZ%d
Z&e rdndZ'e(e jdd Z)e(e*d!< ed"d#d$dd%Z+ee*d&< d
Z,ee*d'< e Z-edB e*d(< e Z.edB e*d)< ed*d+dd,Z/ee*d-< dZ0ee*d.< e Z1edB e*d/< e Z2edB e*d0< ed1d2Z3ee*d3< d
Z4ee*d4< i Z5e6e7e7f e*d5< dZ8e(dB e*d6< d7Z9ed8 e*d9< e jd:d;d	kZ:ee*d<< e jd=d;d	kZ;ee*d>< e jd?d;d	kZ<ee*d@< e jdAe rVd;nd	d	kZ=ee*dB< e jdCd	d	kZ>e jdDd	d	kZ?d
Z@dZAe jdEd	d	kZBe jdFd	kZCe jdGd	kZDe jdHd	d	kZEe jdIe rd;nd	d	kZFdZGdZHdZIe jdJd;d	kZJe jdKd	kZKe jdLdMZLedN e*dO< dZMdZNe" ZOd
ZPd
ZQdZRd
ZSdZTe	jUjVjWe*dP< dZXe	jUjVjWe*dQ< dZYe	jUjVjZe*dR< dZ[e	jUjVjWe*dS< dZ\e	jUjVjWe*dT< dZ]e	jUjVjWe*dU< dZ^ee_dV ge_dV f dB e*dW< dZ`ee_dV ge_dV f dB e*dX< dZad
Zbd
Zcd
ZddZei Zfe6e7e6e7ef f e*dY< i Zge6e7e6e7ef f e*dZ< dZhe jd[d	d	kZid
ZjdZkdZld
d\d]dd^Zme6e7ef e*d_< d`Zneda e*db< d
Zog Zpe_e7ee_dV ge_dV f B  e*dc< dZqe(dB e*dd< dZrd
Zsd Zte(e*de< dfZuedg e*dh< dZvee(ge(f dB e*di< djZwedk e*dl< dfZxedm e*dn< dZyee(ge(f dB e*do< djZzedk e*dp< dfZ{edm e*dq< dZ|ee(ge(f dB e*dr< djZ}d
Z~ee*ds< d ZdtZduZeddvdwdxZee*dy< e jdzd	kZe jd{d	kZe jd|d	kZe jd}d	kZe jd~d	kZe(e ddtZe(e ddZde(dB fddZe Ze(dB e*d< e jdd	d	kZe jdd;d	kZe jdd;d	kZe jde s}d	nd;d	kZee*d< dZde*d< g Ze_e7 e*d< g Ze_e7 e*d< d
Zee*d< eddd
d,Zee*d< g Ze_e( e*d< e jdd Zde(dB fddZe Ze(dB e*d< e jdd Ze jdd Zed e*d< e jdd Zed e*d< d
Zee*d< d
Zee*d< dZee*d< dZee*d< djZed e*d< dZee*d< dZee*d< e jdd;d	kZee*d< d
ZdZd
Ze jdd	kZe jdd	kZdZdZdZe jdd	kZe(e jddZee jddZe jdd	kZe jdd	kZe(e jdd	Zdd Zdd ZG dd dZG dd dZe jdd;d	kZde7defddZde7defddĄZde7defddƄZe jddZe	jjsd	nd;Ze jded	kZe jdd;d	kZe jdd;d	kZe jdd	d	kZe jd̡d	kZdZdZdZdZe(dB e*d< edddҍZee*d< d
Zd
Zd
ZdZe jdԡd	kZdZeg df dB e*d< d
Ze jdסd	kZee*d< e jd١d	kZee*d< e jddܡZe jde rTd;nd	d	kZee*d< dZee*d< dZe jdd	d	kZd]ZdZdZdZdZe(dB e*d< dZd
ZdZd
Zd
Ze dd	d	kZe dd	kZe(e jdd Ze jdd;d	kZdZd
ZdZd
Zd
Zd
Zd
Zd]Zd]ZdZdZd
Zd
ZdZd
Zde	jv pde	jv Ze peZe jdd	d	kZdZe7dB e*d< dZe7dB e*d< edddҍZe7e*d< de7fddZe Ze7e*d< dZe(e*d< edddd,Z ee*d< edd
dҍZee*d< d
ZdtZddgZe_ed e7B  e*d< d
Zee*d< e jd d;d	kZdZe(e*d< dZe(e*d< dZ	ee*d< G dd dZ
G d	d
 d
ZdefddZde(fddZe rdne Ze(dB e*d< edddd,Zee*d< eddZe(e*d< e! Zee*d< edd2Zee*d< eddd
d,Zee*d< e jdd;d	kZee*d< edd2Zee*d< e7dB e*d < e rPz3d d!lmZ er4ee jed"e jd#Zn	ed#ZW n ee fyO   dZY nw dZdZ!e jd$d	d	kZ"e jd%d	d	kZ#d
Z$d
Z%dZ&d
Z'dZ(d&Z)d
Z*dZ+d
Z,ee*d'< e jd(d;d	kZ-d
Z.d
Z/d
Z0d
Z1e  Z2ee*d)< e jd*dܡZ3e3dkZ4e3d	krdne3Z5e jd+dZ6e7dB e*d,< e jd-d	kZ7d
Z8e jd.d;d	kZ9ee*d/< d
Z:ee*d0< d
Z;ee*d1< d
Z<ee*d2< d
Z=ee*d3< d
Z>ee*d4< d
Z?ee*d5< d
Z@ee*d6< d
ZAee*d7< e jd8d;d	kZBe jd9d;d	kZCee*d:< dZDee*d;< i ZEe6e7e6e7ef f e*d<< e(e jd=d>ZFe(e*d?< d
ZGee*d@< de7dB fdAdBZHedCddҍZIe7e*dD< G dEdF dFZJG dGdH dHZKG dIdJ dJZLG dKdL dLZMG dMdN dNZNG dOdP dPZOeeOG dQdR dReOZPeeOG dSdT dTeOZQG dUdV dVZRdHZSedW e*dX< dJZTedY e*dZ< d[ZUed[ e*d\< dJZVedJ e*d]< G d^d_ d_ZWG d`da daZXG dbdc dcZYg ddZZe_e7 e*de< g dfZ[e_e7 e*dg< dgZ\e_e7 e*dh< g Z]e_ee	j^e	j^e	j^gdf  e*di< e djd	d	kZ_G dkdl dlZ`G dmdn dnZae	rd dolbT G dpdq dqZce jdrd;d	kZdee*ds< eejeef  dS (t      N)Callable)AnycastLiteralTYPE_CHECKING)	is_fbcode)Configget_tristate_envinherit_fields_frominstall_config_module)InductorChoices)CUDAGraphPolicyTORCHINDUCTOR_INPLACE_PADDING1Freturnc                   C      t dS )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr	    r   r   ]/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_default      r   c                   C   s,   t jddkr
dS t jddkrdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   r   vec_isa_ok_default   s
   r   c                   C   r   )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   autotune_remote_cache_default%   r   r   c                   C   r   )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   %bundled_autotune_remote_cache_default)   r   r!   c                   C   s   t dt sdS d S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r	   r   r   r   r   r   )bundle_triton_into_fx_graph_cache_default-   s   r#   c                   C   r   )N&TORCHINDUCTOR_AUTOTUNE_AT_COMPILE_TIMEr   r   r   r   r    autotune_at_compile_time_default4   r   r%   c                  C   s<   d} dt jv rt jddkS t rtjd}|| kS dS )N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversionr   r   r   static_cuda_launcher_default8   s   
r.   c                  C   s@   d} dt jv rt jddkS t rd}tj|}|| kS dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr(   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer-   r   r   r   prologue_fusion_enabledG   s   
r2   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_rank,TORCHINDUCTOR_PRECOMPILATION_TIMEOUT_SECONDSi,  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cache$TORCHINDUCTOR_FX_GRAPH_CACHE_DEFAULTTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_defaultenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITE)r9   r;   r<   non_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rK   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrapper*TORCHINDUCTOR_CPP_CACHE_PRECOMPILE_HEADERScpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAX$TORCHINDUCTOR_APPLY_GUMBEL_MAX_TRICKTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTS(TORCHINDUCTOR_RUNTIME_TRITON_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)noner`   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r<   tritonatenrv   mixed_mm_choice'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit&size_threshold_for_succ_based_strategyra   )ra   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinatorr<   )r<   
custom_opscustom_ops_multidtypebucket_all_gathers_bucket_modera   r}   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinator"bucket_reduce_scatters_bucket_modebucket_all_reduces_fx.bucket_all_reduces_fx_bucket_size_determinator!runtime_estimations_mms_benchmark   g      I@*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r<   r;   r9   use_experimental_benchmarker+TORCHINDUCTOR_DISTRIBUTED_MAX_AUTOTUNE_GEMM&TORCHINDUCTOR_PIPELINE_GEMM_AUTOTUNINGTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM%TORCHINDUCTOR_DEFAULT_AUTOTUNE_WARMUP"TORCHINDUCTOR_DEFAULT_AUTOTUNE_REPd   c                  C   s0   t jd} | d u rdS |  dv rd S t| S )N,TORCHINDUCTOR_AUTOTUNE_NUM_CHOICES_DISPLAYED
   r   r   r   r   lowerintenv_valr   r   r   '_autotune_num_choices_displayed_default  s   r   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEM-TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTIONTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitionzCUDAGraphPolicy | Nonecudagraph_policycustom_should_partition_opscudagraph_unsafe_unbacked_ops#max_autotune_allow_flexible_layoutsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPc                  C   s&   t jdd} |  dv rd S t| S )N*TORCHINDUCTOR_NVGEMM_MAX_PROFILING_CONFIGS5r   r   r   r   r   r   %_nvgemm_max_profiling_configs_defaultn  s   r   nvgemm_max_profiling_configs(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacefallback_by_defaultselective_decomposeuse_dceuse_pre_grad_passes)earlylater<   pre_grad_pass_timinguse_joint_graph_passesuse_post_grad_passesCUTEDSL_ENABLE_AUTOTUNINGcutedsl_enable_autotuningi    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE(TORCHINDUCTOR_COLLECTIVE_BENCHMARK_NRUNS50*TORCHINDUCTOR_COLLECTIVE_BENCHMARK_TIMEOUT30'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSc                  C      t jddd} | S )N#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT ,r   r   r   split)collect_envr   r   r    _parse_autoheuristic_collect_env     r   c                  C   r   )NTORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmr   r   )use_envr   r   r   _parse_autoheuristic_use_env  r   r   c                   @   s$   e Zd ZdZde v Zde v ZdS )autoheuristic_collectzT
    Config for which autoheuristic optimizations should collect training data.
    pad_mmr   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r         
r   c                   @   s$   e Zd ZdZde v Zde v ZdS )autoheuristic_usezU
    Config for which autoheuristic optimizations should use learned heuristics.
    r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r     r   r   'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 C   s   t | pt| S N)collect_autoheuristicuse_autoheuristicr   r   r   r   run_autoheuristic  s   r   c                 C   &   | dkrt jS | dkrt jS | t v S Nr   r   )r   r   r   r   r   r   r   r   r     
   
r   c                 C   r   r   )r   r   r   r   r   r   r   r   r     r   r   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT#TORCHINDUCTOR_CACHE_SDPA_CONSTRAINT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold)TORCHINDUCTOR_DELAY_REALIZE_CHEAP_OUTPUTSr;   r<   delay_realize_cheap_outputs.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTr   inductor_choices_classTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLESr   (TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusionloop_index_inversion_in_fusionr   'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   g?max_fusion_unique_io_buffersTORCHINDUCTOR_SPLIT_REDUCTIONSTORCHINDUCTOR_DETERMINISTICTORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL   devgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesTORCHINDUCTOR_DISABLED_PASSESdisabled_passesc                  C   s4   dt jv rt jd } nd} | dv sJ d|  | S )NTORCHINDUCTOR_WORKER_START
subprocess)r  forkspawnzInvalid start method: )r   r   )start_methodr   r   r   decide_worker_start_method  s   

r  worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSElog_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tp)TORCHINDUCTOR_PARTITIONED_SCATTER_ENABLEDr&   "partitioned_scatter_min_partitions   "partitioned_scatter_max_partitionsg?!partitioned_scatter_memory_budgetc                   @   s&   e Zd ZU dZeed< dZeed< dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)r   r   r   r'  bool__annotations__r(  r   r   r   r   r   r&  f  s   
 r&  c                   @   s`  e Zd ZU dZdZeed< dZedB ed< dZedB ed< dZ	e
dB ed< dZedB ed< dZeejjgedB f dB ed	< d
Zed ed< dZed ed< dZedB ed< dZedB ed< dZedB ed< dZe
dB ed< dZeed< dZedB ed< dZeed< dZedB ed< dZed dB ed< dZeed< dZeed< dZ ed ed< dZ!eed< dS ) aten_distributed_optimizationszDConfiguration for distributed optimization passes on ATen FX graphs.Fenable_overlap_schedulingNcollective_bucketinginsert_overlap_depsmax_compute_pre_fetchcompute_overlap_multiplercustom_runtime_estimation
analytical)r2  	benchmarkcollective_estimatorr3  compute_estimatormax_memory_increase_gbmax_memory_increase_ratiomax_in_flight_gbmax_coll_distance!log_final_collectives_estimationsbucket_exposed_firstbucket_only_internode_commsenable_fusion_regions)r<   r   r   	coalescedbucket_modeT&prioritize_bucketing_during_scheduling
spmd_checkwarn)rB  errorspmd_mismatch!overlap_scheduling_autofix_cycles)"r   r   r   r   r,  r)  r*  r-  r.  r/  r   r0  floatr1  r   r)   fxNoder4  r   r5  r6  r7  r8  r9  r:  r;  r<  r=  r?  r@  rA  rD  rE  r   r   r   r   r+  k  s4   
 "
r+  c                  C   s   d} d}t j|}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    rp   z0pytorch/inductor:enable_parallel_compile_version)r)   r*   r+   )ENABLE_PARALLEL_COMPILE_VERSIONr1   r-   r   r   r   #parallel_compile_enabled_internally  s   rJ  c                  C   s   ddl } | t}dtjv rttjd }|d| |S tjdkr+d}|d |S t	 r:t
 s:d}|d |S tj }|sCJ td	|}|d
| |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rp   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcode    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   rJ  r)   _utils	cpu_countmin)rN  logcompile_threadsrT  r   r   r   decide_compile_threads  s&   







rX  rW  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pool<   )r<   quiesce_async_compile_timeuse_static_cuda_launcherz/torch._inductor.config.use_static_cuda_launcheruse_static_triton_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherz2torch._inductor.config.strict_static_cuda_launcherstrict_static_triton_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   force_shape_padTORCHINDUCTOR_PERMUTE_FUSION_use_fp64_for_unbacked_floatsTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmwrap_inductor_compiled_regionsassume_aligned_inputsassume_32bit_indexing.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tableTORCHINDUCTOR_FILE_LOCK_TIMEOUT600file_lock_timeoutenable_autograd_for_aotc                  C   s<   d } t  rtjdd }tjdd}|d urd| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r4   )r   r   r   r   )log_locmast_job_nameglobal_rankr   r   r   get_worker_log_path  s   
r  TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                   @   sn   e Zd ZU ejddkZeed< dZ	e
ed< dZe
ed< ejdd	ur-e
ejdnd	Ze
d	B ed
< d	S )auto_chunkerTORCHINDUCTOR_AUTO_CHUNKERr   enablei   output_size_thresholdr   amplify_ratio_threshold TORCHINDUCTOR_CHUNKER_NUM_CHUNKSN	num_chunk)r   r   r   r   r   r   r  r)  r*  r  r   r  r  r   r   r   r   r    s   
 r  c                   @   s  e Zd ZU dZdZejdddkZejdddkZ	dZ
edB ed< eejd	d
Zdejdejdkr8dndfZedef ed< ejdddkZejdddkZdZedB ed< dZedB ed< edZedB ed< dZed ed< eejddZejdddkZejdddkZejddZejdddkZ d Z!eejd!dZ"ejd"dZ#ejd#dZ$d$Z%d Z&ejd%ddkZ'd Z(ejd&ddkZ)ejd'ddkZ*d(Z+dS ))cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr)   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAYri  ),r   r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr  r   r*  min_chunk_sizerQ  rR  r  tuplestrenable_kernel_profileweight_prepackr  r  r	   r  r)  r  r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayuse_two_step_variance_thresholdr   r   r   r   r    sZ   
 r  c                   @   s  e Zd ZU dZejddkZdZdZ	dZ
eeeedf B  dB ed< d	ZdZdZdZe  Zd
ZdZedB ed< dZdZedddZeed< dZeed< dZdZdZdZdZ ejde s`dnddkZ!eed< dZ"edB ed< dZ#eed< dZ$dZ%e& Z'edB ed< dZ(eed< dZ)eed< e*dddkZ+eed< dZ,dZ-ejdddkZ.ejdddkZ/dZ0e1d ed< ejd ddkZ2ejd!ddkZ3eed"< ejd#ddkZ4dZ5eejd$dZ6e1d% ed&< ejd'ddkZ7d(Z8dZ9e:j;j<rd)nd*Z=eed+< dZ>dZ?dZ@dZAeBdB ed,< dZCejd-ddkZDejd.ddkZEejd/ddkZFdZGeejd0e:j;j<r<dnd1ZHeejd2d3ZIejd4ddkZJejd5e r\dnddkZKd6ZLdZMedB ed7< ejd8ddkZNdZOejd9dkZPi ZQeReBef ed:< d;ZSejd<ddkZTeed=< ejd>ZUeBdB ed?< ejd@ddkZVeedA< ejdBddkZWeedC< ejdDddkZXeedE< dS )Frw   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr   r#  r   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr   cudagraph_or_error%reorder_for_reducing_graph_partitions&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductionsTORCHINDUCTOR_NATIVE_MATMULnative_matmul!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS TORCHINDUCTOR_DECOMPOSE_SORT_OPSdecompose_sort_ops$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rp   r&      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16   rM     spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULENABLE_TEMPLATE_TMA_STORETORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32TORCHINDUCTOR_ENABLE_PDL!TORCHINDUCTOR_MIX_ORDER_REDUCTIONrp   mix_order_reduction_split_size5TORCHINDUCTOR_MIX_ORDER_REDUCTION_AUTOTUNE_SPLIT_SIZE4TORCHINDUCTOR_MIX_ORDER_REDUCTION_ALLOW_MULTI_STAGESdebug_dump_kernel_inputsr  %TORCHINDUCTOR_TRITON_PROTON_PROFILINGproton_profiling&TORCHINDUCTOR_TRITON_PROTON_OUTPUT_DIRproton_output_dir'TORCHINDUCTOR_TRITON_PROTON_GROUP_BY_SMproton_group_by_sm-TORCHINDUCTOR_TRITON_PROTON_SPLIT_INVOCATIONSproton_split_invocations-TORCHINDUCTOR_TRITON_PROTON_PER_CTA_OCCUPANCYproton_per_cta_occupancy)Yr   r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr  r  r   r*  cudagraph_min_partition_sizeslow_path_cudagraph_asserts!cudagraph_trees_history_recordingcudagraph_trees_objgraphr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  force_cudagraph_syncforce_cudagraphs_warmupr   r  r)  r  fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr  r  r  autotune_pointwiseautotune_cublasLtr%   r  r  r  getenvr   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionsr  cooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr)   r-   hipr  use_block_ptruse_tensor_descriptor)transpose_discontiguous_tensor_descriptorr  r  codegen_upcast_to_fp32enable_persistent_tma_matmulenable_template_tma_storeskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_threshold
enable_pdlmix_order_reduction"mix_order_reduction_initial_xblockr  'mix_order_reduction_autotune_split_size#mix_order_reduction_non_strict_mode&mix_order_reduction_allow_multi_stagesr  dictmax_kernel_dump_occurrencesr  r  r  r  r  r   r   r   r   rw   y  s   
  

	




rw   c                   @   sd  e Zd ZU dZdZejdddkZejdddkZ	ejdddkZ
ejdd	Zejd
dZed ed< ejddZdZdZdZeed< dZeed< dZdZeed< dZedB ed< dZeed< i Zeeef ed< ejdddkZeed< dZeed< ejdddkZeed< eejddZ eed< i Z!eee"f ed< dZ#eed < dZ$eed!< ejd"ddkZ%eed#< dZ&eed$< dZ'edB ed%< e(  Z)eed&< dZ*edB ed'< dZ+edB ed(< dZ,edB ed)< i Z-ee.j/j0e1e f ed*< dZ2e1e dB ed+< ejd,ddkZ3dZ4eed-< dZ5edB ed.< dZ6ee1e B dB ed/< dZ7edB ed0< dS )1aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   AOT_INDUCTOR_DEBUG_SYMBOLS!AOT_INDUCTOR_ENABLE_FRAME_POINTER&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlydynamic_linkagemetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationcheck_lowerboundDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr&   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_so package_constants_on_disk_formatprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libsAOT_INDUCTOR_ENABLE_LTOlink_libtorchcross_target_platformaoti_shim_libraryaoti_shim_library_path)8r   r   r   r   output_pathr   r   r   debug_compiledebug_symbolsenable_frame_pointercompile_wrapper_opt_levelr7  r   r*  filtered_kernel_namesserialized_in_specserialized_out_specr9  r)  r:  use_consts_asm_buildr;  r<  r=  r>  r,  r  r@  rA  rC  r   rE  rF  r   rG  rH  rJ  rK  rL  r   rM  rN  rO  rP  rQ  r)   _ops
OpOverloadlistrR  
enable_ltorT  rU  rV  rW  r   r   r   r   r.    s`   
 



r.  c                   @   s   e Zd ZU dZeed< dS )aot_inductor_modeFcompile_standaloneN)r   r   r   rf  r)  r*  r   r   r   r   re    s   
 re  c                   @   s  e Zd ZU dZdZed ed< dZdZe	j
e	jde	j
e	j
ejdZdZedB ed	< g d
Zee ed< eeeeef edd e	jdddD Zeeeef ed< eeeeef edd e	jdddd eD dD Zeeeef ed< e	jdddkZdZdZeed< e	jdddkZe ed< e	jdZ!e"dB ed< e	jdZ#e"dB ed < e	jd!dZ$e"ed"< e	jd#ddkZ%e ed$< e	jd%ddkZ&e ed&< e	jd'd(Z'e"ed)< d*Z(e ed+< dZ)e ed,< dZ*e ed-< d*Z+e ed.< dS )/cutlassz-
    Config specific to cutlass backend.
    -O1)-O0rh  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Ncutlass_max_profiling_configs)rp   r&   r   r   %cutlass_max_profiling_swizzle_optionsc                 c       | ]}t |V  qd S r   r   .0xr   r   r   	<genexpr>  
    
zcutlass.<genexpr>+TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_SHAPEz2,1,1r   cutlass_dynamic_cluster_shapec                 c   rp  r   rq  rr  r   r   r   ru    rv  .TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_FALLBACKc                 c   rp  r   )r  )rs  vr   r   r   ru    s     cutlass_dynamic_cluster_fallbackCUTLASS_EPILOGUE_FUSIONr   r   rp   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr}   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen),r   r   r   r   rl  r   r*  enable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer)   __file__cutlass_dirrn  r   ro  rc  r   r  r   rx  r{  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr}  r  r)  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   rg    sz   
 	



rg  c                   @   sV   e Zd ZU dZedB ed< dZedB ed< dZedB ed< dZdZ	dZ
edB ed< dS )cudaNarchr-   cuda_cxxF   r   )r   r   r   r  r  r*  r-   r  enable_cuda_ltoenable_ptxas_infor   r   r   r   r   r   r  	  s   
 r  c                   @   sT   e Zd ZU dZedB ed< dZedB ed< dZedB ed< ej	
ejddZdS )xpuNr  r-   oneapi_rootrm  r   )r   r   r   r  r  r*  r-   r  r   r  r  r   r   r  r   r   r   r   r  5	  s
   
 r  c                   @   s   e Zd ZU g Zee ed< g dZeed  ed< dZ	ed ed< dZ
dZdZdZdZd	Zed	B ed
< ejdZejdddkZeed< d	Zed	B ed< d	Zed	B ed< d	Zed	B ed< dZeed< d	Zee d	B ed< dZeed< dZeed< d	S )rocmr  )gfx90agfx942gfx950ck_supported_archrj  )	ri  rh  rj  rk  z-Osz-Ozz-Ominz-Ofastz-Omaxrl  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_threshold)r   r   r   r  rc  r  r*  r  r   rl  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   ck_dirr  r)  r  r   r  r  r  r  r  r  r   r   r   r   r  D	  s.   
 

r  )r  rw   halidepallascpu_backend)rw   r  r  cuda_backendr  tpu_backendxpu_backendc                   @   s   e Zd ZdZdS )mtiaFN)r   r   r   disable_welford_reductionr   r   r   r   r  	  s    r  c                   @   sB   e Zd ZU dZdZdZed ed< dZed ed< dZ	dZ
dZd	S )
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r*  r  assertsdebugscan_kernelsr   r   r   r   r  	  s   
 r  c                   @   s   e Zd ZU ejdddkZejdddkZdZe	dB e
d< dZdZdZdZdZdZdZejd	ddkZejd
ddkZejddZejddZdZdZee	gdf dB e
d< ejdddkZeejdejddZee
d< dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r   r   r   r   r   r   enabledsave_real_tensorsr  r  r*  	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr   r  r   r   r   r   r  	  s.   
 

r  )ztrace.upload_tarrh   ri   rj   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierre   rf   r  rk   r   _save_config_ignore)r  zcuda.cutlass_dirzcutlass.cutlass_dirzxpu.cutlass_dirr  rW  rf   re   rh   ri   rj   r  rk   r   rz  r   r=   r?   rC   rD   _cache_config_ignore_prefix_cache_config_factory_keysexternal_matmul8TORCHINDUCTOR_WRITE_ARE_DETERMINISTIC_ALGORITHMS_ENABLEDc                   @   s>   e Zd ZU dZeeeeeef  f dB ed< dZ	e
ed< dS )lookup_tableNtableTcheck_src_hash)r   r   r   r  r,  r  rc  r   r*  r  r)  r   r   r   r   r  H
  s   
 $r  c                   @   s   e Zd ZU dZeed< dZedB ed< dZeed< dZe	dB ed< dZ
dZdZejdZedB ed< ejd	ZedB ed
< dZdZed dB ed< dZdZeed< eddkZeddZdZdZdS )test_configsF%force_extern_kernel_in_multi_templateNforce_custom_op_decompositionforce_no_impl_groupingmax_mm_configs(TORCHINDUCTOR_AUTOTUNE_CHOICE_NAME_REGEXautotune_choice_name_regex(TORCHINDUCTOR_AUTOTUNE_CHOICE_DESC_REGEXautotune_choice_desc_regex)assertrV  track_memory_lifecycleT assume_bucketing_reduces_latency,TORCHINDUCTOR_FORCE_FILTER_REDUCTION_CONFIGSr   )TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULTr   )r   r   r   r  r)  r*  r  r  r  r   runtime_triton_dtype_assertruntime_triton_shape_assertstatic_cpp_dtype_assertr   r   r   r  r  r  *graphsafe_rng_func_ignores_fallback_randomr  r   use_libtorchr  r  force_filter_reduction_configsdistort_benchmarking_resultbisect_pre_grad_graph'bisect_keep_custom_backend_for_inductorr   r   r   r   r  S
  s2   
 r  )*c                   @   s@   e Zd ZU ejdddkZeed< dZ	eed< dZ
eed< dS )	eager_numerics'TORCHINDUCTOR_EMULATE_DIVISION_ROUNDINGr   r   division_roundingFdisable_ftzuse_pytorch_libdeviceN)r   r   r   r   r   r   r  r)  r*  r  r	  r   r   r   r   r  
  s
   
 
r  %TORCHINDUCTOR_EMULATE_PRECISION_CASTSemulate_precision_casts(g  r   rQ  collections.abcr   typingr   r   r   r   r)   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler   r	   r
   r   torch._inductor.choicesr   torch._inductor.cudagraph_utilsr   r   r   inplace_paddingcan_inplace_pad_graph_inputr)  r   r   r   r!   r#   r%   r.   r2   enable_auto_functionalized_v2r  disable_progressverbose_progressworker_log_pathr   r6   r*  r=   r>   r?   r@   rB   rC   rD   rE   rG   rH   rI   r,  r  rJ   rM   rO   rQ   rS   rU   online_softmaxapply_gumbel_max_trickdcestatic_weight_shapessize_assertsnan_assertsruntime_triton_nan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  rd   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_first*epilogue_fusion_user_defined_triton_kernelpattern_matcherb2b_gemm_passre   	_inductorcustom_graph_passCustomGraphPassTyperf   rg   CustomPartitionerFnTyperh   ri   rj   rk   rc  rl   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrm   rn   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_mul keep_addmm_fused_for_half_dtypesuse_mixed_mmru   ry    reorder_for_compute_comm_overlaprz   r{   reorder_for_peak_memoryreorder_for_peak_memory_debugr|   r   r   r   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bw
cpu_gpu_bwr   distributed_max_autotune_gemmpipeline_max_autotune_gemmmax_autotunemax_autotune_pointwisemax_autotune_gemmr   inductor_default_autotune_warmupinductor_default_autotune_repr   r   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memtriton_disable_device_detectionr   r   r   r   r   r   r   uppermax_autotune_gemm_backendsr   r   max_autotune_conv_backendsr   r   r   r   r   r   r   r   r   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecollective_benchmark_nrunsrF  collective_benchmark_timeoutcoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr-   r  layout_opt_defaultlayout_optimizationforce_layout_optimizationcache_sdpa_constraintkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr   r   fallback_randomalign_random_eager"fallback_embedding_bag_byte_unpackimplicit_fallbacks assume_unaligned_fallback_outputr   aggressive_fusionr   r   enabled_metric_tablesr   r  score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemin_overlap_ratio)max_fusion_buffer_group_pairwise_attemptsr  max_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsdeterministicmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingdo_not_emit_runtime_assertionscompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapescombo_kernel_max_num_args!combo_kernel_per_subkernel_blockscombo_kernels_pointwise_onlyjoint_graph_constant_foldingdebug_index_asserts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr  r  r  r  r  r  r  r  _fuse_ddp_communication_fuse_ddp_bucket_sizer  r   partitioned_scatter_enabledr"  r$  r%  r&  r+  rJ  rX  rW  rZ  r\  r]  r^  r`  rb  rc  libfb.pyre  __package__get_dir_pathr  r  replaceseprd  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visiblerj  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testingrl  _profile_varprofile_bandwidthprofile_bandwidth_regexro  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenrr  rs  rt  ru  rv  rw  rx  ry  rz  enable_linear_binary_foldingr}  r~  r  r  r  r  r  r  r  rw   r.  re  rg  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Tensor*write_are_deterministic_algorithms_enabledr  r  torch.utils._config_typingr  r  modulesr   r   r   r   r   <module>   s  
 


		

	


	
	
	
	[$!$
   "   Z ;	 JN".;