
    %Vji/                    \   U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e j                            dd          dk    ZdZdee         fd	Zdee         fd
Zdee         fdZdee         fdZdee         fdZdefdZdefdZe j                            dd          dk    ZdZdZdZ  e            rdndZ!dZ"e#e$d<    eddd          Z%ee$d<   dZ&ee$d<    e            Z'ee         e$d<    e            Z(ee         e$d<    eddd          Z)ee$d<   dZ*ee$d<    e            Z+ee         e$d <    e            Z,ee         e$d!<    ed"#          Z-ee$d$<   dZ.ee$d%<   i Z/e0e1e1f         e$d&<   dZ2ee#         e$d'<   d(Z3ed)         e$d*<   e j                            d+d,          dk    Z4ee$d-<   e j                            d.d,          dk    Z5ee$d/<   e j                            d0d,          dk    Z6ee$d1<    e             Z7ee$d2<   e j                            d3d          dk    Z8dZ9dZ:e j                            d4d          dk    Z;e j                            d5          dk    Z<e j                            d6d          dk    Z=e j                            d7 e            rd,nd          dk    Z>dZ?dZ@dZAe j                            d8d,          dk    ZBe j                            d9          dk    ZCdZDe j                            d:d;          ZEed<         e$d=<   dZFdZG e            ZHdZIdZJdZKdZLe	jM        jN        jO        e$d><   dZPe	jM        jN        jO        e$d?<   dZQe	jM        jN        jR        e$d@<   dZSe	jM        jN        jO        e$dA<   dZTe	jM        jN        jO        e$dB<   dZUeee	jV        jW        jX        gdf                  e$dC<   dZYeeeZdD         geZdD         f                  e$dE<   dZ[eeeZdD         geZdD         f                  e$dF<   dZ\dZ]dZ^dZ_dZ`i Zae0e1e0e1ef         f         e$dG<   i Zbe0e1e0e1ef         f         e$dH<   dZce j                            dId          dk    ZddZedZfddJdKddLZge0e1ef         e$dM<   dNZhedO         e$dP<   dZig dQZjeZee1eeZdD         geZdD         f         f                  e$dR<   dZkee#         e$dS<   dZldZmee$dT<    e jn        dU          xZodn
 e#eo          Zpee#         e$dV<    e jn        dW          xZodn
 e#eo          Zqee#         e$dX<   dYZredZ         e$d[<   dZseee#ge#f                  e$d\<   dYZted]         e$d^<   dZueee#ge#f                  e$d_<   d`ZvdZwee$da<   dbZxdcZy eddddef          Zzee$dg<   e j                            dh          dk    Z{e j                            di          dk    Z|e j                            dj          dk    Z}dkZ~ee#         e$dl<   e j                            dmd          dk    Ze j                            dnd          dk    Ze j                            do e            sdnd,          dk    Zee$dp<    edqdrd          Zee$ds<   g ZeZe#         e$dt<   e j                            dudv                                          Ze j                            dwdx                                          Ze j                            dydz                                          Zed{         e$d|<   e j                            d}dz                                          Zed{         e$d~<   dZdZdZe j                            d          dk    Ze j                            d          dk    ZdZdZdZe j                            d          dk    Ze j                            d          dk    Ze j                            d          dk    Z e#e j                            dd                    Ze j                            dd          Ze j                            dd          Ze j                            dd,          dk    Zde1defdZde1defdZde1defdZe j                            ddz          Ze	j        j        sdnd,Ze j                            de          dk    Ze j                            dd,          dk    Ze j                            dd          dk    Ze j                            d          dk    ZdZdZdZdZee#         e$d<   dZdZe j                            d          dk    ZdZe j                            d          dk    Zee$d<   e j                            d          dk    Zee$d<   e j                            dd          Ze j                            dd,          dk    Zee$d<   dkZe j                            dd          dk    ZdKZdZdZdZdZdZdZdZdZ e#e j                            dd                     Ze j                            dd,          dk    ZdZdZdZdZdZdZdKZdKZdZdZdZe j                            dd,          dk    Zde	j        v pde	j        v Z e            peZe j                            dd          dk    ZdZee1         e$d<   dZee1         e$d<   de1fdZ eΦ            Ze1e$d<   dZe#e$d<    eddd          Zee$d<    edd          Zee$d<   dZdcZddgZeZeed         e1f                  e$d<   dZee$d<    G d d          ZdefdZde#fdZ e            rdn	 e٦            Zee#         e$d<    eddd          Zee$d<    e            Zee$d<    eddd          Zee$d<   e j                            dd,          dk    Zee$d<   ee1         e$d<    e            rl	 d dlmZ erC ej        e j                            e                    de j                  d̦                    Zn ej        d̦          Zn# eef$ r dZY nw xY wdZdkZe j                            dd          dk    Ze j                            dd          dk    ZdZdZdZdZdZdZdZdZdZee$d<   e j                            dd,          dk    ZdZdZdZdZe j                            dd          Zedk    Zedk    rdneZe j                            dd          Zee1         e$d<   e j                            d֦          dk    ZdZ e j                            dd,          dk    Zee$d<   dZee$d<   dZee$d<   dZee$d<   dZee$d<   dZee$d<   dZee$d<   e j                            dd,          dk    Ze j                            dd,          dk    Z	ee$d<   dZ
ee$d<   i Ze0e1e0e1ef         f         e$d<   dee1         fdZ edd          Ze1e$d<    G d d          Z G d d          Z G d d          Z G d d          Z G d d          ZdZed         e$d<   dZed         e$d<    G d d          Z G d d          Zg dZeZe1         e$d<   g dZeZe1         e$d<   g ZeZee	j        e	j        e	j        gdf                  e$d<    G d d          Zerd d lT  eej        e                    dS (      N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                       t          d          S )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr        P/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBBr   c                      t           j                            d          dk    rdS t           j                            d          dk    rdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr      sE    	z~~011S88t	z~~011S88u4r   c                       t          d          S )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr!      r   r   c                       t          d          S )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr$      s    IJJJr   c                  B    t          dt                      sdnd           S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r   r	   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr'   #   s'    9KK)T  r   c                      d} dt           j        v r#t           j                            d          dk    S t                      r%t          j                            d          }|| k    S dS )N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r	   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr1   *   sk    #$ /2:==z~~FGG3NN	 '<<;
 
 666 tr   c                      d} dt           j        v r#t           j                            d          dk    S t                      r'd}t          j                            |          }|| k    S dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr+   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer0   s      r   prologue_fusion_enabledr6   9   sg    %&"&"*44z~~=>>#EE	 <'<<WEE888tr   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_ranki  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITEnon_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rL   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrappercpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)noner^   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r=   tritonatenrt   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit(reorder_iterative_debug_memory_recompute!PYTORCH_REORDER_COLLECTIVES_LIMIT(reorder_iterative_debug_limit_to_reorderPYTORCH_SINK_WAITS_LIMIT(sink_waits_iterative_debug_limit_to_sinkr_   )r_   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinator)r_   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinatorr=   !runtime_estimations_mms_benchmarki,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r=   r<   r;   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEMTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitionz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacei    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mm'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 >    t          |           pt          |           S )N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr   ;  s     &&A*;D*A*AAr   c                 X    | t           j        j        j                            d          v S N,)r,   	_inductorconfigautoheuristic_collectsplitr   s    r   r   r   ?  s#    5?)?EEcJJJJr   c                 X    | t           j        j        j                            d          v S r   )r,   r   r   autoheuristic_user   r   s    r   r   r   C  s#    5?);AA#FFFFr   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  n    dt           j        v rt           j        d         } nd} | dv sJ d|              | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr     s^    #rz11z">?#     /..	  
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSEr<   r=   log_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   ,    e Zd ZU dZeed<   dZeed<   dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r   3  s3         K/9'99999r   r   c                  T    d} d}t           j                            |          }| |k    S )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    rn   z0pytorch/inductor:enable_parallel_compile_version)r,   r-   r.   )ENABLE_PARALLEL_COMPILE_VERSIONr5   r0   s      r   #parallel_compile_enabled_internallyr   8  s0     '(#@G#88AAG*g55r   c                  h   ddl } |                     t                    }dt          j        v r6t          t          j        d                   }|                    d|           nt          j        dk    rd}|                    d           nt                      r&t                      sd}|                    d           nst          t          d	          r!t          t          j        d                    nt          j                    }|sJ t          d
|          }|                    d|           |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rn   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr	   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   s       r   decide_compile_threadsr  F  s+    NNN 

H
%
%C&"*44bj)HIJJ4oFFFF		 	 56666	 ?@BB ?56666 r.// C$Q''((( 	
 yb),,,o>>>r   r  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pooluse_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tablec                      d } t                      rGt          j                            dd           }t          j                            dd          }|d| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r8   )r	   r   r   r   )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr+  %  sW    G{{ T
':DAAjnn[#66$SkSSGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                      e Zd ZU dZdZej                            dd          dk    Zej                            dd          dk    Z	dZ
ee         ed<    eej                            d	d
                    Zdej                            dej        dk    rdnd          fZeed         ef         ed<   ej                            dd          dk    Zej                            dd          dk    ZdZee         ed<   dZee         ed<    ed          Zee         ed<   dZed         ed<    eej                            dd                    Zej                            dd          dk    Zej                            dd          dk    Zej                            dd          Z ej                            dd          dk    Z!d Z" eej                            d!d                    Z#ej                            d"d          Z$ej                            d#d          Z%d$Z&d Z'ej                            d%d          dk    Z(d Z)ej                            d&d          dk    Z*ej                            d'd          dk    Z+dS )(cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr,   r>  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAY),r   r   r   __doc__threadsr   r   r   no_redundant_loopsdynamic_threadsr3  r   r   r   min_chunk_sizer   r   r8  tupler   strenable_kernel_profileweight_prepackr;  r<  r   r=  r   rA  max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayr   r   r   r/  r/  8  s          G
 	
=sCCsJ  jnn%H#NNRUUO!GXc]!!!S(JERRSSN 	

u3<8+C+CiiOO&Cwt}c!	"    	
@#FF#M 
 Z^^$FLLPSSN
 37 (3-66637!8C=777 "2!12L!M!MJMMM 	 wHI   
 "%

EtLL" " 	
FLLPSS   	
FLLPSS   +-*..?+ +' 	
BCHHCO 
 $)  BJNN+PRUVVWW *..)PRVWW *..)PRVWW   !
 	
=sCCsJ 
  % 	
>DDK  	
FLLPSS  r   r/  c                      e Zd ZU dZej                            d          dk    ZdZdZ	dZ
eeeeeedf         f                           ed<   dZdZ e            rdndZd	Zd
Zee         ed<   dZdZ edd          Zeed<   dZdZdZdZdZej                            d e            sdnd          dk    Z eed<   dZ!ee         ed<   dZ"eed<   dZ#dZ$dZ%ee         ed<   dZ&eed<   dZ'eed<   dZ(dZ)ej                            dd          dk    Z*ej                            dd          dk    Z+dZ,e-d         ed<   ej                            dd          dk    Z.ej                            dd          dk    Z/dZ0 eej                            dd                    Z1e-d         ed <   ej                            d!d          dk    Z2d"Z3dZ4d#Z5eed$<   dZ6dZ7dZ8ee9         ed%<   dZ:ej                            d&d          dk    Z;ej                            d'd          dk    Z<dZ= eej                            d(d)                    Z> eej                            d*d+                    Z?dS ),ru   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr  2   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr   cudagraph_or_error&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr>  r?  rA  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rn   r)      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16      spill_thresholdr;  ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32)@r   r   r   rO  r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsri  r   rT  r   r   r   slow_path_cudagraph_asserts!cudagraph_trees_history_recordingr	    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitrk  force_cudagraph_syncforce_cudagraphs_warmupr
   rm  r   fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingro  rp  rq  autotune_pointwiseautotune_cublasLtrr  rs  rt   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesrA  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr{  divisible_by_16min_split_scan_rblockstore_cubinr  use_block_ptruse_tensor_descriptorr;  rU  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_thresholdr   r   r   ru   ru     s         
  :;;sBJ O %*! MQXeE#uS#X2F,G&HIPPP #' ).% 1:	'Euu$ +.' 9;&::: ! $  &v9         #( "   N 	
46UccRU	
 	
 	 d     $Ix}### #d"""   04htn333
 ).--- "OT!!! (,$'+$ 	
:C@@CG  	
?EEL  	 wHI    	
<cBBcI  	
=sCCsJ 
 $)  ),

3S99) )L'*%   
 jnn%DcJJcQO   K OS M "
 37 (3-666 "
 	
5s;;sB ! JNN#:C@@CGM 6;2 !S

=tDD   C

<dCC r   ru   c                      e Zd ZU dZdZej                            dd          dk    Zej                            dd          Z	ej                            dd          Z
ed	         ed
<   ej                            dd          ZdZdZdZeed<   dZeed<   dZdZeed<   dZee         ed<   i Zeeef         ed<   ej                            dd          dk    Zeed<   ej                            dd          dk    Zeed<    eej                            dd                    Zeed<   i Zeeef         ed<   dZ eed<   dZ!eed<   ej                            dd          dk    Z"eed<   dZ#eed <   dZ$eed!<    e%             Z&eed"<   dZ'ee         ed#<   dZ(ee         ed$<   dZ)ee         ed%<   i Z*ee+j,        j-        e.e         f         ed&<   dZ/ee.e                  ed'<   dZ0eed(<   ej                            d)d          dk    Z1dS )*aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   &AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr)   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_sopackage_constants_on_diskprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libscompile_standaloneAOT_INDUCTOR_ENABLE_LTO)2r   r   r   rO  output_pathr   r   r   debug_compilecompile_wrapper_opt_levelr  r   r   filtered_kernel_namesserialized_in_specserialized_out_specr  r   r  use_consts_asm_buildr  r  r   r  dictrU  r  r  r   r  r  r   r  r  r  r  r  r	   r  r  r  r  r  r,   _ops
OpOverloadlistr  r  
enable_ltor   r   r   r  r    s          KJNN#?EELM !#
0$! ! EGJNN7E E$g.@&A   
 JNN0$    */ $...  %$$$  GT'+htn+++  "Hd38n!!! 	
H#NNRUU (   
  "z~~.BCHHCOOOO s2:>>*CQGGHHKHHH !GT#s(^    $)D((( ,1"D000 	
BCHHCO !$   
 %)T((( ',t+++ $-9;;... +/$...
 .2HTN111 59"HSM888 EG4
 5tCy @AFFF*.NHT#Y'...$$$$  93??3FJJJr   r  c            
       T   e Zd ZU dZdZee         ed<   dZee         ed<   dZ	e
d         ed<   dZdZdZdZej                            ej                            d	ej                            ej                            ej                  d
                              ZdZee         ed<   g dZee         ed<   ej                            dd          dk    ZdZdZee         ed<   dZ eed<   ej                            dd          dk    Z!e"ed<   ej                            d          Z#ee         ed<   ej                            d          Z$ee         ed<   ej                            dd          Z%eed<   ej                            d          Z&ee         ed<   ej                            dd          dk    Z'e"ed<   ej                            d d          dk    Z(e"ed!<   ej                            d"d#          Z)eed$<   d%Z*e"ed&<   dZ+e"ed'<   dZ,e"ed(<   d%Z-e"ed)<   dS )*cudaz9Settings for cuda backend, today this consists of cutlassNarchr0   -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)rn   r)   r   r   %cutlass_max_profiling_swizzle_optionsCUTLASS_EPILOGUE_FUSIONr   r   cuda_cxxrn   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_levelTORCHINDUCTOR_CUTLASS_PRESETScutlass_presets+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen).r   r   r   rO  r  r   rU  r   r0   r  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer,   __file__cutlass_dirr  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r   r   r   r  r  0  s        CC
 D(3-
 "GXc]!!! EJw@AIII O   M '""

'GLL88:STT	
 	
 K 48!8C=777 8D||)49CCC 	
0#66#= $
  #Hhsm""" *+!3***
 	
H#NNRUU $    13
)1 1    02z~~(0 0x}    (*z~~3S( (    &(Z^^4S%T%TOXc]TTT 	
DcJJcQ "4    	
;SAASH $     "z~~+U      
 %)T((( +0!4/// -2#T111 $(D'''''r   r  c                      e Zd ZU g Zee         ed<   g dZeed                  ed<   dZ	ed         ed<   dZ
dZdZdZdZd	Zee         ed
<   ej                            d          Zej                            dd          dk    Zeed<   d	Zee         ed<   d	Zee         ed<   d	Zee         ed<   dZeed<   d	Zeee                  ed<   dZeed<   dZeed<   d	S )rocmr  )gfx90agfx942gfx950ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr~  split_k_thresholdcontiguous_threshold)r   r   r   r  r  rU  r   r  r   r  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirr  r   r  r   r  r  r  r  r  r  r   r   r   r  r    s         D$s)F F FtG$@AB    	 wL   
 H J M O #(  $Ix}### Z^^233F 	
FLLPSS $   
 .2Xc]111 /3hsm222 48!8C=777 ',t+++ )-L(49%,,,  s !##"""""r   r  )r/  ru   halidecpu_backend)ru   r  cuda_backendc                   X    e Zd ZU dZdZdZed         ed<   dZed         ed<   dZ	dZ
dZd	S )
r  hostz	host-cudaAnderson2021)r   Li2018	Adams2019Mullapudi2016scheduler_cudar"  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr$  r   r   r%  assertsdebugscan_kernelsr   r   r   r  r    ss         J J
 	 GRS    	 7QR   
 G E LLLr   r  c            	          e Zd ZU ej                            dd          dk    Zej                            dd          dk    ZdZe	e
         ed<   dZdZdZdZdZdZdZej                            d	d          dk    Zej                            d
d          dk    Zej                            dd          Zej                            dd          ZdZdZe	ee
gdf                  ed<   ej                            dd          dk    Z eej                            dej                            dd                              Zeed<   dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r   r   r   r   r   r   enabledsave_real_tensorsr/  r   rU  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler4  r   log_autotuning_resultsr   r7  r   r   r   r,  r,  #  s        jnn2C88C?G 
'FLLPSS  $Ix}### I H H   M N K JNN#=sCCsJM (>DDK jnn%CTJJO !jnn-OQUVV O 37J3%+./666Z^^,BCHHCO &)S

!2:>>2G#M#M	
 	
& &s     r   r,  )
ztrace.upload_tarrf   rg   rh   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrc   rd   r   ri   _save_config_ignore)r,  zcuda.cutlass_dirr   r  rd   rc   rf   rg   r   ri   r  r>   r@   rD   rE   _cache_config_ignore_prefixexternal_matmulc                       e Zd ZU dZeed<   dZee         ed<   dZ	dZ
dZee         ed<   dZee         ed<   dZdZeed                  ed<   dZdS )	test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)assertr  track_memory_lifecycle)r   r   r   rL  r   r   rM  r   r   runtime_triton_dtype_assertstatic_cpp_dtype_assertrN  rU  rO  *graphsafe_rng_func_ignores_fallback_randomrQ  r   use_libtorchr   r   r   rK  rK    s         27)4777$(NHSM((("'# 154440444416.AEHW_%=>EEE LLLr   rK  )*(  r   r   typingr   r   r   r   r   r   r,   !torch._inductor.custom_graph_passtorch._environmentr	   torch.utils._config_moduler
   r   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r!   r$   r'   r1   r6   enable_auto_functionalized_v2r)  disable_progressverbose_progressworker_log_pathr9   r   r   r>   r?   r@   rA   rC   rD   rE   rF   rH   rI   rJ   r  rU  rK   rN   rP   rR   rT   rU   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  bfloat16_atomic_adds_enabledrb   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrc   r   custom_graph_passCustomGraphPassTyperd   re   CustomPartitionerFnTyperf   rg   rh   fxgraphGraphri   r  rj   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrk   rl   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmrs   rw    reorder_for_compute_comm_overlapr{   r|   reorder_for_peak_memoryr}   getenvenv_strr   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bwr   max_autotunemax_autotune_pointwisemax_autotune_gemmr   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memr   r   r   uppermax_autotune_gemm_backendsmax_autotune_conv_backendsr   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr0   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr   fallback_randomimplicit_fallbacks assume_unaligned_fallback_outputaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsmax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r  r  r  r  r	  r  libfb.pyr  __package__get_dir_pathr  r   replacesepr  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr  r  r  r  r  r  r  enable_linear_binary_foldingr"  r#  r$  r+  r-  r/  ru   r  r  r  r  r  r  r,  rG  rH  rI  TensorrK  torch.utils._config_typingmodulesr   r   r   r   <module>r     s   					 



 I I I I I I I I I I I I I I I I  ( ( ( ( ( ( ( ( ( ( V V V V V V V V V V *..!@#FF#M# Cx~ C C C CHTN    Cx~ C C C CKx~ K K K K8D>    d    
 
 
 
 
 JNN7==D 
 	     8Ay{{L33 
 '.  - - - v?1      $) D ( ( ( )F(E(G(G x~ G G G .-// "8D>    )/MB) ) )     " d ! ! ! )F(E(G(G x~ G G G 1V0U0W0W x~ W W W $V*VWWW d W W W 05 & 4 4 4 57 !4S> 6 6 6 )-  , , ,  (1*    JNN#>DDKT K K K JNN=sCCsJ D    :>>"<cBBcI
D I I I
 *3_ d 4 4 4 >DDK    z~~:C@@CGjnn899S@ >DDK JNN4YY[[6QcccRR
       *..!@#FF#M 
<==D  $  HJz~~H HWCD   
    *)++     TX 5?<P W W WTX EO=Q X X X TX u8P W W W PT u8L S S SPT 9M T T T
 JN hx)=(>(DEF M M M 	 	;	<=:;	= 	 	 	 	 (	;	<=:;	= 	 	 	   $)     ( 68 c4S>12 7 7 7 79 $sDcN23 8 8 8   z~~&JCPPTWW  #   	+ + c3h    FQAB P P P $)     (	?@A>?A	
	*    )-  , , ,  16 ($ 6 6 6 29@AAAJ 	D	W )(3-    "	"<===WFDDCCPWLL )(3-    >D w9: C C CQU .3%*9M0N U U U4: 7=1 : : : 	 3HXseSj=Q4R      */ !4 / / /   &,V?<& & & d    z~~:;;sB (NOOSVV  JNN#DEEL  13  2 2 2 JNNDcJJcQ " JNNQSVWW
 / JNN2yy{{4SCCPSTT
     $V47   d    !# DI " " "  Z^^.0A %''   Z^^. %''  DF:>>2ID D%'' (? @ 
 
 
 DF:>>2ID D%'' (? @ 
 
 

 "      JNN455<	 jnn%HIISP 
 /3 +03 -14 . 
'LMMQTT  JNN<==D  JNNJKKsR ( $'3JNN<cBB$ $   
'LbQQ JNN#DjQQ  JNN<cBBcI 
BC BD B B B BK K K K K KGC GD G G G G *I  
 !& 1:SSs JNN68JKKsR  JNN+KSQQUXX  Z^^$FLLPSS  *..!@AASH
       !(3-   
   JNNCDDK !
   Z^^$@AASHd H H H(HIISP $ P P P
'LbQQ JNN=sCCsJ D    !#  JNN<cBBcI 
 $%    -/ )            BJNN#@!DDEE:>>"BCHHCO  "&   %           "# &* #  $    JNN:C@@CG 
  11OUe>O5O Y[[8$8  JNNEsKKsR #
 &* Xc] ) ) )(,  , , ,
C 
 
 
 
 6577 S 7 7 7 &. s - - - !'7:! ! !     F   T         I U8I+>+C%D E   
 ! D      : : : : : : : :
6T 6 6 6 6! ! ! ! !J *3!R:P:P:R:R# R R R $*6:=$ $ $ D    ">!=!?!? $ ? ? ? 39&IL3 3 3 )4    JNN>DDK T   
 3-   9;; 
 $$$$$$ 	@3w3[00bf==zJJ     4w3J??$           
<cBBcI JNN8#>>#E        (- $  (            >DDK # 
 $    ! z~~5r:: B&  , 3 3""  +-*.."D+ + (3-   
 JNNHIISP 0    8#>>#E$ E E E %* T ) ) )  %  $ $ $
 $ t # # # 8= . < < < ,1 "D 0 0 0 49 *D 8 8 8 JNN?EEL  *..)JCPPTWW 4 W W W 37 )4 6 6 6 46 tCc3h/0 5 5 5	Xc] 	 	 	 	 %+F1% % % c   @ @ @ @ @ @ @ @Fh h h h h h h hVGG GG GG GG GG GG GG GGTD( D( D( D( D( D( D( D(NF# F# F# F# F# F# F# F#T 38W./ 7 7 7 -5g() 4 4 4       8K K K K K K K K\" " " T#Y   * * * T#Y   2 UWhelELI4OPQ V V V       ,  -,,,,  ck(+ , , , , ,s   >Aq 	q&%q&