
    &Vji1                     |   d dl Z d dlZd dlmZmZmZ d dlZd dlmZ	 de j
        fdZdeddfdZde j
        fdZ	 	 	 	 dd
ededee         dedee         dee         defdZ G d d          Z G d d          Z	 ddeeef         deee                  deeeedf         f         fdZ	 ddedededefdZdS )     N)AnyOptionalUnion)_get_device_indexreturnc                  r    t           j        dk    rt          j        d          S t          j        d          S )Nwin32z
nvcuda.dllzlibcuda.so.1)sysplatformctypesCDLL     K/root/voice-cloning/.venv/lib/python3.11/site-packages/torch/cuda/_utils.py_get_cuda_libraryr      s/    
|w{<((({>***r   resultc                    | dk    rd S t          j                    }t                      }|                    | t          j        |                     |j        |j                                        nd}t          d|           )Nr   Unknown CUDA errorCUDA error: )r   c_char_pr   cuGetErrorStringbyrefvaluedecodeRuntimeError)r   err_strlibcudaerror_messages       r   _check_cudar      s    {{oG!!GVV\'%:%:;;;")-";AU  5m55
6
66r   c                  "   t          t          j        j                            d          d                   } t
          j        dk    rd|  dg}nd|  dg}|D ](}	 t          j        |          c S # t          $ r Y %w xY wt          d          )	N.r   r	   nvrtc64_z0_0.dllzlibnvrtc.so.zlibnvrtc.soz Could not find any NVRTC library)
inttorchversioncudasplitr
   r   r   r   OSError)major_version
nvrtc_libslib_names      r   _get_nvrtc_libraryr,       s    *0055a899M
|w-}---



 +=**

   	;x((((( 	 	 	H	
4
5
55s   A22
A?>A? kernel_sourcekernel_namecompute_capabilityheader_codecuda_include_dirsnvcc_optionsc           
         ddl }t                      ddt          ddffd}|                                                     d          sd|  } |r	|dz   | z   }n| }|                    d	          }	|A|j                            |j                                                  }
|
j	         |
j
         }g }|                    d
|                                            |r/|D ],}|                    d|                                            -|r-|D ]*}|                    |                    d	                     +ddlm} d |D             }|                    d |D                        t          |          }t!          j        |z  | }t!          j                    } |                    t!          j        |          |	| d                                ddd                                         |||          }|k    rt!          j                    }                    |t!          j        |                     t!          j        |j                  }                    ||           t7          d|j                                                   t!          j                    } |                    |t!          j        |                               t!          j        |j                  } |                    ||                                         t!          j        |                     |j        S )a  
    Compiles a CUDA kernel using NVRTC and returns the PTX code.

    Args:
        kernel_source (str): The CUDA kernel source code as a string
        kernel_name (str): The name of the kernel function to compile
        compute_capability (str, None): The compute capability to target (e.g., "86").
                                           If None, will detect from current device.
        header_code (str, optional): Additional header code to prepend to the kernel source
        cuda_include_dirs (list, None): List of directories containing CUDA headers
        nvcc_options (list, None): Additional options to pass to NVRTC

    Returns:
        str: The compiled PTX code
    r   Nr   r   c                     | k    rot          j                    }                    | t          j        |                     |j        |j                                        nd}t          d|           d S )Nr   r   )r   r   nvrtcGetErrorStringr   r   r   r   )r   r   r   NVRTC_SUCCESSlibnvrtcs      r   check_nvrtcz#_nvrtc_compile.<locals>.check_nvrtcT   s    ]""o''G((g1F1FGGG =, $$&&&) 
 =m==>>> #"r   z
extern "C"zextern "C" 
utf-8z--gpu-architecture=sm_z-I)COMMON_NVCC_FLAGSc                     g | ]
}|d k    |S )z--expt-relaxed-constexprr   .0flags     r   
<listcomp>z"_nvrtc_compile.<locals>.<listcomp>   s*       d6P.P.P.P.P.Pr   c                 8    g | ]}|                     d           S )r;   )encoder>   s     r   rA   z"_nvrtc_compile.<locals>.<listcomp>   s$    LLLTDKK((LLLr   z.cuzKernel compilation failed:
) 
torch.cudar,   r#   strip
startswithrC   r&   get_device_propertiescurrent_devicemajorminorappendtorch.utils.cpp_extensionr<   extendlenr   r   c_void_pnvrtcCreateProgramr   nvrtcCompileProgramc_size_tnvrtcGetProgramLogSizecreate_string_bufferr   nvrtcGetProgramLogr   r   nvrtcGetPTXSizenvrtcGetPTXnvrtcDestroyProgram)r.   r/   r0   r1   r2   r3   r$   r9   full_sourcesource_bytespropsoptions	directoryoptionr<   nvrtc_compatible_flagsnum_optionsoptions_arrayprogreslog_sizelogptx_sizeptxr7   r8   s                           @@r   _nvrtc_compilerh   3   s   0  "##H M	?C 	?D 	? 	? 	? 	? 	? 	? 	?   ++L99 65m55  $!D(=8# %%g..L !
001J1J1L1LMM %:U[:: GNN@,>@@GGIIJJJ  6* 	6 	6INN+	++22445555  3" 	3 	3FNN6==112222 <;;;;; *   NNLL5KLLLMMM g,,K_{2W=M ?DK##L&&((	
 	
	 	 	 
&
&t[-
H
HC m?$$''fl8.D.DEEE)(.99##D#...N#):J:J:L:LNNOOO   HK((v|H/E/EFFGGG

%hn
5
5CK$$T3//000  d!3!34449r   c                   8    e Zd Zdej        ddfdZdeddfdZdS )_CudaModulemoduler   Nc                 "    || _         i | _        d S N)_module_kernels)selfrk   s     r   __init__z_CudaModule.__init__   s    02r   name_CudaKernelc           	         || j         v r| j         |         S ddlm}  |            }t          j                    }	 t          |                    t          j        |          | j        |	                    d                               t          || j                  }|| j         |<   |S # t          $ r}t          d| d          |d }~ww xY w)Nr   )r   r;   zNo kernel named 'z' in this module)ro   torch.cuda._utilsr   r   rO   r   cuModuleGetFunctionr   rn   rC   rs   r   AttributeError)rp   rr   r   r   funckernelerrs          r   __getattr__z_CudaModule.__getattr__   s    4=  =&& 	877777##%%  	V++L&&dkk'6J6J   
 !t|44F"(DM$M 	V 	V 	V !KT!K!K!KLLRUU	Vs   A.B* *
C4CC)__name__
__module____qualname__r   rO   rq   strr{   r   r   r   rj   rj      sb        3v 34 3 3 3 3V V V V V V V Vr   rj   c                       e Zd ZdZdej        dej        ddfdZ	 	 	 	 	 dd	eeeef         d
eeeef         de	e
         dede	e         ddfdZdS )rs   zT
    Represents a compiled CUDA kernel that can be called with PyTorch tensors.
    rx   rk   r   Nc                 "    || _         || _        d S rm   )rx   rk   )rp   rx   rk   s      r   rq   z_CudaKernel.__init__   s    	r      r   r   r   gridblockargs
shared_memstreamc                    ddl }|j        j                                        }|sg }g }g }	|D ]l}
t	          |
|j                  r|
j        s*|
j        r|
                                st          d          t          j        |
                                          }|                    |           |	                    t          j        |                     t	          |
t                    r<t          j        |
          }|	                    t          j        |                     t	          |
t"                    r=t          j        |
          }|	                    t          j        |                     Ot'          dt)          |
                     t          j        t+          |	          z              }t-          |	          D ]'\  }}
t          j        |
t          j                  ||<   (|ddl}|j                                        }t5          |                    | j        |d         |d         |d         |d         |d         |d         ||j        |d                     dS )a  
        Call the compiled CUDA kernel

        Args:
            grid (tuple): Grid dimensions (grid_x, grid_y, grid_z)
            block (tuple): Block dimensions (block_x, block_y, block_z)
            args (list): List of arguments to pass to the kernel.
                         PyTorch tensor arguments will be automatically converted to pointers.
            shared_mem (int): Shared memory size in bytes
            stream (torch.cuda.Stream): CUDA stream to use. If None, uses current stream.
        r   Nz?All tensor arguments must be CUDA tensors or pinned CPU tensorszUnsupported argument type: r      )r$   r&   _utilsr   
isinstanceTensoris_cudais_cpu	is_pinned
ValueErrorr   rO   data_ptrrK   r   r#   c_intfloatc_float	TypeErrortyperN   	enumeratecastrD   current_streamr   cuLaunchKernelrx   _as_parameter_)rp   r   r   r   r   r   r$   r   processed_argsc_argsargptrr   r   c_args_arrayis                   r   __call__z_CudaKernel.__call__   sg   & 	*#5577 	D 13 	K 	KC#u|,, K{ CJ 3==?? $Y   ocllnn55%%c***fl3//0000C%% KS))fl5112222C'' K .--fl7334444 Id3ii I IJJJ #f++588'' 	@ 	@FAs$k#v??LOO >Z..00F""	QQQaaa% 	
 	
 	
 	
 	
r   )r   r   Nr   N)r|   r}   r~   __doc__r   rO   rq   tupler#   r   listr   r   r   r   r   rs   rs      s         V_ fo $     &/&/# $P
 P
CcM"P
 S#s]#P
 tn	P

 P
 P
 
P
 P
 P
 P
 P
 P
r   rs   rg   kernel_namesc           
         ddl }t                      }t          | t                    r|                     d          } t          j                    }|j                                        }|5  t          |
                    t          j        |          |                      ddd           n# 1 swxY w Y   |st          |          S i }|D ]q}t          j                    }t          |                    t          j        |          ||                    d                               t          ||          ||<   r|S )a,  
    Loads a CUDA module from PTX code and returns a module object that can access kernels.

    Args:
        ptx (bytes or str): The PTX code to load
        kernel_names (list, optional): List of kernel names to extract from the module.
                                      If None, will return a module object with __getattr__.

    Returns:
        object: If kernel_names is None, returns a module object with __getattr__ to access kernels.
               If kernel_names is provided, returns a dict mapping kernel names to _CudaKernel objects.
    r   Nr;   )rD   r   r   r   rC   r   rO   r&   r   r   cuModuleLoadDatar   rj   rv   rs   )	rg   r   r$   r   rk   r   kernelsrr   rx   s	            r   _cuda_load_moduler   (  s       !!G #s "jj!! _FZ&&((F	 I IG,,V\&-A-A3GGHHHI I I I I I I I I I I I I I I  #6""" G 2 2  ''T""FDKK,@,@ 	
 	
 	

 $D&11Ns   +6B--B14B1Fdeviceoptional	allow_cpuc                    t          | t                    r| S t          | t                    rt          j        |           } t          | t          j                  r;|r| j        dvrt          d|            n| j        dk    rt          d|            t          j                                        s&t          | t          j	        j                  r| j
        S t          | ||          S )a  Get the device index from :attr:`device`, which can be a torch.device object, a Python integer, or ``None``.

    If :attr:`device` is a torch.device object, returns the device index if it
    is a CUDA device. Note that for a CUDA device without a specified index,
    i.e., ``torch.device('cuda')``, this will return the current default CUDA
    device if :attr:`optional` is ``True``. If :attr:`allow_cpu` is ``True``,
    CPU devices will be accepted and ``-1`` will be returned in this case.

    If :attr:`device` is a Python integer, it is returned as is.

    If :attr:`device` is ``None``, this will return the current default CUDA
    device if :attr:`optional` is ``True``.
    )r&   cpuz(Expected a cuda or cpu device, but got: r&   z!Expected a cuda device, but got: )r   r#   r   r$   r   r   r   jitis_scriptingr&   idx_torch_get_device_index)r   r   r   s      r   r   r   X  s      &# &# &f%%&%,'' K 	K{/11 !TF!T!TUUU 2[F""IIIJJJ9!!## fej/00 	:"68Y???r   )Nr-   NNrm   )FF)r   r
   typingr   r   r   r$   torch._utilsr   r   r   r   r#   r   r,   r   r   bytesrh   rj   rs   dictr   boolr   r   r   <module>r      sE    



 ' ' ' ' ' ' ' ' ' '  F E E E E E+6; + + + +	7 	7 	7 	7 	7 	76FK 6 6 6 6, )-(,#'y yyy !y 	y
  ~y 4.y y y y yxV V V V V V V V:Y
 Y
 Y
 Y
 Y
 Y
 Y
 Y
z AE- -	sEz	-*249*=-
;S-/001- - - -b <A@ @@@48@@ @ @ @ @ @r   