o
    h                    @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZddlmZ ddlmZ ddlmZ ddlmZ d dlmZmZ d d	lmZm Z  d d
l!m"Z" e
j#dkZ$e
j#%dZ&e
j#%dZ'e$rdndZ(e$rdndZ)e$rdndZ*e$rdndZ+e$rdndZ,ej-.e/Z0ej-1ej-1e0Z2ej-3e2dZ4e$rdndZ5dZ6dZ7e8e8e9df e8e9df f Z:e;e<e:f Z=e6dfe6dfe6dfe6dfdddddZ>e=e?d< d Z@e@dfe@d!fe@d!fe@d"fe@d"fe@d#fe@d#fd$ZAe=e?d%< g d&ZBd'eeCe<  d(eCe< fd)d*ZDd(ee< fd+d,ZEd(ee< fd-d.ZFd(ee< fd/d0ZGd(e<fd1d2ZHd(e<fd3d4ZId5ZJd6ZKd7ZLd8ZMd9ZNeF ZOeOrbeHd:ndZPeOdurrejQjRdurrd;nd<ZSdZTejQjRdure8d=d> ejQjRUd?dd@ D ZTejVW reE ndZXejYZdApejYZdBZ[ej\W reG ndZ]e^dCZ_g dDZ`g dEZag dFZbg dGZcg dHZddIdJgZedKdL Zfg eedMdNdOef  dPZge ZhdQdRdSZidTZjdUdV Zkd(elfdWdXZmd(eCe< fdYdZZnd[d\ Zod(e<fd]d^Zpd_e<d(elfd`daZqd(e8elef fdbdcZrdde<deed(dfdfdgZsdhdi Ztdjdk ZuG dldm dme"Zvdndo Zwdpdq Zxdrds Zyddue<d(eCe< fdvdwZzddue<d(eCe< fdxdyZ{							<			;	<	;ddzee<eCe< f d{eel d|eel fd}d~Z|dd Z}dd Z~dd Z	<dddZdd Z										<			;	;	;	<dddZ	;dde<deld{eel d|eel d(df
ddZdzeCe< de<deld{eel d|eel d(dfddZ	<ddzeCe< de<deld{eel d|eel deld(dfddZdd Zdd Zdd ZddeeCe<  d(eCe< fddZddeeCe<  d(eCe< fddZde<deld(e<fddZdeld(ee9 fddZde<d(e;e<e<f fddZde<delde<d(dfddZdd Zdd Z	(	dddZ	(	dddZd(e<fddZde<d(elfddZde<d(elfddZdS )    N)Path   )	FileBaton)ExtensionVersioner)hipify_python)GeneratedFileCleaner)OptionalUnion)TorchVersionVersion)	build_extwin32darwinlinuxz.pydz.soz.exe libz.dllz/DLLz-shared)oem )   r   r   )   r   i^  .)
   r      r   ))   r   r      r   )z11.011.111.211.311.411.511.611.7CUDA_GCC_VERSIONS)   r$   r   r   )   r   )   r   )r   r   r   r   r    r!   r"   CUDA_CLANG_VERSIONS)get_default_build_rootcheck_compiler_ok_for_platform*get_compiler_abi_compatibility_and_versionBuildExtensionCppExtensionCUDAExtensionSyclExtensioninclude_pathslibrary_pathsloadload_inlineis_ninja_availableverify_ninja_availability&remove_extension_h_precompiler_headersget_cxx_compilercheck_compiler_is_gccargsreturnc                 C   s   | sg S dd | D S )zQuote command-line arguments for DOS/Windows conventions.

    Just wraps every argument which contains blanks in double quotes, and
    returns a new argument list.
    c                 S   s$   g | ]}d |v rd| dn|qS ) "r   ).0argr   r   m/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/torch/utils/cpp_extension.py
<listcomp>[   s   $ z"_nt_quote_args.<locals>.<listcomp>r   )r8   r   r   r>   _nt_quote_argsR   s   r@   c                  C   s   t jdpt jd} | du rCtd}|dur$t jt j|} ntr9td}t	|dkr4d} n|d } nd} t j
| sCd} | rUtj sUtd	|  d
tjd | S )zFind the CUDA install path.	CUDA_HOME	CUDA_PATHNnvccz7C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*r   r   z/usr/local/cudaz+No CUDA runtime is found, using CUDA_HOME=''file)osenvirongetshutilwhichpathdirname
IS_WINDOWSgloblenexiststorchcudais_availableprintsysstderr)	cuda_home	nvcc_path
cuda_homesr   r   r>   _find_cuda_home]   s(   

r[   c                  C   s   t jdpt jd} | du r@td}|dur6t jt jt j|} t j| dkr5t j| } n
d}t j	|r@|} | rSt
jjdu rStd|  dtjd	 | S )
zFind the ROCm install path.	ROCM_HOME	ROCM_PATHNhipcchipz	/opt/rocmz+No ROCm runtime is found, using ROCM_HOME='rD   rE   )rG   rH   rI   rJ   rK   rL   rM   realpathbasenamerQ   rR   versionr_   rU   rV   rW   )	rocm_home
hipcc_pathfallback_pathr   r   r>   _find_rocm_homex   s$   

rf   c                  C   s   d } t d}|d urtjtjtj|} | S z&tjdp#g }|D ]}|j	dkr>tjt
| j }  W | S q&W | S  tjjyT   tdtjd Y | S w )Nicpxzintel-sycl-rtz
libsycl.sozMTrying to find SYCL_HOME from intel-sycl-rt package, but it is not installed.rE   )rJ   rK   rG   rL   rM   r`   	importlibmetadatafilesnamer   locateparentresolvePackageNotFoundErrorrU   rV   rW   )	sycl_home	icpx_pathrj   fr   r   r>   _find_sycl_home   s.   


rs   c                  G   s0   t du rtdtrtdtjjt g| R  S )z
    Join paths with ROCM_HOME, or raises an error if it ROCM_HOME is not set.

    This is basically a lazy way of raising an error for missing $ROCM_HOME
    only once we need to get any ROCm-specific path.
    NzSROCM_HOME environment variable is not set. Please set it to your ROCm install root.zDBuilding PyTorch extensions using ROCm and Windows is not supported.)r\   OSErrorrN   rG   rL   joinpathsr   r   r>   _join_rocm_home   s
   rx   c                  G   $   t du rtdtjjt g| R  S )z
    Join paths with SYCL_HOME, or raises an error if it SYCL_HOME is not found.

    This is basically a lazy way of raising an error for missing SYCL_HOME
    only once we need to get any SYCL-specific path.
    NzSYCL runtime is not dected. Please setup the pytorch prerequisites for Intel GPU following the instruction in https://github.com/pytorch/pytorch?tab=readme-ov-file#intel-gpu-support or install intel-sycl-rt via pip.)	SYCL_HOMErt   rG   rL   ru   rv   r   r   r>   _join_sycl_home   s   r{   a8  

                               !! WARNING !!

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Your compiler ({}) may be ABI-incompatible with PyTorch!
Please use a compiler that is ABI-compatible with GCC 5.0 and above.
See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.

See https://gist.github.com/goldsborough/d466f43e8ffc948ff92de7486c5216d6
for instructions on how to install GCC 5 or higher.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                              !! WARNING !!
a  

                               !! WARNING !!

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Your compiler ({user_compiler}) is not compatible with the compiler Pytorch was
built with for this platform, which is {pytorch_compiler} on {platform}. Please
use {pytorch_compiler} to to compile your extension. Alternatively, you may
compile PyTorch from source using {user_compiler}, and then you can also use
{user_compiler} to compile your extension.

See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help
with compiling PyTorch from source.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                              !! WARNING !!
z
The detected CUDA version ({0}) mismatches the version that was used to compile
PyTorch ({1}). Please make sure to use the same CUDA versions.
zThe detected CUDA version ({0}) has a minor version mismatch with the version that was used to compile PyTorch ({1}). Most likely this shouldn't be a problem.z
CUDA was not found on the system, please set the CUDA_HOME or the CUDA_PATH
environment variable or add NVCC to your system PATH. The extension compilation will fail.
r_   TFc                 c   s    | ]}t |V  qd S N)int)r<   vr   r   r>   	<genexpr>       r   .   
CUDNN_HOME
CUDNN_PATHz\d+\.\d+\.\d+\w+\+\w+)z/MDz/wd4819z/wd4251z/wd4244z/wd4267z/wd4275z/wd4018z/wd4190z/wd4624z/wd4067z/wd4068z/EHsc)&base_class_has_different_dll_interfacefield_without_dll_interface#dll_interface_conflict_none_assumed(dll_interface_conflict_dllexport_assumed)z-D__CUDA_NO_HALF_OPERATORS__z-D__CUDA_NO_HALF_CONVERSIONS__z"-D__CUDA_NO_BFLOAT16_CONVERSIONS__z-D__CUDA_NO_HALF2_OPERATORS__z--expt-relaxed-constexpr)-fPICz-D__HIP_PLATFORM_AMD__=1z-DUSE_ROCM=1z-DHIPBLAS_V2)z-DCUDA_HAS_FP16=1z-D__HIP_NO_HALF_OPERATORS__=1z-D__HIP_NO_HALF_CONVERSIONS__=1z-fsyclz -fsycl-targets=spir64_gen,spir64c                  C   s8   dt jv rt jdS tj } dd | D } d| S )NTORCH_XPU_ARCH_LISTc                 S   s   g | ]	}| d s|qS )zdg2-
startswithr<   xr   r   r>   r?   )      z'_get_sycl_arch_list.<locals>.<listcomp>,)rG   rH   rI   rR   xpuget_arch_listru   )	arch_listr   r   r>   _get_sycl_arch_list"  s
   


r   z-fsycl-linkz--offload-compressz-Xs "-device r;   x86	x86_amd64)r   z	win-amd64
0x03090000c                  C   s(   t rtjdd} | S tjdd} | S )NCXXclc++)rN   rG   rH   rI   )compilerr   r   r>   r6   <  s
   r6   c                   C   s   t tjj S r|   )!BUILT_FROM_SOURCE_VERSION_PATTERNmatchrR   rb   __version__r   r   r   r>   _is_binary_buildC  s   r   c                   C   s   t rddgS g dS )Nclang++clang)zg++gcczgnu-c++zgnu-ccr   r   )IS_MACOSr   r   r   r>    _accepted_compilers_for_platformG  s   r   c                 C   s   t j| r$t| }| }W d   n1 sw   Y  ||kr$dS t| d}|| W d   dS 1 s:w   Y  dS )z
    Equivalent to writing the content into the file but will not touch the file
    if it already had the right content (to avoid triggering recompile).
    Nw)rG   rL   rQ   openreadwrite)filenamenew_contentrr   contentsource_filer   r   r>   _maybe_writeK  s   

"r   c                   C   s   t jtjjddS )a  
    Return the path to the root folder under which extensions will built.

    For each extension module built, there will be one folder underneath the
    folder returned by this function. For example, if ``p`` is the path
    returned by this function and ``ext`` the name of an extension, the build
    folder for the extension will be ``p/ext``.

    This directory is **user-specific** so that multiple users on the same
    machine won't meet permission issues.
    torch_extensions)appname)rG   rL   r`   rR   _appdirsuser_cache_dirr   r   r   r>   r(   [  s   r(   r   c                    s   t rdS t|   du rdS tj  t fddt D r#dS tj	 }d|d< t
j| dgt
j|d	jt }trwtd
tj}t||}t|dkrSd|v S tj|d   tj dkrkd|v rkdS t fddt D S tr~|dS dS )a  
    Verify that the compiler is the expected one for the current platform.

    Args:
        compiler (str): The compiler executable to check.

    Returns:
        True if the compiler is gcc/g++ on Linux or clang/clang++ on macOS,
        and always True for Windows.
    TNFc                 3       | ]}| v V  qd S r|   r   r<   rk   compiler_pathr   r>   r   }  r   z1check_compiler_ok_for_platform.<locals>.<genexpr>CLC_ALL-vrW   env^COLLECT_GCC=(.*)$r   zclang versionr   r   gcc versionc                 3   r   r|   r   r   r   r   r>   r     r   zApple clang)rN   rJ   rK   rG   rL   r`   anyr   rH   copy
subprocesscheck_outputSTDOUTdecodeSUBPROCESS_DECODE_ARGSIS_LINUXrecompile	MULTILINEfindallrP   stripra   r   r   )r   r   version_stringpatternresultsr   r   r>   r)   j  s.   


r)   c              	   C   s  t  s	dtdfS tjddv rdtdfS t| s0ttj	| t
 d tjd dtdfS tr8dtdfS z<trPt}t| dd	g}|jt  d
}n#t}tj| tjd}td|jt  }|du rmg dnt| }W n! ty   t \}}}td|  d|  dtdf Y S w ttt ||krdtd
!|fS |  dd
!| } tt"	|  dtd
!|fS )a  
    Determine if the given compiler is ABI-compatible with PyTorch alongside its version.

    Args:
        compiler (str): The compiler executable name to check (e.g. ``g++``).
            Must be executable in a shell process.

    Returns:
        A tuple that contains a boolean that defines if the compiler is (likely) ABI-incompatible with PyTorch,
        followed by a `TorchVersion` string that contains the compiler version separated by dots.
    Tz0.0.0TORCH_DONT_CHECK_COMPILER_ABION1YESTRUEYr   )user_compilerpytorch_compilerplatformFz-dumpfullversionz-dumpversionr   )rW   z(\d+)\.(\d+)\.(\d+)N)0r   r   z$Error checking compiler version for : r:   )#r   r
   rG   rH   rI   r)   warningswarnWRONG_COMPILER_WARNINGformatr   rV   r   r   r   MINIMUM_GCC_VERSIONr   r   r   r   r   splitMINIMUM_MSVC_VERSIONr   r   searchlistgroups	Exceptionexc_infotuplemapr}   ru   ABI_INCOMPATIBILITY_WARNING)r   minimum_required_version
versionstrrb   compiler_infor   _errorr   r   r>   r*     sB   r*   compiler_namecompiler_versionc                 C   s  t stttjt dd}t|dg j	t
 }td|}|d u r&d S |d}t|}tjjd u r7d S ttjj}||krht|dd d u rMtd|j|jkr]tt|tjjtt|tjj tjdrytjd	d
vryt s{d S | drt nt!}||vrtd|  d|  d S || \}	}
d|v r|t!krd}
dt"t#|	}dt"t#|
}d| d| }|t$|k rtd|  d| d| d| d|  d| d|t$|krtd|  d| d| d|  d| dd S )NbinrC   	--versionzrelease (\d+[.]\d+)r   majorzsetuptools>=49.4.0 is requiredr   r   r   r   zThere are no z) version bounds defined for CUDA version zV11.4.48r   r   z>=z, <z!The current installed version of z (z4) is less than the minimum required version by CUDA z2). Please make sure to use an adequate version of z).z7) is greater than the maximum required version by CUDA z1. Please make sure to use an adequate version of )%rA   RuntimeErrorCUDA_NOT_FOUND_MESSAGErG   rL   ru   r   r   r   r   r   r   r   groupr   rR   rb   rS   getattr
ValueErrorr   CUDA_MISMATCH_MESSAGEr   r   r   CUDA_MISMATCH_WARNrV   r   r   rH   rI   r   r'   r#   r   strr
   )r   r   rC   cuda_version_strcuda_versioncuda_str_versioncuda_vertorch_cuda_versioncuda_compiler_boundsmin_compiler_versionmax_excl_compiler_versionmin_compiler_version_strmax_excl_compiler_version_strversion_bound_strr   r   r>   _check_cuda_version  sn   
r  c                 C   s$   t dd | D s| d d S d S )Nc                 s       | ]}| d V  qdS )z
-sycl-std=Nr   r<   flagr   r   r>   r         z5_append_sycl_std_if_no_std_present.<locals>.<genexpr>z-sycl-std=2020)r   appendcflagsr   r   r>   "_append_sycl_std_if_no_std_present  s   r  c                 C   s$   t  }d| td|  g}|S )Nz-fsycl-host-compiler=z-fsycl-host-compiler-options=)r6   shlexquote)r  host_cxxhost_cflagsr   r   r>   _wrap_sycl_host_flags  s
   r  c                       s   e Zd ZdZedd Zd fddZd fdd	Zdd
dZ fddZ	de
eef fddZdd Zdd Zdd Zdd Z  ZS )r+   a   
    A custom :mod:`setuptools` build extension .

    This :class:`setuptools.build_ext` subclass takes care of passing the
    minimum required compiler flags (e.g. ``-std=c++17``) as well as mixed
    C++/CUDA/SYCL compilation (and support for CUDA/SYCL files in general).

    When using :class:`BuildExtension`, it is allowed to supply a dictionary
    for ``extra_compile_args`` (rather than the usual list) that maps from
    languages/compilers (the only expected values are ``cxx``, ``nvcc`` or
    ``sycl``) to a list of additional compiler flags to supply to the compiler.
    This makes it possible to supply different flags to the C++, CUDA and SYCL
    compiler during mixed compilation.

    ``use_ninja`` (bool): If ``use_ninja`` is ``True`` (default), then we
    attempt to build using the Ninja backend. Ninja greatly speeds up
    compilation compared to the standard ``setuptools.build_ext``.
    Fallbacks to the standard distutils backend if Ninja is not available.

    .. note::
        By default, the Ninja backend uses #CPUS + 2 workers to build the
        extension. This may use up too many resources on some systems. One
        can control the number of workers by setting the `MAX_JOBS` environment
        variable to a non-negative number.
    c                    s   G  fddd| }|S )zReturn a subclass with alternative constructor that extends any original keyword arguments to the original constructor with the given options.c                       s   e Zd Z fddZ  ZS )z5BuildExtension.with_options.<locals>.cls_with_optionsc                    s    |  t j|i | d S r|   )updatesuper__init__)selfr8   kwargs)	__class__optionsr   r>   r  0  s   
z>BuildExtension.with_options.<locals>.cls_with_options.__init__)__name__
__module____qualname__r  __classcell__r   r  r  r>   cls_with_options/  s    r   r   )clsr  r   r   r  r>   with_options,  s   zBuildExtension.with_optionsr9   Nc                    s`   t  j|i | |dd| _|dd| _| jr,d}t s.t|d d| _d S d S d S )Nno_python_abi_suffixF	use_ninjaTznAttempted to use ninja as the BuildExtension backend but {}. Falling back to using the slow distutils backend.zwe could not find ninja.)	r  r  rI   r#  r$  r3   r   r   r   )r  r8   r  msgr  r   r>   r  6  s   
zBuildExtension.__init__c                    s   t    | jrd| _d S d S )NT)r  finalize_optionsr$  forcer  r  r   r>   r&  C  s   

zBuildExtension.finalize_optionsc              
      s    \}}d}d}tj}t|d }|r|sF|rF|jD ]}tj|\}}	|	dkr.d}n|	dkr4d}|r:|r: nqt|d }|rD|sF|s|rOjsOJ d|rXt	sXt
|| jD ]m}t|jtrrdD ]}	|	|jvrqg |j|	< qe|d t	r| |jr|dt  n!d	D ]}
ttjd
|
 }|d urts|d|
 d| d q| | d|jv rȈjsJ d|j dq[j jg d7  _tjj rj jdg7  _jjdkrj jddg7  _jj jj!njj"d'fdd dd dd d' fdd}	 	 	 		 	 	 d( fdd 	}d!d" 	 	 	 		 	 	 d( fd#d$	}	 	 	 		 	 	 d( fd%d&	}jjdkrejr`|j_ n|j_ njrn|j_ n|j_"t#$ d S ))NF.cuT.syclz+ninja is required to build sycl extensions.)cxxrC   sycl-DTORCH_API_INCLUDE_EXTENSION_Hz-DPy_LIMITED_API=COMPILER_TYPESTDLIB	BUILD_ABI
_PYBIND11_-DPYBIND11_z="r;   
nvcc_dlinkz;With dlink=True, ninja is required to build cuda extension r   )r)  .cuh.hipr*  z.mmmsvcr5  r9   c                    sN   j jdkrdnd}|d  d }t fdd| D s%| | d S d S )Nr7  z/{}:z-{}=stdzc++17c                 3   s    | ]}|  V  qd S r|   r   r  cpp_flag_prefixr   r>   r     r	  zZBuildExtension.build_extensions.<locals>.append_std17_if_no_std_present.<locals>.<genexpr>)r   compiler_typer   r   r
  )r  cpp_format_prefixcpp_flagr(  r9  r>   append_std17_if_no_std_present  s   
zGBuildExtension.build_extensions.<locals>.append_std17_if_no_std_presentc                 S   sN   t ddg |  t|  } td}|d ur%tdd | D s%| d|g | S )N--compiler-options'-fPIC'CCc                 s   r  ))-ccbinz--compiler-bindirNr   r  r   r   r>   r     r	  zKBuildExtension.build_extensions.<locals>.unix_cuda_flags.<locals>.<genexpr>rB  )COMMON_NVCC_FLAGS_get_cuda_arch_flagsrG   getenvr   extend)r  _ccbinr   r   r>   unix_cuda_flags  s   
z8BuildExtension.build_extensions.<locals>.unix_cuda_flagsc                 S   sF   | d urt t| D ]}tj| | stj| | | |< q
d S d S r|   )rangerP   rG   rL   isabsabspath)rw   ir   r   r>   !convert_to_absolute_paths_inplace  s   zJBuildExtension.build_extensions.<locals>.convert_to_absolute_paths_inplacec           	   
      s   t |}z[jj}t|r;trtddntddg}jd| t	|t
r+|d }tr6t| t| }n|}n	t	|t
rD|d }trJt| } | | ||||| W jd| d S jd| w )Nr   r^   rC   compiler_sor+  )r   deepcopyr   rN  _is_cuda_fileIS_HIP_EXTENSIONrx   _join_cuda_homeset_executable
isinstancedictCOMMON_HIPCC_FLAGS_get_rocm_arch_flagsCOMMON_HIP_FLAGS)	objsrcextcc_argsextra_postargspp_optsr  original_compilerrC   )r>  original_compiler  rH  r   r>   unix_wrap_single_compile  s$   



"zABuildExtension.build_extensions.<locals>.unix_wrap_single_compiler   c                    s  t j|}jj j|||| ||\}}	}}
}j|
||}jjdd }tt	t
| }tt	t| }t|trD|d }nt|}trNt| } | d}d}|r|}t|trd|d }nt|}trw|t| }tt | }n|} | dd |D }dd |D }t|trd|v r|d }nd}d}d}d}|r|| t }t|tr|d	 }nt|} | t| || | } | d
d |D }d|}dd |D }|t|7 }t}dd |D }t| |	dd || D dd |D |||||||d||d |	S )z;Compiles sources by outputting a ninja file and running it.r   Nr+  rC   c                 S      g | ]}t |qS r   r  r  r<   rr   r   r   r>   r?         zTBuildExtension.build_extensions.<locals>.unix_wrap_ninja_compile.<locals>.<listcomp>c                 S   rb  r   rc  rd  r   r   r>   r?     re  r4  r,  c                 S      g | ]}| d dqS )r;   \\"replacer<   itemr   r   r>   r?   %      r:   c                 S   rb  r   rc  rd  r   r   r>   r?   *  re  c                 S   rb  r   rc  rd  r   r   r>   r?   -  re  c                 S   rb  r   rc  rd  r   r   r>   r?   2  re  c                 S   rb  r   rc  rd  r   r   r>   r?   3  re  Tsourcesobjectsr  post_cflagscuda_cflagscuda_post_cflagscuda_dlink_post_cflagssycl_cflagssycl_post_cflagssycl_dlink_post_cflagsbuild_directoryverbose	with_cuda	with_sycl)rG   rL   rK  r   include_dirs_setup_compile_get_cc_argsrN  r   r   rP  _is_sycl_filerT  rU  r   rQ  rX  rW  rV  _COMMON_SYCL_FLAGSr  ru   r  _SYCL_DLINK_FLAGS%_write_ninja_file_and_compile_objects)rn  
output_dirmacrosr{  debugextra_preargsr]  dependsr   ro  r^  common_cflagsextra_cc_cflagsry  rz  rp  rr  rq  rs  ru  rt  rv  r  )r>  rM  r  rH  r   r>   unix_wrap_ninja_compile  s   







z@BuildExtension.build_extensions.<locals>.unix_wrap_ninja_compilec                 S   s   t |  t|  S r|   )rC  rD  r  r   r   r>   win_cuda_flagsB  s
   z7BuildExtension.build_extensions.<locals>.win_cuda_flagsc           	   	      sV   t |_d } fdd}z|j_| |||||||W j_S j_w )Nc           
         s  t ddd fdd| D D }t ddd fdd| D D }t d	 d
d  fdd| D D }t|dkrt|dkr|d }|d }t|rtdd}tjtrbjd }ntjtrlj}ng }|ddg }t	D ]}d|g| }qxt
D ]
}	dd|	 g| }q|d|d|g| | } | S tjtrt	jd  }| | |7 } | S tjtrt	j }| | |7 } | S )Nz/T(p|c)(.*)c                 S      g | ]	}|r| d qS )r   r   r<   mr   r   r>   r?   U      zcBuildExtension.build_extensions.<locals>.win_wrap_single_compile.<locals>.spawn.<locals>.<listcomp>c                 3       | ]}  |V  qd S r|   r   r<   elem)	src_regexr   r>   r   V  r	  zbBuildExtension.build_extensions.<locals>.win_wrap_single_compile.<locals>.spawn.<locals>.<genexpr>z/Fo(.*)c                 S   r  r   r  r  r   r   r>   r?   [  r  c                 3   r  r|   r  r  )	obj_regexr   r>   r   \  r	  z((\-|\/)I.*)c                 S   r  r  r  r  r   r   r>   r?   a  s
    c                 3   r  r|   r  r  )include_regexr   r>   r   c  r	  r   r   r   rC   
-std=c++17--use-local-env
-Xcompiler-Xcudafe--diag_suppress=z-cz-or+  )r   r   rP   rP  rR  rT  r  rU  r   COMMON_MSVC_FLAGSMSVC_IGNORE_CUDAFE_WARNINGS)
cmdsrc_listobj_listinclude_listrZ  rY  rC   r  r  ignore_warning)r>  original_spawnr  r  )r  r  r  r>   spawnR  sN   





zOBuildExtension.build_extensions.<locals>.win_wrap_single_compile.<locals>.spawn)r   rO  r  r   r  )	rn  r  r  r{  r  r  r]  r  r  )r>  r`  r  r  r  r   r>   win_wrap_single_compileF  s   	1z@BuildExtension.build_extensions.<locals>.win_wrap_single_compilec                    s  j js	j   tj|}j j j |||| ||\}}	}}
}|p(g }g }|r5|j j	 n|j j
 |t || |
 }ttt| }t|trX|d }nt|} | d }d }|rddg}|D ]}|d || qltD ]}|d |d|  q{||
 t|tr|d }nt|}|}t|}t|}|rt|}t|}t|trd|v rÈ|d }nd }t| |	|||||d d d |d	|d
d |	S )Nr+  r  r  r  r  r  rC   r4  TFrm  )r   initialized
initializerG   rL   rK  r{  r|  rF  compile_options_debugcompile_optionsr  r   r   rP  rT  rU  r   r
  r  r@   r  )rn  r  r  r{  r  r  r]  r  r   ro  r^  r  r  ry  rp  rr  rq  common_cflagr  rs  )r>  rM  r  r  r   r>   win_wrap_ninja_compile  sz   	









z?BuildExtension.build_extensions.<locals>.win_wrap_ninja_compiler9   N)NNNr   NNN)%
_check_abiiter
extensionsnextrn  rG   rL   splitextr$  rQ  r  rT  extra_compile_argsrU  _add_compile_flag_hipify_compile_flagspy_limited_apimin_supported_cpythonr   rR   _CrN   _define_torch_extension_name_add_gnu_cpp_abi_flagrk   r   src_extensionsbackendsmpsis_builtr;  _cpp_extensionsr   r  _compiler   build_extensions)r  r   r   cuda_extsycl_extextension_iter	extensionsourcer   r[  rk   valra  r  r  r  r   )r>  rM  r`  r  r  rH  r  r>   r  H  s   












	nF\


zBuildExtension.build_extensionsc                    sB   t  |}| jr|d}|d d |dd   }d|}|S )Nr   )r  get_ext_filenamer#  r   ru   )r  ext_nameext_filenameext_filename_partswithout_abir  r   r>   r    s   

zBuildExtension.get_ext_filenamec                 C   sX   t | jdr| jjd }nt }t|\}}tr(dtjv r(dtjvr(d}t|||fS )Ncompiler_cxxr   VSCMD_ARG_TGT_ARCHDISTUTILS_USE_SDKzIt seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.This may lead to multiple activations of the VC env.Please set `DISTUTILS_USE_SDK=1` and try again.)	hasattrr   r  r6   r*   rN   rG   rH   UserWarning)r  r   r   rb   r%  r   r   r>   r    s   zBuildExtension._check_abic                 C   sH   t |j|_t|jtr|j D ]}|| qd S |j| d S r|   )r   rO  r  rT  rU  valuesr
  )r  r  r  r8   r   r   r>   r    s   z BuildExtension._add_compile_flagc           	      C   s   t |jtrhd|jv rjg }|jd D ]N}|dr[d|v r[|ds[|dd}t|dkrA|\}}|ddd}| d| }n|ddd}|| td	| d
| t	j
d q|| q||jd< d S d S d S )NrC   -CUDA-I=r   r   HIPzModified flag: z -> rE   )rT  r  rU  r   r   rP   ri  r
  rU   rV   rW   )	r  r  modified_flagsr  parts	flag_part
value_partmodified_flag_partmodified_flagr   r   r>   r    s   
z$BuildExtension._hipify_compile_flagsc                 C   s.   |j d}|d }d| }| || d S )Nr   r  -DTORCH_EXTENSION_NAME=)rk   r   r  )r  r  namesrk   definer   r   r>   r  2  s   
z+BuildExtension._define_torch_extension_namec                 C   s    |  |dtttjj  d S Nz-D_GLIBCXX_USE_CXX11_ABI=)r  r   r}   rR   r  _GLIBCXX_USE_CXX11_ABI)r  r  r   r   r>   r  <  s    z$BuildExtension._add_gnu_cpp_abi_flagr  )r  r  r  __doc__classmethodr"  r  r&  r  r  r   r   r
   r  r  r  r  r  r  r   r   r  r>   r+     s     
	
   /

r+   c                 O   s   | dg }|t 7 }||d< | dg }|t 7 }||d< | dg }|d |d |d | dds>|d	 trE|d
 ||d< d|d< tj| |g|R i |S )aM  
    Create a :class:`setuptools.Extension` for C++.

    Convenience method that creates a :class:`setuptools.Extension` with the
    bare minimum (but often sufficient) arguments to build a C++ extension.

    All arguments are forwarded to the :class:`setuptools.Extension`
    constructor. Full list arguments can be found at
    https://setuptools.pypa.io/en/latest/userguide/ext_modules.html#extension-api-reference

    .. warning::
        The PyTorch python API (as provided in libtorch_python) cannot be built
        with the flag ``py_limited_api=True``.  When this flag is passed, it is
        the user's responsibility in their library to not use APIs from
        libtorch_python (in particular pytorch/python bindings) and to only use
        APIs from libtorch (aten objects, operators and the dispatcher). For
        example, to give access to custom ops from python, the library should
        register the ops through the dispatcher.

        Contrary to CPython setuptools, who does not define -DPy_LIMITED_API
        as a compile flag when py_limited_api is specified as an option for
        the "bdist_wheel" command in ``setup``, PyTorch does! We will specify
        -DPy_LIMITED_API=min_supported_cpython to best enforce consistency,
        safety, and sanity in order to encourage best practices. To target a
        different version, set min_supported_cpython to the hexcode of the
        CPython version of choice.

    Example:
        >>> # xdoctest: +SKIP
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
        >>> from setuptools import setup
        >>> from torch.utils.cpp_extension import BuildExtension, CppExtension
        >>> setup(
        ...     name='extension',
        ...     ext_modules=[
        ...         CppExtension(
        ...             name='extension',
        ...             sources=['extension.cpp'],
        ...             extra_compile_args=['-g'],
        ...             extra_link_args=['-Wl,--no-as-needed', '-lm'])
        ...     ],
        ...     cmdclass={
        ...         'build_ext': BuildExtension
        ...     })
    r{  library_dirs	librariesc10rR   	torch_cpur  Ftorch_pythonsleefr   language)rI   r/   r0   r
  rN   
setuptools	Extension)rk   rn  r8   r  r{  r  r  r   r   r>   r,   A  s"   .






r,   c              
   O   s&  | dg }|tdd7 }||d< | dg }|d |d |d | dd	s1|d
 trC|d |d |d n|d |d |d ||d< | dg }trt }tj|||tj	|dgdd |D dddd}t
 }	|D ]$}
tj|
}||v r|| jdur|| jn|}|	tj|| qt|	}|tdd7 }||d< d|d< | dg }| dd	p|}|r| di }| dg }|dg7 }|dd |D 7 }|d d |D 7 }tjjdurttjjd!kr|d"g7 }||d< ||d< tj| |g|R i |S )#a  
    Create a :class:`setuptools.Extension` for CUDA/C++.

    Convenience method that creates a :class:`setuptools.Extension` with the
    bare minimum (but often sufficient) arguments to build a CUDA/C++
    extension. This includes the CUDA include path, library path and runtime
    library.

    All arguments are forwarded to the :class:`setuptools.Extension`
    constructor. Full list arguments can be found at
    https://setuptools.pypa.io/en/latest/userguide/ext_modules.html#extension-api-reference

    .. warning::
        The PyTorch python API (as provided in libtorch_python) cannot be built
        with the flag ``py_limited_api=True``.  When this flag is passed, it is
        the user's responsibility in their library to not use APIs from
        libtorch_python (in particular pytorch/python bindings) and to only use
        APIs from libtorch (aten objects, operators and the dispatcher). For
        example, to give access to custom ops from python, the library should
        register the ops through the dispatcher.

        Contrary to CPython setuptools, who does not define -DPy_LIMITED_API
        as a compile flag when py_limited_api is specified as an option for
        the "bdist_wheel" command in ``setup``, PyTorch does! We will specify
        -DPy_LIMITED_API=min_supported_cpython to best enforce consistency,
        safety, and sanity in order to encourage best practices. To target a
        different version, set min_supported_cpython to the hexcode of the
        CPython version of choice.

    Example:
        >>> # xdoctest: +SKIP
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
        >>> from setuptools import setup
        >>> from torch.utils.cpp_extension import BuildExtension, CUDAExtension
        >>> setup(
        ...     name='cuda_extension',
        ...     ext_modules=[
        ...         CUDAExtension(
        ...                 name='cuda_extension',
        ...                 sources=['extension.cpp', 'extension_kernel.cu'],
        ...                 extra_compile_args={'cxx': ['-g'],
        ...                                     'nvcc': ['-O2']},
        ...                 extra_link_args=['-Wl,--no-as-needed', '-lcuda'])
        ...     ],
        ...     cmdclass={
        ...         'build_ext': BuildExtension
        ...     })

    Compute capabilities:

    By default the extension will be compiled to run on all archs of the cards visible during the
    building process of the extension, plus PTX. If down the road a new card is installed the
    extension may need to be recompiled. If a visible card has a compute capability (CC) that's
    newer than the newest version for which your nvcc can build fully-compiled binaries, PyTorch
    will make nvcc fall back to building kernels with the newest version of PTX your nvcc does
    support (see below for details on PTX).

    You can override the default behavior using `TORCH_CUDA_ARCH_LIST` to explicitly specify which
    CCs you want the extension to support:

    ``TORCH_CUDA_ARCH_LIST="6.1 8.6" python build_my_extension.py``
    ``TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" python build_my_extension.py``

    The +PTX option causes extension kernel binaries to include PTX instructions for the specified
    CC. PTX is an intermediate representation that allows kernels to runtime-compile for any CC >=
    the specified CC (for example, 8.6+PTX generates PTX that can runtime-compile for any GPU with
    CC >= 8.6). This improves your binary's forward compatibility. However, relying on older PTX to
    provide forward compat by runtime-compiling for newer CCs can modestly reduce performance on
    those newer CCs. If you know exact CC(s) of the GPUs you want to target, you're always better
    off specifying them individually. For example, if you want your extension to run on 8.0 and 8.6,
    "8.0+PTX" would work functionally because it includes PTX that can runtime-compile for 8.6, but
    "8.0 8.6" would be better.

    Note that while it's possible to include all supported archs, the more archs get included the
    slower the building process will be, as it will build a separate kernel image for each arch.

    Note that CUDA-11.5 nvcc will hit internal compiler error while parsing torch/extension.h on Windows.
    To workaround the issue, move python binding logic to pure C++ file.

    Example use:
        #include <ATen/ATen.h>
        at::Tensor SigmoidAlphaBlendForwardCuda(....)

    Instead of:
        #include <torch/extension.h>
        torch::Tensor SigmoidAlphaBlendForwardCuda(...)

    Currently open issue for nvcc bug: https://github.com/pytorch/pytorch/issues/69460
    Complete workaround code example: https://github.com/facebookresearch/pytorch3d/commit/cb170ac024a949f1f9614ffe6af1c38d972f7d48

    Relocatable device code linking:

    If you want to reference device symbols across compilation units (across object files),
    the object files need to be built with `relocatable device code` (-rdc=true or -dc).
    An exception to this rule is "dynamic parallelism" (nested kernel launches)  which is not used a lot anymore.
    `Relocatable device code` is less optimized so it needs to be used only on object files that need it.
    Using `-dlto` (Device Link Time Optimization) at the device code compilation step and `dlink` step
    helps reduce the protentional perf degradation of `-rdc`.
    Note that it needs to be used at both steps to be useful.

    If you have `rdc` objects you need to have an extra `-dlink` (device linking) step before the CPU symbol linking step.
    There is also a case where `-dlink` is used without `-rdc`:
    when an extension is linked against a static lib containing rdc-compiled objects
    like the [NVSHMEM library](https://developer.nvidia.com/nvshmem).

    Note: Ninja is required to build a CUDA Extension with RDC linking.

    Example:
        >>> # xdoctest: +SKIP
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
        >>> CUDAExtension(
        ...        name='cuda_extension',
        ...        sources=['extension.cpp', 'extension_kernel.cu'],
        ...        dlink=True,
        ...        dlink_libraries=["dlink_lib"],
        ...        extra_compile_args={'cxx': ['-g'],
        ...                            'nvcc': ['-O2', '-rdc=true']})
    r  rS   )device_typer  r  rR   r  r  Fr  amdhip64c10_hip	torch_hipcudartc10_cuda
torch_cudar{  *c                 S      g | ]}t j|qS r   rG   rL   rK  r<   sr   r   r>   r?     rl  z!CUDAExtension.<locals>.<listcomp>T)project_directoryoutput_directoryheader_include_dirsincludesextra_filesshow_detailedis_pytorch_extensionhipify_extra_files_onlyNr   r  dlink_librariesdlinkr  r4  z-dlinkc                 S      g | ]}d | qS )-Lr   r   r   r   r>   r?   9  re  c                 S   r  )z-lr   r   r   r   r>   r?   :  re  r   z-dlto)rI   r0   r
  rQ  rG   getcwdr   hipifyrL   ru   setrK  hipified_pathaddrelpathr   r/   rR   rb   rS   r
   r  r  )rk   rn  r8   r  r  r  r{  	build_dirhipify_resulthipified_sourcesr  s_abshipified_s_absr  r  r  extra_compile_args_dlinkr   r   r>   r-     sp   w











r-   c                 O   s   | dg }|t 7 }||d< | dg }|d |d |d |d | dds4|d	 |d
 ||d< | dg }|t 7 }||d< d|d< tj| |g|R i |S )at
  
    Creates a :class:`setuptools.Extension` for SYCL/C++.

    Convenience method that creates a :class:`setuptools.Extension` with the
    bare minimum (but often sufficient) arguments to build a SYCL/C++
    extension.

    All arguments are forwarded to the :class:`setuptools.Extension`
    constructor.

    .. warning::
        The PyTorch python API (as provided in libtorch_python) cannot be built
        with the flag ``py_limited_api=True``.  When this flag is passed, it is
        the user's responsibility in their library to not use APIs from
        libtorch_python (in particular pytorch/python bindings) and to only use
        APIs from libtorch (aten objects, operators and the dispatcher). For
        example, to give access to custom ops from python, the library should
        register the ops through the dispatcher.

        Contrary to CPython setuptools, who does not define -DPy_LIMITED_API
        as a compile flag when py_limited_api is specified as an option for
        the "bdist_wheel" command in ``setup``, PyTorch does! We will specify
        -DPy_LIMITED_API=min_supported_cpython to best enforce consistency,
        safety, and sanity in order to encourage best practices. To target a
        different version, set min_supported_cpython to the hexcode of the
        CPython version of choice.

    Example:
        >>> # xdoctest: +SKIP
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
        >>> from torch.utils.cpp_extension import BuildExtension, SyclExtension
        >>> setup(
        ...     name='xpu_extension',
        ...     ext_modules=[
        ...     SyclExtension(
        ...                 name='xpu_extension',
        ...                 sources=['extension.cpp', 'extension_kernel.cpp'],
        ...                 extra_compile_args={'cxx': ['-g', '-std=c++20', '-fPIC']})
        ...     ],
        ...     cmdclass={
        ...         'build_ext': BuildExtension
        ...     })

    By default the extension will be compiled to run on all archs of the cards visible during the
    building process of the extension. If down the road a new card is installed the
    extension may need to be recompiled. You can override the default behavior using
    `TORCH_XPU_ARCH_LIST` to explicitly specify which device architectures you want the extension
    to support:

    ``TORCH_XPU_ARCH_LIST="pvc,xe-lpg" python build_my_extension.py``

    Note that while it's possible to include all supported archs, the more archs get included the
    slower the building process will be, as it will build a separate kernel image for each arch.

    Note: Ninja is required to build SyclExtension.
    r  r  r  c10_xpurR   r  r  Fr  	torch_xpur{  r   r  )rI   r0   r
  r/   r  r  )rk   rn  r8   r  r  r  r{  r   r   r>   r.   F  s"   9







r.   cpur  c                 C   s   t jtd}|t j|ddddg}| dkr,tr,|t j|d |td |S | dkr_td}|dkr=|| t j	dd	 }rO|dkrO|| t
d	ur]|t jt
d |S | d
krr|td |tdd |S )z
    Get the include paths required to build a C++ or CUDA or SYCL extension.

    Args:
        device_type: Defaults to "cpu".
    Returns:
        A list of include path strings.
    includerR   csrcapirS   THHz/usr/includeCUDA_INC_PATHNr   r,  )rG   rL   ru   _TORCH_PATHrQ  r
  rx   rR  rH   rI   r   r{   )r  lib_includerw   cuda_home_includecuda_inc_pathr   r   r>   r/     s,   	

r/   c                 C   s  t g}| dkr"tr"d}|t| tdur |tjtd |S | dkr[tr0tjdd}nd}tj	t
|sDtj	t
drDd}|t
| tdurY|tjt| |S | dkrtritjdd}nd}tj	t|s}tj	tdr}d}|t| |S )z
    Get the library paths required to build a C++ or CUDA extension.

    Args:
        device_type: Defaults to "cpu".

    Returns:
        A list of library path strings.
    rS   r   Nx64lib64r   )TORCH_LIB_PATHrQ  r
  rx   HIP_HOMErG   rL   ru   rN   rQ   rR  r   r{   )r  rw   lib_dirr   r   r>   r0     s:   r0   rn  ry  rz  c                 C   s>   t | t|tr
|gn|||||||pt| |||	|
|||dS )aG  
    Load a PyTorch C++ extension just-in-time (JIT).

    To load an extension, a Ninja build file is emitted, which is used to
    compile the given sources into a dynamic library. This library is
    subsequently loaded into the current Python process as a module and
    returned from this function, ready for use.

    By default, the directory to which the build file is emitted and the
    resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where
    ``<tmp>`` is the temporary folder on the current platform and ``<name>``
    the name of the extension. This location can be overridden in two ways.
    First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it
    replaces ``<tmp>/torch_extensions`` and all extensions will be compiled
    into subfolders of this directory. Second, if the ``build_directory``
    argument to this function is supplied, it overrides the entire path, i.e.
    the library will be compiled into that folder directly.

    To compile the sources, the default system compiler (``c++``) is used,
    which can be overridden by setting the ``CXX`` environment variable. To pass
    additional arguments to the compilation process, ``extra_cflags`` or
    ``extra_ldflags`` can be provided. For example, to compile your extension
    with optimizations, pass ``extra_cflags=['-O3']``. You can also use
    ``extra_cflags`` to pass further include directories.

    CUDA support with mixed compilation is provided. Simply pass CUDA source
    files (``.cu`` or ``.cuh``) along with other sources. Such files will be
    detected and compiled with nvcc rather than the C++ compiler. This includes
    passing the CUDA lib64 directory as a library directory, and linking
    ``cudart``. You can pass additional flags to nvcc via
    ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various
    heuristics for finding the CUDA install directory are used, which usually
    work fine. If not, setting the ``CUDA_HOME`` environment variable is the
    safest option.

    SYCL support with mixed compilation is provided. Simply pass SYCL source
    files (``.sycl``) along with other sources. Such files will be detected
    and compiled with SYCL compiler (such as Intel DPC++ Compiler) rather
    than the C++ compiler. You can pass additional flags to SYCL compiler
    via ``extra_sycl_cflags``, just like with ``extra_cflags`` for C++.
    SYCL compiler is expected to be found via system PATH environment
    variable.

    Args:
        name: The name of the extension to build. This MUST be the same as the
            name of the pybind11 module!
        sources: A list of relative or absolute paths to C++ source files.
        extra_cflags: optional list of compiler flags to forward to the build.
        extra_cuda_cflags: optional list of compiler flags to forward to nvcc
            when building CUDA sources.
        extra_sycl_cflags: optional list of compiler flags to forward to SYCL
            compiler when building SYCL sources.
        extra_ldflags: optional list of linker flags to forward to the build.
        extra_include_paths: optional list of include directories to forward
            to the build.
        build_directory: optional path to use as build workspace.
        verbose: If ``True``, turns on verbose logging of load steps.
        with_cuda: Determines whether CUDA headers and libraries are added to
            the build. If set to ``None`` (default), this value is
            automatically determined based on the existence of ``.cu`` or
            ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers
            and libraries to be included.
        with_sycl: Determines whether SYCL headers and libraries are added to
            the build. If set to ``None`` (default), this value is
            automatically determined based on the existence of ``.sycl`` in
            ``sources``. Set it to `True`` to force SYCL headers and
            libraries to be included.
        is_python_module: If ``True`` (default), imports the produced shared
            library as a Python module. If ``False``, behavior depends on
            ``is_standalone``.
        is_standalone: If ``False`` (default) loads the constructed extension
            into the process as a plain dynamic library. If ``True``, build a
            standalone executable.

    Returns:
        If ``is_python_module`` is ``True``:
            Returns the loaded PyTorch extension as a Python module.

        If ``is_python_module`` is ``False`` and ``is_standalone`` is ``False``:
            Returns nothing. (The shared library is loaded into the process as
            a side effect.)

        If ``is_standalone`` is ``True``.
            Return the path to the executable. (On Windows, TORCH_LIB_PATH is
            added to the PATH environment variable as a side effect.)

    Example:
        >>> # xdoctest: +SKIP
        >>> from torch.utils.cpp_extension import load
        >>> module = load(
        ...     name='extension',
        ...     sources=['extension.cpp', 'extension_kernel.cu'],
        ...     extra_cflags=['-O2'],
        ...     verbose=True)
    keep_intermediates)_jit_compilerT  r   _get_build_directory)rk   rn  extra_cflagsextra_cuda_cflagsextra_sycl_cflagsextra_ldflagsextra_include_pathsrw  rx  ry  rz  is_python_moduleis_standaloner&  r   r   r>   r1     s    mr1   c                  C   sH   g } dD ]}t tjd| }|d ur!ts!| d| d| d q| S )Nr.  r2  r3  z=\"\")r   rR   r  rN   r
  )
abi_cflagspnamepvalr   r   r>   _get_pybind11_abi_build_flagsg  s   r4  c                  C   s   dt ttjj g} | S r  )r   r}   rR   r  r  )glibcxx_abi_cflagsr   r   r>   _get_glibcxx_abi_build_flags{  s   r6  c                 C   s   t sdS tj }d|d< ztj| dgtj|djt }W n% t	yB   ztj| dgtj|djt }W n t	y?   Y Y dS w Y nw t
dt
j}t
||}t|dkrXdS tj|d	  }tj|d
krpd|v rpdS dS )NFr   r   r   r   r   r   r   r   r   r   T)r   rG   rH   r   r   r   r   r   r   r   r   r   r   r   rP   rL   r`   r   ra   )r   r   r   r   r   r   r   r   r>   r7     s,   
  r7   c                    s  t sdS t }t|}|du rdS tjtddd}tjtddd}tjtddd}dd	 }d
d }	dd }
dd }dd   fdd}dd }|| }d|rYdd |D ng }tjtd}d| dt	ddtj|ddddg}||}g }|s|dg7 }|ddg7 }|d d t
 D 7 }|d!d t D 7 }||}|	|||||||}|
|}tj|d"ur|| ||| dS |||}|du r|| ||| dS dS )#aP  
    Precompiled Headers(PCH) can pre-build the same headers and reduce build time for pytorch load_inline modules.
    GCC offical manual: https://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/Precompiled-Headers.html
    PCH only works when built pch file(header.h.gch) and build target have the same build parameters. So, We need
    add a signature file to record PCH file parameters. If the build parameters(signature) changed, it should rebuild
    PCH file.

    Note:
    1. Windows and MacOS have different PCH mechanism. We only support Linux currently.
    2. It only works on GCC/G++.
    NFr  rR   zextension.hextension.h.gchextension.h.signc                 S   s*   d}| d u r|S | D ]}||d 7 }q
|S )Nr   r:   r   )r  stringelementr   r   r>   listToString  s   zF_check_and_build_extension_h_precompiler_headers.<locals>.listToStringc                 S   s>   t ddd|  d| d| d| d| d| d| d S )Nz[ \n]+r:   z
                z -x c++-header z -o z
            )r   subr   )r   	head_filehead_file_pchr  torch_include_dirsr)  r-  r   r   r>   format_precompiler_header_cmd  s*   zW_check_and_build_extension_h_precompiler_headers.<locals>.format_precompiler_header_cmdc                 S   s   |  dd}|S )Nr:   r   rh  )r  	signaturer   r   r>   command_to_signature  s   zN_check_and_build_extension_h_precompiler_headers.<locals>.command_to_signaturec                 S   sT   t j| }|du rdS t| }| }||kW  d    S 1 s#w   Y  d S )NF)rG   rL   isfiler   r   )	file_pathrA  b_existrF   r   r   r   r>   check_pch_signature_in_file  s   
$zU_check_and_build_extension_h_precompiler_headers.<locals>.check_pch_signature_in_filec              
   S   sl   t j| s4zt| jddd W d S  ty3 } z|jtjkr(td|  |W Y d }~d S d }~ww d S )NT)parentsexist_okzFail to create path )	rG   rL   rQ   r   mkdirrt   errnoEEXISTr   )path_direxcr   r   r>   _create_if_not_exist  s   zN_check_and_build_extension_h_precompiler_headers.<locals>._create_if_not_existc                    sR    t j|  t| d}|| |  W d    d S 1 s"w   Y  d S )Nr   )rG   rL   rM   r   r   close)rD  pch_signrr   rN  r   r>   write_pch_signature_to_file  s
   

"zU_check_and_build_extension_h_precompiler_headers.<locals>.write_pch_signature_to_filec              
   S   sD   zt j| dt jd W d S  t jy! } ztd|  |d }~ww )NT)shellrW   z)Compile PreCompile Header fail, command: )r   r   r   CalledProcessErrorr   )pch_cmder   r   r>   build_precompile_header  s   zQ_check_and_build_extension_h_precompiler_headers.<locals>.build_precompile_headerr:   c                 S   r  r  r   r<   r  r   r   r>   r?     re  zD_check_and_build_extension_h_precompiler_headers.<locals>.<listcomp>z-I z-I {}r  r  r-  r  r   c                 S      g | ]}| qS r   r   r   r   r   r>   r?          c                 S   rZ  r   r   r   r   r   r>   r?     r[  T)r   r6   r7   rG   rL   ru   r  r   	sysconfigget_pathr4  r6  rC  )r)  r-  r/  r   b_is_gccr=  r>  head_file_signaturer;  r@  rB  rF  rR  rW  extra_cflags_strextra_include_paths_strr  r?  torch_include_dirs_strr  common_cflags_strrU  rP  b_same_signr   rQ  r>   0_check_and_build_extension_h_precompiler_headers  sV   	

re  c                  C   s@   dd } t jtddd}t jtddd}| | | | d S )Nc                 S   s   t j| rt |  d S d S r|   )rG   rL   rQ   remove)	path_filer   r   r>   _remove_if_file_exists  s   zFremove_extension_h_precompiler_headers.<locals>._remove_if_file_existsr  rR   r7  r8  )rG   rL   ru   r  )rh  r>  r_  r   r   r>   r5     s
   r5   c                 C   s  |
pt | |}
t|tr|g}|pg }t|tr|g}|pg }t|tr'|g}|dd |du r7t||	 nt  |durg }|d t|trM|g}t|trZdd |D }nt|tsht	dt
| | D ]%\}}|r|d	| d
| d| d ql|d	| d| d| d ql|d ||7 }tj|
d}t|d| |g}|r|dd |dd |dd tj|
d}t|d| || |r|dd |dd tj|
d}t|d| || t| ||||||	|
||||d|dS )a  
    Load a PyTorch C++ extension just-in-time (JIT) from string sources.

    This function behaves exactly like :func:`load`, but takes its sources as
    strings rather than filenames. These strings are stored to files in the
    build directory, after which the behavior of :func:`load_inline` is
    identical to :func:`load`.

    See `the
    tests <https://github.com/pytorch/pytorch/blob/master/test/test_cpp_extensions_jit.py>`_
    for good examples of using this function.

    Sources may omit two required parts of a typical non-inline C++ extension:
    the necessary header includes, as well as the (pybind11) binding code. More
    precisely, strings passed to ``cpp_sources`` are first concatenated into a
    single ``.cpp`` file. This file is then prepended with ``#include
    <torch/extension.h>``.

    Furthermore, if the ``functions`` argument is supplied, bindings will be
    automatically generated for each function specified. ``functions`` can
    either be a list of function names, or a dictionary mapping from function
    names to docstrings. If a list is given, the name of each function is used
    as its docstring.

    The sources in ``cuda_sources`` are concatenated into a separate ``.cu``
    file and  prepended with ``torch/types.h``, ``cuda.h`` and
    ``cuda_runtime.h`` includes. The ``.cpp`` and ``.cu`` files are compiled
    separately, but ultimately linked into a single library. Note that no
    bindings are generated for functions in ``cuda_sources`` per se. To bind
    to a CUDA kernel, you must create a C++ function that calls it, and either
    declare or define this C++ function in one of the ``cpp_sources`` (and
    include its name in ``functions``).

    The sources in ``sycl_sources`` are concatenated into a separate ``.sycl``
    file and  prepended with ``torch/types.h``, ``sycl/sycl.hpp`` includes.
    The ``.cpp`` and ``.sycl`` files are compiled separately, but ultimately
    linked into a single library. Note that no bindings are generated for
    functions in ``sycl_sources`` per se. To bind to a SYCL kernel, you must
    create a C++ function that calls it, and either declare or define this
    C++ function in one of the ``cpp_sources`` (and include its name
    in ``functions``).

    See :func:`load` for a description of arguments omitted below.

    Args:
        cpp_sources: A string, or list of strings, containing C++ source code.
        cuda_sources: A string, or list of strings, containing CUDA source code.
        sycl_sources: A string, or list of strings, containing SYCL source code.
        functions: A list of function names for which to generate function
            bindings. If a dictionary is given, it should map function names to
            docstrings (which are otherwise just the function names).
        with_cuda: Determines whether CUDA headers and libraries are added to
            the build. If set to ``None`` (default), this value is
            automatically determined based on whether ``cuda_sources`` is
            provided. Set it to ``True`` to force CUDA headers
            and libraries to be included.
        with_sycl: Determines whether SYCL headers and libraries are added to
            the build. If set to ``None`` (default), this value is
            automatically determined based on whether ``sycl_sources`` is
            provided. Set it to ``True`` to force SYCL headers
            and libraries to be included.
        with_pytorch_error_handling: Determines whether pytorch error and
            warning macros are handled by pytorch instead of pybind. To do
            this, each function ``foo`` is called via an intermediary ``_safe_foo``
            function. This redirection might cause issues in obscure cases
            of cpp. This flag should be set to ``False`` when this redirect
            causes issues.

    Example:
        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CPP_EXT)
        >>> from torch.utils.cpp_extension import load_inline
        >>> source = """
        at::Tensor sin_add(at::Tensor x, at::Tensor y) {
          return x.sin() + y.sin();
        }
        """
        >>> module = load_inline(name='inline_extension',
        ...                      cpp_sources=[source],
        ...                      functions=['sin_add'])

    .. note::
        Since load_inline will just-in-time compile the source code, please ensure
        that you have the right toolchains installed in the runtime. For example,
        when loading C++, make sure a C++ compiler is available. If you're loading
        a CUDA extension, you will need to additionally install the corresponding CUDA
        toolkit (nvcc and any other dependencies your code has). Compiling toolchains
        are not included when you install torch and must be additionally installed.

        During compiling, by default, the Ninja backend uses #CPUS + 2 workers to build
        the extension. This may use up too many resources on some systems. One
        can control the number of workers by setting the `MAX_JOBS` environment
        variable to a non-negative number.
    r   z#include <torch/extension.h>TNz*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {c                 S   s   i | ]}||qS r   r   rd  r   r   r>   
<dictcomp>  r[  zload_inline.<locals>.<dictcomp>z3Expected 'functions' to be a list or dict, but was zm.def("z", torch::wrap_pybind_function(z), "z");z", z, "}zmain.cpp
z#include <torch/types.h>r   z#include <cuda.h>r   z#include <cuda_runtime.h>zcuda.cuz#include <sycl/sycl.hpp>z	sycl.syclF)r/  r&  )r(  rT  r   insertre  r5   r
  r   rU  r   typeitemsrG   rL   ru   r   r'  )rk   cpp_sourcescuda_sourcessycl_sources	functionsr)  r*  r+  r,  r-  rw  rx  ry  rz  r.  with_pytorch_error_handlingr&  use_pch
module_deffunction_name	docstringcpp_source_pathrn  cuda_source_pathsycl_source_pathr   r   r>   r2     sx   o






  


r2   rw  rx  c                 C   sT  |r|rt d|	d u rttt|}	tdd |pg D }|
d u r)ttt|}
t| }tj| |||||g||	|
||d}|dkrg||kr`|r`td|  dd| d	|  d
| d t	j
d |  d
| } ttj|d}| r
z||krt|dj}tr|	s|rtj|||d ur|ng dd |D tdtjtdg||d|d	}t }|D ]}tj|}|||v r|| jn| qt|}t| ||pg |pg |pg |pg |pg |||	|
|d W d    n1 sw   Y  n|rtd|  dt	j
d W |  n	|  w |  |rtd|  dt	j
d |r$t| |S t| ||S )Nz>`is_python_module` and `is_standalone` are mutually exclusive.c                 s   s    | ]}d |v V  qdS )cudnnNr   rd  r   r   r>   r     r   z_jit_compile.<locals>.<genexpr>)build_argumentsrw  ry  rz  r.  r/  r   z*The input conditions for extension module z have changed. zBumping to version z and re-building as _v...rE   lockr%  c                 S   r  r   r  r  r   r   r>   r?     rl  z _jit_compile.<locals>.<listcomp>r  T)	r  r  r  r   ignoresr  show_progressr  	clean_ctx)rk   rn  r)  r*  r+  r,  r-  rw  rx  ry  rz  r/  z9No modifications detected for re-loaded extension module z, skipping build step...zLoading extension module ) r   r   r   rP  r~  JIT_EXTENSION_VERSIONERget_versionbump_version_if_changedrU   rV   rW   r   rG   rL   ru   try_acquirer   rQ  r   r	  rx   r  r
  rK  r  r  r   #_write_ninja_file_and_build_libraryreleasewait_get_exec_path_import_module_from_library)rk   rn  r)  r*  r+  r,  r-  rw  rx  ry  rz  r.  r/  r&  
with_cudnnold_versionrb   batonr  r  r  r  r  r   r   r>   r'    s   



"
r'  c                 C   s   t   t }t| |d u rttt| }|d u r ttt| }tj	|
d}|r4t
d| dtjd tj|
sN|rGt
d|
 dtjd tj|
dd t|||||||||	| |d d ||d |rjt
d	tjd t|
|d
d d S )Nbuild.ninjaEmitting ninja build file r~  rE   Creating directory TrH  rL   r  rp  rq  rr  rs  rt  ru  rv  rn  ro  ldflagslibrary_targetry  rz  zCompiling objects...z%Error compiling objects for extensionerror_prefix)r4   r6   r*   r   r   rP  r~  rG   rL   ru   rU   rV   rW   rQ   makedirs_write_ninja_file_run_ninja_build)rn  ro  r  rp  rq  rr  rs  rt  ru  rv  rw  rx  ry  rz  r   build_file_pathr   r   r>   r  :  sL   
r  r/  c                 C   s  t   t }t| |	d u rttt|}	|
d u r ttt|}
t|p$g |	||}tj	
|d}|r=td| dtjd tj	|sW|rPtd| dtjd tj|dd t|| ||p^g |pag |pdg |pgg |pjg |	|
|d |r}td	|  dtjd t||d
|  dd d S )Nr  r  r~  rE   r  Tr  )rL   rk   rn  r)  r*  r+  r,  r-  ry  rz  r/  zBuilding extension module zError building extension 'rD   r  )r4   r6   r*   r   r   rP  r~  _prepare_ldflagsrG   rL   ru   rU   rV   rW   rQ   r  "_write_ninja_file_to_build_libraryr  )rk   rn  r)  r*  r+  r,  r-  rw  rx  ry  rz  r/  r   r  r   r   r>   r  w  sP   

r  c                   C   s*   z
t d  W dS  ty   Y dS w )zxReturn ``True`` if the `ninja <https://ninja-build.org/>`_ build system is available on the system, ``False`` otherwise.zninja --versionFT)r   r   r   r   r   r   r   r>   r3     s   r3   c                   C   s   t  stddS )zRaise ``RuntimeError`` if `ninja <https://ninja-build.org/>`_ build system is not available on the system, does nothing otherwise.z(Ninja is required to load C++ extensionsN)r3   r   r   r   r   r>   r4     s   r4   c                 C   s  t rDtjtjd}| d |r| d | d |r'| d | d | d | dt  |sC| d	 | d|  n>| d
t  | d |r\| trYdnd | d |rl| tridnd | d |sx| d |r| dt  |r|rt	dtj
d t r| dtdd  | d td ur| dtjtdd  | S tsd}tjt|stjtdrd}| d
t|  | d td ur| d
tjtd  | S tr| d
td  | d | S )Nlibszc10.libzc10_cuda.libztorch_cpu.libztorch_cuda.libz"-INCLUDE:?warp_size@cuda@at@@YAHXZz	torch.libz	/LIBPATH:ztorch_python.libr  z-lc10z	-lc10_hipz
-lc10_cudaz-ltorch_cpuz-ltorch_hipz-ltorch_cudaz-ltorchz-ltorch_pythonz-Wl,-rpath,z%Detected CUDA files, patching ldflagsrE   r   r   z
cudart.libr!  z-lcudartz
-lamdhip64)rN   rG   rL   ru   rV   base_exec_prefixr
  r"  rQ  rU   rW   rR  r   rQ   rx   )r,  ry  rx  r/  python_lib_pathextra_lib_dirr   r   r>   r    sf   













r  r  c                 C   s  | dur| D ]}d|v rqd|v rg   S qt g d}g d}|dd |D  }tjdd}|std	 g }ttj	
 D ]4}tj	|}d
d tj	 D }	tdd |	D }
t|
|}|d  d|d  }||vru|| qAt|}|d  d7  < n|dd}| D ]
\}}|||}q|d}g }|D ]=}||vrtd| d|dd }|d\}}| | }|d| d|  |dr|d| d|  qtt|S )ad  
    Determine CUDA arch flags to use.

    For an arch, say "6.1", the added compile flag will be
    ``-gencode=arch=compute_61,code=sm_61``.
    For an added "+PTX", an additional
    ``-gencode=arch=compute_xx,code=compute_xx`` is added.

    See select_compute_arch.cmake for corresponding named and supported arches
    when building with CMake.
    NTORCH_EXTENSION_NAMEarch))zKepler+Tesla3.7)Keplerz3.5+PTX)zMaxwell+Tegra5.3)Maxwellz5.0;5.2+PTX)Pascalz6.0;6.1+PTX)zVolta+Tegra7.2)Voltaz7.0+PTX)Turingz7.5+PTX)zAmpere+Tegra8.7)Amperez8.0;8.6+PTX)Adaz8.9+PTX)Hopperz9.0+PTX)zBlackwell+Tegra10.1)	Blackwellz10.0;12.0+PTX)z3.5r  z5.0z5.2r  z6.0z6.1z6.2z7.0r  z7.5z8.0z8.6r  z8.9z9.0z9.0az10.0z10.0ar  z10.1az12.0z12.0ac                 S   s   g | ]}|d  qS )+PTXr   r  r   r   r>   r?   *	      z(_get_cuda_arch_flags.<locals>.<listcomp>TORCH_CUDA_ARCH_LISTzTORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].c                 S   s4   g | ]}d |v rt dtd|dd qS )sm_r   z\d+r   r   )r}   ru   r   r   r   r<   r  r   r   r>   r?   <	  s    
c                 s   s     | ]}|d  |d  fV  qdS )r   Nr   )r<   smr   r   r>   r   >	  s    z'_get_cuda_arch_flags.<locals>.<genexpr>r   r   r   r  r  r:   ;zUnknown CUDA arch (z) or GPU not supported+z-gencode=arch=compute_z	,code=sm_z,code=compute_)collectionsOrderedDictrG   rH   rI   r   r   rI  rR   rS   device_countget_device_capabilityr   maxminr
  sortedri  rn  r   r   endswithr
  )r  r  named_archessupported_archesvalid_arch_strings
_arch_listr   rL  
capabilitysupported_smmax_supported_smr  
named_archarchvalflagsrb   r   minornumr   r   r>   rD    sZ   



rD  c                 C   s   | d ur| D ]}d|v sd|v rdg  S qt jdd }|s.tj }|r+| }ng }n	|ddd}dd |D }|dg7 }|S )	Nzamdgpu-targetzoffload-archz-fno-gpu-rdcPYTORCH_ROCM_ARCHr:   r  c                 S   r  )z--offload-arch=r   r  r   r   r>   r?   t	  re  z(_get_rocm_arch_flags.<locals>.<listcomp>)rG   rH   rI   rR   r  _cuda_getArchFlagsr   ri  )r  r  _archs	archFlagsarchsr  r   r   r>   rW  b	  s   



rW  rk   c                 C   s   t jd}|d u r?t }tjjd u rdn
dtjjdd }dtj	j
 tj	j ttdd }| d| }t j||}|rLtd	| d
tjd t j|| }t j|sm|rftd| dtjd t j|dd |S )NTORCH_EXTENSIONS_DIRr  cur   r   pyabiflagsr   zUsing z as PyTorch extensions root...rE   zCreating extension directory r~  Tr  )rG   rH   rI   r(   rR   rb   rS   ri  rV   version_infor   r  r   rL   ru   rU   rW   rQ   r  )rk   rx  root_extensions_directorycu_strpython_versionbuild_folderrw  r   r   r>   r(  x	  s&   "r(  c                 C   sT   t jd}|d ur| r| rtd| dtjd t|S | r(tdtjd d S )NMAX_JOBSzUsing envvar MAX_JOBS (z) as the number of workers...rE   zqAllowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N))rG   rH   rI   isdigitrU   rV   rW   r}   )rx  max_jobsr   r   r>   _get_num_workers	  s   r  vc_archc                 C   sD   zddl m} |j| W S  ty!   ddlm} ||  Y S w )Nr   	distutils)_msvccompiler)r  r  r  _get_vc_envAttributeErrorsetuptools._distutils)r  r  r  r   r   r>   r  	  s   r  r  c              
   C   s@  ddg}t |}|d ur|dt|g tj }trQd|vrQddlm} |j	
 }t| }dd t| D }	| D ]\}
}|
 }||	vrN||	|< q>|	}z tj  tj  d	}tj||re|ntjtj| d
|d W d S  tjy } z!t \}}}|}t|dr|jr|d|jjt  7 }t||d }~ww )Nninjar   z-jr  r   r  c                 S   s   i | ]	\}}|  |qS r   )upper)r<   kr~   r   r   r>   ri  	  r   z$_run_ninja_build.<locals>.<dictcomp>r   T)stdoutrW   cwdcheckr   outputr   )r  rF  r   rG   rH   r   rN   r  r  utilget_platformPLAT_TO_VCVARSr  rn  r  rV   r  flushrW   r   runPIPEr   rT  r   r  r  r   r   r   )rw  rx  r  commandnum_workersr   r  	plat_name	plat_specvc_envr  r~   ukstdout_filenorV  r   r   messager   r   r>   r  	  sH   




r  c                 C   sp   t r-ttdddvr-tdd tdddD }|s-t dtdd tjd< tj||  t	 S )NPATHr   r  c                 s   s*    | ]}t j|ot j|tV  qd S r|   )rG   rL   rQ   samefiler"  )r<   pr   r   r>   r   	  s
    
z!_get_exec_path.<locals>.<genexpr>)
rN   r"  rG   rE  r   r   rH   rL   ru   EXEC_EXT)module_namerL   torch_lib_in_pathr   r   r>   r  	  s   r  c                 C   st   t j||  t }|r2tj| |}|d usJ tj|}t|j	tj
js*J |j	| |S tj| |S r|   )rG   rL   ru   LIB_EXTrh   r  spec_from_file_locationmodule_from_specrT  loaderabcLoaderexec_modulerR   opsload_library)r   rL   r.  filepathspecmoduler   r   r>   r  	  s   r  c                    s2  dd |D }dd |D }dd |D }dd |D }dd |D }dd |D }r1t d}nt d	}tjd
tr<dndd}|d urI|| g }|
sZ|d|  |d |dd t D 7 }trr|dd || D 7 }n|dd |D 7 }|dd |D 7 }|dd t D 7 }tr|t dg | }t|}n|ddg | }rt	rdg| t
 t }||7 }|t|7 }n`r|t t  }trtD ]}d|g| }qtD ]
}dd| g| }q|dg }t|}|t|7 }n,|ddg7 }||7 }tdd |D s|d td }|d urd!|g| }nd }rD|t }||7 }t| |}d"d |D }d#|}|t|7 }t}nd }d }d$td%tffd&d'  fd(d|D }|
rbg ntg| }trp|d) ntrwt|}|
r|tnt}| | }t| |d |d d |g |||||d* d S )+Nc                 S      g | ]}|  qS r   r   r  r   r   r>   r?   
  r  z6_write_ninja_file_to_build_library.<locals>.<listcomp>c                 S   r  r   r  r  r   r   r>   r?   
  r  c                 S   r  r   r  r  r   r   r>   r?   
  r  c                 S   r  r   r  r  r   r   r>   r?   
  r  c                 S   r  r   r  r  r   r   r>   r?   
  r  c                 S   r  r   r  r<   rF   r   r   r>   r?   

  rl  rS   r  r  ntposix_prefix)schemer  r-  c                 S   rZ  r   r   r   r   r   r>   r?   
  r[  c                 S   r  rX  r   rY  r   r   r>   r?   "
  re  c                 S      g | ]
}d t | qS rX  rc  rY  r   r   r>   r?   $
      c                 S   r  )z	-isystem rc  rY  r   r   r>   r?   %
  r  c                 S   rZ  r   r   r   r   r   r>   r?   '
  r[  z
/std:c++17r   r  z
-DWITH_HIPr  r  r  r?  r@  c                 s   r  )z-std=Nr   r  r   r   r>   r   @
  r	  z5_write_ninja_file_to_build_library.<locals>.<genexpr>rA  rB  c                 S   rf  )r0  rg  rh  rj  r   r   r>   r?   N
  rl  r:   r   r9   c                    sZ   t jt j| d }t| r r| d}|S t| r&r&| d}|S | d}|S )Nr   z.cuda.oz.sycl.oz.o)rG   rL   r  ra   rP  r~  )r   	file_nametarget)ry  rz  r   r>   object_file_pathV
  s   


z<_write_ninja_file_to_build_library.<locals>.object_file_pathc                    s   g | ]} |qS r   r   )r<   rZ  )r  r   r>   r?   c
  r  z-undefined dynamic_lookupr  )r/   r\  r]  rN   r
  r4  r6  r  r@   rQ  rX  rV  rW  rC  rD  r  r   rG   rE  r  r  ru   r  r  r   SHARED_FLAGr   r  r  r  )rL   rk   rn  r)  r*  r+  r,  r-  ry  rz  r/  user_includessystem_includespython_include_pathr  r  
cuda_flagsr  r  cc_envrt  r  rv  ro  r  r[  r  r   )r  ry  rz  r>   r  	  s   









r  c           ,   	   C   s  dd }||}||}||}||}||}||}||}||}||}t |	t |
ks2J t |	dks:J t }dg}|d|  |sL|rldtjv rWtd}ntr_tdd}ntdd	}|d
|  |sp|r~t	rtdnd}|d|  trt
| }dd| g}|dd|  |r|dd|  |dd|  |dd|  |r|dd|  |dd|  |dd|  |dd|  dd |	D }	dg}t	r|d |d n|d |d |d |r=d g}d!}tjjd"ur4td#d$d%kr4|d |d d&}|d'| d( |rHd)g}|d* g }t|	|
D ]J\}}t|oY|}t|o`|}|rgd+}n|rmd,}nd-}t	r~|d.d/}|d.d/}|dd0}|dd0}|d1| d2| d|  qO|rtjtj|
d d3}d4g} | d5 d1| d6d|
 g}!|
|g7 }
ng g } }!|rtjtj|
d d7}"d8g}#|#d9 d1|" d:d|
 g}$|
|"g7 }
ng g }#}$|d"urJd;g}%t	r2td<d=gjt d>}&t |&d?kr$tj|&d d.d/}'ntd@|%dA|' dB n|%dC d1| dDd|
 g}(dE| g})ng g g }%}(})|||g}*|r_|*| |rg|*| |*| |#|%||!|$|(|)g7 }*dFdGdH |*D }+|+dI7 }+t| |+ d"S )Jak  Write a ninja file that does the desired compiling and linking.

    `path`: Where to write this file
    `cflags`: list of flags to pass to $cxx. Can be None.
    `post_cflags`: list of flags to append to the $cxx invocation. Can be None.
    `cuda_cflags`: list of flags to pass to $nvcc. Can be None.
    `cuda_post_cflags`: list of flags to append to the $nvcc invocation. Can be None.
    `cuda_dlink_post_cflags`: list of flags to append to the $nvcc device code link invocation. Can be None.
    `sycl_cflags`: list of flags to pass to SYCL compiler. Can be None.
    `sycl_post_cflags`: list of flags to append to the SYCL compiler invocation. Can be None.
    `sycl_dlink_post_cflags`: list of flags to append to the SYCL compiler device code link invocation. Can be None.
e.
    `sources`: list of paths to source files
    `objects`: list of desired paths to objects, one per source.
    `ldflags`: list of flags to pass to linker. Can be None.
    `library_target`: Name of the output library. Can be None; in that case,
                      we do no linking.
    `with_cuda`: If we should be compiling with CUDA.
    c                 S   s   | d u rg S dd | D S )Nc                 S   r  r   r  r  r   r   r>   r?   
  r  z=_write_ninja_file.<locals>.sanitize_flags.<locals>.<listcomp>r   )r  r   r   r>   sanitize_flags
  s   z)_write_ninja_file.<locals>.sanitize_flagsr   zninja_required_version = 1.3zcxx = PYTORCH_NVCCr   r^   rC   znvcc = icxrg   zsycl = z	cflags = r:   zpost_cflags = zcuda_cflags = zcuda_post_cflags = zcuda_dlink_post_cflags = zsycl_cflags = zsycl_post_cflags = zsycl_dlink_post_cflags = z
ldflags = c                 S   r  r   r  r  r   r   r>   r?   
  rl  z%_write_ninja_file.<locals>.<listcomp>zrule compilez@  command = cl /showIncludes $cflags -c $in /Fo$out $post_cflagsz  deps = msvczD  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflagsz  depfile = $out.dz  deps = gcczrule cuda_compiler   N*TORCH_EXTENSION_SKIP_NVCC_GEN_DEPENDENCIESr   r   z?--generate-dependencies-with-compile --dependency-output $out.dz  command = $nvcc z. $cuda_cflags -c $in -o $out $cuda_post_cflagszrule sycl_compilezF  command = $sycl $sycl_cflags -c -x c++ $in -o $out $sycl_post_cflagscuda_compilesycl_compiler   :z$:z$ zbuild r   zdlink.ozrule cuda_devlinkz5  command = $nvcc $in -o $out $cuda_dlink_post_cflagsz: cuda_devlink zsycl_dlink.ozrule sycl_devlinkz5  command = $sycl $in -o $out $sycl_dlink_post_cflagsz: sycl_devlink z	rule linkwherer   z
r   z'MSVC is required to load C++ extensionsz  command = "z)/link.exe" $in /nologo $ldflags /out:$outz%  command = $cxx $in $ldflags -o $outz: link zdefault z

c                 s   s    | ]}d  |V  qdS )rk  N)ru   )r<   br   r   r>   r   <  r	  z$_write_ninja_file.<locals>.<genexpr>rk  )rP   r6   r
  rG   rH   rE  rQ  rx   rR  rN   rX  ru   rR   rb   rS   ziprP  r~  ri  rL   rM   r   r   r   r   r   r   r   ),rL   r  rp  rq  rr  rs  rt  ru  rv  rn  ro  r  r  ry  rz  r  r   configrC   r,  r  compile_rulecuda_compile_rulenvcc_gendepssycl_compile_rulebuildr   object_fileis_cuda_sourceis_sycl_sourcerulecuda_devlink_outcuda_devlink_rulecuda_devlinksycl_devlink_outsycl_devlink_rulesycl_devlink	link_rulecl_pathscl_pathlinkdefaultblocksr   r   r   r>   r  
  s   "



 


 








r  c                  G   ry   )z
    Join paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set.

    This is basically a lazy way of raising an error for missing $CUDA_HOME
    only once we need to get any CUDA-specific path.
    NzSCUDA_HOME environment variable is not set. Please set it to your CUDA install root.)rA   rt   rG   rL   ru   rv   r   r   r>   rR  A  s   rR  rL   c                 C   s*   ddg}t r|d tj| d |v S )Nr)  r5  r6  r   )rQ  r
  rG   rL   r  rL   	valid_extr   r   r>   rP  N  s   
rP  c                 C   s   dg}t j| d |v S )Nr*  r   )rG   rL   r  r?  r   r   r>   r~  T  s   r~  )r  )NNNNNNFNNTFT)F)NNNNNNNNNFNNTTTF)Tr|   r  )r   rO   rh   importlib.abcrG   r   r  rJ   r  r   rV   r\  r   r  pathlibr   rJ  rR   torch._appdirs
file_batonr   _cpp_extension_versionerr   r	  r   hipify.hipify_pythonr   typingr   r	   torch.torch_versionr
   r   setuptools.command.build_extr   r   rN   r   r   r   r  r  CLIB_PREFIXCLIB_EXTr  rL   rK  __file___HERErM   r  ru   r"  r   r   r   r   r}   VersionRangerU  r   
VersionMapr#   __annotations__MINIMUM_CLANG_VERSIONr'   __all__r   r@   r[   rf   rs   rx   r{   r   r   r   r   r   r\   r#  rb   r_   rQ  ROCM_VERSIONr   rS   _is_compiledrA   rH   rI   r   r   rz   r   r   r  r  rC  rX  rV  r  r   r  r  r  r  r6   boolr   r   r   r(   r)   r*   r  r  r  r+   r,   r-   r.   r/   r0   r1   r4  r6  r7   re  r5   r2   r'  r  r  r3   r4   r  rD  rW  r(  r  r  r  r  r  r  r  rR  rP  r~  r   r   r   r>   <module>   s  


 $

,49	    4F @P%1	

}
y
 N	

_
I	

=
 ; c	5

 
 A