o
    h                  
   @   s  U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZ d dlmZ ddgZe sWd dlZG d	d
 d
Zdd Zeejd _eejd _dS d dlmZ d dlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& e 'e(Z)e	rzd dl*m+Z+ W n e,y   e)-d Y nw G dd dej.Z/e/ Z0e/e1d< d de2fddZ3G dd dZddde2de4e5df dee4e2df  defddZdS )!    Nreduce)chain)OptionalTYPE_CHECKINGUnion)is_available)not_noneinit_device_mesh
DeviceMeshc                   @   s   e Zd ZdS )_DeviceMeshStubN)__name__
__module____qualname__ r   r   q/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/torch/distributed/device_mesh.pyr      s    r   c                   C   s   d S Nr   r   r   r   r   _init_device_mesh_stub   s   r   ztorch.distributed.device_mesh)Backend)_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_backendget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroupsplit_group)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc                	   @   s&  e Zd Zd%ddZd&ddZddd	eed
f deeed
f  ddfddZ		d'ddde
e ddfddZd(ddZddde
e fddZededefddZededefddZdddedefddZ	d'dedede
ej ddfdd Zdeeed
f  fd!d"Zdddeded fd#d$ZdS ))_MeshEnvreturnNc                 C   s"   g | _ i | _i | _i | _i | _d S r   )
mesh_stackchild_to_root_mappingmesh_dim_group_optionsroot_to_flatten_mappingflatten_name_to_root_dimsselfr   r   r   __init__C   s   z_MeshEnv.__init__r   c                 C   s    t | jdkrtd| jd S )Nr   z#No device mesh is currently active!)lenr$   RuntimeErrorr)   r   r   r   get_current_meshO   s   
z_MeshEnv.get_current_meshdevice_meshsubmesh_dim_names.submesh_dimsc                    sN   fdd|D } j }g }g }d}t||D ]K\}	}
t|	dkrO|j|	d | |	d | d}||	d |  |t|	d 7 }|| j  |
 jd  q||	d |  | j|	d   qtt|j	}|D ]}|
| ql|jg ||R  jdg|R  }  }|D ]}t j||dd}||v r|}q||_ | j|< |S )	Nc                    s    g | ]}t  fd d|dqS )c                    s   |  j | S r   )meshsize)xyr0   r   r   <lambda>`   s    z5_MeshEnv.create_sub_mesh.<locals>.<listcomp>.<lambda>   r   ).0mesh_dimr7   r   r   
<listcomp>^   s    
z,_MeshEnv.create_sub_mesh.<locals>.<listcomp>r   r9   r,   )	start_dimend_dimFmesh_dim_names_init_backend)r3   zipr-   flattenappendr'   _dim_group_infoslistrangendimremovepermutereshaper   r   device_typer%   )r*   r0   r1   r2   slice_dim_sizemesh_tensorslice_dim_idxslice_dim_group_infonum_dims_flattenmesh_dim_indicesmesh_dim_namemesh_dims_remained_idxidxpg_ranks_by_dimcur_rankmesh_ndsubmeshres_submeshr   r7   r   create_sub_meshT   sh   

	


z_MeshEnv.create_sub_meshrS   c                    sr  t |  fddt|jD }|sd fdd|D }| j i  tg tt j| j  	 R  }||v rLt
| d  dd| d | jv r_|| j  v r_| j  | S t|j }tt jj}|D ]}|| qq jjg ||R  d	|}  }	|D ]}
t j|
|fd
}|	|
v r|}q | j|< || j i |< t|| j  |< |S )Nc                    s   g | ]
}t  j|qS r   )r	   r@   index)r:   flattened_mesh_dim_name	root_meshr   r   r<      s    z0_MeshEnv.create_flatten_mesh.<locals>.<listcomp>_c                    s   g | ]	}t  j| qS r   )r	   r@   )r:   dimr^   r   r   r<      s    z# already exists for submesh of the . z5The mesh_dim_names of submesh and flattened mesh are z-. Please specify another valid mesh_dim_name.r,   r@   )_mesh_resourcesget_root_meshr	   r@   joinr(   
setdefaultr   rF   keysr.   r'   mathprodr3   r4   rG   rH   rI   rJ   rK   r   r   rL   r%   tuple)r*   r0   rS   flatten_dims_in_rootinvalid_dim_namesflattened_mesh_dim_sizeremained_dims_in_rootflatten_dim_in_rootrV   rW   rX   flattened_meshres_flattened_meshr   r^   r   create_flatten_mesh   sh   






z_MeshEnv.create_flatten_meshc                 C   s   | j |d }|s|S |S r   )r%   get)r*   r0   r_   r   r   r   re      s   z_MeshEnv.get_root_meshc                 C   sD   |  |}|j}|r |r t|dksJ d|d }| ||S dS )z
            Returns the index of the mesh dim in the root mesh.
            The device_mesh passed in needs to be sliced out from the root mesh
            or submesh of the root mesh.
            r9   z"The submesh can only be a 1D mesh.r   N)re   r@   r-   get_mesh_dim_by_name)r*   r0   r_   child_mesh_dim_nameschild_mesh_dim_namer   r   r   get_root_mesh_dim   s   
z_MeshEnv.get_root_mesh_dimrL   c                 C   s   t |  S r   )_get_device_handledevice_countrL   r   r   r   num_devices_per_host      z_MeshEnv.num_devices_per_hostc                 C   s   t  t|  S r   )r   r"   r|   r{   r   r   r   	num_hosts  s   z_MeshEnv.num_hostsc                 C   sT   |j d u st|j dkrtd||j vr"td| dd|j  t|j |S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)r@   r-   KeyErrorr	   r\   )r*   r0   rS   r   r   r   ru   
  s   



z_MeshEnv.get_mesh_dim_by_namera   backend
pg_optionsc                 C   s   ||f| j |< d S r   )r&   )r*   ra   r   r   r   r   r   _set_mesh_dim_group_options  s   z$_MeshEnv._set_mesh_dim_group_optionsc           	         s   ||  |krtd| j|i  | j| }g |j| t fdd|D s4td| d  dd}g }|D ]4}||v rN|| }|d }|| n|j|}||f ||krltd| dd	| d
d|}q:|S )z
            Validate whether the mesh_dim_names is valid for slicing the given device_mesh.
            If valid, return dim indexes of the slice mesh in the device mesh.
            z'Cannot create a submesh from a submesh.c                 3   s    | ]}| v V  qd S r   r   )r:   rS   valid_mesh_dim_namesr   r   	<genexpr>6  s
    
z0_MeshEnv._get_slice_mesh_dims.<locals>.<genexpr>zInvalid mesh_dim_names z% specified. Valid mesh_dim_names are .r,   z specified. z!Found mesh dim indices to slice: rb   z.Mesh dim indices should be in ascending order.)	re   r.   r(   rg   r@   allr   rD   r\   )	r*   r0   r@   r(   curr_idxslice_mesh_dimsrS   mesh_indicesnext_idxr   r   r   _get_slice_mesh_dims#  sD   


z_MeshEnv._get_slice_mesh_dimsc           	      C   s|   |  ||}|jd|d|j|}| }g }|D ]}t|j||fdd}||v r3|j| gng |_|	| q|S )z`
            Return all the submeshes of a given mesh dimension of the device mesh.
            r,   Fr?   )
ru   r3   swapdimsrK   r4   r   r   rL   rE   rD   )	r*   r0   rS   r;   rV   rW   res_submeshesmesh_1drY   r   r   r   _get_all_submeshesW  s&   z_MeshEnv._get_all_submeshesr#   Nr#   r   r   )r0   r   r#   r   )r   r   r   r+   r/   rk   strrF   intr[   r   rs   re   rx   staticmethodr|   r~   ru   C10dBackendOptionsr   r   r   r   r   r   r   r"   B   sj    



P

F


4r"   rd   cudarL   c                 C   s   t t| dS )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchr{   r   r   r   ry   v  s   ry   c                   @   s  e Zd ZU dZeed< ejed< ee	edf  ed< ddddede
ejd	f dee	edf  d
eddf
ddZdd Zdd Zd7ddZd8ddZdefddZdd ZdedefddZde
ee	edf f dd fddZd9dee
eef  defd d!Zdee fd"d#Ze	d9dd$d%e
eee f dedee
ejd	f  dee	edf  dd f
d&d'Zd9dee defd(d)Zedefd*d+Zede	edf fd,d-Z defd.d/Z!d9dee
eef  defd0d1Z"deee  fd2d3Z#d9d4ee dd fd5d6Z$dS ):r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rL   r3   .r@   NTr?   r!   rA   r#   c                C   s
  || _ t|tjr|jjdkrtd| t|tjr%| jtj	dntj
|dtj	d| _|r5t|nd | _t| j  | _d | _|dkr|rS|   |   t r`t dkr`t | _| jt k }|ddv sqJ |ddkr~|d  nd | _d S d S )	Ncpuz!`mesh` must be a CPU tensor, got dtypedevicer   xlathreadedr   )r   r9   )rL   
isinstancer   Tensorr   type
ValueErrordetachtor   tensorr3   rk   r@   rC   tolist_flatten_mesh_list
_thread_id_get_or_create_default_group_init_process_groupsr   r   	threading	get_identr   nonzeror4   _coordinate_on_dim)r*   rL   r3   r@   rA   rank_coordsr   r   r   r+     s*   

zDeviceMesh.__init__c                 C   s   t  }|st  t }| j |kr td| d| j  dt| j}|sN|rN| }||krF|| dkrFtd| d| d| j d|	t
 |  t S )	Nz2Mesh should not be bigger than default world size z, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r   r   r   r3   numelr.   ry   rL   rz   
set_devicer   r   )r*   default_initialized
world_sizedevice_handler|   r   r   r   r     s0   
z'DeviceMesh._get_or_create_default_groupc                 C   s  g }t  }| jjdkr:| j t kr:ttt }tj	 r,t
|dkr,td|ddn|}|t|||jf nt| jjD ]}| jd|d| j|}|tjv r^tj| \}}nd\}}| jrmd| j|  nd	| }	d }d
}
t|dd  }d urtj	 rt||| |	d}d}
|D ]:}| }|d u s|
st||||	d}|  |v rt||krtd|   d| d|tt|||jf qq@|| _d S )Nr9   gloozcpu:gloo,cuda:ncclmesh_default)r   ranks
group_descr,   )NNmesh_	mesh_dim_Fbound_device_id)	parent_pgr   split_ranksr   T)r   r   r   r   zFEach device mesh dimension should get only one process group, but got z in !)r   r3   rH   r   r   rF   rG   r   r   r   r   r   rD   r   
group_namer   rK   r4   rd   r&   r@   r   r    r   r   r-   r.   r	   rE   )r*   dim_group_infosdefault_groupr   	dim_groupra   rV   r   r   r   has_split_groupr   dim_meshsubgroup_ranksr   r   r   r     s   
	


zDeviceMesh._init_process_groupsc                 C   s   t j|  | S r   )rd   r$   rD   r)   r   r   r   	__enter__h  s   zDeviceMesh.__enter__c                 C   s   t j  d S r   )rd   r$   pop)r*   exc_type	exc_valueexc_tracebackr   r   r   __exit__n  s   zDeviceMesh.__exit__c                 C   sJ   | j sd| j d| j  d}|S d| j d| j  d| j  d}|S )NzDeviceMesh('z', )z, mesh_dim_names=)r@   rL   r3   r   )r*   device_mesh_reprr   r   r   __repr__r  s    zDeviceMesh.__repr__c                 C   s:   t | dd | _| jst| j| jj| j| j| jf| _| jS )N_hash)	r   r   hashr   r3   shaperL   r@   r   r)   r   r   r   __hash__z  s   	zDeviceMesh.__hash__otherc                 C   sb   t |tsdS t| t|krdS | j|jko0| jj|jjko0| j|jko0| j|jko0| j|jkS )NFT)	r   r   idr   r3   r   rL   r@   r   )r*   r   r   r   r   __eq__  s   



zDeviceMesh.__eq__c                 C   s|   | j stdt|tr|fn|}|| j kr| S t| |}tjj	  t
| ||}W d   |S 1 s7w   Y  |S )aU
  
            Slice the current DeviceMesh based on the mesh_dim_names given to create a submesh.
            The submesh created consists of the dimensions and the communicators indicated by
            ``mesh_dim_names``

            Args:
                mesh_dim_names (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the DeviceMesh to create the submesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner in a world size of 8.
            In the first example:
                Calling mesh_2d["tp"] on rank 0, 1, 2, 3 returns a 1D submesh of DeviceMesh:([0, 1, 2, 3]).
                Calling mesh_2d["tp"] on rank 4, 5, 6, 7 returns a 1D submesh of  DeviceMesh:([4, 5, 6, 7]).
                Calling mesh_2d["dp"] on rank 0, 4 returns a 1D submesh of  DeviceMesh:([0, 4]).
                Calling mesh_2d["dp"] on rank 1, 5 returns a 1D submesh of  DeviceMesh:([1, 5]).
                Calling mesh_2d["dp"] on rank 2, 6 returns a 1D submesh of  DeviceMesh:([2, 6]).
                Calling mesh_2d["dp"] on rank 3, 7 returns a 1D submesh of  DeviceMesh:([3, 7]).

            In the second example:
                Calling mesh_3d["dp", "cp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 1], [4, 5]]).
                Calling mesh_3d["dp", "cp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 3], [6, 7]]).
                Calling mesh_3d["cp", "dp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 4], [1, 5]]).
                Calling mesh_3d["cp", "dp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 6], [3, 7]]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize a 2D device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh_2d = init_device_mesh(device_type="cuda", (2,4), mesh_dim_names=("dp", "tp"))
                >>> tp_mesh = mesh_2d["tp"]
                >>> dp_mesh = mesh_2d["dp"]
                >>>
                >>> # Initialize a 3D mesh.
                >>> mesh_3d = init_device_mesh(device_type="cuda", (2,2,2), mesh_dim_names=("dp", "pp", "cp"))
                >>> # The order of the mesh_dim_names provided deteremines the order of dimensions in the submesh.
                >>> dp_cp_mesh = mesh_3d["dp", "cp"]
                >>> cp_dp_mesh = mesh_3d["cp", "dp"]
            z1Cannot slice a DeviceMesh without mesh_dim_names!N)r@   r.   r   r   rd   r   r   _subclassesfake_tensorunset_fake_temporarilyr[   )r*   r@   r   rY   r   r   r   __getitem__  s"   -

zDeviceMesh.__getitem__r;   c                 C   s   t | ds	td| jjdkr|du rtd| jj ddd| jjdkr6|du r6tt| jd	 dd
  S t| }tj	
|d}|r[|| v r[|| jd	 dd
 }tt| S t|trft| |n|}tt| j| dd
  S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            rE   z*DeviceMesh process groups not initialized!r9   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r      )hasattrr.   r3   rH   r	   r   rE   rd   re   r'   rt   rh   r   r   ru   )r*   r;   r_   r'   r   r   r   r   	get_group  s@   

zDeviceMesh.get_groupc                    s    fddt  jjD S )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            c                    s   g | ]}  |qS r   )r   )r:   ir)   r   r   r<     s    z-DeviceMesh.get_all_groups.<locals>.<listcomp>)rG   r3   rH   r)   r   r)   r   get_all_groups  s   zDeviceMesh.get_all_groupsrc   groupc                C   sV  t | trLt| }t |tjr| |ks#|dur/t |tjs/||kr/tdt| d| tj|dtj	d}t
|||dd}t| || jfg|_|S t| }t|dkrZtd	|du rbtd
|du rjtdt |tjrz| jtj	ddntj|dtj	d}|jt|krtd|  dt| dt
|||dd}dd |D |_|S )af  
            Constructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup` or a list of existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. For example, if a single process group is passed in,
            the resulted DeviceMesh is a 1D mesh. If a list of 2 process groups is passed in,
            the resulted DeviceMesh is a 2D mesh.

            If more than one group is passed, then the ``mesh`` and ``mesh_dim_names`` arguments
            are required. The order of the process groups passed in determines the topology of
            the mesh. For example, the first process group will be the 0th dimension of the DeviceMesh.
            The `mesh` tensor passed in must have the same number of dimensions as the number of process
            groups passed in, and the order of the dimensions in the `mesh` tensor must match the order
            in the process groups passed in.

            Args:
                group (ProcessGroup or list[ProcessGroup]): the existing ProcessGroup
                    or a list of existing ProcessGroups.
                device_type (str): The device type of the mesh. Currently supports: "cpu",
                    "cuda/cuda-like". Passing in a device type with a GPU index, such as "cuda:0",
                    is not allowed.
                mesh (torch.Tensor or ArrayLike, optional): A multi-dimensional array or an
                    integer tensor describing the layout of devices, where the IDs are global IDs
                    of the default process group. Default is None.
                mesh_dim_names (tuple[str], optional): A tuple of mesh dimension names to assign
                    to each dimension of the multi-dimensional array describing the layout of devices.
                    Its length must match the length of `mesh_shape`. Each string in `mesh_dim_names`
                    must be unique. Default is None.

            Returns:
                DeviceMesh: A :class:`DeviceMesh` object representing the device layout.
            NzInvalid mesh z for ProcessGroup with ranks r   r   Fr?   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroupsz:Must pass mesh_dim_names if passing multiple ProcessGroups)r   r   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroupsc                 S   s    g | ]}t |t||jfqS r   )r   r   r   )r:   r   r   r   r   r<   s  s    z)DeviceMesh.from_group.<locals>.<listcomp>)r   r   r   r   r   r   r   r   r   r   r   r   r   rE   rF   r-   r   r   rH   )r   rL   r3   r@   group_ranksr0   groupsr   r   r   
from_group  sd   
+


zDeviceMesh.from_groupc                 C   s   |d u r	| j  S | j |S r   )r3   r   r4   )r*   r;   r   r   r   r4   }  s   zDeviceMesh.sizec                 C   s   | j jS r   )r3   rH   r)   r   r   r   rH     s   zDeviceMesh.ndimc                 C   s   t | jjS r   )rk   r3   r   r)   r   r   r   r     r}   zDeviceMesh.shapec                 C   s   t  S )z:
            Returns the current global rank.
            )r   r)   r   r   r   r     s   zDeviceMesh.get_rankc                 C   s`   | j dkr|du rtd| jj  dd|du rd}t| |}t|ts*J dtt|S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            r9   Nr   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)rH   r.   r3   r	   r   r   r   r   )r*   r;   mesh_dim_groupr   r   r   get_local_rank  s   zDeviceMesh.get_local_rankc                 C   s   | j r| j S dS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)r   r)   r   r   r   get_coordinate  s   zDeviceMesh.get_coordinaterS   c                 C   s   | j stdt| |S )a\  
            Returns a 1D DeviceMesh by flattening the current DeviceMesh.

            If no mesh_dim_name is provided, the default is a string concatentaing the mesh_dim_names of the
            given submesh with each mesh_dim_name separated by "_". For example, if we have a 3D mesh
            DeviceMesh([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], mesh_dim_names=("dp", "cp", "tp")), calling
            mesh_3d["dp", "cp"]._flatten() will create a 1D submesh DeviceMesh([0, 1, 2, 3], mesh_dim_names=("dp_cp",))
            on rank 0, 1, 2, 3 and a 1D submesh DeviceMesh([4, 5, 6, 7], mesh_dim_names=("dp_cp",)) on rank 4, 5, 6, 7.

            After the flattened dimension is created, to access the flattened dimesnion in mesh_3d, one can use the
            existing slicing method to obtain the flattened mesh through calling mesh_3d["dp_cp"].
            z3Cannot flatten a DeviceMesh without mesh_dim_names!)r@   r.   rd   rs   )r*   rS   r   r   r   _flatten  s
   zDeviceMesh._flattenr   r   r   )%r   r   r   __doc__r   __annotations__r   r   r   rk   r   boolr+   r   r   r   r   r   r   objectr   r   r   r   r   rF   r   r   r   r4   propertyrH   r   r   r   r   r   r   r   r   r   r     sp   
 '

*
v

 I0	c *rc   
mesh_shape.r@   r#   c                C   s   |dur.t t|t |krtdd| t |t |kr.tddt | dt | d| r=|  s=td|  d	d
td tjt|tj	d
|}W d   n1 s[w   Y  t| ||d}|S )a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z0Device type with index is not supported but got rb   zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.r   r   )rL   r3   r@   )r-   setr.   isalphar   r   arangeri   rj   r   viewr   )rL   r   r@   r3   r0   r   r   r   r
     s2   )
)r   )6loggingri   r   	functoolsr   	itertoolsr   typingr   r   r   r   torch.distributedr   torch.utils._typing_utilsr	   __all__sysr   r   modulesr   r
   torch._C._distributed_c10dr   r   "torch.distributed.distributed_c10dr   r   r   r   r   r   r   r   r   r   r   r    	getLoggerr   loggernumpy.typingr!   ImportErrorwarninglocalr"   rd   r   r   ry   rk   r   r   r   r   r   <module>   sf   
8
  4	    \
