o
    oÇhÙ-  ã                   @   s,   d Z ddlZG dd„ dƒZG dd„ dƒZdS )aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
é    Nc                   @   s>   e Zd Zddd„Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ ZdS )ÚMXFP4TensorNc                 C   ój   || _ |durt|tjƒsJ dƒ‚|j | _ |  |¡| _dS |dur1t|tƒr+|| _dS |f| _dS tdƒ‚)at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nú%Parameter data must be a torch tensorú.Either parameter data or size must be provided©	ÚdeviceÚ
isinstanceÚtorchÚTensorÚ_from_floatÚdataÚtupleÚsizeÚ
ValueError©Úselfr   r   r   © r   úe/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/triton/tools/mxfp.pyÚ__init__   s    zMXFP4Tensor.__init__c                 C   sp   t jdd| jt j| jd}t jdd| jt j| jd}t jdd| jt j| jd}|d> |d> B |B  t j¡| _| S )Nr   é   ©r   Údtyper   é   é   é   )r	   Úrandintr   Úuint8r   Útyper   )r   ÚSÚEÚMr   r   r   Úrandom#   s
   zMXFP4Tensor.randomc                 C   s
  |t jks	J dƒ‚| j}|d? d@  |¡}|d? d@  |¡}|d@  |¡}t  |¡}|dk|dk@ }| }| ¡ rs|| }	|| }
|| }t  d|	¡}t  |
dk|
|
d ¡}t  |
dk|d d|d  ¡}|t  d|¡ | }|||< |||dk@   d9  < | t j¡S )	zŠ
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r   éÿÿÿÿç      à?ç      ð?r   )r	   Úfloat32r   r   Ú
zeros_likeÚanyÚpowÚwhere)r   r   r   r   r   r    ÚvalueÚis_zeroÚnon_zero_maskÚS_nzÚE_nzÚM_nzÚsignÚexponentÚmantissaÚvalue_nzr   r   r   Úto+   s&   
zMXFP4Tensor.toc                 C   sT  t  |¡ t j¡}t  |¡}|dk}t  |¡t  |¡B }t jg d¢t j| jd}t jddgt j| jd}g }g }	g }
|D ]M}|dkrcd}|D ]}|d }|d|  }| 	|¡ |	 	|¡ |
 	|¡ qFq<| 
¡ d }|D ]}d|d  }|d|  }| 	|¡ |	 	|¡ |
 	|¡ qkq<t j|t j| jd}t j|	t j| jd}	t j|
t j| jd}
| d¡}|jd }| d¡}| ¡  
¡ }||| d¡< t  || d¡ ¡}t j|dd	d
\}}||k}| ¡ dkrù|
 d¡ |d¡}|dk t j¡}||d  }t j|dd}|	| }|
| }| |j¡}| |j¡}d||< d||< |d> |d> B |B  t j¡S )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   ©r   r   r   r#   r   r$   r"   T)ÚdimÚkeepdimgíµ ÷Æ°>©r6   r   )r	   Úsignbitr   r   ÚabsÚisnanÚisinfÚtensorr   ÚappendÚitemr%   ÚviewÚshapeÚ	unsqueezeÚmaxÚminÚsumÚexpandÚint32Úargmin)r   Úvaluesr   Ú
abs_valuesr+   Ú
is_invalidÚE_bitsÚM_bitsÚcandidate_valuesÚcandidate_EÚcandidate_Mr   r1   r    Úsignificandr*   Ú
candidatesÚabs_values_flatÚNÚabs_values_expandedÚmax_candidate_valueÚerrorsÚ
min_errorsÚ_Úis_tieÚM_bits_expandedÚtie_breakerÚbest_indicesÚ
E_selectedÚ
M_selectedr   r   r   r   N   sd   


û

û


zMXFP4Tensor._from_floatc                 C   sî   | j }d|  kr|jk sJ dƒ‚ J dƒ‚| |¡}|d d }|d dkrIdgd|j  }|j| d d d }d||< tjjj||ddd}t|jƒ}|||< | 	|d d¡ |j
|Ž }| |d d¡}| |d d¡}	|	d> |B }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   Úconstant)Úmoder*   r   )r   Úndimr   r	   ÚnnÚ
functionalÚpadÚlistrA   ÚinsertÚreshapeÚselect)r   r6   r   Úsize_along_dimÚnew_size_along_dimÚ	pad_sizesÚ	pad_indexÚ	new_shapeÚlowÚhighÚpackedr   r   r   Úto_packed_tensor¦   s*   
ÿÿ


zMXFP4Tensor.to_packed_tensorc                 C   s²   |d? d@ }|d@ }t j||f|d d}t|jƒ}|d|… || d g ||d d…  }|j|Ž }	|| d dkrStdƒg|	j }
td|| ƒ|
|< |	t|
ƒ }	|	 t j	¡S )aÅ  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r   é   r   r8   Nr   r   )
r	   Ústackrf   rA   rh   Úslicerb   r   r   r   )r   Úpacked_tensorr6   Úoriginal_shaperp   ro   ÚstackedrA   rn   r   Úindicesr   r   r   Úunpack_packed_tensorÉ   s   
*
z MXFP4Tensor.unpack_packed_tensor©NNN)	Ú__name__Ú
__module__Ú__qualname__r   r!   r4   r   rr   rz   r   r   r   r   r      s    
#X#r   c                   @   s0   e Zd Zd
dd„Zddd„Zdd„ Zdd	„ ZdS )ÚMXScaleTensorNc                 C   r   )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   r   r   r   r   ë   s   	 zMXScaleTensor.__init__c              
   C   s–   d}|du rdnt dtt t |¡¡ƒ| ƒ}|du rdntdt dtt t |¡¡ƒ| ƒƒ}||ks7J dƒ‚tj||d | jtj| j	d}|| _
| S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
        é   Nr   éþ   z&Low must be less than or equal to highr   r   )rC   Úintr	   Úlog2r=   rD   r   r   r   r   r   )r   ro   rp   ÚbiasÚmin_exponentÚmax_exponentr   r   r   r   r!   þ   s   *0zMXScaleTensor.randomc                 C   s^   |t jks	J dƒ‚| j |¡}|dk}| ¡ }d||< |d }t  d|¡}t j||< | |¡S )NzBCurrently only float32 is supported for f8e8m0 to float conversionéÿ   r   r€   g       @)r	   r%   r   r   Úcloner(   Únan)r   r   r   Úis_nanÚe_biasedÚer*   r   r   r   r4     s   

zMXScaleTensor.toc           	      C   sŠ   t j|t j| jd}t  |¡t  |¡B |dkB }d||< ||  }t  t  |¡¡}|d }| t j	¡}t  
|dd¡}| t j¡|| < |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r5   r   r‡   r€   r   )r	   Ú
empty_liker   r   r;   r<   Úfloorrƒ   r   rG   Úclamp)	r   rI   ÚresultrK   Úvalid_valuesrŒ   r‹   Úe_biased_intÚe_biased_clampedr   r   r   r     s   	
zMXScaleTensor._from_floatr{   )NN)r|   r}   r~   r   r!   r4   r   r   r   r   r   r   é   s
    

r   )Ú__doc__r	   r   r   r   r   r   r   Ú<module>   s
     ^