
    Xh-                     F    d Z ddlZ G d d          Z G d d          ZdS )aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
    Nc                   4    e Zd ZddZd Zd Zd Zd Zd ZdS )	MXFP4TensorNc                    || _         |Lt          |t          j                  s
J d            |j         | _         |                     |          | _        dS |!t          |t                    r|n|f| _        dS t          d          )at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        N%Parameter data must be a torch tensor.Either parameter data or size must be provided	device
isinstancetorchTensor_from_floatdatatuplesize
ValueErrorselfr   r   r	   s       c/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/tools/mxfp.py__init__zMXFP4Tensor.__init__   s     dEL11ZZ3ZZZZ+DK((..DIII *4 7 7EdXDIIIMNNN    c                 t   t          j        dd| j        t           j        | j                  }t          j        dd| j        t           j        | j                  }t          j        dd| j        t           j        | j                  }|dz  |dz  z  |z                      t           j                  | _        | S )Nr      r   dtyper	            )r   randintr   uint8r	   typer   )r   SEMs       r   randomzMXFP4Tensor.random#   s    M!QTYek$+VVVM!QTYek$+VVVM!QTYek$+VVV1fa(1,225;??	r   c                    |t           j        k    s
J d            | j        }|dz	  dz                      |          }|dz	  dz                      |          }|dz                      |          }t          j        |          }|dk    |dk    z  }| }|                                r||         }	||         }
||         }t          j        d|	          }t          j        |
dk    |
|
dz
            }t          j        |
dk    |dz  d|dz  z             }|t          j        d|          z  |z  }|||<   |||dk    z  xx         dz  cc<   |                    t           j                  S )	z
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r         ?      ?r   )r   float32r   r    
zeros_likeanypowwhere)r   r   r   r!   r"   r#   valueis_zeronon_zero_maskS_nzE_nzM_nzsignexponentmantissavalue_nzs                   r   tozMXFP4Tensor.to+   s    %%%'l%%%yai3$$U++ai3$$U++CZe$$  ##6a1f%  	,]#D]#D]#D9R&&D{419dD1H==H{419dSj#s
:JKKHei8444x?H#+E-  	ga !!!R'!!!zz%-(((r   c                    t          j        |                              t           j                  }t          j        |          }|dk    }t          j        |          t          j        |          z  }t          j        g dt           j        | j                  }t          j        ddgt           j        | j                  }g }g }	g }
|D ]}|dk    rTd}|D ]N}|dz  }|d|z  z  }|	                    |           |		                    |           |
	                    |           O\|
                                dz
  }|D ]Q}d|dz  z   }|d|z  z  }|	                    |           |		                    |           |
	                    |           Rt          j        |t           j        | j                  }t          j        |	t           j        | j                  }	t          j        |
t           j        | j                  }
|                    d          }|j        d         }|                    d          }|                                
                                }|||                    d          <   t          j        ||                    d          z
            }t          j        |dd	
          \  }}||k    }|                                dk    rT|
                    d                              |d          }|dk                        t           j                  }||dz  z
  }t          j        |d          }|	|         }|
|         }|                    |j                  }|                    |j                  }d||<   d||<   |dz  |dz  z  |z                      t           j                  S )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   r   r	   r   r'   r   r(   r&   T)dimkeepdimgư>r;   r   )r   signbitr    r   absisnanisinftensorr	   appenditemr)   viewshape	unsqueezemaxminsumexpandint32argmin)r   valuesr!   
abs_valuesr/   
is_invalidE_bitsM_bitscandidate_valuescandidate_Ecandidate_Mr"   r5   r#   significandr.   
candidatesabs_values_flatNabs_values_expandedmax_candidate_valueerrors
min_errors_is_tieM_bits_expandedtie_breakerbest_indices
E_selected
M_selecteds                                 r   r   zMXFP4Tensor._from_floatN   s    M&!!&&u{33Yv&&
?[((5;v+>+>>

 lll%+dkRRRq!fEKLLL 	* 	*AAvv * *A"#c'K'1h;7E$++E222&&q)))&&q))))* 6688a< * *A"%C-K'1h;7E$++E222&&q)))&&q))))* \"2%-PTP[\\\
l;ek$+VVVl;ek$+VVV$//"--!!$-77:: )nn..3355/B
++, .1E1Ea1H1HHII
 	&a>>>
AJ&::<<!)33A66==aDDO*a/55ekBBK{T12F|F222 .
 .
OOJ,--OOJ,--'
'
aAF#a'--ek:::r   c                 $   | j         }d|cxk    r|j        k     sn J d            |                    |          }|dz   dz  }|dz  dk    rNdgd|j        z  z  }|j        |z
  dz
  dz  dz   }d||<   t          j        j                            ||dd          }t          |j                  }|||<   |	                    |dz   d            |j
        | }|                    |dz   d          }|                    |dz   d          }	|	dz  |z  }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   constant)moder.   r   )r   ndimr   r   nn
functionalpadlistrF   insertreshapeselect)r   r;   r   size_along_dimnew_size_along_dim	pad_sizes	pad_index	new_shapelowhighpackeds              r   to_packed_tensorzMXFP4Tensor.to_packed_tensor   sI    yC####$)#####V $## 3,q0Q6 A""q49}-IS1,1A5I#$Ii 8&**4ST*UUD$$	+	#q!$$$t|Y'kk#'1%%{{37A&&!)s"r   c                    |dz	  dz  }|dz  }t          j        ||f|dz             }t          |j                  }|d|         ||         dz  gz   ||dz   d         z   } |j        | }	||         dz  dk    rFt          d          g|	j        z  }
t          d||                   |
|<   |	t          |
                   }	|	                    t           j	                  S )a  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r      r   r=   Nr   r   )
r   stackrl   rF   rn   slicerh   r   r    r   )r   packed_tensorr;   original_shaperv   ru   stackedrF   rt   r   indicess              r   unpack_packed_tensorz MXFP4Tensor.unpack_packed_tensor   s     "c)c!+sDksQw777 W]##$3$K5:>"22U3788_D	w	* #"a''T{{mdi/G N3$788GCLg'Dyy%%%r   NNN)	__name__
__module____qualname__r   r$   r8   r   rx   r    r   r   r   r      s}        O O O O*  !) !) !)FV; V; V;p! ! !F& & & & &r   r   c                   *    e Zd ZddZddZd Zd ZdS )MXScaleTensorNc                    || _         |Lt          |t          j                  s
J d            |j         | _         |                     |          | _        dS |!t          |t                    r|n|f| _        dS t          d          )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   s       r   r   zMXScaleTensor.__init__   s     dEL11ZZ3ZZZZ+DK((..DIII *4 7 7EdXDIIIMNNNr   c                    d}|dnCt          dt          t          j        t          j        |                              |z             }|dnQt          dt          dt          t          j        t          j        |                              |z                       }||k    s
J d            t          j        ||dz   | j        t          j        | j	                  }|| _
        | S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
           Nr      z&Low must be less than or equal to highr   r   )rH   intr   log2rB   rI   r   r   r   r	   r   )r   ru   rv   biasmin_exponentmax_exponentr"   s          r   r$   zMXScaleTensor.random   s    
 KqqSC
5<PSCTCT8U8U4V4VY]4]-^-^"lssCQEJu|\`OaOaDbDb@c@cfj@j9k9k0l0l|+++-U+++M,q(8tyPUP[dhdoppp	r   c                 $   |t           j        k    s
J d            | j                            |          }|dk    }|                                }d||<   |dz
  }t          j        d|          }t           j        ||<   |                    |          S )NzBCurrently only float32 is supported for f8e8m0 to float conversion   r   r   g       @)r   r)   r   r    cloner,   nan)r   r   r   is_nane_biaseder.   s          r   r8   zMXScaleTensor.to  s    %%%'k%%%y~~e$$#+::<<sN	#q!!	fzz%   r   c                    t          j        |t           j        | j                  }t          j        |          t          j        |          z  |dk    z  }d||<   ||          }t          j        t          j        |                    }|dz   }|                    t           j	                  }t          j
        |dd          }|                    t           j                  || <   |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r:   r   r   r   r   )r   
empty_liker   r	   r@   rA   floorr   r    rL   clamp)	r   rN   resultrP   valid_valuesr   r   e_biased_inte_biased_clampeds	            r   r   zMXScaleTensor._from_float  s     !&DKPPP[((5;v+>+>>&A+N
 zzk*K
<0011s7}}U[11 ;|Q<<.33EK@@
{r   r   )NN)r   r   r   r   r$   r8   r   r   r   r   r   r      s^        O O O O&   	! 	! 	!    r   r   )__doc__r   r   r   r   r   r   <module>r      s     Z& Z& Z& Z& Z& Z& Z& Z&zD D D D D D D D D Dr   