
    Xh?                    d   d dl mZ ddlmZ ddlmZ ddlmZ dPd	ZdPd
Zej	        ed                         Z
ej	        e ej        d          d                                     Zej	        e ej        d          dQd                                    Zej	        edRd                        Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zej	        e ej        ddd          dSd                                    Zej	        e ej        d d!"          dTd#                                    Zed$             Zed%             Zed&             Zed'             Zej	        e ej        d(dd          dSd)                                    Zej	        e ej        d*d!"          dTd+                                    Zed,             ZdUd/Z ej	        e ej        d0d.1          dVdWd2                                    Z!ed3             Z"ej	        e ej        d4          dXd5                                    Z#ed6             Z$ej	        e ej        d7          dRd8                                    Z%ej	        e ej&        d9d.1          dYdWd:                                    Z'ed;             Z(ej	        e ej&        d<          dZd=                                    Z)ed[d@            Z*edPdA            Z+ed\dD            Z,ed]dE            Z-eddej.        fd^dI            Z/edej.        fd_dJ            Z0ed`dadK            Z1edej.        fd_dL            Z2dM Z3ej	        ed`dN                        Z4edO             Z5dS )b    )annotations   )jit   )core)mathicore.constexprc                    d}t          j        |           j        }|dk    r|dz  }|dz  }|dk    t          j        |          S )Nr   r   )r   	constexprvalue)r	   log2ns      j/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/language/standard.py_log2r   
   sQ    DqA
a%%	a	 a%% >$    c                X    | j         }t          j        ||dz
  z  dk    o|dk              S Nr   r   )r   r   r   )r	   r   s     r   _is_power_of_twor      s/    	A>1A;1,7a888r   c                    | |z   dz
  |z  S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r    )xdivs     r   cdivr      s     GaKCr   sigmoidc                8    ddt          j        |            z   z  S )Nr   )r   exp)r   s    r   r   r   +   s     DHaRLL !!r   softmaxNFc                    |d}n|}| t          | ||          z
  }t          j        |          }t          |||          }t          j        |||          S )Nr   	keep_dims)maxr   r   sumfdiv)r   dimr!   ieee_rounding_dimznumdens           r   r   r   2   sc     { "	C49----A
(1++C
c49
-
-
-C9S#}---r   c                <    t          j        | | j        g|          S )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r,   s     r   ravelr/   @   s     <AG9+>>>>r   c                    | |z  |z   }||z  }||z  }||z  }t          j        ||z
  |          }||z  }|||z  z   }	||z  }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r	   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_js              r   	swizzle2dr=   L   sn    , 
VaB voGW}HvE\&5.&11F	gBBKE&LE%<r   c                .    t          j        | d|          S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtypes     r   zerosrB   t   s     9UAu%%%r   c                6    t          | j        | j                  S )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )rB   r@   rA   )inputs    r   
zeros_likerE      s     ek***r   c                    |r| |k    o||k     }nd}| |k    p|}t          j        || |          }t          j        |||          }||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_rets	            r   _argmax_combinerS      sb     26F?	&	CBJr66**EJr66**E%<r   c                (    t          | |||d          S NTrS   rJ   rK   rL   rM   s       r   _argmax_combine_tie_break_leftrX          66664@@@r   c                (    t          | |||d          S rG   rV   rW   s       r   _argmax_combine_tie_break_fastr[          66665AAAr   c                ,    t          j        | |          S N)r   maximumabs     r   _elementwise_maxrc          <1r   r_   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argTc                b   t          j        |           } |r<|rt          j        | |t          |          S t          j        | |t          |          S t          j        | j        j                  t          j        d          k     rt          j        | j                                                  r | 	                    t           j
                  } nB| j                                        s
J d            | 	                    t           j                  } t          j        | |t          |          S Nr        z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrX   r[   r   rA   primitive_bitwidthis_floatingtofloat32is_intint32reducerc   rD   axisre   rf   r!   s        r   r"   r"      s   
 -e44E O( 	o,UD:Xdmnnnn,UD:Xdmnnnn>%+899DN2<N<NNN~ek557788 -..{))++QQ-QQQQ,,{5$(8INNNNr   zmaximum indexrN   )rh   c                4    t          | |d||          \  }}|S NT)re   rf   r!   )r"   rD   rv   rN   r!   _rets         r   argmaxr|      s'     5$tSamvwwwHQJr   c                    |r| |k    o||k     }nd}| |k     p|}t          j        || |          }t          j        |||          }||fS rG   rH   )	rJ   rK   rL   rM   rN   rO   lt	value_ret	index_rets	            r   _argmin_combiner      sc     26F?	&	CB
2vv..I
2vv..Iir   c                (    t          | |||d          S rU   r   rW   s       r   _argmin_combine_tie_break_leftr      rY   r   c                (    t          | |||d          S rG   r   rW   s       r   _argmin_combine_tie_break_fastr      r\   r   c                ,    t          j        | |          S r^   r1   r`   s     r   _elementwise_minr      rd   r   r2   c                >   t          j        |           } |r<|rt          j        | |t          |          S t          j        | |t          |          S t          j        | j        j                  dk     rt          j        | j                                                  r | 	                    t           j
                  } nB| j                                        s
J d            | 	                    t           j                  } t          j        | |t          |          S rj   )r   rl   rm   r   r   r   rA   rn   ro   rp   rq   rr   rs   rt   r   ru   s        r   minr      s    
 -e44E O( 	o,UD:Xdmnnnn,UD:Xdmnnnn>%+899B>>~ek557788 -..{))++QQ-QQQQ,,{5$(8INNNNr   zminimum indexc                4    t          | |d||          \  }}|S rx   )r   ry   s         r   argminr      s'     TQ_ktuuuFAsJr   c                    | |z   S r^   r   r`   s     r   _sum_combiner         q5Lr   in_dtyperA   c                    t          j        |          }||S d }|                                 r| j        dk     rt           j        nd }n-|                                 r| j        dk     rt           j        nd }|S )Nrk   )r   _unwrap_if_constexpris_int_signedint_bitwidthrs   is_int_unsigneduint32)r   rA   	out_dtypes      r   _pick_sum_dtyper   
  s    %e,,E I H"*"7""<"<DJJ$				!	!	#	# H#+#82#=#=DKK4	r   r#   )	dtype_argc                    t          | j        |          }||                     |          } t          j        | |t
          |          S )Nr    )r   rA   rp   r   rt   r   )rD   rv   r!   rA   r   s        r   r#   r#     sE    
 !0U C CI##;udLIFFFFr   c                    | |z  S r^   r   r`   s     r   _xor_combiner   %  r   r   zxor sumc                    t          j        | j        j                                        d           t          j        | |t          |          S )Nz#xor_sum only supported for integersr    )r   static_asserttypescalarrr   rt   r   rD   rv   r!   s      r   xor_sumr   -  sB     	uz(//113XYYY;udLIFFFFr   c                    | |z  S r^   r   )r   ys     r   _or_combiner   8  r   r   	reduce_ofc                    t          j        | j        j                                        d           t          j        | |t          |          S )Nz%reduce_of only supported for integersr    )r   r   r   r   rr   rt   r   r   s      r   	reduce_orr   =  sB     	uz(//113Z[[[;udK9EEEEr   cumsumc                    t          j        |           } t          | j        |          }||                     |          } t          j        | |t          |          S r^   )r   rl   r   rA   rp   associative_scanr   )rD   rv   reverserA   r   s        r   r   r   H  sS     -e44E /U C CI## lGDDDr   c                    | |z  S r^   r   r`   s     r   _prod_combiner   Z  r   r   cumprodc                b    t          j        |           } t          j        | |t          |          S r^   )r   rl   r   r   )rD   rv   r   s      r   r   r   _  s+    
 -e44E mWEEEr   n_dimsr3   c                    t          j        dd          }t          j        |dg| |z
  dz
  z  dgz   dg|z  z             }|S )Nr   r   r   )r   aranger-   )r   r3   ars      r   
_indicatorr   k  sJ    	Q		B	b1#!a0A36!q@	A	ABIr   c                j   t          | j                  }t          j        | j        j        d          }|                     |d          }|t          ||dz
  |z
  d          z  }|                    | j        d          }t          ||          }t          j	        | |k    ||z  k    ||           }	|	S )NTbitwidthsignedbitcastr   )
r   r.   r   get_int_dtyperA   rn   rp   r   r   rI   )
r   flipr	   r   idtypeixiyr   is_rightr{   s
             r   _compare_and_swapr   r  s     #17^^F )CDQQQF	
fd	#	#B	gb&1*q.$//	/B
agt$$A &!$$H *a!e11a
8
8CJr   stageorderc                    |dk    r#t          t          | j                  |          }n|}t          j        |          D ]}t          | ||dz
  |z
            } | S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   )r   r   r.   r   static_ranger   )r   r   r   r   r	   s        r   _bitonic_merge_hypercuber     se     zz%..%00u%% 6 6auqy1}55Hr   c                    t          j        | dgt          | j                  z            }t	          |||          }t          j        || j                  } | S )Nr   )r   r-   r   r.   r   r@   )r   r   r   r   hs        r   _bitonic_merger     sK    QeAGnn,--A E511AQ  AHr   kr%   
descendingc                   |t          | j                  dz
  n|}t          j        |t          | j                  dz
  k    d           t	          | j        |                   }||nt	          |          }t	          | j                  }t          j        | dg|z            }t          j        d|dz             D ]}	t          ||	|	|k     rdn|          }t          j        |dz   |dz             D ]o}	|r)t          |t	          |j                  dz
  |z
            n(t          |t	          |j                  dz
  |z
            }t          |||	|k     rdn|          }pt          j        || j        dd         d|z  gz             } | S )ai  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param k: the number of top elements to select. If none, assume k = x.shape[dim]
    :type k: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   +only minor dimension is currently supportedr   )rv   )lenr@   r   r   r   r.   r-   r   r   r"   r   )
r   r   r%   r   r'   log_nlog_kr   r   r	   s
             r   	sort_implr     s    03{3qw<<!++Dts17||a//1^___!!'$-00E%&YEEE!HHE"17^^F 	Qf%%A q%!),, K K$QE		11zJJ uqy%!)44 O O9CrCqw!+e35555QV[\]\cVdVdghVhkpVpIrIrIr$QAIIqq:NN 	Q5z122AHr   c                &    t          | ||          S )N)r%   r   r   )r   r%   r   s      r   sortr     s    QCJ7777r   c                (    t          | ||d          S )NT)r   r%   r   r   )r   r   r%   s      r   topkr     s    Q!6666r   c                    |t          | j                  dz
  n|}t          j        |t          | j                  dz
  k    d           t	          | j        d                   }t          | |||          S )Nr   r   r   )r   r@   r   r   r   r   )r   r%   r   r'   r   s        r   bitonic_merger     sl     03{3qw<<!++Dts17||a//1^___"172;//F!VZ888r   c                    t          j        |           } t          j        |          }| t          |          dz
  } | dk     r| t          |          z  } t          j        |           S r   )r   r   r   r   )r%   r@   s     r   _get_flip_dimr     s_    

#C
(
(C%e,,E
{%jj1n
Qwws5zz>#r   c                   t          j        t          | j                   |k    o|t          | j                  k                t	          || j                  }t          j        t          | j        |                              t          | j        |                   }t          j        | j        j	        d          }t          j
        |                     |d          | j        d|         dg|z  z   | j        |dz   d         z             }t          j        |          D ]}|t          |||z   d          z  }t          j
        || j                                      | j        d          } | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along
    :type dim: int
    Tr   r   Nr   r   )r   r   r   r@   r   r   r   r   rA   rn   r-   rp   r   r   )r   r%   r'   stepsr   r   r	   s          r   r   r     sJ    	AG}+Bc!'ll0BCCC(ag66D'66777!!'$-00E )CDQQQFQTT&$T//$1#+1MPQPWX\_`X`XaXaPb1bccAu%% + +4!8T***Q  ##AGT#::AHr   c                    t          j        | |          }t          |j                  dk    r|S t          j        ||j        dd         d|j        d         z  gz             S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r@   r-   )ra   rb   cs      r   
interleaver     s[     		!QA
17||q
 |Aqwss|q172;.??@@@r   )r	   r
   )NFF)F)NFTF)TF)r   r
   rA   r
   )NFN)rA   r
   rG   )r   FN)r   F)r   r
   r3   r
   )r   r
   r   r
   )r   r
   r   r
   r   r
   )r   r
   r%   r
   r   r
   )r%   r
   r   r
   r^   )r   r
   r%   r
   )6
__future__r   runtime.jitr    r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r/   r=   rB   rE   rS   rX   r[   rc   _add_reduction_docstrr"   r|   r   r   r   r   r   r   r   r   r#   r   r   r   r   _add_scan_docstrr   r   r   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   r   r   r   <module>r      s   " " " " " "                  
       9 9 9 9 	  	   	  I&&" " '&  " I&&. . . '&  . ? ? ?  ? $ $ $N 	& 	& 	& + + +    A A A B B B    I:J*IK K KO O OK K  O" O;KLLL   ML         A A A B B B    I:J*IK K KO O OK K  O" O;KLLL   ML  
       EW555G G G G 65  G    I&&G G G '&  G    K((F F F )(  F x7333	E 	E 	E 	E 43  	E    y!!F F F "!  F        $    *     %)dhdt % % % % %P "&TEU 8 8 8 8 8 7 7 7 7 7 +/dN^ 9 9 9 9 9       . A A A A Ar   