
    Xhz                       d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ ddlmZ ddlmZ  ed	          Z ed
          Z G d de          Z G d de	e                   ZdS )    )annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver   )ir   )coreTTensorTyc                       e Zd Z fdZ xZS )IncompatibleTypeErrorImplc                    || _         || _        d| j                                         z   dz   | j                                        z   | _        t	          t
          |                               | j                   d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      j/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/language/semantic.pyr   z"IncompatibleTypeErrorImpl.__init__   sl    2T[5I5I5K5KKgUX\XcXlXlXnXnn'..77EEEEE    )__name__
__module____qualname__r   __classcell__)r   s   @r   r   r      sA        F F F F F F F F Fr   r   c                     e Zd ZU ej        Zded<   eZded<   d Zdd
ZddZ	ddZ
ddZdddZddZ	 	 ddd"Zdd%Zdd)Zdd*Zdd+Zdd,Zdd-Zdd/Zdd0Zdd5Zdd6Zdd9Zdd:Zdd;Zdd<Zdd=Zdd>Zdd?Zdd@Z ddAZ!ddBZ"ddCZ#ddDZ$ddEZ%ddFZ&ddIZ'ddJZ(ddKZ)ddLZ*ddMZ+ddNZ,ddOZ-dPdQddUZ.ddWZ/ddXZ0dd[Z1dd]Z2dd`Z3ddaZ4ddbZ5ddeZ6ddfZ7ddiZ8ddjZ9ddkZ:d dnZ;ddpZ<dddrZ=ds Z>dt Z?du Z@dv ZAdw ZBdx ZCdy ZDdz ZEd{ ZFddZGddZHddZIddZJddZKd ZLd ZMddZNddZOddZPddZQddZRddZSd	dZTd ZUd ZVd
dZWddZXddZYddZZddZ[ddZ\ddZ]ddZ^ddZ_ddZ`ddZad ZbddZcddZdddZeddZfddZgd ZhddZiddÄZjddƄZkddȄZlddʄZmdd˄Zndd̄Zodd̈́Zpdd҄ZqddՄZrddքZsdׄ Ztdd؄ZuddڄZvddۄZwddZxdPS (  TritonSemanticzType[TensorTy]tensorz
ir.builderbuilderc                    || _         d S N)r'   )r   r'   s     r   r   zTritonSemantic.__init__   s    r   axisintreturnr   c                    |dvrt          d|           |                     | j                            |          t          j                  S )Nr   r   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrorr&   r'   create_get_program_idtlint32r   r*   s     r   
program_idzTritonSemantic.program_id&   sJ    y  Q4QQRRR{{4<==dCCRXNNNr   c                    |dvrt          d|           |                     | j                            |          t          j                  S )Nr.   z-num_programs axis must be 0, 1, or 2 but got )r/   r&   r'   create_get_num_programsr1   r2   r3   s     r   num_programszTritonSemantic.num_programs+   sJ    y  STSSTTT{{4<??EErxPPPr   a_tytl.dtypeb_tyc                   |j         }|j         }|j        }|j        }||k    r
||k    r|n|S |t          j        j        j        k    r
||k    r|n|S |t          j        j        j        k    r
||k    r|n|S t          d| d|           )Nzunexpected signedness r   )int_bitwidthint_signednessr1   dtype
SIGNEDNESSUNSIGNED	TypeError)r   r8   r:   a_rankb_ranka_snb_sns          r   integer_promote_implz#TritonSemantic.integer_promote_impl4   s    """" 4<<!F??444RX(111!V++445RX(111!V++445BBBDBBCCCr   a_is_scalarboolb_is_scalar
div_or_modc                   ||k    rk|r||fn||f\  }}|                                 j        |                                 j        k    r*|r&|t          j        t          j        fv rt          j        S |S |                                s|                                rt          j        S |                                s|                                rt          j        S |	                                s|	                                r|rt          j        S t          j        S |
                                r.|
                                r|rt          j        S t          j        S |
                                s|
                                rt          j        S |                                r(|                                r||k    r|nt          j        S |                                r|                                st          d| d|           |rO|j        |j        k    r?t          d|                                z   dz   |                                z   dz             |                     ||          S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer1   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intrA   r=   r   rF   )r   r8   rG   r:   rI   rJ   	scalar_ty	tensor_tys           r   computation_type_implz$TritonSemantic.computation_type_implC   sO   
 +%%3>#PD$<<T4L Iy~~%)9)9)??? &9R[0I#I#I:%   <<>> 	T\\^^ 	: <<>> 	T\\^^ 	: <<>> 	"T\\^^ 	" "z!z!<<>> 	#dllnn 	# #z!{"<<>> 	T\\^^ 	:;;== 	8T[[]] 	84<<44RZ7{{}} 	BDKKMM 	B@t@@$@@AAA  	p$-1DDD9DMMOOKgUX\XeXeXgXggoo p p p ((t444r   T
check_typec                   t          |t                    r8|                     | j                            |          t
          j                  S t          |t                    rd|cxk    rdk     rn nt
          j        }njd|cxk    rdk     rn nt
          j	        }nMd|cxk    rdk     rn nt
          j
        }n0d|cxk    rdk     rn nt
          j        }nt          d| d          |                     ||	          S t          |t                    r~d
}dddz  z  }t          d         |          }|t          d          k    s|dk    s||k    s||cxk    r|k    rn nt
          j        }nt
          j        }|                     ||	          S t          |t
          j                  r|                     |j                  S t          || j                  r|S |r#t+          d| dt-          |           d          |S )N           l                             l            zNonrepresentable integer .r>   g      8g   ?r      absinfg        zcannot convert z	 of type z
 to tensor)
isinstancerH   r&   r'   get_int1r1   int1r+   r2   uint32int64uint64r/   scalar_constantfloat__builtins__rR   rT   	constexpr	to_tensorrO   rA   type)r   xr]   r>   min_float32max_float32abs_xs          r   rr   zTritonSemantic.to_tensoru   sG   a 	;;t|44Q77AAA3 	""""U"""""!####e#####	1$$$$u$$$$$!####e#####	 !AQ!A!A!ABBB'''7775!! 	!K%C/K '**Ee$$||Avve2222{22222

'''7772<(( 	>>!'***4;'' 	H 	OMaMM$q''MMMNNNr   r   r   allow_ptr_aNonec                    |                                 r`|st          ||          |                                 r||k    rt          ||          |                                rt          ||          d S d S r)   )is_ptrr   is_floating)r   r   r   rx   s       r   check_ptr_type_implz"TritonSemantic.check_ptr_type_impl   s    ==?? 	@ @/???}} @Ff$4$4/???!!## @/???	@ 	@@ @r   FlhsTensorTy | numbers.NumberrhsTuple[TensorTy, TensorTy]c                   t          |t          j                  }t          |t          j                  }|r|}	|                     |          }|r|}
|                     |          }|j        j        }|j        j        }|                     |||           |                     |||           |r|                                s{|                                sf|                     |||||          }|r|	dk     r|	                                s|r)|
dk     r#|	                                rt          d          |                                r|rH|                                |	cxk    r|                                k    sn t          d|	 d|           |rH|                                |
cxk    r|                                k    sn t          d|
 d|           |r|                     |	|          n|                     ||          }|r|                     |
|          n|                     ||          }|                     ||          \  }}||fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type rd   )rh   numbersNumberrr   rs   scalarr}   r{   r\   is_int_unsignedr/   rY   get_int_min_valueget_int_max_valuern   castbroadcast_impl_value)r   r~   r   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrJ   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_implz+TritonSemantic.binary_op_type_checking_impl   s    #377"377 	&J..%%C 	&J..%%C X_
X_
  ZGGG  ZGGG 	vJ$5$5$7$7 	v
@Q@Q@S@S 	v33Jz[hjtuuJ L*q..Z5O5O5Q5Q.$ #1)3aJ<V<V<X<X  "K L L L  "" c  c**F*F*H*HJ +I +I +I +I*4*F*F*H*H+I +I +I +I$%az%a%aU_%a%abbb  c**F*F*H*HJ +I +I +I +I*4*F*F*H*H+I +I +I +I$%az%a%aU_%a%abbbHUu$&&z&DDD[_[d[dehjt[u[uCHUu$&&z&DDD[_[d[dehjt[u[uC ,,S#66SCxr   	binary_opcallablec                   |j         j        j        dk    s| j        j        j        sd S |j         j        }|j         j        }||k    sJ |                                sJ |                     |t          j	                  }|                     |t          j	                  } |||d          }|
                                }|                     |t          j	                  }|                                }|                     |t          j	                  }|                     |                     ||          |                     ||                    }	d|j         d|j         }
|                     |	|
           d S )N@   Fr+   z! overflow detected for operation )rs   r   r<   r'   optionssanitize_overflowrY   r   r1   rl   r   rn   r   and_
less_equalgreater_equalr    device_assert)r   r~   r   r   r   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_implz/TritonSemantic.binary_op_sanitize_overflow_impl   sW   8?'2--T\5I5[-FX_
X_
Z''''  """""iiRX&&iiRX&&iS%((0022	((BH==	0022	((BH==	yyi88$:L:LSR[:\:\]]bJ+bbiN`bb4%%%%%r   inputotherr   c                   |                      ||dd          \  }}|j        j        }|j        j        }|                                r#|                                rt	          d          |                                r0|                                s||}}|j        j        }|j        j        }|                                r|j        }|j                                        rm|j        j        dk     r]|j        	                    t          j                                      | j                  }| j                            |j        |d          }|                     | j                            |j        |          |j                  S |                                r>|                     | j                            |j        |j                  |j                  S |                                r\|r|                     ||| j                   |                     | j                            |j        |j                  |j                  S t	          d|           )NTzcannot add pointers togetherr   FrL   )r   rs   r   r{   rA   handler>   r   r<   with_element_tyr1   rl   to_irr'   create_int_castr&   create_addptrr|   create_faddrY   r   add
create_add)r   r   r   r   input_scalar_tyother_scalar_tyother_handlei64_tys           r   r   zTritonSemantic.add   s    88tTRRu*+*+!!## 	<(>(>(@(@ 	<:;;; !!## 	0O,B,B,D,D 	0 %5E#j/O#j/O!!## 	` <L{**,, Y1IB1N1N33BH==CCDLQQ#|;;EL&RWXX;;t|99%,UUW\Wabbb((** 	`;;t|77elSSUZU_```##%% 	`  N55eUDHMMM;;t|66u|U\RRTYT^___<?<<===r   c                v   |                      ||dd          \  }}|j        j        }|                                r+|                     ||                     |          d          S |                                r>|                     | j        	                    |j
        |j
                  |j                  S |                                r\|r|                     ||| j                   |                     | j                            |j
        |j
                  |j                  S t          d|           )NTF)r   rL   )r   rs   r   r{   r   minusr|   r&   r'   create_fsubr   rY   r   sub
create_subrA   r   r   r   r   rZ   s        r   r   zTritonSemantic.sub   s"   88tUSSuJ%	 	O88E4::e#4#48NNN  "" 	`;;t|77elSSUZU_``` 	`  N55eUDHMMM;;t|66u|U\RRTYT^___6966777r   c                   |                      ||          \  }}|j        j        }|                                r>|                     | j                            |j        |j                  |j                  S |                                r\|r| 	                    ||| j
                   |                     | j                            |j        |j                  |j                  S t          d|           NrL   )r   rs   r   r|   r&   r'   create_fmulr   rY   r   mul
create_mulrA   r   s        r   r   zTritonSemantic.mul  s    88FFuJ%	  "" 	`;;t|77elSSUZU_``` 	`  N55eUDHMMM;;t|66u|U\RRTYT^___6966777r   c                   |                      ||dddd          \  }}|j        j        }|j        j        }|                                r,|                                r|                     ||          }n |                                r+|                                r|                     ||          }n|                                rU|                                rA|                     |t          j                  }|                     |t          j                  }nx|                                rR|                                r>|j        |j        k    r|                     ||          }n)|                     ||          }nt          d|           | 
                    | j                            |j        |j                  |j                  S NFTrL   )r   rs   r   r|   rY   r   r1   rR   fp_mantissa_widthrA   r&   r'   create_fdivr   )r   r   r   r   r   s        r   truedivzTritonSemantic.truediv  s   88ueUY[_``u*+*+&&(( 	B_-C-C-E-E 	BIIe_55EE##%% 	B/*E*E*G*G 	BIIe_55EE##%% 	B/*@*@*B*B 	BIIeRZ00EIIeRZ00EE((** 	B/J/J/L/L 	B0?3TTT		%99		%99 @@@AAA{{4<33EL%,OOQVQ[\\\r   c                   |                      ||dddd          \  }}|j        j        }|j        j        }|                                r|                                r|                     ||          }|                     ||          }|                     ||          }|                                r>|                     | j        	                    |j
        |j
                  |j                  S |                     | j                            |j
        |j
                  |j                  S t          d|           r   )r   rs   r   rY   rF   r   is_int_signedr&   r'   create_sdivr   create_udivrA   )r   r   r   r   r   ret_tys         r   floordivzTritonSemantic.floordiv7  s%   88ueUY[_``u*+*+!!## 	e(>(>(@(@ 	e..PPFIIeV,,EIIeV,,E##%% e{{4<#;#;EL%,#W#WY^Ycddd{{4<#;#;EL%,#W#WY^Ycddd<?<<===r   ieee_roundingc                Z   |j         j        }|j         j        }|                                r|                                st          d          |                     ||dddd          \  }}| j                            |j        |j                  }|                     ||j                   S )Nz4both operands of fdiv must have floating scalar typeFT)	rs   r   r|   rA   r   r'   r   r   r&   )r   r   r   r   r   r   r   s          r   fdivzTritonSemantic.fdivE  s    *+*+**,, 	TO4O4O4Q4Q 	TRSSS88ueUZ\`aaul&&u|U\BB{{3
+++r   c                   |                      ||dddd          \  }}|j        j        }|j        j        }|                                r>|                     | j                            |j        |j                  |j                  S |                                r|j	        |j	        k    r?t          d|                                z   dz   |                                z   dz             |                                r>|                     | j                            |j        |j                  |j                  S |                     | j                            |j        |j                  |j                  S t          d|           )NFTzCannot mod z by rM   rL   )r   rs   r   r|   r&   r'   create_fremr   rY   r=   rA   r   r   create_sremcreate_urem)r   r   r   rZ   r   s        r   modzTritonSemantic.modN  sx   88ueUY[_``uJ%	*+  "" 	e;;t|77elSSUZU_``` 	e'?+III	0B0B0D0D Dv MP_PhPhPjPj j ns !s t t t &&(( e{{4<#;#;EL%,#W#WY^Ycddd{{4<#;#;EL%,#W#WY^Ycddd6966777r   rt   ypropagate_nantl.PropagateNanc                F   |                      ||          \  }}|j        }|                                r|t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S |t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S t          d|           |                                r>|                     | j                            |j	        |j	                  |j
                  S |                                r>|                     | j                            |j	        |j	                  |j
                  S t%          d|           NzUnexpected propagate_nan Unexpected dtype )r   r>   r|   r1   PropagateNanALLr&   r'   create_minimumfr   rs   NONEcreate_minnumfr/   r   create_minsir   create_minuirA   r   rt   r   r   r>   s        r   minimumzTritonSemantic.minimume  `   00A661 	9 333{{4<#?#?!(#S#SUVU[\\\"/"666{{4<#>#>qx#R#RTUTZ[[[ !L]!L!LMMM  "" 	9;;t|8818LLafUUU""$$ 	9;;t|8818LLafUUU777888r   c                F   |                      ||          \  }}|j        }|                                r|t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S |t          j        j        k    r>|                     | j                            |j	        |j	                  |j
                  S t          d|           |                                r>|                     | j                            |j	        |j	                  |j
                  S |                                r>|                     | j                            |j	        |j	                  |j
                  S t%          d|           r   )r   r>   r|   r1   r   r   r&   r'   create_maximumfr   rs   r   create_maxnumfr/   r   create_maxsir   create_maxuirA   r   s        r   maximumzTritonSemantic.maximumv  r   r   minmaxc                ~   |                      ||          \  }}|                      ||          \  }}|                      ||          \  }}|j        }|                                rE|                     | j                            |j        |j        |j        |          |j                  S t          d| d          )Nr   z(. Only floating point clamp is supported)	r   r>   r|   r&   r'   create_clampfr   rs   rA   )r   rt   r   r   r   r>   s         r   clampzTritonSemantic.clamp  s    44S#>>S221c::3221c::3 	a;;t|99!(CJPSPZ\ijjlmlrsss____```r   c                x   |                      ||          \  }}|j        j        }|j        j        }|                                r|                                st	          ||          |                     ||          }||k    r|                     ||          }||k    r|                     ||          }||fS r)   )r   rs   r   rY   r   rF   r   )r   r   r   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_implz,TritonSemantic.bitwise_op_type_checking_impl  s    88FFuz(z(""$$ 	HL,?,?,A,A 	H+L,GGG..|\JJ
%%IIeZ00E%%IIeZ00Ee|r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   
create_andr   rs   r   r   r   s      r   r   zTritonSemantic.and_  I    99%GGu{{4<225<NNPUPZ[[[r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   	create_orr   rs   r   s      r   or_zTritonSemantic.or_  sG    99%GGu{{4<11%,MMuzZZZr   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   
create_xorr   rs   r   s      r   xor_zTritonSemantic.xor_  r   r   c                   |j                                         s |                     |t          j                  }|j                                         s |                     |t          j                  }|                     ||          S r)   )rs   is_int1bitcastr1   rj   r   r   s      r   logical_andzTritonSemantic.logical_and  sl    z!!## 	1LL00Ez!!## 	1LL00Eyy&&&r   c                   |j                                         s |                     |t          j                  }|j                                         s |                     |t          j                  }|                     ||          S r)   )rs   r   r   r1   rj   r   r   s      r   
logical_orzTritonSemantic.logical_or  sl    z!!## 	1LL00Ez!!## 	1LL00Exxu%%%r   c                    |j                                         s |                     |t          j                  }|                     |          S r)   )rs   r   r   r1   rj   invertr   r   s     r   not_zTritonSemantic.not_  s?    z!!## 	1LL00E{{5!!!r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   create_lshrr   rs   r   s      r   lshrzTritonSemantic.lshr  I    99%GGu{{4<33EL%,OOQVQ[\\\r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   create_ashrr   rs   r   s      r   ashrzTritonSemantic.ashr  r	  r   c                    |                      ||          \  }}|                     | j                            |j        |j                  |j                  S r)   )r   r&   r'   
create_shlr   rs   r   s      r   shlzTritonSemantic.shl  r   r   c                    |S r)    r  s     r   pluszTritonSemantic.plus  s    r   c                J   |j         j        }|                                r't          d|                                z   dz             |                     | j                            |                    | j                            |          }| 	                    ||d          S )Nz$wrong type argument to unary minus ()T)
rs   r   r{   r/   r   r&   r'   get_null_valuer   r   )r   r   r   _0s       r   r   zTritonSemantic.minus  s    z(   	eClF[F[F]F]]`ccddd[[44\5G5G5U5UVVXdeexxE4(((r   c                p   |j         j        }|                                s|                                r't	          d|                                z   dz             |                     | j                            |	                    | j                            |          }| 
                    ||          S )Nz%wrong type argument to unary invert (r  )rs   r   r{   r|   r/   r   r&   r'   get_all_ones_valuer   r   )r   r   r   _1s       r   r  zTritonSemantic.invert  s    z(   	fL$<$<$>$> 	fD|G\G\G^G^^addeee[[889K9KDL9Y9YZZ\hiiyy###r   vtl.block_typec                J    |j                             t          j                  S r)   )rs   r   r1   rj   )r   r  s     r   
_bool_likezTritonSemantic._bool_like  s    v%%bg...r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOGTr   r  rY   r   create_icmpSGTcreate_icmpUGTrA   r   r   r   rZ   s       r   greater_thanzTritonSemantic.greater_than  )   88FFuJ%	  "" 	t;;t|::5<VVX\XgXghmXnXnooo 	t&&(( t{{4<#>#>u|U\#Z#Z\`\k\klq\r\rsss{{4<#>#>u|U\#Z#Z\`\k\klq\r\rsss6966777r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOGEr   r  rY   r   create_icmpSGEcreate_icmpUGErA   r"  s       r   r   zTritonSemantic.greater_equal  r$  r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOLTr   r  rY   r   create_icmpSLTcreate_icmpULTrA   r"  s       r   	less_thanzTritonSemantic.less_than  r$  r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                r|
                                rL|                     | j                            |j        |j                  |                     |                    S |                     | j                            |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOLEr   r  rY   r   create_icmpSLEcreate_icmpULErA   r"  s       r   r   zTritonSemantic.less_equal  r$  r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                rL|                     | j        
                    |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpOEQr   r  rY   create_icmpEQrA   r"  s       r   equalzTritonSemantic.equal"      88FFuJ%	  "" 	o;;t|::5<VVX\XgXghmXnXnooo 	o;;t|99%,UUW[WfWfglWmWmnnn6966777r   c                   |                      ||          \  }}|j        j        }|                                rL|                     | j                            |j        |j                  |                     |                    S |	                                rL|                     | j        
                    |j        |j                  |                     |                    S t          d|           r   )r   rs   r   r|   r&   r'   create_fcmpUNEr   r  rY   create_icmpNErA   r"  s       r   	not_equalzTritonSemantic.not_equal-  r6  r   N)r   startendr   c               (   t          |t                    rt          |t                    st          d          t          |dz	            }t          |dz	            }|s|rt          d          ||k    rt          d          ||z
  }||dz
  z  dk    rt          d          |g}|t	          j        t          j        |          }|                    | j                  }| 	                    | j        
                    |||          |          S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)rh   r+   r/   rH   r1   
block_typer2   r   r'   r&   create_make_range)	r   r;  r<  r   is_start_int64is_end_int64rangeshape	ret_ty_irs	            r   arangezTritonSemantic.arange<  s   %%% 	PZS-A-A 	PNOOOerk**C2I 	9\ 	97888%<<\]]]eUQYA%%BCCC>]28U33FLL..	{{4<99)UCPPRXYYYr   r>   c                   |t          d          |dk    r3| j                            |                    | j                            }n(t	          | j        d|j                   } ||          }|                     ||          S )Nz2dtype must be specified when value is not a tensorr   get_)r/   r'   r  r   getattrnamer&   )r   rO   r>   get_value_fns       r   rn   zTritonSemantic.scalar_constantN  s    =QRRRA::L//DL0I0IJJEE"4<1D
1D1DEEL L''E{{5%(((r   c                    t          |t          j                  r0|j        j        dk    s
J d            |                     ||          S |                     ||          S )Nr   zonly accepts size-1 tensor)rh   r1   r&   numelrO   r   rn   )r   rO   r>   s      r   make_scalarzTritonSemantic.make_scalarY  s_    eRY'' 	+;$)))+G)))99UE***##E5111r   rD  	List[int]c                V    |                      |                     ||          |          S r)   )splatrN  )r   rD  rO   r>   s       r   fullzTritonSemantic.full`  s&    zz$**5%88%@@@r   rO   c                >   |j                                         r
J d            t          |          dk    r|S t          j        |j        |          }|                     | j                            |	                    | j                  |j
                  |          S )NzCannot splat a block tensorr   )rs   is_blocklenr1   r?  r>   r&   r'   create_splatr   r   )r   rO   rD  r   s       r   rQ  zTritonSemantic.splatg  s    :&&((GG*GGGGu::??Lu{E22{{4<44V\\$,5O5OQVQ]^^`fgggr   	dst_shapecan_reorderc                    d}|D ]}||z  }|j         j        |k    rt          d          t          j        |j         j        |          }|                     | j                            |j	        ||          |          S )Nr   z:reshape() cannot change total number of elements in tensor)
rs   rM  r/   r1   r?  r   r&   r'   create_reshaper   )r   r   rW  rX  rM  sr   s          r   reshapezTritonSemantic.reshapen  s     	 	AQJEE:u$$YZZZuz0)<<{{4<66u|YP[\\^deeer   c                V   d |j         D             }|                    |d           |j                                        s|                     ||          S t          j        |j        j        |          }|                     | j	        
                    |j        |          |          S )Nc                6    g | ]}t          j        |          S r  r1   _unwrap_if_constexpr).0rt   s     r   
<listcomp>z.TritonSemantic.expand_dims.<locals>.<listcomp>x  s#    EEEAR,Q//EEEr   r   rD  )rD  insertrs   rT  rQ  r1   r?  r   r&   r'   create_expand_dimsr   )r   r   r*   rW  r   s        r   expand_dimszTritonSemantic.expand_dimsw  s    EEEEE	q!!!z""$$ 	6::e9:555uz0)<<{{4<::5<NNPVWWWr   c                2   |s
J d            t          |j                  dk    sJ t          j        |j        j        |j        d         |j        d         z   g          }|                     | j                            |j	        |j	                  |          S )Nz;current implementation of `cat` always may reorder elementsr   r   )
rU  rD  r1   r?  rs   r   r&   r'   
create_catr   )r   r~   r   rX  ret_types        r   catzTritonSemantic.cat  s    YYYYYY39~~""""=39Q<#)A,3N2OPP{{4<223:szJJHUUUr   abc                   |                      ||          \  }}|j        g k    }|r,|                     |d          }|                     |d          }t          |j        d         t          j                  rt	          j        d          }nd}|j        |gz   }t	          j        |j        j        |          }| 	                    | j
                            |j        |j                  |          }|r|                     |dgd          }|S )Nr   r   FrX  )r   rD  rf  rh   r1   rq   r?  rs   r   r&   r'   create_joinr   r\  )r   rk  rl  
was_rank_1two	new_shaperi  r   s           r   joinzTritonSemantic.join  s    ((A..1 W]
 	'  A&&A  A&&Aagbk2<00 	,q//CCCGseO	=	::kk$,2218QXFFQQ 	<,,sQCU,;;C
r   c                x   t          |j                  dk    sJ t          j        |j        d                   dk    sJ |j        d d         }t          j        |j        j        |          }| j                            |j	                  \  }}| 
                    ||          | 
                    ||          fS )Nr   rn  r   )rU  rD  r1   r`  r?  rs   r   r'   create_splitr   r&   )r   rk  rs  ri  outLHSoutRHSs         r   splitzTritonSemantic.split  s    AGq    '449999GCRCL	=	::2218<<KK))KK))
 	
r   dims
Tuple[int]c                   t          j                  t          |          k    rt          d          t          d |D                       t	          t          t          |                              k    rt          d|           t          j        j        j	        fd|D                       }| 
                    | j                            j        |          |          S )Nz5permute dims must have the same length as input shapec              3  >   K   | ]}t          j        |          V  d S r)   r_  )ra  ds     r   	<genexpr>z)TritonSemantic.permute.<locals>.<genexpr>  s-      ;;")!,,;;;;;;r   z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                *    g | ]}j         |         S r  rc  )ra  r~  r   s     r   rb  z*TritonSemantic.permute.<locals>.<listcomp>  s    4R4R4RU[^4R4R4Rr   )rU  rD  r/   sortedlistrC  r1   r?  rs   r   r&   r'   create_transr   )r   r   rz  ri  s    `  r   permutezTritonSemantic.permute  s    u{s4yy((TUUU;;d;;;;;tE#d))DTDT?U?UUUe_ceefff=!24R4R4R4RT4R4R4RSS{{4<44U\4HH(SSSr   c                H   |j                                         s|                     ||          S |j                                         }t	          |          t	          |          k    rt          d| d|           ||k    r|S t          |          D ];\  }}||         |k    r*|dk    r$t          d||          d| d| d| d| 
          <t          j        |j         j	        |          }| 
                    | j                            |j        |          |          S )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rs   rT  rQ  get_block_shapesrU  r/   	enumerater1   r?  r   r&   r'   create_broadcastr   )r   r   rD  	src_shapeiitemr   s          r   broadcast_impl_shapez#TritonSemantic.broadcast_impl_shape  s[   z""$$ 	,::eU+++J//11	y>>SZZ''UUUeUUVVVIL ++ 	@ 	@GAtQx4DAII  "?W\]^W_ "? "?CG"? "?%&"? "?*3"? "?7<"? "? @ @ @ uz0%88{{4<88uMMvVVVr   c           	        |j         }|j         }|                                r||                                sh|                    |j                  }|                     | j                            |                    | j                  |j                  |          }n|                                s||                                rh|                    |j                  }|                     | j                            |                    | j                  |j                  |          }n8|                                r#|                                r|	                                }|	                                }t          |          t          |          k     rt          t          |          t          |                    D ]r}|                     | j                            |j        d          t          j        |j        dg|j        z                       }|j         }|	                                }snt          |          t          |          k     rt          t          |          t          |                    D ]r}|                     | j                            |j        d          t          j        |j        dg|j        z                       }|j         }|	                                }st          |          t          |          k    sJ g }t!          |          D ]\  }	}
||	         }|
dk    r|                    |           )|dk    s||
k    r|                    |
           Kt%          dt'          |	          z   dz   t'          |
          z   dz   t'          |          z             ||k    rNt          j        |j        |          }|                     | j                            |j        |          |          }||k    rNt          j        |j        |          }|                     | j                            |j        |          |          }||fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index r  r   )rs   rT  r   r   r&   r'   rV  r   r   r  rU  rC  re  r1   r?  valuesr  appendr/   strr  )r   r~   r   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaper  leftrightr   s                r   r   z#TritonSemantic.broadcast_impl_value  s    ?? +	`V__%6%6 +	`++FM::F++dl77T\8R8RTWT^__aghhCC"" '	`v'8'8 '	`++FM::F++dl77T\8R8RTWT^__aghhCC__ #	`6??#4#4 #	`//11I//11I9~~I..s9~~s9~~>> : :A++dl&E&EcjRS&T&T&(mFMA3IYCY&Z&Z\ \C XF & 7 7 9 9II	:
 Y#i..00s9~~s9~~>> : :A++dl&E&EcjRS&T&T&(mFMA3IYCY&Z&Z\ \C XF & 7 7 9 9IIy>>S^^3333I$Y// e e4!!199$$U++++qjjetmm$$T****$ &136q66&:<@&ACFt99&MOV&WY\]bYcYc&d e e eI%%v}i@@kk$,"?"?
I"V"VX^__I%%v}i@@kk$,"?"?
I"V"VX^__Cxr   rounding_modeOptional[str]c                    |d S |dk    rt           j        j        S |dk    rt           j        j        S t	          d| d          )NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r   ROUNDING_MODERTNERTZr/   )r   r  s     r   _str_to_rounding_modez$TritonSemantic._str_to_rounding_mode  sU     4F""#((E!!#''r=rrrsssr   dst_tyc                8   |j         }|                                r|                    |j                  }||k    r|S |j        }|j        }|                                s|                                r|                     ||          S |j        }|j        }||k    r2t          dt          |          z   dz   t          |          z             | 	                    | j
                            |j        |                    | j
                            |          S )Nz!Cannot bitcast data-type of size z to data-type of size )rs   rT  r   r   r{   r   primitive_bitwidthr/   r  r&   r'   create_bitcastr   r   )r   r   r  src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   zTritonSemantic.bitcast  s   ?? 	;++FM::FVL]
]
 	,*"3"3"5"5 	,99UF+++00x@3x==P T2 247MMB C C C{{4<66u|V\\RVR^E_E_``bhiiir   fp_downcast_roundingc                   |j         }|j        }|j        }||k    r|S |                                r|                    |          }|                     |          }d}|                                rP|                                r<|j        |j        k     r,|t          j        j	        }nL|t          j        j	        k    rd}n4|2t          dt          |          z   dz   t          |          z             |                                s|                                rI| j        j                            d          	 
J d             | j        j        d         ||||           S |                                r|                                s*|                                r|                                s|rM|                     | j                            |j        |                    | j                  |          |          S |                                r|                                r(|                                rH|                                s4|                     |                     |t2          j                  |          S |                                o#|                                o|j        |j        k    }|rL|                     | j                            |j        |                    | j                            |          S |                                o#|                                o|j        |j        k     }	|	rL|                     | j                            |j        |                    | j                            |          S |                                r'|                                r|j        |j        k    s|j        |j        k    r|                                 o|!                                 }
|!                                rh|j"                            | j                  }|                     | j        #                    |          |j"                  }| $                    ||          S |                     | j        %                    |j        |                    | j                  |
          |          S |&                                r=|                                r(|!                                rh|j"                            | j                  }|                     | j        #                    |          |j"                  }| $                    ||          S |                                 rL|                     | j        '                    |j        |                    | j                            |          S |                     | j        (                    |j        |                    | j                            |          S |                                r|&                                r|!                                s|                                 sL|                     | j        )                    |j        |                    | j                            |          S |                     | j        *                    |j        |                    | j                            |          S |+                                r|                                r|j        }|dk    rL|                     | j        ,                    |j        |                    | j                            |          S |d	k    rj| $                    |                     |t2          j-                  |                     | j        .                    d
          t2          j-                            S |                                r`|+                                rL|                     | j        /                    |j        |                    | j                            |          S |+                                r`|+                                rL|                     | j        0                    |j        |                    | j                            |          S J d| d|             )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.)	_semanticr   r   r   zcannot cast z to )1rs   r   rT  r   r  r|   r  r   r  r  r/   r  is_fp8e4b15r'   codegen_fnsgetrX   r&   create_fp_to_fpr   r   rV   rU   rW   r   r1   rR   create_fp_trunccreate_fp_extrY   r<   r=   r   is_boolr>   r  r:  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fpr{   create_ptr_to_intrl   	get_int64create_int_to_ptrr  )r   r   r  r  r  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr  bitwidths                 r   r   zTritonSemantic.cast  sd   ]
]
##L?? 	8++J77F  $99:NOO#!!## 	2
(>(> )
 )
 	2+j.KKK#+BDTDY-A-A%)9)>>>VZ@S#/  ":<?
OO"LNi"j!$Z"1 2 2 2 ""$$ 	y
(>(>(@(@ 	y<+//&( (/34 45g4 4 4C4<+,BCE6Sgswxxxx  	vJ$:$:$<$< 	v""$$	v)3):):)<)<	v	v ;;,,U\6<<;U;UWkllntv v v    	G););)=)= 	G  	G)3););)=)=	G99TYYubj99:FFF
 !,,.. J""$$J)J,II 	  	o;;t|;;EL&,,W[WcJdJdeegmnnn '')) J""$$J)J,II 	  	m;;t|99%,UYUaHbHbcceklll  		+:#4#4#6#6 		+#z'>>>*B[_i_xBxBx$2244QZ=O=O=Q=Q9QK!!## +[&&t|44[[!<!<R!@!@%+NN~~eR000{{4<#?#?fll[_[gNhNhju#v#v#)+ + + **,, 	s1B1B1D1D 	s!!## s[&&t|44[[!<!<R!@!@%+NN~~eR000))++ s{{4<#?#?fll[_[gNhNh#i#ikqrrr{{4<#?#?fll[_[gNhNh#i#ikqrrr  	s:#B#B#D#D 	s!!## s:+C+C+E+E s{{4<#?#?fll[_[gNhNh#i#ikqrrr{{4<#?#?fll[_[gNhNh#i#ikqrrr  	t:#4#4#6#6 	t!.H2~~{{4<#A#A%,PVP\P\]a]iPjPj#k#kmsttt1}}~~diirx&@&@$++dlNdNdefNgNgikiqBrBrsss  	q:#4#4#6#6 	q;;t|==elFLLY]YeLfLfggioppp  	n:#4#4#6#6 	n;;t|::5<VZVbIcIcddflmmm88U8888888r   c                    t           j        j        }|r[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Nz.ca.cgz.cvCache modifier  not supported)r   CACHE_MODIFIERr   CACGCVr/   r   cache_modifiercaches      r   _str_to_load_cache_modifierz*TritonSemantic._str_to_load_cache_modifier  s}    !& 	S&&),5((),5((), !Q>!Q!Q!QRRRr   c                   t           j        j        }|rs|dk    rt           j        j        }n[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Nz.wbr  z.csz.wtr  r  )r   r  r   WBr  CSWTr/   r  s      r   _str_to_store_cache_modifierz+TritonSemantic._str_to_store_cache_modifier  s    !& 
	S&&),5((),5((),5((), !Q>!Q!Q!QRRRr   c                    t           j        j        }|rC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )N
evict_lastevict_firstzEviction policy r  )r   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr/   )r   eviction_policyevictions      r   _str_to_eviction_policyz&TritonSemantic._str_to_eviction_policy  se    %, 	U,..-8 M11-9 !SO!S!S!STTTr   c                    d }|rC|dk    rt           j        j        }n+|dk    rt           j        j        }nt	          d| d          |S )NzeronanzPadding option r  )r   PADDING_OPTIONPAD_ZEROPAD_NANr/   )r   padding_optionpaddings      r   _str_to_padding_optionz%TritonSemantic._str_to_padding_option  s_     	S''+45((+3 !Q>!Q!Q!QRRRr   c                   t           j        j        }|rs|dk    rt           j        j        }n[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr/   )r   
sem_optionsems      r   _str_to_semzTritonSemantic._str_to_sem  s    o- 
	PY&&o-y((o-y((o5y((o- !NJ!N!N!NOOO
r   c                    t           j        j        }|r[|dk    rt           j        j        }nC|dk    rt           j        j        }n+|dk    rt           j        j        }nt          d| d          |S )Ngpuctasysr  r  )r   MEM_SYNC_SCOPEGPUCTASYSTEMr/   )r   scope_optionscopes      r   _str_to_scopezTritonSemantic._str_to_scope  s}    !% 	Ru$$)-&&)-&&)0 !PL!P!P!PQQQr   c                n   |rt          |d          s|g}d |D             }|D ]5}t          |t                    rd|cxk    rt          |          k     sn J 6t          |          dk    sJ t          |          t          t	          |                    k    s
J d            t          |          S dS )N__iter__c                T    g | ]%}t          |t          j                  r|j        n|&S r  rh   r1   rq   rO   ra  elems     r   rb  z?TritonSemantic._canonicalize_boundary_check.<locals>.<listcomp>  s1    pppY]JtR\,J,JTdjjPTpppr   r   z'Duplicate dimension in `boundary_check`r  )hasattrrh   r+   rU  setr  )r   boundary_checkblock_shapedims       r   _canonicalize_boundary_checkz+TritonSemantic._canonicalize_boundary_check  s     	*>:66 2"0!1ppaopppN% L L!#s++KS0K0K0K0K3{;K;K0K0K0K0K0K0K0K~&&****~&&#c..A.A*B*BBBBDmBBB.)))rr   c	           
        ||t          d          |j        j        j        }	|	t          j        k    s
J d            |	                                r$|t          j        j        k    rt          d          |j        j        }
| 	                    ||

                                          }|                     | j                            |j        |||||          |
          S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r/   rs   
element_tyr1   rj   rY   r   r  r  r  r  r&   r'   create_tensor_pointer_loadr   )r   ptrmaskr   r
  r  r  r  is_volatileelt_tyr  s              r   _load_block_pointerz"TritonSemantic._load_block_pointer  s     u0jkkk$/   "X   ==?? 	aw"*;*CCC_``` $ ::>6KbKbKdKdee {{L33CJPWY^`hjuvv  	r   c	           
     J   |j         j                                        s*t          d|j                                          d          ||t          d          |s|rt          d          |j                                         sT|r(|j                                         rt          d          |r(|j                                         rt          d          |j                                         r^|-|                     ||j                                                   }|-|                     ||j                                                   }|j         j        }	|	j        }
|
t          j
        k    }|r<t          j        }
t          j        |
|	j                  }	|                     ||	          }||                     ||
          }|j                                         r|j                             |
          }n|
}|7|                     | j                            |j        |||          |          }nF|                     | j                            |j        |j        |r|j        nd |||          |          }|r |                     |t          j
                  }|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rs   r   r{   r/   r   rT  r  r  r  r1   rj   int8pointer_typeaddress_spacer   r   r&   r'   create_loadr   create_masked_load)r   r  r  r   r
  r  r  r  r  ptr_tyr  r  r  r   s                 r   _load_legacyzTritonSemantic._load_legacy  s   x%%'' 	YWSX5F5F5H5HWWWXXX <E-HIII 	Yn 	Y X Y Y Y
 x  "" 	k j	**,, j !hiii k,,.. k !ijjj 8 	V00sx7P7P7R7RSS 11%9R9R9T9TUU " BG# 	)WF_VV-ABBF))C((C IIeV,,E 8 	X--f55FF F <++dl66sz5(T_``bhiiCC++//
DKY^Ihdhjo08+G GHNP PC  	*))C))C
r   r  r  Optional[TensorTy]r
  r   r  r  r  r  r  c	           
     ^   |                      |          }	|                     |          }
|                     |          }|j                                        r:|j        j                                        r|                     ||||||	|
|          S |                     ||||||	|
|          S r)   )	r  r  r  rs   r{   r  rT  r  r!  )r   r  r  r   r
  r  r  r  r  r  r  r  s               r   loadzTritonSemantic.load2  s     00@@//@@--n==8?? 	n!4!=!=!?!? 	n++CungW\^fhsttt $$S$~wPUW_almmmr   desctl.tensor_descriptor_basec                   t          |t          j                  sJ t          |j                  }t          |          |k    sJ d| dt          |                       |                     |d          }| j                            |j        || 	                    |          | 
                    |                    }|                     ||j                  S )N	expected  offsets, but got Frequire_i64)rh   r1   tensor_descriptor_baserU  r  _convert_to_ir_valuesr'   create_descriptor_loadr   r  r  r&   r?  )r   r%  offsetsr  r  ndimrt   s          r   descriptor_loadzTritonSemantic.descriptor_load@  s    $ 9:::::4#$$7||t###%W%W%WW%W%W###,,W%,HHL//WdFfFfguFvFv040L0L_0]0]_ _{{1do...r   c                    t          |t          j                  sJ t          |j                  }t          |          |k    sJ d| dt          |                       |j        |j        k    sJ d S )Nr(  r)  )rh   r1   r,  rU  r  rD  )r   r%  rO   r/  r0  s        r   validate_store_likez"TritonSemantic.validate_store_likeK  s}    $ 9:::::4#$$7||t###%W%W%WW%W%W###{d.......r   c                    |                      |||           |                     |d          }|                     | j                            |j        |j        |          t          j                  S NFr*  )r3  r-  r&   r'   create_descriptor_storer   r1   void)r   r%  rO   r/  s       r   descriptor_storezTritonSemantic.descriptor_storeQ  sc      ug666,,W%,HH{{4<??U\[bccegelmmmr   c                   |                      |||           |j        t          j        t          j        t          j        t          j        t          j        t          j        hv s
J d            | 	                    |d          }t          j        j        }|                     | j                            ||j        |j        |          t          j                  S NUnsupported dtypeFr*  )r3  r>   r1   rk   r2   rm   rR   rP   rQ   r-  r   DESCRIPTOR_REDUCE_KINDADDr&   r'   create_descriptor_reducer   r7  r   r%  rO   r/  rN   s        r   descriptor_atomic_addz$TritonSemantic.descriptor_atomic_addV  s      ug666zbi29bj"*VXVabbbbdwbbb,,W%,HH(,{{4<@@t{TYT`bijjlnlstttr   c                j    t           j                                        }|j        dk    o
|j        dk    S )NcudaZ   )r   activeget_current_targetbackendarch)r   targets     r   _has_native_tmazTritonSemantic._has_native_tma]  s.    1133&(>V[B->?r   c                   |t           j        t           j        t           j        t           j        t           j        t           j        hv s
J d            |t           j        t           j        hv r|                                 sJ d            d S d S )Nr;  z-16-bit float types require native tma support)r1   rk   r2   rm   rl   rP   rQ   rI  )r   r>   s     r   $_descriptor_atomic_min_max_supportedz3TritonSemantic._descriptor_atomic_min_max_supporteda  sx    BHbi2:r{[[[[]p[[[RZ---''))ZZ+ZZZZ .-ZZr   c                >   |                      |||           |                     |j                   |                     |d          }t          j        j        }|                     | j        	                    ||j
        |j
        |          t          j                  S r5  )r3  rK  r>   r-  r   r<  MINr&   r'   r>  r   r1   r7  r?  s        r   descriptor_atomic_minz$TritonSemantic.descriptor_atomic_minf        ug66611$*===,,W%,HH(,{{4<@@t{TYT`bijjlnlstttr   c                >   |                      |||           |                     |j                   |                     |d          }t          j        j        }|                     | j        	                    ||j
        |j
        |          t          j                  S r5  )r3  rK  r>   r-  r   r<  MAXr&   r'   r>  r   r1   r7  r?  s        r   descriptor_atomic_maxz$TritonSemantic.descriptor_atomic_maxm  rO  r   c                   |                      |||           |j        t          j        t          j        t          j        t          j        hv s
J d            |                     |d          }t          j	        j
        }|                     | j                            ||j        |j        |          t          j                  S r:  )r3  r>   r1   rk   r2   rm   rl   r-  r   r<  ANDr&   r'   r>  r   r7  r?  s        r   descriptor_atomic_andz$TritonSemantic.descriptor_atomic_andt        ug666zbi29bhGGGGI\GGG,,W%,HH(,{{4<@@t{TYT`bijjlnlstttr   c                   |                      |||           |j        t          j        t          j        t          j        t          j        hv s
J d            |                     |d          }t          j	        j
        }|                     | j                            ||j        |j        |          t          j                  S r:  )r3  r>   r1   rk   r2   rm   rl   r-  r   r<  ORr&   r'   r>  r   r7  r?  s        r   descriptor_atomic_orz#TritonSemantic.descriptor_atomic_or{  s      ug666zbi29bhGGGGI\GGG,,W%,HH(+{{4<@@t{TYT`bijjlnlstttr   c                   |                      |||           |j        t          j        t          j        t          j        t          j        hv s
J d            |                     |d          }t          j	        j
        }|                     | j                            ||j        |j        |          t          j                  S r:  )r3  r>   r1   rk   r2   rm   rl   r-  r   r<  XORr&   r'   r>  r   r7  r?  s        r   descriptor_atomic_xorz$TritonSemantic.descriptor_atomic_xor  rV  r   c                ~   t          |t          j                  sJ |dk    s
J d            |dk    s
J d            t          |j                  dk    sJ d|j                     |j        d         dk    sJ d|j                     t          |j                  dk    sJ d	|j                     |j        d         d
k    sJ d|j                     |j        }d|j        z  d
z  }|j        d         |k    sJ d| d| d|j        d                      t          j        |j        |j        d         |j        d         g          }| 	                    |fd          d         }| j
                            |j        |j        ||                    | j
                            }	|                     |	|          S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got    z5descriptor gather must have at least 8 rows, but got r>  zdescriptor gather of  must have at least  columns, but got Fr*  )rh   r1   r,  rU  r  rD  r>   r  r?  r-  r'   create_descriptor_gatherr   r   r&   )
r   r%  	x_offsetsy_offsetr  r  r>   min_colsrs   rt   s
             r   descriptor_gatherz TritonSemantic.descriptor_gather  s    $ 9:::::###%J###"$$$&L$$$ 4#$$)))+_TM]+_+_)))"a''')hVZVf)h)h''' 9?##q(((*\9?*\*\((( q!Q&&&(q`i`o(q(q&&&
11A5   AE  A  Ax  A  Akok{|}k~  A  A   }TZ)/!*<d>Nq>Q)RSS--xl-NNqQL11$+y?OQY[_[e[efjfr[s[stt{{1d###r   c                   t          |t          j                  sJ t          |j                  dk    sJ d|j                     |j        d         dk    sJ d|j                     t          |j                  dk    sJ d|j                     |j        d         dk    sJ d|j                     |j        }d	|j        z  dz  }|j        d         |k    sJ d
| d| d|j        d                      | 	                    |fd          d         }| j
                            |j        |j        |j        |           |                     d t          j                  S )Nr   r_  r   r   r`  ra  rb  z6descriptor scatter must have at least 8 rows, but got r>  zdescriptor scatter of rc  rd  Fr*  )rh   r1   r,  rU  r  rD  shapaer>   r  r-  r'   create_descriptor_scatterr   r&   r7  )r   r%  rO   rf  rg  r>   rh  s          r   descriptor_scatterz!TritonSemantic.descriptor_scatter  s   $ 9::::: 4#$$)))+_TM]+_+_)))"a''')hVZVf)h)h''' 9?##q(((*]9K[*]*]((( q!Q&&&(rajap(r(r&&&
11A5   BU  B  BPX  B  Blpl|}~l  B  B   --xl-NNqQ..t{EL)JZ\deee{{4)))r   c           	     Z   |t          d          |j        j                                        }|j                                        s|                     ||          }|j                                        s
J d            ||j                                        k    s(J d| d|j                                         d            |j        j        j        |j        j        k    s*J d|j        j        j         d|j        j         d            |j        j        j        }|t          j        k    s
J d            |                     ||          }| 	                    ||          }| 
                    | j                            |j        |j        |||          t          j                  S )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r/   rs   r  r  rT  r  r1   rj   r  r   r&   r'   create_tensor_pointer_storer   r7  )	r   r  valr  r
  r  r  r  r  s	            r   _store_block_pointerz#TritonSemantic._store_block_pointer  s    jkkk h)::<<x  "" 	>++C==Cx  ""SS$SSSSch77 
 
 
 
 
`+``9R9R9T9T```
 
 
x"-1DDDD  Gu\_\d\o\z  Gu  Gu  VY  V^  Vi  Gu  Gu  GuDDD$/   "X    ::>;WW iiV$$ {{L44SZ^]bdlmmoqovx x 	xr   c           	        |j         j                                        s*t          d|j                                          d          |rt          d          |j                                         sR|j                                         rt          d          |r(|j                                         rt          d          |j                                         r\|                     ||j                                                   }|-|                     ||j                                                   }|j         j        }|j        }|t          j
        k    r<t          j        }t          j        ||j                  }|                     ||          }|                     ||          }|E|                     | j                            |j        |j        ||          t          j                  S |j         j                                        st          d          |                     | j                            |j        |j        |j        ||          t          j                  S )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  "Mask must have boolean scalar type)rs   r   r{   r/   r   rT  r  r  r  r1   rj   r  r  r  r   r&   r'   create_storer   r7  r  create_masked_store)	r   r  rp  r  r
  r  r  r   r  s	            r   _store_legacyzTritonSemantic._store_legacy  sA   x%%'' 	ZXSX5F5F5H5HXXXYYY  	F E F F F
 x  "" 	jx  "" k !ijjj j	**,, j !hiii 8 	T++C1J1J1L1LMMC00sx7P7P7R7RSS" RWWF_VV-ABBF))C((C iiV$$ <;;t|88SZQVX`aacecjkkky'')) 	CABBB{{4<;;CJ
TXT_afhpqq7$ $ 	$r   rp  c                   |                      |          }|                     |          }|j                                        s|j        j                                        rt          d          |j                                        r8|j        j                                        r| 	                    ||||||          S | 
                    ||||||          S )N"Cannot store to a constant pointer)r  r  rs   is_constr   r/   r{   r  rT  rq  rv  )	r   r  rp  r  r
  r  r  r  r  s	            r   storezTritonSemantic.store  s     11.AA//@@8 	C#(/":":"<"< 	CABBB8?? 	W!4!=!=!?!? 	W,,S#t^UT\]]] %%c3neXVVVr   cmpr  r   c           	     4   |                      |          }|                     |          }|j        j        j        }|j        dvrt          d          |                     | j        	                    |j
        |j
        |j
        ||          |j                  S )N)   r>  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  rs   r   r  r  r/   r&   r'   create_atomic_casr   )r   r  r{  rp  r  r   r  s          r   
atomic_caszTritonSemantic.atomic_cas  s    s##""5))X_/
(<<XYYY{{4<99#*cjRUR\^achiiknkstttr   op#Tuple[TensorTy, TensorTy, TensorTy]c                   |j         j                                        s)t          d|j                                         z             |j                                         s|j         j                                        rt          d          |j         j        j        }|t          j        u r|dk    rt          d|z   dz             |t          j	        u r|dk    rt          d|z   dz             |t          j
        t          j        fv s|j        dk     r%t          d|z   dz   t          |          z             |j                                         r^|-|                     ||j                                                   }|-|                     ||j                                                   }|                     ||j         j        j                  }|| j                            d	          }t          j        }|j                                         rW|j                             t          j                  }| j                            |                    | j                  |          }|                     ||          }|||fS )
Nz)Pointer argument of store instruction is rx  r   atomic_z does not support fp16z does not support bf16r}  z does not support T)rs   r   r{   r/   r   ry  r  r1   rP   rQ   int16uint16r  r  rT  r  r  r   r'   ri   rj   r   rV  r   r&   )r   r  rp  r  r  r  mask_irmask_tys           r   atom_red_typechecking_implz)TritonSemantic.atom_red_typechecking_impl  s=   x%%'' 	`H38K\K\K^K^^___8 	C#("5">">"@"@ 	CABBBX_/
##eY^.FFGGG$$uY^.FFGGG"(BI...*2ORT2T2TY^.BBS__TUUU8 	R00sx7P7P7R7RSS//SX5N5N5P5PQQiiSX_788<l++D11GgGx  "" Z(2227;;,33GMM$,4O4OQXYY;;w00DC~r   c                    |j         j        }t          j        |d          }|                     ||          }|                     ||dz
            }|                     |t          j                  S )NF)r  signedr   )r>   r  r1   get_int_dtyper   r  r   rj   )r   rt   r  idtypeixsignbits         r   _signbitzTritonSemantic._signbit6  sa    7-!8EBBB\\!V$$))B1--yy"'***r   c                   |                      |||d          \  }}}|                     |          }|                     |          }|j        j        }|                                r|                                rV|                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |t          j        t          j        hvrt%          d|           |t          j        k    rt          j        nt          j        }|                     ||          }|                     |t          j        |d                    }	|t          j        k    rt          j        nt          j        }
|                     ||
          }|                     |t          j        |
d                    }|                     |          }|                     |          }|                     | j        	                    t          j        j        |	j        |j        |                     ||          j        ||          |j                  }|                     | j        	                    t          j        j        |j        |j        |                     ||          j        ||          |j                  }|                     |||          }|                     ||          S )Nr   z#atomic_max not supported for dtype r   )r  r  r  rs   r   rY   r   r&   r'   create_atomic_rmwr   	ATOMIC_OPrQ  r   UMAXr1   rR   rT   rA   r2   rl   r   r  rk   rm   r  r  r   UMINwherer   r  rp  r  r  r   sca_tyi_typei_vali_ptrui_typeui_valui_ptrnegpospos_retneg_retr   s                     r   
atomic_maxzTritonSemantic.atomic_max=     88c4OOS$s##""5))==?? 	##%% {{L222<3CSZQTQ[]a]hjmotuuH   {{L222<3DcjRUR\^b^iknpuvvH   "*bj111J&JJKKK#rz11rxS&))S"/&!"<"<==%33"))c7++c2?7A#>#>??mmC  iinn++L**2<+;U\5<+/99T3+?+?+FUT TUZU_a a ++L**2<+<fmV]+/99T3+?+?+FUT TU[U`b b jjgw//||C(((r   c                   |                      |||d          \  }}}|                     |          }|                     |          }|j        j        }|                                r|                                rV|                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |                     | j        	                    t          j        j        |j        |j        |j        ||          |j                  S |t          j        t          j        hvrt%          d|           |t          j        k    rt          j        nt          j        }|                     ||          }|                     |t          j        |d                    }	|t          j        k    rt          j        nt          j        }
|                     ||
          }|                     |t          j        |
d                    }|                     |          }|                     |          }|                     | j        	                    t          j        j        |	j        |j        |                     ||          j        ||          |j                  }|                     | j        	                    t          j        j        |j        |j        |                     ||          j        ||          |j                  }|                     |||          }|                     ||          S )Nr   z#atomic_min not supported for dtype r   )r  r  r  rs   r   rY   r   r&   r'   r  r   r  rM  r   r  r1   rR   rT   rA   r2   rl   r   r  rk   rm   r  r  r   r  r  r  s                     r   
atomic_minzTritonSemantic.atomic_minc  r  r   c           
        |                      |||d          \  }}}|                     |          }|                     |          }|j        j        }|                                rt          j        j        nt          j        j	        }| 
                    | j                            ||j        |j        |j        ||          |j                  S )Nr   )r  r  r  rs   r   r|   r   r  FADDr=  r&   r'   r  r   )r   r  rp  r  r  r   r  r  s           r   
atomic_addzTritonSemantic.atomic_add  s    88c4OOS$s##""5))"("4"4"6"6LR\BL<L{{4<99"cj#*VZVacfhmnn8% % 	%r   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nand)r  r  r  r&   r'   r  r   r  rT  r   rs   r   r  rp  r  r  r   s         r   
atomic_andzTritonSemantic.atomic_and      88c4OOS$s##""5)){{L**2<+;SZUYU`beglmmorowy y 	yr   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nor)r  r  r  r&   r'   r  r   r  rX  r   rs   r  s         r   	atomic_orzTritonSemantic.atomic_or  s    88c4NNS$s##""5)){{L**2<?CJ
TXT_adfkllnqnvx x 	xr   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nxor)r  r  r  r&   r'   r  r   r  r[  r   rs   r  s         r   
atomic_xorzTritonSemantic.atomic_xor  r  r   c           
     :   |                      |||d          \  }}}|                     |          }|                     |          }|                     | j                            t          j        j        |j	        |j	        |j	        ||          |j
                  S )Nxchg)r  r  r  r&   r'   r  r   r  XCHGr   rs   r  s         r   atomic_xchgzTritonSemantic.atomic_xchg  s    88c4PPS$s##""5)){{L**2<+<cj#*VZVacfhmnnH  	r   c                    |                                 | j        j        j        v sJ d| j        j        j         d|             |                                }|dk    rd}t          t          j        |          S )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr'   r   allowed_dot_input_precisionsupperrI  r   INPUT_PRECISION)r   input_precisions     r   _str_to_dot_input_precisionz*TritonSemantic._str_to_dot_input_precision  s    $$&&$,*>*[[[[xdl.B._xxgvxx \[[)//11h&&&Or)?;;;r   accr  max_num_imprecise_acc	out_dtypec           
        |j                                         r|j                                         sJ |j                                        r|j                                        rn|j        t          j        t          j        t          j        t          j        t          j	        fv sJ d|j                     |j        t          j        t          j        t          j        t          j        t          j	        fv sJ d|j                     |j        |j        k    sJ d|j         d|j                     |j        
                                s|j        
                                rgd| j        j        j        v rt          j        d           |                     |t          j                  }|                     |t          j                  }|| j        j        j        }|                     |          }t'          |j                  }t'          |j                  }||cxk    rdk    s,n ||cxk    rdk    sn J d	|j         d
|j         d            |j        d         j        |j        d         j        k    sAJ d|j         d|j         d|j        d         j         d|j        d         j         d	            | j        j                            d          	 
J d             | j        j        d         |j         |j                   }	|j        d         j        |	d         k    r8|j        d         j        |	d         k    r|j        d         j        |	d         k    s%J d|	d          d|	d          d|	d                      |j         j                                        rK|j         j        t          j        k    s
J d            | j                            d          }
t          j        }n|                                rt;          d          |j         j                                        s|j         j                                        r'| j                            d          }
t          j	        }nJ|                                 r| j        !                    d          n| j                            d          }
|}|j         j        d         }|j         j        d         }|j         j        d         }|dk    r|j         j        d         nd }t	          j"        ||r|||gn||g          }|4| j        #                    |$                    | j                  |
          }n|j%        }|j         |k    sJ |G|j                                        r+|j                                        r| j        j        j&        }nQd}nN|j                                        r5|j                                        r||k    rt;          d| d| d          | '                    | j        (                    |j%        |j%        |||          |          S )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   fp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releaser      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r  rn  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ())rs   rT  r>   rX   r1   r  uint8rP   rQ   rR   r  r'   r   !deprecated_fp8_dot_operand_dtypeswarningswarnr   default_dot_input_precisionr  rU  rD  rO   r  r  r   rY   	get_int32r2   rW   r/   rU   get_fp32rV   get_fp16r?  rV  r   r   max_num_imprecise_acc_defaultr&   
create_dot)r   r~   r   r  r  r  r  lhs_rankrhs_rankr  r  ret_scalar_tyMNKBr   
acc_handles                     r   dotzTritonSemantic.dot  s   x  "":sx'8'8':'::::9 	p#)"2"2"4"4 	p9"(BJ!#!- - - -.Rsy.R.R- - -9"(BJ!#!- - - -.Rsy.R.R- - -9	)))+oTWT]+o+odgdm+o+o)))9  "" 	-ci&;&;&=&= 	-T\1SSS m   ))C,,C))C,,C""l2NO::?KKsy>>sy>>8((((q((((H,E,E,E,EA,E,E,E,E,E  HVuxu~  HV  HV  JM  JS  HV  HV  HV,E,E,Ey}"ci'   uSY  u  uQTQZ  u  u  Z]  Zc  df  Zg  Zm  u  u  _b  _h  ik  _l  _r  u  u  u  |'++ #'( ()]( ( (?t|/?#(SSy}"l1o55#)B-:MQ]^_Q`:`:`	"#|A666uauuVWuudpqrdsuu 7668?!!## 	&8?bg---/E---''**BHMM   		&z   X_$$&& 	&#(/*A*A*C*C 	&&&q))BJMM-6->->-@-@^&&q)))dlF[F[\]F^F^B%MHN2HN2HN2!)QCHN1D}1.Hq!Qii1a&II;226<<3M3MrRRJJJ8v%%%% !(y!! *ci&6&6&8&8 *(,(<(Z%%()%%y!! hci&6&6&8&8 h=RUV=V=V !f;P!f!fbc!f!f!fggg{{L##CJ
JYnooqwy y 	yr   float_formatc                    t          t          j        |                                d           }|t	          d| d          |S )NzInvalid float format: rc   )rI  r   ScaleDotElemTypeTYr  r/   )r   r  ty_enums      r   _str_to_fp_typezTritonSemantic._str_to_fp_type	  sF    "/1C1C1E1EtLL?ElEEEFFFr   c                   t           j        t           j        t           j        t           j        d                    |          }|<|dk    sJ d|             |j        t           j        k    sJ d|j                     |S |j        |k    r|S t           j        t           j        t           j        t           j        d|         }|j        |k    sJ d| d|j                     | 	                    ||          S )z
        If float_format is subbyte, make sure it's packed as uint8 and return it.
        Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
        )e5m2e4m3bf16fp16Ne2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r1   float8e5
float8e4nvrQ   rP   r  r>   r  r  r   )r   rp  r  	triton_tyunsigned_tys        r   _bitcast_to_fp_typez"TritonSemantic._bitcast_to_fp_type  s   
  ["-Z! !!$\!2!2 	6)))+eWc+e+e)))9(((*aVYV_*a*a(((J9	!!J#%8RXryZ\ZcddeqrK9+++-d\-d-dY\Yb-d-d+++<<Y///r   	lhs_scale
lhs_format	rhs_scale
rhs_formatTensorTy | None	fast_math
lhs_k_pack
rhs_k_packc                   |j                                         r|j                                         sJ t          |j                  }t          |j                  }||cxk    rdk    s,n ||cxk    rdk    sn J d|j         d|j         d            |j        }|j        }|                     |          }|                     |          }h d}||v sJ d|             ||v sJ d|             |d u p"t          |t          j                  o|j        d u }|d u p"t          |t          j                  o|j        d u }| 	                    ||          }| 	                    ||          }|	s|d	k    s
J d
            |
s|d	k    s
J d
            |j         j        dd          \  }}|j         j        dd          \  }}|d	k    rdnd}|d	k    rdnd}|	r||z  n|}|
r||z  n|}||k    sJ d|j         d|j         d            |dk    r|j         j        d         nd }|	s||z  }|
s||z  }t          j
        ||r|||gn||g          }| j                            d          }|4| j                            |                    | j                  |          }n|j        }|j         |k    sJ |rd n|j        }|rd n|j        } |                     | j                            |j        | ||j        ||||	|
|
  
        |          S )Nr   r  r  r  r  >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  zBonly mxfp4 inputs can be packed along a dimension different than Kr  r   zCReduction dimension should pack the same number of elements; (lhs: r   )rs   rT  rU  rD  rO   r  rh   r1   rq   r  r?  r'   r  rV  r   r   r&   create_dot_scaled)!r   r~   r  r  r   r  r  r  r  r  r  r  r  r  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner  K_LHSK_RHSr  PACKED_APACKED_BPACKED_A_DIMPACKED_B_DIMr  r   r  r  rhs_scale_handlelhs_scale_handles!                                    r   
dot_scaledzTritonSemantic.dot_scaled!  s    x  "":sx'8'8':'::::sy>>sy>>8((((q((((H,E,E,E,EA,E,E,E,E,E  HVuxu~  HV  HV  JM  JS  HV  HV  HV,E,E,E$*
$*
..z::..z::BBB_,,,.M.M.M,,,_,,,.M.M.M,,,%-r*Y2U2U2qZcZimqZq%-r*Y2U2U2qZcZimqZq&&sJ77&&sJ77wZ61113w111wZ61113w1118>"##&58>"##&q"f,,11!"f,,11!+5@x%''5+5@x%''5|+++  .Tsvs|  .T  .T  HK  HQ  .T  .T  .T+++!)QCHN1D 	HA 	HAyq*D1a))q!fEE\""1%%;226<<3M3MrRRJJJ8v%%%%#4J44):J#4J44):J{{L**3:7GZ]Zdfv+:IzS]_ik klrt t 	tr   	conditionc                0   |j         t          j        k    rt          j        d|j                     |                     |t          j                  }|                     ||dd          \  }}|j                                        r3| 	                    ||          \  }}| 	                    ||          \  }}n| 	                    ||          \  }}|j        }| 
                    | j                            |j        |j        |j                  |          S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r>   r1   rj   r  r  r   r   rs   rT  r   r&   r'   create_selectr   )r   r  rt   r   r  r   s         r   r  zTritonSemantic.whereU  s   ?bg%%M L  {D  {J  L  L   IIi11	00AtTBB1>""$$ 	C44YBBLIq,,Q22DAqq44YBBLIq{{4<55i6FRSRZ[[]cdddr   c                b    |rt          j        ||          }n|}|                     ||          S r)   )r1   r?  r&   )r   rt   rZ   r  res_tys        r   wrap_tensorzTritonSemantic.wrap_tensori  s8     	]9i88FF F{{1f%%%r   inputsSequence[TensorTy]Tuple[TensorTy, ...]c                2    t           fdD                       dd         j        j        t                    }|k     sJ d| d            fdt	                    D             t          fdD                       s
J d             j                            d D                        |                                           sJ t           fd	t          t                              D                       S )
Nc              3  ^   K   | ]'}                     ||j        j        gd           V  (dS )Tro  N)r\  rM  rO   )ra  tr   s     r   r  z+TritonSemantic.reduction.<locals>.<genexpr>s  s<      ^^RS4<<AGM?<MM^^^^^^r   r   z&reduction axis must be < inputs rank (r  c                &    g | ]\  }}|k    |S r  r  )ra  r  r[  r*   s      r   rb  z,TritonSemantic.reduction.<locals>.<listcomp>y  s"    AAA41aqDyyQyyyr   c              3  8   K   | ]}|j         j        k    V  d S r)   )rs   rD  )ra  r  rD  s     r   r  z+TritonSemantic.reduction.<locals>.<genexpr>z  s,      99Q16<5(999999r   z-all reduction inputs must have the same shapec                    g | ]	}|j         
S r  r   ra  r  s     r   rb  z,TritonSemantic.reduction.<locals>.<listcomp>|  s    /I/I/IQ/I/I/Ir   c              3     K   | ]>}                                         |          |         j        j                  V  ?d S r)   r  
get_resultrs   r   )ra  r  r  	reduce_opr  r   s     r   r  z+TritonSemantic.reduction.<locals>.<genexpr>  se       u u\]DY11!44fQin6KYWWu u u u u ur   )
tuplers   rD  rU  r  allr'   create_reduceverifyrC  )r   r  r*   region_builder_fnrankr  r  rD  s   ```  @@@r   	reductionzTritonSemantic.reductionq  su   <^^^^W]^^^^^FDq	$5zzd{{{LTLLL{{{AAAA9U#3#3AAA	9999&99999jj;jjjjL../I/I&/I/I/I4PP	)$$$!!!!! u u u u u u uafgjkqgrgrasasu u u u u 	ur   reversec                    d         j         j        t                    }| |cxk    r|k     sn J d| d| d            |dk     r||z  }D ]}|j         j        k    s
J d             j                            d D             ||           |                                           sJ t           fdt          t                              D                       S )Nr   z
scan axis z must be < inputs rank (r  z(all scan inputs must have the same shapec                    g | ]	}|j         
S r  r  r  s     r   rb  z3TritonSemantic.associative_scan.<locals>.<listcomp>  s    +E+E+EAH+E+E+Er   c              3     K   | ]>}                                         |          |         j        j                  V  ?d S r)   r  )ra  r  r  scan_opr   rD  s     r   r  z2TritonSemantic.associative_scan.<locals>.<genexpr>  sN      ww_`T%%g&8&8&;&;VAY^=RTYZZwwwwwwr   )rs   rD  rU  r'   create_scanr  r  rC  )	r   r  r*   r   r#  r!  r  r'  rD  s	   ``     @@r   associative_scanzTritonSemantic.associative_scan  s5   q	$5zzu####t#####%W$%W%WPT%W%W%W###!88DLD 	U 	UA6<5(((*T((((,**+E+Ef+E+E+EtWUU'"""~~wwwwwwwdijmntjujudvdvwwwwwwr   srcindexc                \   |j                                         s
J d            t          |j        j                  }t          |j        j                  |k    s
J d            | |cxk    r|k     sn J d| d| d            |dk     r||z  }t          |          D ]=}||k    r	|j        j        |         |j        j        |         k    sJ d| d            >| j                            |j        |j        |          }| 	                    ||j        j
        |j        j                  S )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r  r   z
index dim z( must match the corresponding source dim)r>   rY   rU  rs   rD  rC  r'   create_gatherr   r  r   )r   r*  r+  r*   r!  r~  gathers          r   r.  zTritonSemantic.gather  sO   {!!##FF%FFFF38>""5:#$$,,,.`,,,u####t#####%YD%Y%YRV%Y%Y%Y###!88DLDt 	y 	yADyy:#A&#(.*;;;;=x$=x=x=x;;;;++CJdKK9IJJJr   num_binsc                   t          |j                  dk    s
J d            |j                                        s
J d            |O|                     ||j                  }|j        j                                        st          d          |j	        }| 
                    | j                            |j	        ||          t          j        t          j        |g                    S )Nr   z histogram only supports 1D inputz%histogram only supports integer inputrs  )rU  rD  r>   rY   r  rs   r   r  r/   r   r&   r'   create_histogramr1   r?  r2   )r   r   r/  r  s       r   	histogramzTritonSemantic.histogram  s    5;1$$$&H$$${!!##LL%LLLL,,T5;??D9#++-- G !EFFF;D{{4<88xQUVV=H:>>@ @ 	@r   r  c                   t          dt          |j                            t          |          k    rt          d          |j                            dt          j        ||j                                                             |S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   rU  rD  r/   r   set_attrr   	make_attrget_contextr   rt   r  s      r   multiple_ofzTritonSemantic.multiple_of  sl    q#ag,,3v;;..`aaa	+R\&!(BVBVBXBX-Y-YZZZr   c                    t          |j                  t          |          k    rt          d          |j                            dt          j        ||j                                                             |S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityrU  rD  r/   r   r4  r   r5  r6  r7  s      r   max_contiguouszTritonSemantic.max_contiguous  sa    qw<<3v;;&&cddd	/2<@T@T@V@V+W+WXXXr   c                    t          |j                  t          |          k    rt          d          |j                            dt          j        ||j                                                             |S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr:  r7  s      r   max_constancyzTritonSemantic.max_constancy  sa    qw<<3v;;&&bccc	.",vqx?S?S?U?U*V*VWWWr   c                p    |                      | j                                        t          j                  S r)   )r&   r'   create_barrierr1   r7  )r   s    r   debug_barrierzTritonSemantic.debug_barrier  s&    {{4<6688"'BBBr   prefixargsList[TensorTy]hexc                   |                     d          s|r|dz  }|                     d          s|r|d d         dz   }t          |          dk    r|                    d          sd|z   }d |D             }d |D             }|                     | j                            ||||          t          j                  S )N r  rn  r   c                    g | ]	}|j         
S r  r  ra  args     r   rb  z/TritonSemantic.device_print.<locals>.<listcomp>  s    ///3CJ///r   c                @    g | ]}|j                                         S r  )r>   r   rH  s     r   rb  z/TritonSemantic.device_print.<locals>.<listcomp>  s&    ???3SY,,..???r   )endswithrU  
startswithr&   r'   create_printr1   r7  )r   rA  rB  rD  new_args	is_signeds         r   device_printzTritonSemantic.device_print  s     s## 	 	cMFt$$ 	( 	(CRC[4'Fv;;??6#4#4S#9#9?6\F//$///??$???	{{4<44VS(IVVXZX_```r   r   r   c                    | j         j        j        sd S |                     | j                             |j        |          t          j                  S r)   )r'   r   debugr&   create_assertr   r1   r7  )r   r   r   s      r   r   zTritonSemantic.device_assert  sA    |#) 	F{{4<55dk3GGQQQr   c                |    |                      | j                            |j                  t          j                  S r)   )r&   r'   create_assumer   r1   r7  )r   r   s     r   assumezTritonSemantic.assume  s*    {{4<55dkBBBGLLLr   c                   t          |t                    rt          j        |          }t          |t          j                  rt          |j        t
                    r| j                            |j                  S |rFd|j        cxk    rdk     sn J d|j         d            | j                            |j                  S d|j        cxk    rdk     sn J d|j         d            | j        	                    |j                  S t          |t          j
                  r|j        j        dk    s
J d	            |j                                        s
J d
            |j        t          j        k    rQ|rO| j                            |j        | j                                        |j                                                  S |j        t          j        k    r|s
J d            |j        S J dt)          |                       )Nra   rb   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the ranger_   r`   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rh   r+   r1   rq   rO   rH   r'   ri   r  r  r&   rM  r>   rY   rl   r   r   get_int64_tyr   r2   rs   )r   r  r+  s      r   _convert_elem_to_ir_valuez(TritonSemantic._convert_elem_to_ir_value  s3   dC   	&<%%DdBL)) 	$*d++ 9|,,TZ888 :3333e33333 6J#':6J 6J 6J333|--dj9993333e33333 6J#':6J 6J 6J333|--dj999bi(( 		:#q(((*V(((:$$&&bb(bbbbzRX%%+%|33DKAZAZA\A\48J4L4L4N4NP P Prx'''W W W W W;XXDQUJJXXXXXr   c                r     t          |d          r fd|D             S                      |          gS )Nr  c                <    g | ]}                     |          S r  )rY  )ra  r  r+  r   s     r   rb  z8TritonSemantic._convert_to_ir_values.<locals>.<listcomp>  s)    \\\$D224EE\\\r   )r  rY  )r   	list_liker+  s   ` `r   r-  z$TritonSemantic._convert_to_ir_values  sN    9j)) 	]\\\\\R[\\\\..y+FFGGr   basec           	     .   |                      |          }|                      |          }|                      |d          }|j                                        r|j        j                                        rt          d          |j        j        t          j        k    r=|                     |t          j	        t          j
        |j        j                            }t          d          sgd D             t          d D                       s
J d            t          |d          s|g}d |D             }t          |          t          t!          t#          |                              k    s
J d	            t          fd
||||fD                       s
J d            | j                            |j        ||||          }|                     |t          j	        t          j        |j        j                                      S )NFr*  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c                T    g | ]%}t          |t          j                  r|j        n|&S r  r  r  s     r   rb  z1TritonSemantic.make_block_ptr.<locals>.<listcomp>  s0    fffRVZbl%C%CMtzzfffr   c              3  `   K   | ])}t          |t                    od |cxk    odk     nc V  *dS )r_   r`   N)rh   r+   r  s     r   r  z0TritonSemantic.make_block_ptr.<locals>.<genexpr>  sM      \\:dC((CVt-C-C-C-Ce-C-C-C-C\\\\\\r   zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                T    g | ]%}t          |t          j                  r|j        n|&S r  r  r  s     r   rb  z1TritonSemantic.make_block_ptr.<locals>.<listcomp>#  s/    ZZZDz$==G4ZZZr   z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  X   K   | ]$}t                    t          |          k    V  %d S r)   )rU  )ra  r\  r  s     r   r  z0TritonSemantic.make_block_ptr.<locals>.<genexpr>'  s6      hh)3{##s9~~5hhhhhhr   zBExpected shape/strides/offsets/block_shape to have the same length)r-  rs   r{   r  rT  r/   r1   rj   r   r  r  r  r  r  r  r  rC  rU  r'   create_make_block_ptrr   r&   r?  )r   r]  rD  stridesr/  r  orderr   s        `  r   make_block_ptrzTritonSemantic.make_block_ptr
  s2    **511,,W55,,W%,HH y!! 	nTY%9%B%B%D%D 	nlmmm 927**99T2?27DI<S#T#TUUD {J// 	(&-KffZefff\\P[\\\\\ 	V 	VU	V 	V 	V uj)) 	GEZZTYZZZe}}U3u::%6%6 7 77779w777 hhhhwX_afGghhhhh 	Q 	QP	Q 	Q 	Q 33DKQXZeglmm{{62?2=AUWb3c3c#d#deeer   c                    |                      |d          }|                     | j                            |j        |          |j                  S r5  )r-  r&   r'   create_advancer   rs   )r   r]  r/  s      r   advancezTritonSemantic.advance0  sF    ,,W%,HH {{4<66t{GLLdiXXXr   rd  r  List[tl.constexpr]tl.tensor_descriptorc           	     `    t          |          }d|cxk    rdk    sn t          d| d          t          |          |k    r"t          d| dt          |                     t          |          |k    r"t          d| dt          |                     t          |j        t          j                  sJ |j        j        j        d	z  }t	          j        |d
                   }||z  dk     rt          d| d| d||z   d          t	          j        |d
                   |d
<   |d
         dk    rt          d|d
                     fd|D             } fd|D             }t	          j	        |          }t          |j
        t          j                  sJ t	          j        |j
        j        |          }|j        }	|j
        j                                        }
 j                            |	d |D             d |D             ||
          }t	          j        ||||          S )Nr      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got rb  rn  r}  zRDescriptor block shape must have at least 16 bytes in the last dimension, but got z * z = z bytesz-Tensor descriptor last dim must be 1 but got c                P    g | ]"}                     |t          j                  #S r  )rN  r1   r2   ra  rt   r   s     r   rb  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>Q  s+    >>>1!!!RX..>>>r   c                P    g | ]"}                     |t          j                  #S r  )rN  r1   rl   ro  s     r   rb  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>R  s+    BBBQ4##Arx00BBBr   c                    g | ]	}|j         
S r  r  ra  r[  s     r   rb  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>\  s    IbIbIbWX!(IbIbIbr   c                    g | ]	}|j         
S r  r  rr  s     r   rb  z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>]  s    <W<W<W!QX<W<W<Wr   )rU  r/   rh   r>   r1   r  r  r  r`  _unwrap_shapers   r?  r   r   r'   create_make_tensor_descriptortensor_descriptor)r   r]  rD  rd  r  r0  	elem_sizecontig_dim_sizers   base_handleis_signed_intr   s   `           r   make_tensor_descriptorz%TritonSemantic.make_tensor_descriptor7  s    5zzTQQQQQRRRw<<4NNNGNNOOO{t##eTeeWZ[bWcWceefff$*bo66666J)<A	1+b/BBY&++ ket  k  k  zC  k  k  HW  Zc  Hc  k  k  k   -gbk::2;!ZWUW[ZZ[[[>>>>>>>BBBB'BBB &{33$)R_55555}TY1;??k	,::<<;;KIbIb\aIbIbIb<W<Ww<W<W<WYdfsu u#FE7DAAAr   )r*   r+   r,   r   )r8   r9   r:   r9   r,   r9   )r8   r9   rG   rH   r:   r9   rI   rH   rJ   rH   r,   r9   )T)r]   rH   )r   r9   r   r9   rx   rH   r,   ry   )FFTF)r~   r   r   r   r,   r   )r~   r   r   r   r   r   )r   r   r   r   r   rH   r,   r   )r   r   r   r   r,   r   )r   r   r   r   r   rH   r,   r   )rt   r   r   r   r   r   )rt   r   r   r   r   r   r   r   )r   r   r   r   r,   r   )r   r   r   r   r,   r   )r   r   )r   r   r,   r   )r  r   r,   r  )r;  r+   r<  r+   r   r  r,   r   )r>   r9   r,   r   )rD  rO  r>   r9   r,   r   )rO   r   rD  rO  r,   r   )r   r   rW  rO  rX  rH   r,   r   )r   r   r*   r+   r,   r   )r~   r   r   r   rX  rH   r,   r   )rk  r   rl  r   r,   r   )rk  r   r,   r   )r   r   rz  r{  r,   r   )r   r   rD  r{  r,   r   )r~   r   r   r   r,   r   )r  r  )r   r   r  r9   r,   r   r)   )r   r   r  r9   r  r  r,   r   )r  r   r  r"  r   r"  r
  r   r  r  r  r  r  r  r  rH   r,   r   )r%  r&  r  r  r  r  r,   r   )r%  r&  rO   r   r,   ry   )r%  r&  rO   r   r,   r   )r  r  r  r  r,   r   )rO   r   r,   r   )r  r   rp  r   r  r"  r  r  r  r  r,   r   )r  r   r{  r   rp  r   r  r  r   r  r,   r   )
r  r   rp  r   r  r   r  r  r,   r  )rt   r   r,   r   )r  r   rp  r   r  r   r  r  r   r  r,   r   )r~   r   r   r   r  r   r  r  r  r+   r  r9   r,   r   )r  r  )rp  r   r  r  )r~   r   r  r   r  r  r   r   r  r"  r  r  r  r  r  rH   r  rH   r  rH   r  r9   r,   r   )r  r   rt   r   r   r   r,   r   )r  r  r*   r+   r,   r  )r  r  r*   r+   r#  rH   r,   r  )r*  r   r+  r   r*   r+   r,   r   )r   r   r/  r+   r  r"  r,   r   )rt   r   r  rO  r,   r   )r,   r   )rA  r  rB  rC  rD  rH   r,   r   )r   r   r   r  r,   r   )r]  r   r,   r   )
r]  r   rD  rC  rd  rC  r  rj  r,   rk  )yr    r!   r"   r1   r&   __annotations__langr   r4   r7   rF   r\   rr   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r   r  r  r#  r   r-  r   r5  r:  rF  rn   rN  rR  rQ  r\  rf  rj  rt  ry  r  r  r   r  r   r   r  r  r  r  r  r  r  r  r!  r$  r1  r3  r8  r@  rI  rK  rN  rR  rU  rY  r\  ri  rm  rq  rv  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r)  r.  r2  r8  r;  r=  r@  rP  r   rV  rY  r-  rf  ri  r{  r  r   r   r%   r%      s	        YF&&&&D  O O O O
Q Q Q QD D D D05 05 05 05d# # # # #R	@ 	@ 	@ 	@ ae05# # # # #J& & & &$> > > >>8 8 8 8"8 8 8 8] ] ] ]2> > > >, , , ,8 8 8 8.9 9 9 9"9 9 9 9"	a 	a 	a 	a   \ \ \ \[ [ [ [\ \ \ \' ' ' '& & & &" " " "
] ] ] ]] ] ] ]\ \ \ \   ) ) ) )$ $ $ $/ / / /8 8 8 88 8 8 88 8 8 88 8 8 8	8 	8 	8 	8	8 	8 	8 	8 GK Z Z Z Z Z Z$	) 	) 	) 	)2 2 2 2A A A Ah h h hf f f fX X X XV V V V   0

 

 

 

T T T TW W W W 2 2 2 2pt t t tj j j j$i9 i9 i9 i9 i9^    	 	 		 	 	    
 
 
  ,: : :xn n n n	/ 	/ 	/ 	// / / /n n n n
u u u u@ @ @[ [ [
u u u uu u u uu u u uu u u uu u u u$ $ $ $0* * * **x x x8*$ *$ *$XW W W W(u u u u   8+ + + +$) $) $) $)L$) $) $) $)L% % % %y y y yx x x xy y y y   < < <Ly Ly Ly Ly\   0 0 0 0$.t .t .t .the e e e(& & &u u u u,x x x x.K K K K.	@ 	@ 	@ 	@         C C C Ca a a aR R R R
M M M MY Y Y4H H H H
$f $f $f $fLY Y Y Y'B 'B 'B 'B 'B 'Br   r%   )
__future__r   r  typingr   r   r   r   r   r	   r
   r   triton.runtimer   _C.libtritonr   r^  r   r1   r   r   	Exceptionr   r%   r  r   r   <module>r     s0   " " " " " "  J J J J J J J J J J J J J J J J J J  ! ! ! ! ! !            GCLL7:F F F F F	 F F FEB EB EB EB EBWX& EB EB EB EB EBr   