
    Xh                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ g dZg dZeez   Zg dZed	gz   Zeez   Zed	gz   Zd
dgZdgez   dgz   ez   d	gz   Z e ee          h dz
            Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d,dee         fdZ,d-dej-        deeej.        f         fdZ/de0dej1        fd Z2de0fd!Z3d" Z4d.d$Z5d.d%Z6e
j7        8                     e5              e6            &          Z9d'e:d(e:fd)Z;d*eej.        ej<        j=        j        f         dej.        fd+Z>dS )/    N)knobs)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                  J    t           j                            dd          dk    S )NTRITON_INTERPRET01)osenvironget     j/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/_internal_testing.pyis_interpreterr#      s    :>>,c22c99r!   c                  r    t                      rd S t          j        j        j                                        S N)r#   tritonruntimedriveractiveget_current_targetr    r!   r"   r*   r*      s/     t> '::<<<r!   c                  <    t                      } | dn
| j        dk    S )NFcudar*   backendtargets    r"   is_cudar1   $   s"    !!FN55&(@@r!   c                  n    t                      o't          j                                        d         dk    S )Nr   	   )r1   torchr,   get_device_capabilityr    r!   r"   	is_hopperr6   )   s)    99C99;;A>!CCr!   c                  <    t                      } | dn
| j        dk    S )NFhipr-   r/   s    r"   is_hipr9   -   "    !!FN55%(??r!   c                  R    t                      } | d uo| j        dk    o
| j        dk    S )Nr8   gfx90ar*   r.   archr/   s    r"   is_hip_cdna2r?   2   0    !!FU&.E"9UfkX>UUr!   c                  R    t                      } | d uo| j        dk    o
| j        dk    S )Nr8   gfx942r=   r/   s    r"   is_hip_cdna3rC   7   r@   r!   c                  R    t                      } | d uo| j        dk    o
| j        dk    S )Nr8   gfx950r=   r/   s    r"   is_hip_cdna4rF   <   r@   r!   c                  v    t                      } t          | j                   | d uo| j        dk    od| j        v S )Nr8   gfx12)r*   printr>   r.   r/   s    r"   is_hip_gfx12rJ   A   s@    !!F	&+T&.E"9Tg>TTr!   c                  V    t                      pt                      pt                      S r%   )r?   rC   rF   r    r!   r"   is_hip_cdnarL   G   s    >>=\^^=|~~=r!   c                  <    t                      } | dn
| j        dk    S )NFxpur-   r/   s    r"   is_xpurO   K   r:   r!   c                  N    t                      } | dnt          | j                  S )N )r*   strr>   r/   s    r"   get_archrS   P   s%    !!F22S%5%55r!   rsc                    t          | t                    r| f} |t          d          }|t          t          z   v rt          j        t          t
          |                    }||j        nt          ||j                  }||j	        nt          ||j	                  }t          t
          |          }|
                    ||| |          }d||dk    <   |S |r)d|v r%|
                    dd	| t
          j                  }|S |t          v r*|                    dd|                               |          S |d
k    re|                    dd|                               d                              d          t          j        d          z                      d          S |dv r|                    dd|           dk    S t#          d|           )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr
   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrT   lowhighrd   rX   xs           r"   numpy_randomrs   U   s   
 % 		zb!!!J,,,Y//00;eiiCUY,?,? Luyyc$	.B.BI&&JJsD%uJ55!q&		 
9x9,,JJr2uBGJ44	l	"	"yyAu%%,,Y777	j	 	 		!Q&&--i88==hGG")T^J_J__eefoppp	/	/	/yyAu%%++7I77888r!   rr   returnc                 "   | j         j        }|t          v rt|                    d          }|                     t          t          |                    }t          t          j	        ||          t          t          |                    S |r;d|v r7t          t          j	        | |          t          t          |                    S |dk    r.|dk    r(t          j	        | |                                          S t          j	        | |          S )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerZ   r   r   )rX   namerb   lstriprk   re   rc   r   r4   tensortlr   )rr   rw   dst_typetsigned_type_namex_signeds         r"   	to_tritonr   s   s     	
AK88C==88GB(899::5<@@@'"a..QQQ 	VH,,u|Af===wr8?T?TUUU	>>h*44<&111::<<<|Af----r!   c                 @    t          j        t          |                    S r%   )r{   	str_to_tyr	   rr   s    r"   str_to_triton_dtyper      s    <215666r!   c                 .   t          | t          j        j                  r| j        S t          | t
          j                  r7t          j        dt          |                     }|	                    d          S t          dt          |                      )Nz^torch\.(\w+)$rY   znot a triton or torch dtype: )r_   r&   languagerX   rx   r4   rematchrR   group	TypeErrortype)rX   ms     r"   torch_dtype_namer      s|    %.// Gz	E5;	'	' GH&E

33wwqzzEUEEFFFr!   c                 ,   t          | t                    rc| j                                                                                            t          t          t          | j	                                      S t          | t          j                  rq| j	        t          j        u r8|                                                                                                 S |                                                                 S t          d|            )Nz Not a triton-compatible tensor: )r_   r   basecpunumpyrk   re   rc   r   rX   r4   Tensorr   float
ValueErrorr   s    r"   to_numpyr      s    !]## Avzz||!!##**727G7P7P+Q+QRRR	Au|	$	$ A7en$$5577==??((***uuww}}?A??@@@r!   Fc                 z   t                      rdS t                      sdS t          j        j        j        }| rdnd}t          t          t          |	                    d                              }t          |          dk    s
J |            t          j                                        d         dk    o||k    S )	NTF)   r   )r      .   r   r3   )r#   r1   r   nvidiaptxasversiontuplemapr`   splitlenr4   r,   r5   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tuples       r"   supports_tmar      s     t99 u<%-L",9ww's3(:(:3(?(?@@AA!""a''');''':++--a0A5`:LP`:``r!   c                     | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r    )r   s    r"   tma_skip_msgr      s     ^ff]]r!   )reasonsizealignc                 D    t          j        | t           j        d          S )Nr,   )rX   rw   )r4   emptyr
   )r   r   _s      r"   default_alloc_fnr      s    ;t5:f====r!   r}   c                 \    t          | t          j        j        j                  r| j        S | S r%   )r_   r&   r'   jitr   r   )r}   s    r"   unwrap_tensorr      s(    !V^'566 vHr!   )NNNr%   )F)?r   r   r   rc   r4   r&   triton.languager   r{   r   pytestnumpy.randomr   typingr   r   triton.runtime.jitr   r   r	   ra   rb   integral_dtypesri   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedset
tma_dtypesr#   r*   r1   r6   r9   r?   rC   rF   rJ   rL   rO   rS   rs   ndarrayr   r   rR   rX   r   r   r   r   r   markskipifrequires_tmar`   r   r'   r   r   r    r!   r"   <module>r      s   				 				                    $ $ $ $ $ $ " " " " " " " " U U U U U U U U U U000
555{*000)ZL8 	<	', &6 x*$y0<?:,NVCC,--0N0N0NNOO
: : := = =A A A
D D D@ @ @
V V V
V V V
V V V
U U U> > >@ @ @
6 6 6
9 9x'< 9 9 9 9<. . .u]EL=X7Y . . . .&73 728 7 7 7 7Gs G G G GA A A	a 	a 	a 	a^ ^ ^ ^ {!!llnn"4\\^^!LL>3 >s > > > >U5<);)IIJ u|      r!   