
    Xh                     $   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ dZedk    rT ee          Ze                    d	d
           e                    ddeddd           e                    ddedd           e                    ddedd           e                    ddedd           e                    ddedd            e                    d!d"ed#d$           e                    d%d&ed'd$           e                                Zej        rej        nej        Zej        rej        n
 ee          Z eej                  Zej                            d  eej                             ej                            ej         e          Z!ej        "                    e!          Z#e!j$        %                    e#            e&e#ej                  Z'ej(        )                    d(          Z( e*e(          dk    sJ  e+ e,d) ej-        )                    d(                              Z-d*e
e         fd+Z.d,ej/         d-ej0         Z1 e.e-e1gz             Z2d. Z3d/  e4e-          D             Z5d0 e56                                D             Z5d1  e4e-          D             Z7d2 e76                                D             Z7e56                                D ]!\  Z8Z9e9dk    re9e7e'j:        e8d                   <   "d3  e4e-          D             Z-e7D ]Z8d4e-e8<   d5;                    d6 e7<                                D                       Z=d7 e76                                D             Z>e>d8ej/         d9ej0         gz  Z>e5<                                D ]Z?e?d:v sJ d;e?             d< e56                                D             Z@ejA        B                    e'e7e-e@=          ZCej/        ej0        d>ZD ejE        eCeD?          ZFeFjG        jH        d k    r eId@          g Z:g ZJg ZKg ZL e4e'j:                  D ]\  ZMZNeNe7vrae:O                    eN           eJO                    e-eN                    eKO                    eN           eLO                    e-eN                    je5P                    eMfd          dk    r*e:O                    eN           eJO                    dA           dZQ e4e-<                                          D ]S\  ZMZReQ eeM          z  ZQe5P                    eMfd          dk    reQdBz  ZQe5P                    eMfd          dCk    reQdDz  ZQTdE;                    ee2eQg          ZSeFjT        dF         ZT e e jU        eT                    dGdH         ZVi dIeSdJej        dK e*eT          dLdM;                    dN  eWeVdddG         eVdddG                   D                       d*dM;                    dO  eWeKeL          D                       dPdM;                    dQ  eWe:eJ          D                       dRdM;                    dS eKD             dTgz             dU e*eK          dz   dVe>dWeFjG        jX        dXej/        dYdE;                    e=e1g          dZe(d          d[e(d         d\e(dG         d]dZYd^D ]ZZ ee[          j        d_z  d`z  daeZ z  Z\e]                    dbe2 dEeQ dbeZ           ^                    dc          5 Z_e_`                      ee\          a                                jb        ddi eY           ddd           n# 1 swxY w Y   dS dS )e    N)ArgumentParser)Path)List	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r   r   z--gridz-gzLaunch grid of the kernel,c                 ,    |                      d          S )N )strip)ss    f/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/tools/compile.py<lambda>r   F   s    1773<<     	signaturec                     t          j                    }|                    d                    |                                                      |                                d d         S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   ms     r   hash_signaturer$   H   sP    N	)$$++--...{{}}RaR  r   warpsxstagesc                     	 t          |           }|S # t          $ r Y nw xY w	 t          |           }|S # t          $ r Y nw xY wd S N)int
ValueErrorfloat)r   rets     r   	constexprr-   P   sq    	a&&CJ 	 	 	D		((CJ 	 	 	D	ts    
  5 
AAc                 p    i | ]3\  }}d |v 	|ft          |                    d           d                   4S ):r   )r-   split.0ir   s      r   
<dictcomp>r4   ]   sB    \\\41aSVZ[S[S[aUIaggcll1o..S[S[S[r   c                     i | ]
\  }}|||S r(    r2   kvs      r   r4   r4   ^   s    ===daq}Q}}}r   c                 T    i | ]%\  }}t           j        |         t          |          &S r6   )kernel	arg_namesr-   r1   s      r   r4   r4   _   s,    TTTtq!!!$illTTTr   c                     i | ]
\  }}|||S r(   r6   r7   s      r   r4   r4   `   s    EEE$!Qq}A}}}r   c                 l    i | ]1\  }}t           j        |         |                    d           d         2S )r/   r   )r;   r<   r0   r1   s      r   r4   r4   d   s4    WWW$!Q!!$aggcll1oWWWr   r-   xc                 ,    g | ]}t          |          S r6   )str)r2   r9   s     r   
<listcomp>rB   g   s    ===Q#a&&===r   c                 "    g | ]\  }}| d | S )=r6   r7   s      r   rB   rB   h   s&    ;;;AQ****;;;r   z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got c                 ,    i | ]\  }}|d k    |dd ggS )rE   ztt.divisibilityr6   r7   s      r   r4   r4   m   s,    OOOdaqBwwQ#R()wwwr   )fn
constexprsr   attrs)	num_warps
num_stages)optionszMAOT compiling kernels with global scratch requirements is not yet implementedi32crE   d_cubin   kernel_nametriton_kernel_namebin_sizebin_dataz, c                 "    g | ]\  }}d | | S )0xr6   )r2   r?   ys      r   rB   rB      s&    RRRtq!{A{q{{RRRr   c                 <    g | ]\  }}t          |           d | S r   r   r2   nametys      r   rB   rB      s0    ppphdB9R== 9 94 9 9pppr   full_signaturec                 <    g | ]\  }}t          |           d | S r\   r   r]   s      r   rB   rB      s0    $i$i$i84	"%>%>%>%>$i$i$ir   arg_pointersc                     g | ]}d | S )&r6   )r2   args     r   rB   rB      s    "H"H"H9s99"H"H"Hr   z&global_scratchnum_argskernel_docstringsharedrJ   	algo_infogridXgridYgridZ_placeholder)hrN   extracudazcompile..wr6   )cbinasciir   importlib.util	importlibsysargparser   pathlibr   typingr   tritontriton.backendstriton.backends.nvidia.driverr   desc__name__parseradd_argumentrA   r)   
parse_argsargsout_namerT   out_pathr
   arg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrr;   gridr0   lenlistmapr   r$   rJ   rK   meta_sigsig_hashr-   	enumeratehintsitems	constantskeyvaluer<   r    values	const_sig
doc_stringrn   rI   compiler	ASTSourcesrcoptscompileccinfometadataglobal_scratch_sizeRuntimeError	arg_typesarg_names_not_1arg_types_not_1r3   arg_nameappendgetsuffixr_   	func_nameasmhexlifyhex_ziprh   paramsext__file__template_pathwith_suffixopenfpwrite	read_textformatr6   r   r   <module>r      s	         



 # # # # # #                  3 3 3 3 3 34 z ^---F
s  u u u
CJi!%  ' ' '
t#qGmnnn
CN  P P P
e#tJlmmm
dt.YYY
t#<U`deee
$S7R]abbbD $Ct}}43CH $At}}44>>H tDIHHOOAss8?++,,,>11(-JJD
.
)
)$
/
/CKC   WS$*++F9??3D3t99>>>> SS//1E1Ec1J1JKKLLI!$s) ! ! ! !
 @t~??do??H~i8*455H   ]\99Y;O;O\\\E==ekkmm===ETTyy?S?STTTIEE)//"3"3EEEIkkmm 8 8
UA::27If&s1v./WW))IBVBVWWWI % %$	#==)*:*:*<*<===>>I;;):):;;;J0002Q2Q2QRRJ\\^^ G GG|||F1FF||||OOekkmmOOOE
/
#
#v)y`e
#
f
fCtGGDV^C...F*Q..ljkkkIIOO y!122 $ $89$$X&&&Yx0111""8,,,""9X#67777YYud##q((X&&&U### F9++--..  2##a&&99aUD!!Q&&cMF99aUD!!R''cMF(Hf566I
*W
C3x$$%%ad+Dyd. 	CCHH 	DIIRRss4!9d14a4j7Q7QRRRSS	
 	TYYpp##o_nJoJopppqq 	$))$i$issS\^gOhOh$i$i$ijj 			"H"H"H"H"HL]K^"^__ 	CC((1, 	J 	&/( 	T^ 	SXXy(344 	a 	a 	a  	!F$  G GX-7&@CScCSCSS!!"?h"?"?"?"?#"?"?@@EEcJJ 	GbHH;TT-((2244;EEfEEFFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	Gu pG Gs   :<^^		^	