
    Xh              	         d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  d	dl!m"Z" d	dl#m$Z$m%Z%m&Z&m'Z' e(d e)d                    Z* ed          Z+ G d dej,                  Z-d:dZ. G d d          Z/i Z0g Z1d Z2d;dZ3 G d dee+                   Z4d Z5d Z6d Z7e G d d                      Z8 G d  d!e4e+                   Z9ed<d$            Z:edddddddd%d=d0            Z:	 d>dddddddd%d?d3Z: G d4 d5          Z; G d6 d7          Z<d8 Z=d9 Z>dS )@    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs)driver)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtypez.runtime.jitTc                  |     e Zd ZdZd fdZed             Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                    t                                                       || _        t          j        |                    d                    | _        || _        || _        h d| _	        i | _
        d| _        d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstanceF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr/   r4   r5   src	__class__s        d/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/runtime/jit.pyr.   zDependenciesFinder.__init__)   s{    	nSZZ%8%899 "*
 *
 *
&. TV*/'''    c                4    | j                                         S N)r3   	hexdigestr9   s    r<   retzDependenciesFinder.retN   s    {$$&&&r=   c                    t          j        |j                  rdS t          |dd          }|                    t
                    S )NT
__module__ )inspect	isbuiltinfuncr+   
startswithTRITON_MODULE)r9   noderH   modules       r<   _is_triton_builtinz%DependenciesFinder._is_triton_builtinR   sA    TY'' 	4|R00  ///r=   c                2   t          |t                    r | j                                        |j                                        z  D ]V}|\  }}| j        |         \  }}|j        |         \  }}||k    r)t	          d| d| d| j         d|j         d| d          W| j                            |j                   |j        }|t          t          |dd                    z  }| j                            |                    d	                     d S d S )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr"   )r,   JITFunctionr7   keysRuntimeErrorr/   __name__update	cache_keystrr+   r3   r2   )r9   rH   kvar_name_v1v2func_keys           r<   _update_hashzDependenciesFinder._update_hashX   se   dK(( 	9 *//11D4I4N4N4P4PP  !-a0A-a0A88& T8  T  T  T  TTXT]  T  Trvr  T  T  Y[  T  T  T    !(()>???~HGD*e<<===HKxw7788888	9 	9r=   c                    t          |j                  t          j        u r|j        S |j         j        v rd S  fd} ||j                  \  }}| j        s|t          |          t          urft          |t                    sQt          |dd          s@|j         j        vr2t          j        |          |f j        |j        t	          |          f<                        |           |S )Nc                    j                             | d           }|	|j         fS j                            | d           }|	|j        fS dS )N)NN)r4   getr5   )r/   valr9   s     r<   name_lookupz2DependenciesFinder.visit_Name.<locals>.name_lookupr   sZ    ,""4..CDL((.$$T400CDN**:r=   __triton_builtin__F)typectxastStoreidlocal_namesr8   r   r,   rQ   r+   r6   copyr7   r^   )r9   rK   rc   rb   var_dicts   `    r<   
visit_NamezDependenciesFinder.visit_Namej   s
   >>SY&&7N7d&&&4	 	 	 	 	 $DG,,X
 O 7	  IIZ// #344 0 >ESJ^`e=f=f 0 G4#AAA>Binnh=WD!47BxLL"9:#
r=   c                *      fd|j         D             S )Nc                :    g | ]}                     |          S  )visit).0eltr9   s     r<   
<listcomp>z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>   s#    555C

3555r=   )eltsr9   rK   s   ` r<   visit_TuplezDependenciesFinder.visit_Tuple   s!     6555495555r=   c                f   |                      |j                  }t          |t          j                  r4|                      |j                  }t          |t          j                  4|t          |dd          t          k    rd S t          ||j                  }|                     |           |S )NrT   rE   )	rq   valuer,   rg   	Attributer+   rJ   attrr^   )r9   rK   lhsrB   s       r<   visit_Attributez"DependenciesFinder.visit_Attribute   s    jj$$cm,, 	(**SY''C cm,, 	(;73
B77=HH4c49%%#
r=   c                f    d |j         j         D             | _        |                     |           d S )Nc                    h | ]	}|j         
S rp   arg)rr   r   s     r<   	<setcomp>z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>   s    >>>CG>>>r=   )argsrj   generic_visitrv   s     r<   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s6    >>ty~>>>4     r=   c                .     fd}t          j        |j        |j        |j        r|j        gng |j                  D ]}                     |            ||j                   |j                             |j                    ||j	                   d S )Nc                    	 j         rJ d_         | D ]}|                    |           	 d_         d S # d_         w xY w)NTF)r8   rq   )defaultsexprr9   s     r<   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sp    8::::26/$ ) )D'

4((() 38///%/7777s	   ,9 	A)
	itertoolschainposonlyargsr   vararg
kwonlyargsrq   kw_defaultskwargr   )r9   rK   r   r   s   `   r<   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 	8 	8 	8 	8 ?4#3TYQUQ\@dbdfjfuvv 	 	CJJsOOOOt'(((:!JJtz"""t}%%%%%r=   c                    |                      |          }t          |t                    r| xj        t	          |          z  c_        d S | j                            |           d S r?   )rq   r,   r'   rj   setadd)r9   rK   targets      r<   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sd     D!!fd## 	)F+  (((((r=   c                    t          |j                  dk    rt          d          |                     |j        d                    |                     |           d S )N   z2Simultaneous multiple assignment is not supported.r   )r$   targets	TypeErrorr   r   rv   s     r<   visit_AssignzDependenciesFinder.visit_Assign   s^    t|!!
 PQQQT\!_--- 	4     r=   c                d    |                      |j                   |                     |           d S r?   r   r   r   rv   s     r<   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   4    T[))) 	4     r=   c                d    |                      |j                   |                     |           d S r?   r   rv   s     r<   	visit_ForzDependenciesFinder.visit_For   r   r=   )r   r    )rT   rD   __qualname____doc__r.   propertyrB   rM   r^   rm   rw   r}   r   r   r   r   r   r   __classcell__r;   s   @r<   r   r      s
       	 	#0 #0 #0 #0 #0 #0J ' ' X'0 0 09 9 9$% % %N6 6 6
  ! ! !
& & &@) ) )! ! !! ! !! ! ! ! ! ! !r=   r   r   rW   c                   dd l mc m} t          | t                    r|                                 } |                     d          rH|                     d          } t          |           } |                     d          sJ d| dd          z   S | 	                    d          rdt          | d d                   z   S |                     d          rdt          | dd                    z   S |                     d          r"t          |                     d                    S nut          | |j
                  rdt          | j                   S t          | |j                  r| j        } n,t          | t                    r| j        } nt	          |           } t!          j        |                     d	d
          |           S )Nr   zconst const**kr   ztl._trE   )triton.language.corelanguagecorer,   rW   striprI   removeprefix_normalize_tyendswithpointer_type
element_tydtyper/   re   rT   r   ra   replace)tyr   s     r<   r   r      s   '''''''''"c XXZZ=="" 	!))Br""B==%%%%%"QRR&= ;;s 	0r#2#w////== 	/r!""v....== 	9 !7!7888	9	B)	*	* 1=//111	B
	#	# W	B		 [WW%)"**T2*>*>CCCr=   c                      e Zd ZdZdd	Zed
             Zedd            Zedd            Zed             Z	ed             Z
ed             Zed             ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr#   paraminspect.Parameterdo_not_specializebooldo_not_specialize_on_alignmentc                >    || _         || _        || _        || _        d S r?   )r   _paramr   r   )r9   r   r   r   r   s        r<   r.   zKernelParam.__init__
  s&    !2.L+++r=   c                    | j         j        S r?   )r   r/   rA   s    r<   r/   zKernelParam.name  s    {r=   r   rW   c                    | j         j        r| j         j        t          j        j        k    rdS t          | j         j                  S )NrE   )r   
annotationrF   	Parameteremptyr   rA   s    r<   r   zKernelParam.annotation  s<    {% 	)?7CTCZ)Z)Z2T[3444r=   c                    | j         }|                    d          r|dd          }n|                    d          r
|dd          }|t          t          j                              v r| j         S dS )Nr   r   r   r   rE   )r   rI   r   r   values)r9   as     r<   annotation_typezKernelParam.annotation_type  sw    O<< 	!""AA\\# 	!""A.5778888?"rr=   c                    d| j         v S N	constexpr)r   rA   s    r<   is_constexprzKernelParam.is_constexpr&  s    do--r=   c                Z    | j         rdS d| j        v p| j                            d          S )NFr   r   )r   r   rI   rA   s    r<   is_constzKernelParam.is_const*  s4     	5$/)MT_-G-G-M-MMr=   c                    | j         j        S r?   )r   defaultrA   s    r<   r   zKernelParam.default0  s    {""r=   c                @    | j         j        t          j        j        k    S r?   )r   r   rF   r   r   rA   s    r<   has_defaultzKernelParam.has_default4  s    {"g&7&===r=   N)r   r#   r   r   r   r   r   r   r   rW   )rT   rD   r   r   r.   r   r/   r   r   r   r   r   r   r   rp   r=   r<   r   r     s        LLM M M M     _  5 5 5 _5
    _ . . _. N N _N
 # # X# > > X> > >r=   r   c                8     ddl m ddlm d fd	S )	Nr   r   r   r   FTc                      dS t           t                    rdS t           t                    r@|r  d|          nd } dk    r|rdS d k    r
 dk    rd	|fS d
 k    r
 dk    rd|fS d|fS t           t                    rdS t	           d          rh j        |f}t                              |d           }|,|d         rdndt          |d                   z   }|t          |<   |r  d|          nd }||fS t           t                    r	d j
        fS t                     rd fS t	           d          rdS t           t                    rAfd D             } fd} |d |D                       }	 |d |D                       }
|	|
fS t           t                    rLt	           j        d          sJ t           j        j                  }d| t           j                   dd fS t                     rTt	           j        d          sJ t           j        j                  }d| t           j                   d j        dd fS t#          dt%                     z            )N)r   N)u1Nr#   )alignr   )r   r   i   ii32l            l    u64i64)fp32Ndata_ptrr   r   r   tensorr   tma_desc_cpu_ptr)	nvTmaDescNc                &    g | ]} |          S rp   rp   )rr   xspecialize_impls     r<   rt   zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>c  s#    4441OOA&&444r=   c                f    t          d          r t                    |  nt          |           S )N_fields)hasattrre   tuple)valsr   s    r<   <lambda>zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>d  s2    '#y:Q:Q&bid3ii&6&6W\]aWbWb r=   c                    g | ]
}|d          S r   rp   rr   r   s     r<   rt   zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>e  s    111qad111r=   c                    g | ]
}|d          S r   rp   r   s     r<   rt   zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>f  s    222qt222r=   ztensordesc<>,zUnsupported type: %s)r,   r   r#   r(   r   r   	dtype2strra   r   rQ   rV   r   r   baser'   block_shapelayoutr   re   )r   r   specialize_valuer   keydskresspec
make_tupletysrR   innerGluonTensorDescriptorr   specialize_extrar   s   `           r<   r   z/create_specialize_impl.<locals>.specialize_implB  s   ;&&T"" ,	@<S!! *	@?OY""3U;;;;UYCaxx,x''SSI%5%5s|###"2"2s|#s|#U##  	@!>S*%% 	@9h'C--T**C{"1v.tt32DSV2L2LL!$	#BR\""3>>>>X\C:[)) 	@//Y'' 	@%%S,-- 	@&&U## 	@4444444DbbbbJ*11D11122C:22T22233D;-.. 		@38Z00000&sx~66EA%Aco)>)>AAA4HH233 	@38Z00000&sx~66EP%Pco)>)>PPPPPRVWW2T#YY>???r=   )FTT)r   r   'triton.experimental.gluon.nvidia.hopperr   )r  r  r   r   s   `@@@r<   create_specialize_implr  =  si    $$$$$$aaaaaa/@ /@ /@ /@ /@ /@ /@ /@ /@b r=   Fc                    t          t                    dk    r(t                              t          d                      t          d         } || |          d         S )Nr   c                    d S r?   rp   )rZ   kwargss     r<   r   zmangle_type.<locals>.<lambda>x  s    PT r=   )r   )r$   specialize_impl_cacheappendr  )r   
specializer   s      r<   mangle_typer  v  s[    
 !!Q&&$$%;<T<T%U%UVVV+A.O?3<<<Q??r=   c                  "    e Zd ZU ded<   ddZdS )KernelInterfacer   runr   c                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 $     j         | dd|S )NFgridwarmup)r  )r   r  r  r9   s     r<   r   z-KernelInterface.__getitem__.<locals>.<lambda>  s     xtx$T%'Y'YRX'Y'Y r=   rp   )r9   r  s   ``r<   __getitem__zKernelInterface.__getitem__  s     ZYYYYYr=   N)r   r   )rT   rD   r   __annotations__r  rp   r=   r<   r  r  }  s9         
FFFZ Z Z Z Z Zr=   r  c           	        d |                                 D             }dd l}| |d |                                D             t          |                                          d |                                D             t          |                                          |j        |d}|                    |          }|S )Nc                X    i | ]'\  }}||j         j        d k    rt          |          n|(S r   )r;   rT   rW   rr   r   ry   s      r<   
<dictcomp>z1serialize_specialization_data.<locals>.<dictcomp>  s:    wwwWaWZ\aEO$<$G$Gc%jjjUwwwr=   r   c                ,    g | ]}t          |          S rp   r'   r   s     r<   rt   z1serialize_specialization_data.<locals>.<listcomp>  s    ?b?b?bAQ?b?b?br=   c                ,    g | ]}t          |          S rp   r  r   s     r<   rt   z1serialize_specialization_data.<locals>.<listcomp>  s    0O0O0OQa0O0O0Or=   )r/   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )itemsjsonrR   r'   r   __dict__dumps)	r/   r  	constantsattrsr#  r   r%  objserialized_objs	            r<   serialize_specialization_datar,    s    wwenetetevevwwwIKKK9?b?bQZQ_Q_QaQa?b?b?bY  0O0O%**,,0O0O0O_cdidpdpdrdr_s_s#C C
 ZZ__Nr=   c           
     v   t          | j                  t          |          k    sJ g }t          | j                                        |          D ]\  }}|j        r|                    d| d           &|j        rdnd}|j        rdnd}|j        rdnd}d| d| d| d| d	}	|j	        r{t          |j	        t                    r|j	        dk    s|j	        dd	         d
v rd}|r"|                    d|j	         d|	 d           |                    d|j	         d           |                    |	            d }
dd                    t          t          |
| j                                                            dgz              dd                    d | j                                        D                        dd                    |           d}d | j                                        D             }t           |d<   t#          |j                  |d<   t'          ||           |d         S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                t    | d         j         t          j        j        u r| d         n| d          d| d          S )Nr   r   z	=default_r   rF   r   r   )r   s    r<   r   z0create_function_from_signature.<locals>.<lambda>  s=    AaDLG,=,CCCAaDDAaDIaIa[\]^[_IaIa r=   z
def dynamic_func(z	**optionsz):
    params = {c                    g | ]
}d | d| S )'z': rp   )rr   r/   s     r<   rt   z2create_function_from_signature.<locals>.<listcomp>  s)    QQQ4.t....QQQr=   z}
    specialization = [r   z-]
    return params, specialization, options
c                Z    i | ](\  }}|j         t          j        j        ud | |j         )S )default_r5  )rr   r/   r   s      r<   r  z2create_function_from_signature.<locals>.<dictcomp>  sE       D%= 1 777 	45=777r=   rQ   r   dynamic_func)r$   
parametersziprR   r   r	  r   r   r   r   r,   rW   joinr'   mapr$  rQ   r  get_arg_specializationexec)sigkparamsbackendspecializationr/   kpr   r
  r   rB   r   	func_bodyfunc_namespaces                r<   create_function_from_signaturerH    s    s~#g,,....N++--w77 0 0b? 	0!!":4":":":;;;;!#9vv'H$&$8DfJ!@LGGfEOTOOXOOOOuOOOC! 0b0#66 +)T11R5G5K|5[5[%*
 M"))*Rr/A*R*R*R*R*RSSSS #))*Kr/A*K*K*KLLLL%%h//// b
aC))DS#.*>*>*@*@!A!ABBk]RSS 		QQ3>;N;N;P;PQQQRR  xx//  I >//11  N %0N=!(>w?](^(^N$% 	N### .))r=   c                $    | j          d| j         S )N.)rD   r   fns    r<   get_full_namerM    s    m//bo///r=   c                  .    e Zd ZU ded<   ded<   ded<   dS )JitFunctionInfor   rL   rW   r/   rQ   jit_functionN)rT   rD   r   r  rp   r=   r<   rO  rO    s3         IIIr=   rO  c                       e Zd Zd ZddZd Zd Zd Zd Z	 	 dd
Z	d Z
ed             Zed             Zd Zd Zd Zd Z fdZ fdZd Z xZS )rQ   c                    dS )NFrp   rA   s    r<   is_gluonzJITFunction.is_gluon  s    ur=   r   bool | Nonec	                   |sd S | j         j        }	| j         j        }
d                    d t	          | j        |d                   D                       }|	 d|j         d|j         d|j         d|j	         d|j
         d	| d
}t          | j                   }t          ||||d         ||          }||||j        |j        |j        |j	        |j
        |j        |||d} |||t          |
|	|           d|i||d          S )Nr1  c                ,    g | ]\  }}|j          d | S )z: r/   )rr   r   r   s      r<   rt   z*JITFunction._call_hook.<locals>.<listcomp>  s,    ___%*4444___r=   r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r.  r   )r  devicer(  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprrL  compileis_manual_warmupalready_compiled)rL  r   rD   r=  r<  paramsrZ  r[  r\  r]  r^  rM  r,  r_  rO  )r9   hookr   r  rY  r(  r#  r`  rb  r/   rL   	arg_reprsrc  	full_namera  r  s                   r<   
_call_hookzJITFunction._call_hook  s     	4w##II__c$+WZ[\W]F^F^___``	  k  k7#4  k  kAQ  k  k`g`r  k  k  HO  H`  k  k  |C  |[  k  k  _h  k  k  k!$'**	;IyR[]def]gipruvv #" *(!, ' 8'.'F".#6"
 
 tvtT22C*6*&"
 
 
 	
r=   c                \    t          |          sJ | j                            |           dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr	  )r9   rh  s     r<   add_pre_run_hookzJITFunction.add_pre_run_hook  s3    
 ~~!!$'''''r=   c                    ddl m}m}m}m} t
          j                                        } ||          }|| _        || _        || _        t          | j	        | j
        |          }i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelrd  	ASTSourcemake_backend)compilerrq  rd  rr  rs  r   activeget_current_targetrH  r  rg  )r9   rq  rd  rr  rs  r   rC  binders           r<   create_binderzJITFunction.create_binder  s     	POOOOOOOOOOO1133,v&&,"/WUU67F**r=   c                 !"# |                     d| j                  pt          j        j        |d<   t          j                                        }t          j                            |          }| j        D ]
} ||i | | j	        |         \  }}	"}
 |
|i |\  #}}t          |          t          |          z   }|                     |d           }|y"                    |          }d | j        D             }d |D             }d t          ||          D             }d|vs
J d            d|vs
J d            d	|vs
J d
            |D ]!}||j        vr||vrt          d|z            "t!          |d           }#fd|D             }d |D             !t!          !d           }!"fd|D             }|                     t          j        j        ||||||g|          rd S |                     | |||          }|                     ||	|j                  }|||<   |                     t          j        j        ||||||g|           t-                      }| j                                        D ]?\  \  }}\  }}|                     ||          x}|k    rt3          d| d| d|           @|s|J t5          |          r |#          }t7          |          }|d         }|dk    r|d         nd}|dk    r|d         nd} |j        ||g#                                R  }  |j        |||||j        |j         | t          j        j!        t          j        j"        g	#                                R   |S )Ndebugc                    g | ]	}|j         
S rp   rW  r   s     r<   rt   z#JITFunction.run.<locals>.<listcomp><  s    333!qv333r=   c                    g | ]
}|d          S r   rp   r   s     r<   rt   z#JITFunction.run.<locals>.<listcomp>=  s    444qt444r=   c                    i | ]\  }}||	S rp   rp   )rr   rX   vs      r<   r  z#JITFunction.run.<locals>.<dictcomp>>  s    BBB&1aABBBr=   device_typez=device_type option is deprecated; current target will be usedrY  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    |dk    S r   rp   )rZ   rb   s     r<   r   z!JITFunction.run.<locals>.<lambda>G  s    sk?Q r=   c           	     p    i | ]2}|t          t                                                    |          3S rp   )r   r'   r   )rr   path
bound_argss     r<   r  z#JITFunction.run.<locals>.<dictcomp>H  s;    jjjW[$ 1$z7H7H7J7J2K2KT R Rjjjr=   c                    g | ]
}|d          S r   rp   r   s     r<   rt   z#JITFunction.run.<locals>.<listcomp>J  s    555!555r=   c                ,    t          |t                    S r?   )r,   rW   )rZ   r   s     r<   r   z!JITFunction.run.<locals>.<lambda>K  s    As9K9K r=   c           	     X    i | ]&}|                     t          |                    'S rp   )
parse_attrr   )rr   rX   attrvalsrC  s     r<   r  z#JITFunction.run.<locals>.<dictcomp>L  s4    ZZZqQ**+<Xq+I+IJJZZZr=   )r   r#  rO   z1 has changed since we compiled this kernel, from z to r   r   r   )#ra   rz  r   runtimer   ru  get_current_deviceget_current_streamrn  device_cachesrW   parse_optionsrg  r<  r&  KeyErrorr   rk  jit_cache_hookrr  rd  jit_post_compile_hookobjectr7   r$  rS   rm  r$   launch_metadatar   r  functionpacked_metadatalaunch_enter_hooklaunch_exit_hook)$r9   r  r  r   r  rY  r  rh  kernel_cacher   rw  rD  r#  r   kernelsigkeyssigvalsr  rX   
constexprsr)  r:   not_presentr/   rZ   rb   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  r  rC  r  s$                                    @@@r<   r  zJITFunction.run#  sh    **Wdj99PU]=Pw 113311&99 & 	" 	"DD$!&!!!!040B60J-fgv /5fd.Ef.E.E+
NG .!!CLL0!!#t,, >++F33G33t{333G44^444GBBC,A,ABBBI ...0o...6)))+e)))6)))+e))) ] ]G,,,'1A1A"#WZ[#[\\\&w0Q0QRRJjjjj_ijjjJ55n555H!(,K,KLLEZZZZZTYZZZEu};S)VU_ahkpjq%' ' t..y*eDDC\\#fg>N\OOF &LOOEM?iQWYcelotnu"$ $ $ hh.2.C.I.I.K.K 	q 	q*IT1*\&**4===#EE"otoo^aoogmooq q q F  	n###~~ (tJ''D		I!WF )AT!WW1F )AT!WW1F4f4T6XJDUDUDWDWXXXOFJvvvvvH^`o}68VnYcYjYjYlYln n n nr=   c                H    | j         | j        n|                      |          S r?   )_repr_fn_name)r9   rZ   s     r<   rc  zJITFunction.reprm  s     $
 2t}}

1Er=   Nc	           	     Z   |r|ng }|r|ng }|| _         |j        | _        || _        t	          j        |          | _        || _        || _        t	          j        |          d         | _	        || _
        t          |          | _        || _        g | _        t          | j        j                                                  D ]I\  }	}
|	|v p|
j        |v }|	|v p|
j        |v }| j                            t)          |	|
||                     Jt+          j        t	          j        |                    }|t1          j        d|t0          j                                                  d          }|                     |           t;          | j                  | _        d | _         i | _!        d | _"        || _#        || _$        d | j        D             | _%        d | j        D             | _&        g | _'        |j(        | _(        |j)        | _)        |j*        | _*        |j+        | _+        |j        | _        d S )Nr   z^def\s+\w+\s*\(c                    g | ]	}|j         
S rp   rW  rr   ps     r<   rt   z(JITFunction.__init__.<locals>.<listcomp>  s    666Q!&666r=   c                *    g | ]}|j         	|j        S rp   )r   r   r  s     r<   rt   z(JITFunction.__init__.<locals>.<listcomp>  s!    HHHQH15HHHr=   ),rL  rD   rL   versionrF   r  r   r   getsourcelinesstarting_line_numberr  rM  r  r  rg  	enumerater;  r   r/   r	  r   textwrapdedent	getsourceresearch	MULTILINEstart_unsafe_update_srcr   rx  r  hashr7   r  rz  rP   	arg_namesr  rn  r   rT   r   __globals__)r9   rL  r  r   r   rz  rP   rc  r  ir   dnsdns_oar:   s                 r<   r.   zJITFunction.__init__p  s)   1BJ--Ki)q)G)Goq&m *2..!2.L+$+$:2$>$>q$A!
%b)).!$.";"B"B"D"DEE 	C 	CHAu((KEJ:K,KC88hEJJh<hFK{1eS&AABBBB og/3344").R\BBHHJJKKL$$$();<<	 TV 
  76$+666HH$+HHH   zO>-r=   c                N    | j         t          j        | j                  j        z  S r?   )r  rF   getclosurevarsrL  r5   rA   s    r<   get_capture_scopezJITFunction.get_capture_scope  s     '"8"A"A"KKKr=   c                   | j         t          j        | j                  j        }t          | j        | j        || j                  }|	                    | 
                                           |j        t          | j                  z   | _         t          t          |j                                                            | _        | j         S )N)r/   r4   r5   r:   )r  rF   r  rL  r5   r   r  r  r:   rq   parserB   rW   r  dictsortedr7   r$  )r9   r5   dependencies_finders      r<   rV   zJITFunction.cache_key  s     9.tw77AI"4$-QUQamv9=#C #C #C%%djjll333+/#d6O2P2PPDI$(0C0T0Z0Z0\0\)])]$^$^D!yr=   c                    ddl m} |S )Nr   r   )r   r   )r9   r   s     r<   re   zJITFunction.type  s    222222r=   c               R     | j         t          t          j        |          |dd|S )NTr  )r  r>  
MockTensor
wrap_dtype)r9   r  r   r  s       r<   r  zJITFunction.warmup  s.    txZ5JD1Q1QT$\\U[\\\r=   c                   ddl m}m} dd l}dd lm t          j                                        }|	                    |          }|d         | j
        k    r t          d|d          d| j
                   t          t          |d                   }|d         }fd	t          ||          D             }	t          t          |d
                   }
|d         }t          t          |
|                    }t          |d                                                   } || ||	|          }d |d                                         D             }|d         } ||d |          }|| j        |         d         |<   |S )Nr   )rd  rr  r   r/   zSpecialization data is for z but trying to preload for r  r   c                z    i | ]7\  }}|j                             |          r                     |          n|8S rp   )r   is_dtype)rr   r   ry   tls      r<   r  z'JITFunction.preload.<locals>.<dictcomp>  sR     
 
 
U BH$5$5e$<$<G%%
 
 
r=   r!  r"  r  c                b    i | ],\  }}|t          |t                    rt          |          n|-S rp   )r,   r'   r   r  s      r<   r  z'JITFunction.preload.<locals>.<dictcomp>  sG     
 
 
U E4!8!8Cue
 
 
r=   r#  r   )rt  rd  rr  r%  triton.languager   r   ru  r  loadsr  rS   r>  r   r<  r  r$  r  )r9   ra  rd  rr  r%  rY  deserialized_objr  r   r(  r!  r"  r)  r  r:   r#  r   r  r  s                     @r<   preloadzJITFunction.preload  s   11111111$$$$$$1133::&9::F#t}44r.>v.Frrcgcprrt t tE#3O#DEE(9
 
 
 
!-??
 
 
	  0 >??
%l3
SZ0011)+6<<>>??	iiE::
 
.y9??AA
 
 
 u%dG,,-36"1%c*r=   c                    t          j        | j                  }t          |t           j                  sJ t          |j                  dk    sJ t          |j        d         t           j                  sJ |S )Nr   r   )rg   r  r:   r,   Moduler$   bodyFunctionDef)r9   trees     r<   r  zJITFunction.parse  sg    y""$
+++++49~~""""$)A,88888r=   c                     t          d          )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rS   )r9   r   r  s      r<   __call__zJITFunction.__call__  s    WXXXr=   c                    |dk    rt          d| d          t          t          |                               ||           d S )Nr:   zCannot set attribute 'zX' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorr-   rQ   __setattr__)r9   r/   ry   r;   s      r<   r  zJITFunction.__setattr__  s\    5==  "-$ "- "- "- . . . 	k4  ,,T599999r=   c                Z    d| _         t                                          d|           dS )z
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.
        Nr:   )r  r-   r  )r9   new_srcr;   s     r<   r  zJITFunction._unsafe_update_src  s,    
 	E7+++++r=   c                2    d| j          d| j        j         dS )NzJITFunction(:r.  )rL   rL  r   rA   s    r<   __repr__zJITFunction.__repr__  s"    CdkCCDG,@CCCCr=   )r   rT  )NNNNNNN)rT   rD   r   rS  rk  ro  rx  r  rc  r.   r  r   rV   re   r  r  r  r  r  r  r  r   r   s   @r<   rQ   rQ     sx         ,
 ,
 ,
 ,
\( ( (+ + +H H HTF F F mq;?<( <( <( <(|L L L 	 	 X	   X] ] ]  @  Y Y Y: : : : :, , , , ,D D D D D D Dr=   rQ   rL  JITFunction[T]c                    d S r?   rp   rK  s    r<   jitr    s    Cr=   r  rc  r  r   r   rz  rP   rc  Optional[Callable]r  r   Optional[Iterable[int | str]]r   rz  Optional[bool]rP   Callable[[T], JITFunction[T]]c                    d S r?   rp   r  s          r<   r  r    s	     Cr=   Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c               F    dfd}|  ||           S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    rL  r   r   r  c           
         t          |           sJ t          j        j        rddlm}  ||           S t          |           S )Nr   )InterpretedFunction)r  r   r   rz  rP   rc  r  )rm  r   r  	interpretinterpreterr  rQ   )	rL  r  rz  r   r   r  rP   rc  r  s	     r<   	decoratorzjit.<locals>.decorator8  s    ||=" 	888888&&r7N_Fdlq08tUdf f f f "3/M! /	 	 	 	r=   NrL  r   r   r  rp   )	rL  r  rc  r  r   r   rz  rP   r  s	    ``````` r<   r  r    sb    :           & 
~y}} r=   c                  Z    e Zd ZdZed             Zd Zed             Zed             ZdS )r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                Z    | j         j        dk    r| j        dk    rt          |           S | S )Nr   torch)r;   rT   rD   r  r   s    r<   r  zMockTensor.wrap_dtype]  s/    =!W,,71J1Jc??"
r=   c                    || _         d S r?   r  )r9   r   s     r<   r.   zMockTensor.__init__c  s    


r=   c                     dS Nr   rp   rp   r=   r<   r   zMockTensor.data_ptrf      qr=   c                     dS r  rp   rp   r=   r<   	ptr_rangezMockTensor.ptr_rangej  r  r=   N)	rT   rD   r   r   staticmethodr  r.   r   r  rp   r=   r<   r  r  W  s~         
   \
     \   \  r=   r  c                  L    e Zd Zd Zd Zd ZddZd Zd Zd	 Z	d
 Z
d Zd ZdS )TensorWrapperc                t    || _         || _        |j        | _        |j        | _        | j        j        | _        d S r?   )r   r   datarY  shape)r9   r   r   s      r<   r.   zTensorWrapper.__init__q  s1    
	I	kY_


r=   c                4    | j                                         S r?   )r   r   rA   s    r<   r   zTensorWrapper.data_ptrx  s    y!!###r=   c                      | j         j        | S r?   )r   stride)r9   r   s     r<   r  zTensorWrapper.stride{  s    ty&&r=   r   rW   c                (    d| j          d| j         dS )NzTensorWrapper[rX  r.  )r   r   rA   s    r<   __str__zTensorWrapper.__str__~  s    :
::di::::r=   c                4    | j                                         S r?   )r   element_sizerA   s    r<   r	  zTensorWrapper.element_size  s    y%%'''r=   c                Z    t          | j                                        | j                  S r?   )r  r   cpur   rA   s    r<   r  zTensorWrapper.cpu  s    TY]]__dj999r=   c                D    | j                             |j                    d S r?   )r   copy_)r9   others     r<   r  zTensorWrapper.copy_  s    	
#####r=   c                Z    t          | j                                        | j                  S r?   )r  r   cloner   rA   s    r<   r  zTensorWrapper.clone  s     TY__..
;;;r=   c                \    t          | j                            |          | j                  S r?   )r  r   tor   )r9   rY  s     r<   r  zTensorWrapper.to  s"    TY\\&114:>>>r=   c                \    t          | j                            |          | j                  S r?   )r  r   	new_emptyr   )r9   sizess     r<   r  zTensorWrapper.new_empty  s$    TY0077DDDr=   Nr   )rT   rD   r   r.   r   r  r  r	  r  r  r  r  r  rp   r=   r<   r  r  o  s        % % %$ $ $' ' '; ; ; ;( ( (: : :$ $ $< < <? ? ?E E E E Er=   r  c                   t          | t                    r,|| j        j        k    r| j        S t          | j        |          S t	          | d          rt          | |          S t          dt          |            d          )Nr   zCannot reinterpret a rJ  )r,   r  r   r   r   r   re   )r   r   s     r<   reinterpretr    s    &-(( AFK%%%; !e444		$	$ AVU+++?V???@@@r=   c                P   | }t          |t                    s|j        }t          |t                    |j        j        j        }t          j        |j                  \  }}t          |          D ]3\  }}|                                	                    d          r||z  } n4||fS )Nzdef )
r,   rQ   rL  __code__co_filenamerF   r  r  r   rI   )rL  base_fn	file_namelines
begin_lineidxlines          r<   get_jit_fn_file_liner!    s    G+.. * +.. 
#/I.wz::E: u%%  	T::<<""6** 	#JE	 j  r=   r   )Fr  )rc  r  r  r  r   r  r   r  rz  r  rP   r  r   r  r?   )rL  r  rc  r  r  r  r   r  r   r  rz  r  rP   r  r   r  )?
__future__r   r   rg   rk   r0   rF   r   r  r  collectionsr   dataclassesr   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   r   triton.tools.tensor_descriptorr   typesr   rE   r   runtime.driverr   _utilsr   r   r   r   rT   r$   rJ   r   NodeVisitorr   r   r   r   r  r  r  r  r,  rH  rM  rO  rQ   r  r  r  r  r!  rp   r=   r<   <module>r,     s   , , , , , , , , 



        				  # # # # # # ! ! ! ! ! ! % % % % % % d d d d d d d d d d d d d d d d d d d d d d d d ; ; ; ; ; ;             # # # # # # e e e e e e e e e e e e.33~..../GCLLH! H! H! H! H! H! H! H!`D D D D4/> /> /> /> /> /> /> />d 	 6 6 6r@ @ @ @	Z 	Z 	Z 	Z 	Zgaj 	Z 	Z 	Z	 	 	7* 7* 7*t0 0 0        eD eD eD eD eD/!$ eD eD eDZ	 
   
 
 #*.7;DH #
 
 
 
 
 

 4 #*.7;DH #4 4 4 4 4 4x       0"E "E "E "E "E "E "E "EJA A A! ! ! ! !r=   