
    XhM                        d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
Zd Zd Z ed           G d d                      Z G d de          ZdS )    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     d S )Nc                     dS )N   r   r    )lhs_typerhs_types     n/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/triton/backends/amd/compiler.py<lambda>z"get_min_dot_size.<locals>.<lambda>   s    i     r   r   s    r   get_min_dot_sizer      s     0//r   c                 R    t           j        j        | dk    nt           j        j        S Ngfx942)r
   r	   use_block_pingpongarchs    r   is_pingpong_schedule_enabledr$      s!    !&!=!EDH59Kggr   c                 R    t           j        j        | dk    nt           j        j        S r   )r
   r	   use_in_thread_transposer"   s    r   is_in_thread_transpose_enabledr'      s#    !&!B!JDHPUPYPqqr   T)frozenc                   f   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	e
ed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZee         ed<   dZee         ed<   dZeed<   dZee         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zeed!<   d"Zeed#<   d$ Zd% Z dS )&
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesnum_ctasNextern_libsr   cluster_dimsFdebugTsanitize_overflowr#   )fp8e5supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r8   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridr   matrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namenoneschedule_hintc                 B   t          | j        dd                   }|dk    rdnd}t                              | d|           | j        dk    r| j        | j        dz
  z  dk    s
J d	            | j        d
k    r| j        dk    s
J d            t          t                    j        dz  }| j	        i nt          | j	                  }dD ]}t          || dz            ||<   t                              | dt          |                                                     d S )N   
       @   	warp_sizer   r   znum_warps must be a power of 2gfx950zgfx950 only accepts kpack == 1lib)ocmlocklz.bcr1   )intr#   object__setattr__r,   r>   r   __file__parentr1   dictstrtupleitems)self	gfx_majorrK   default_libdirr1   rM   s         r   __post_init__zHIPOptions.__post_init__E   s1   	!B$((	#r//BBr	4i888~!!t~!9K'LQR&R&R&R/ 'S&R&R 9  :???$D???h.6 ,4bb$t?O:P:P# 	A 	AC">sKKK#?@@K4k6G6G6I6I0J0JKKKKKr   c                     d                     d | j                                        D                       }t          j        |                    d                                                    S )N_c                 "    g | ]\  }}| d | S )-r   ).0namevals      r   
<listcomp>z#HIPOptions.hash.<locals>.<listcomp>V   s&    OOOID#4#OOOr   utf-8)join__dict__rX   hashlibsha256encode	hexdigest)rY   keys     r   hashzHIPOptions.hashU   sX    hhOO9L9L9N9NOOOPP~cjj1122<<>>>r   )!__name__
__module____qualname__r,   rP   __annotations__r-   r/   r0   r1   rU   r2   rW   r3   boolr4   r#   rV   r6   r   r7   r9   r:   r;   r<   r=   r>   r?   r@   rB   rD   r\   rm   r   r   r   r*   r*      s        IsL#JHcK#L%###E4"t"""D#'2%*22246%uSz666'----/9 %*999!d!!!$)T))) !#!!!E3NNN$$$$)*!3***L#"  M3L L L ? ? ? ? ?r   r*   c                       e Zd Zedefd            Zdeddf fdZdefdZde	fdZ
d Zd	 Zdeeef         fd
Zd Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zed             Zd Z ej                    d             Z xZS )
HIPBackendr   c                     | j         dk    S )NrA   )backendr   s    r   supports_targetzHIPBackend.supports_target\   s    ~&&r   returnNc                     t                                          |           t          |j        t                    sJ d| _        d S )Nhsaco)super__init__
isinstancer#   rV   
binary_ext)rY   r   	__class__s     r   r|   zHIPBackend.__init__`   s>       &+s+++++!r   c                     d|j          S )Nhip:r"   rY   optionss     r   get_target_namezHIPBackend.get_target_namee   s    $gl$$$r   c                 "   dt           j        j        p| j        j        i}| j        j        dk    rNt          t          j                  }|                    dh           t          t          |                    |d<   dvrt          t          j                  }| j        j        dk    r|                    h d           nM| j        j        dk    r|                    dd	h           n%d
| j        j        v r|                    dd	h           t          t          |                    |d<   dvrt           j        j        |d<   |                    fdt          j                                        D                        t          di |S )Nr#   r    tf32r:   r6   >   fp8e4b8fp8e4nvfp8e5b16rL   r   r5   gfx12r;   c                 :    i | ]}|v |         ||         S Nr   )ra   koptss     r   
<dictcomp>z,HIPBackend.parse_options.<locals>.<dictcomp>}   s:     ; ; ;AT		d1g&9 Q&9&9&9r   r   )r
   runtimeoverride_archr   r#   setr*   r:   updaterW   sortedr6   languagedefault_fp_fusion__dataclass_fields__keys)rY   r   argsr:   r6   s    `   r   parse_optionszHIPBackend.parse_optionsh   s   3Gt{7GH ;x''+.z/V+W+W((//99938@\9]9]3^3^D/0!--#&z'F#G#G {8++$++,N,N,NOOOO!X--$++Y,@AAAADK,,,$++Y,@AAA+08L1M1M+N+ND'(T))',~'GD#$ ; ; ; ;)H)M)M)O)O ; ; ; 	< 	< 	<!!D!!!r   c                 r    |j         |j        |j        |j        d         |j        d         |j        d         fS )Nr   r   r.   )r,   r0   sharedr2   )rY   metadatas     r   pack_metadatazHIPBackend.pack_metadata   s>    O!!$!!$!!$
 	
r   c                 .    dt          | j                  iS )Nmin_dot_size)r   r   r   s     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s     0 = =>>r   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rY   r   s     r   get_module_mapzHIPBackend.get_module_map   s    77777719==r   c                 .    t          j        |           d S r   )r	   load_dialects)rY   ctxs     r   r   zHIPBackend.load_dialects   s    #r   c                     dd l }d}t          | d          r|                                 |k    S t          | |j                  r:t          | d          r*|                                                                 |k    S dS )Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r}   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbzHIPBackend.is_within_2gb   s    
3$$ 	1==??j00c5<(( 	>WS:K-L-L 	>&&((--//:==ur   c                 F    t          j        |           }d| v r|ddggz  }|S )NSztt.pointer_rangerI   )r   
parse_attr)descrets     r   r   zHIPBackend.parse_attr   s3    $T**$;;',--C
r   c                     t          j        | |fi |}t          j        j        r%|dk    rt
                              |           r|dz  }|S )Ntensorr   )r   get_arg_specializationr
   r	   use_buffer_opsrt   r   )r   tykwargsr   s       r   r   z!HIPBackend.get_arg_specialization   sT    0bCCFCC 9# 	h:;S;STW;X;X3JC
r   c                     t           j        j        } | %t          |           }|                                r|S t          t
                    j        dz  }|                                r|S t          d          }|                                r|S t          d          }|                                r|S t          d          )Nzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r
   r	   lld_pathr   is_filerS   rT   	Exception)lld_env_pathllds     r   path_to_rocm_lldzHIPBackend.path_to_rocm_lld   s     y)#|$$C{{}} 
8nn#&77;;== 	J.//;;== 	J$%%;;== 	Jqrrrr   c                    t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j        	                    |           t          j        
                    |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           |                    |            | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirzHIPBackend.make_ttir   s   _S[))
!!"%%%..r222<<R@@@''+++###))"---b!!!##B'''$$R(((##B'''
s
r   c                 N   t          j        | j                  }|                                 t          j                            |d|j         |j        |j	        |j
                   |                    |            t          j        | j                  }|                                 t          j                            |           t          j                            |           t          j                            |           t           j        j                            ||j        |j        |j                   t          j                            |           t           j        j                            |           t          j                            |d           t           j        j                            |           t          j                            |           t          j                            |           t          j                            |           t          j                            |           t6          j        j        }t6          j        j        }t6          j        j        }|j        dk    rdx}}t           j        j                             ||j!        |||           |r*t           j        j        "                    ||j                   t          j                            |           |j        #                                dk    r*t           j        j        $                    ||j                   t          j                            |d           t          j                            |           t          j        %                    |           tM          |j                  rCt           j        j        '                    |           t          j                            |           t           j        j        (                    |           tS          |j                  }|r5|j!        dk    r*t           j        j        *                    ||j!                   t6          j        j+        rmt           j        j        ,                    |           t          j                            |           t           j        j        -                    ||j                   t           j        j        .                    |           t          j                            |           t          j        /                    |           t          j        0                    |           |r*t           j        j        1                    ||j                   |                    |            | S )Nr   Tzlocal-prefetchr   rC   r.   )2r   r   r   r   r   r   add_convert_to_ttgpuirr#   r,   rK   r0   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulr=   r>   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   global_prefetchlocal_prefetchuse_async_copyrD   add_stream_pipeliner/   add_coalesce_async_copylowerinsert_instruction_sched_hintsadd_reduce_data_duplicationr'   add_in_thread_transposeadd_reorder_instructionsr$   add_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsadd_fold_true_cmpir   r   add_update_async_wait_count)r   r   r   r   r   r   r   r!   s           r   
make_ttgirzHIPBackend.make_ttgir   sS   _S[))
**2/Dgl/D/DgFWY`Yj+2+;	= 	= 	=
s_S[))
##B'''44R88833B777
00W\7C_ahanooo44R888
0044400T:::
77;;;,,R000''+++##B'''''+++)311  $444/00On
..r73EXfhvwww 	IJ66r7<HHH''+++ &&((F22J==b'BWXXX00T:::44R888222666)',77 	=J66r:::N88<<<
33B7779',GG 	J'"4"9"9J11"g6HIII9# 	KJ88<<<M++B///J88W\JJJ
--b111''+++b!!!$$R((( 	MJ::2w|LLL
s
r   c                    | }t          j        |j                  }|                                 t          j                            |           t          j                            |           t          j	        
                    |           t          j                            |           t          j                            |           |                    |           |S r   )r   r   r   r   r   r   r   r   add_sccpr   add_loop_aware_cser    add_combine_tensor_select_and_ifr   )srcr   r   r   r   s        r   	ttgir_optzHIPBackend.ttgir_opt  s    _S[))
""2&&&r"""&&r***((,,,77;;;
s
r   c                    | }t          j        |j                  }|                                 d}t          j        j                            ||j        |           t
          j	        
                    |           t
          j	                            |           t
          j                            |           d}t          j        j                            ||j        |           t
          j                            |           t
          j                            |           t
          j	                            |           t
          j	                            |           t
          j                            |           t
          j                            |           t
          j                            |           |j                                        dk    r0t          j        j                            ||j        |j                   t0          j        j        st
          j                            |           t          j        j                            ||           |                    |           t?          j                      t?          j                    }t?          j!        ||          t	          j"                   d}t0          j        j#        rd}t?          j$        t          j%        |j        |           t	          j&        |j                   t	          j'        d           t	          j(        dd           t	          j(        d	d           t	          j(        d
d           t	          j(        d|j)        dk               d *                                D             }	|	d         +                    t          j,                   |	d         -                    dd|j.        |j)        z              |	d         -                    d|j/                    |j0        rdnd}
|	d         -                    d|
           t0          j        j#        r5|	d         1                    d           |	d         2                                 t	          j3        |	d                    t0          j        j#        rgti          tj                    j6        dz  }to          |dz            to          |dz            to          |dz            g}t?          j8        |           n/|j9        r(fd|j9        D             }t?          j8        |           t?          j:        t>          j;        |j        dg |j<                   t0          j        j=        rt	          j>        |	d                    | ?                    d          |d<   t	          j@                   t	          jA                   to                    S )Nr   TrC    +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rJ   c                 :    g | ]}|                                 |S r   )is_declaration)ra   fns     r   rd   z(HIPBackend.make_llir.<locals>.<listcomp>`  s)    PPPbB<M<M<O<OPrPPPr   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr8   zdenormal-fp-math-f32rM   z
asanrtl.bczocml.bczockl.bcc                 D    g | ]\  }}t          j        |          |S r   )r	   need_extern_lib)ra   rb   pathllvm_mods      r   rd   z(HIPBackend.make_llir.<locals>.<listcomp>  s1    iiiltTSEXYacgEhEhiTiiir   z
ttg.sharedr   )Br   r   r   r   r	   r   r   add_optimize_lds_usager#   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rD   r   lower_instruction_sched_hintsr/   r
   compilationdisable_line_infollvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrK   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr,   r-   r?   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rS   rT   rV   link_extern_libsr1   optimize_moduleOPTIMIZE_O3r;   scalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r   r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   target_featuresfnsdenormal_moder[   pathsr  s                @r   	make_llirzHIPBackend.make_llir#  s!   _S[))
 
11"glOTTT$$R(((**2...11"555 	
((W\9EEE''+++b!!!''+++**2...''+++b!!!$$R((( &&((F22J<<RwOabbb 2 	+M&&r***
55b)DDD
s 	,..>#w// ***( 	'&Ox):GL/ZZZ 	Hgl333Hc***%h0H%PPP%h0QSWXXX%h0H%PPP%h0H'J[_aJabbb QPH2244PPPA >???A8:dw?PQXQb?b:d:deee 	A0W5I2KLLL+2+EQ6A1=AAA( 	&F((222F##%%%
 	 Q(((( 
	3!(^^2U:NN\122NY.//NY.//E
 !(E2222  	3iiiig.AiiiE!(E222Xt'7r2wOghhh9* 	<3CF;;; !--l;;$X... 	 ***8}}r   c           	      n   t          j        d|           }t          |          dk    sJ |d         |d<   g }|j        dk    r|                    d           t          j        | t          j        |j	        d||j
        d          }t          j        j        rt          d	           t          |           |S )
Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rb   	attentionzsink-insts-to-avoid-spillsr   Fz!// -----// AMDGCN Dump //----- //)refindalllenrD   appendr   translate_to_asmr	   r  r#   r;   r
   dump_amdgcnprint)r   r   r   namesflagsamdgcns         r   make_amdgcnzHIPBackend.make_amdgcn  s    
 
QSVWW5zzQ 8
  K//LL5666&sC,=w|RQVX_Xprwxx9  	5666&MMMr   c                    d}t           j        j        rd}t          j        | |j        |          }t                                          }t          j	                    5 }t          j	                    5 }t          |j        d          5 }|                    |           d d d            n# 1 swxY w Y   t          j        |ddd|j        d|j        g           d d d            n# 1 swxY w Y   t          |j        d          5 }	|	                                }
d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |
S )	Nr   r   wbz-flavorgnuz-sharedz-orb)r
   r  r  r	   assemble_amdgcnr#   rt   r   tempfileNamedTemporaryFileopenrb   write
subprocess
check_callread)r   r   r   r0  rz   	rocm_pathtmp_outtmp_infd_infd_outr   s              r   
make_hsacozHIPBackend.make_hsaco  s*   ( 	'&O#CGG//11	(** 	$g,.. q&&+t,, 'KK&&&' ' ' ' ' ' ' ' ' ' ' ' ' ' '%y)UIv{\`bibn&opppq q q q q q q q q q q q q q q glD)) $Vkkmm$ $ $ $ $ $ $ $ $ $ $ $ $ $ $	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 
s~   D=1C%B)C%)B--C%0B-1(C%D=%C)	)D=,C)	-D=D&D=&D*	*D=-D*	.D==EEc                      |t           j        k    r fd|d<    fd|d<   n|t           j        k    r	 fd|d<    fd|d<    fd|d	<    fd
|d<   d S )Nc                 2                         | |          S r   )r   r   r   r   rY   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#xQX3Y3Y r   r   c                 2                         | |          S r   )r   rV  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DOOCSZ4[4[ r   ttgirc                 2                         | |          S r   )r   rV  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DNN3RY4Z4Z r   c                 2                         | |          S r   )r4  rV  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    t~~c8W/U/U r   llirc                 2                         | |          S r   )rA  rV  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    1A1A#xQX1Y1Y r   r@  c                 2                         | |          S r   )rS  rV  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    Xw0W0W r   rz   )r   TRITONGLUON)rY   stagesr   r   s   ` ` r   
add_stageszHIPBackend.add_stages  s    x&&YYYYYF6N[[[[[F7OO''ZZZZZF7OUUUUUvYYYYYxWWWWWwr   c                 x    t          j        t                                          dgd          }| d| j         S )Nz	--versionre   )encodingr`   )rK  check_outputrt   r   r   )rY   versions     r   rm   zHIPBackend.hash  s?    ):+F+F+H+H+*Vahiii))DK)))r   ) rn   ro   rp   staticmethodr   rw   r|   rV   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r4  rA  rS  ra  	functools	lru_cacherm   __classcell__)r   s   @r   rt   rt   Z   sF       '	 ' ' ' \'"y "T " " " " " "
%# % % % %"S " " " "2
 
 
? ? ?>S*_ 5 > > > >
     \   \   \ s s \s&   \  ; ; \;z   \ k k \kZ   \*   \ X X X Y* * * * * * *r   rt   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rh   rG  r7  rK  rg  pathlibr   r   r$   r'   r*   rt   r   r   r   <module>rq     s   E E E E E E E E E E 5 5 5 5 5 5 5 5 5 5 5 5       ! ! ! ! ! ! # # # # # # # # # #         				              0Y 0 0 0 0h h hr r r $9? 9? 9? 9? 9? 9? 9? 9?xk* k* k* k* k* k* k* k* k* k*r   