
    |Yh#y                     v   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmc mc m	Z
 d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZ d dl m!Z!m"Z" d dl#m$Z$ dd	l%m&Z&m'Z'm(Z( g d
Z)eZ*ej+        ej,        j+        ej-        ej,        j-        iej,        j+        ej	        j+        ej,        j-        ej	        j-        idZ.d Z/	 	 	 d#dZ0d$dZ1d Z2d Z3d%dZ4	 	 	 	 d&dZ5d Z6d Z7 ej8        e&          	 	 	 	 d'd            Z9d Z:d Z; ej8        e&          d(d            Z< ej8        e&          dej=        ddfd            Z> ej8        e&          d(d            Z? ej8        e&          d%d            Z@ ej8        e&          	 	 	 	 	 	 d)d            ZA	 	 	 	 	 d*d ZB	 d%d!ZCd+d"ZDdS ),    N)_FusedModule)_is_activation_post_process)_activation_is_memoryless_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit)_get_special_act_post_process_has_special_act_post_process)get_default_dynamic_quant_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_list(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsno_observer_set)DeQuantStubQuantWrapper)type_before_parametrizations   )DEPRECATION_WARNINGget_qparam_dict)has_no_children_ignoring_parametrizations)
get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamicprepare_qatquantize_qatconvertswap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classc                      t           S )z'Defines the default custom config dict.)_DEFAULT_CUSTOM_CONFIG_DICT     p/var/www/tools.fuzzalab.pt/emblema-extractor/venv/lib/python3.11/site-packages/torch/ao/quantization/quantize.pyr   r   G   s    &&r)    c                 
   |                     t          |           |          }|                     ||          }t          | d|          }t          j        j        j                            ||            t          ||           }|| _        | 	                                D ]b\  }}|r|dz   |z   n|}	|=||                     dg           v s7t          |          |                     dg           v st          ||||	           cdS )a  This is a helper function for `propagate_qconfig_`

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name of submodule to quantization
                     configuration
        qconfig_parent: quantization config of parent module, we will fallback to
                       this config when there is no specified config for current
                       module
        prefix: corresponding prefix of the current module, used as key in
                qconfig_dict
        prepare_custom_config_dict: dictionary for custom handling of modules
                                    see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    qconfig.Nnon_traceable_module_namenon_traceable_module_class)getr   getattrtorchaoquantizationr-   _assert_valid_qconfigr   named_childrentype_propagate_qconfig_helper)
moduleqconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechildmodule_prefixs
             r*   r9   r9   L   s0   2 "%%$V,,n N "%%fn==NVY??N	H!77OOO >~v V V.FN,,.. 
 
e/5?t++4%-.223NPRSSSSE{{)--.JBOOP P &|%>  
 
r)   c                 :    |i }|i }t          | ||           dS )a  Propagate qconfig through the module hierarchy and assign `qconfig`
    attribute on each leaf module

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name or type of submodule to
            quantization configuration, qconfig applies to all submodules of a
            given module unless qconfig for the submodules are specified (when
            the submodule already has qconfig attribute)
        prepare_custom_config_dict: dictionary for custom handling of modules
            see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    N)r>   )r9   )r:   r;   r>   s      r*   r   r   }   sD      !)%'"9S     r)   c                 ,    |                      |          S )z.Forward hook that calls observer on the outputactivation_post_process)selfinputoutputs      r*   _observer_forward_hookrK      s    ''///r)   c                 8    |                      |d                   S )z2Forward pre hook that calls observer on the outputr   rF   )rH   rI   s     r*   _observer_forward_pre_hookrM      s    ''a111r)   Fc                     t          | d          s
J d            |r|                     t          d           d S |                     t          d           d S )NrG   zGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrM   register_forward_hookrK   )r:   pre_hooks     r*   &_register_activation_post_process_hookrT      sw    6455  Q    K(()CT(RRRRR$$%;T$JJJJJr)   c                    |t                      }|i }`t          |           }t          |          dk    sJ d|             t          |          dk    rt          t	          |                    ndddd dfd	}|                                 D ]\  }}t          |          t          j        fv r#t          t          |          t          j        t          j        f          rM |          rAt          |d          sJ d	t          |           d
             |j                  |_        t!          |t"                    r |          r ||           |(t          |          |v r |          r ||           t%          |          rt'          |          }	 |||	           % |          rt          |          |v rq|t          |                   }
|
                    |          }t+          | ||           t          |
t-          t/                                          s ||           t1          ||||           t3          |           r;t!          | t4          j        j                  st          |           |v r ||            t          | d          r=t!          | t4          j        j                  s t          |           |v r ||            dS dS dS dS )as  Add observer for the leaf child of the module.

    This function insert observer module to all leaf child module that
    has a valid qconfig attribute.

    Args:
        module: input module with qconfig attributes for all the leaf modules that we want to quantize
        qconfig_propagation_list: a list of quantizable modules that will have observers added to them
            if they are leaf nodes
        device: parent device, if any
        non_leaf_module_list: list of non-leaf modules we want to add observer

    Return:
        None, module is modified inplace with added observer modules and forward_hooks
    Nr   zR_add_observer_ only works with cpu or single-device CUDA modules, but got devices r   c                 t    ||                                  n	 |            }||                    |           |S N)
activationto)r-   devicespecial_act_post_processrX   s       r*   get_activation_post_processz3_add_observer_.<locals>.get_activation_post_process   sO     (/    ))++ 	
 MM&!!!r)   c                 4    t          | d          o| j        d uS )Nr-   rP   r-   )ms    r*   needs_observationz)_add_observer_.<locals>.needs_observation   s    q)$$>$)>>r)   c                      |           r`t          | t                    sM|                     d | j        |                     t	          | t          | j                             dS dS dS )zmAdds an activation post process module and register
        a pre or post hook that calls the module
        rG   rS   N)
isinstancer   
add_moduler-   rT   r   )r_   r[   rZ   r\   r`   s     r*   insert_activation_post_processz6_add_observer_.<locals>.insert_activation_post_process   s    
 Q 	
1k(B(B 	LL)++Iv'?    35ai@@     	 	 	 	r)   rG   zfunctional class z- has no pre-defined `activation_post_process`weight_fake_quantrW   )r   _get_unique_devices_lennextiterr7   r   nnDropout
issubclassnnqFloatFunctionalQFunctionalrP   r-   rG   rc   r   r   r   
from_floatsetattrtupler   _add_observer_r   r3   
Sequential)r:   qconfig_propagation_listnon_leaf_module_listrZ   custom_module_class_mappingdevicesre   rA   rB   r[   observed_classobserved_childr\   r`   s      `        @@r*   rt   rt      s   ,  '#G#I#I "*&(# ~&v..7||q   jahjj !   ),Gq(8(8d7mm$$$d   ? ? ?       & ,,.. / /e'..2:,>>(//#2Es1W
 
 +	 ! '' u&?@@  z(DU(K(Kzzz   1L0KM61 1- |,, !	  '' 6..u555 ,,U337KKK  '' 6..u555*511 	'DU'K'K$**52JKKKKe$$	,U337RRR8,U33N ,66u==NFD.111 neO4E4E.F.FGG ?..~>>>($+    	2&99/658#677/ )004LLL&&v... 	+,,/658#677/ )004LLL&&v...../ / / / MLr)   c                 |    d |                                  D             d |                                 D             z  S )Nc                 <    h | ]}|j         j        d k    |j         S metarZ   r8   .0ps     r*   	<setcomp>z'_get_unique_devices_.<locals>.<setcomp>4  s(    MMMQX]f5L5LAH5L5L5Lr)   c                 <    h | ]}|j         j        d k    |j         S r~   r   r   s     r*   r   z'_get_unique_devices_.<locals>.<setcomp>4  s2     Q Q Qahmv.E.E.E.E.Er)   )
parametersbuffers)r:   s    r*   rg   rg   3  sT    MMf//11MMM Q Q ..**Q Q Q  r)   c                     t          |           r&t          | d          r| j        rt          |           S |                                 D ]\  }}t          |          | j        |<   | S )a{  Wrap the leaf child module in QuantWrapper if it has a valid qconfig
    Note that this function will modify the children of module inplace and it
    can return a new module which wraps the input module as well.

    Args:
        module: input module with qconfig attributes for all the leaf modules
        that we want to quantize

    Return:
        Either the inplace modified module with submodules wrapped in
        `QuantWrapper` based on qconfig or a new `QuantWrapper` module which
        wraps the input module, the latter case only happens when the input
        module is a leaf module and we want to quantize it.
    r-   )r   rP   r-   r   r7   r   _modules)r:   rA   rB   s      r*   r   r   9  s}      	2&99$FI&&$ N$
 F###,,.. 9 9e 1% 8 8Mr)   c                    t           j                            d           |t                      }|                    di           }|st          j        |           } |}|t                      }t          | d           t          d | 
                                D                       st          j        d           t          | |||           | S )af  Prepares a copy of the model for quantization calibration or quantization-aware training.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    The model will be attached with observer or fake quant modules, and qconfig
    will be propagated.

    Args:
        `model`: input model to be modified in-place
        `inplace`: carry out model transformations in-place, the original module is mutated
        `allow_list`: list of quantizable modules
        `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer
        `prepare_custom_config_dict`: customization configuration dictionary for prepare function

    .. code-block:: python

       # Example of prepare_custom_config_dict:
       prepare_custom_config_dict = {
           # user will manually define the corresponding observed
           # module class which has a from_float class method that converts
           # float custom module to observed custom module
           "float_to_observed_custom_module_class": {CustomModule: ObservedCustomModule}
       }

    z!quantization_api.quantize.prepareNr$   r;   c              3   D   K   | ]}t          |d           o|j        V  dS )r-   Nr^   )r   r_   s     r*   	<genexpr>zprepare.<locals>.<genexpr>  s3      LLqwq)$$2LLLLLLr)   zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)rx   )r3   _C_log_api_usage_oncer   r1   copydeepcopyr   r   anymoduleswarningswarnrt   )modelinplace
allow_listobserver_non_leaf_module_listr>   rx   rv   s          r*   r   r   T  s    D 
H  !DEEE!)%C%E%E""<"@"@/# #  %e$$  *#G#I#I u40000 LLEMMOOLLLLL 
K	
 	
 	
  %$?	    Lr)   c                      t           d          r$t           j                  rt           d           d fd	} |d            |d           d S )NrG   Fc                     | rj         nj        }| rt          nt          }t	                      }|                                D ]\  }}||u r|                    |           |D ]}|                    |           d S rW   )_forward_pre_hooks_forward_hooksrM   rK   setitemsaddpop)rS   hook_mapobserver_hookhandle_ids_to_remove	handle_idhook_fnr:   s         r*   remove_hooksz5_remove_activation_post_process.<locals>.remove_hooks  s    08S6,,f>S*2N&&8N 	  #uu"*.."2"2 	4 	4Iw-''$((333- 	$ 	$ILL####	$ 	$r)   Trb   F)rP   r   rG   delattr)r:   r   s   ` r*   _remove_activation_post_processr     s     v011 36Q&7 7 3 	1222
$ 
$ 
$ 
$ 
$ 
$ L$L%      r)   c                     |                                  D ]}t          |           t          | d          r| `t	          |            dS )zClean up the qconfig left in the module so that new qconfig can be
    propagated.

    Args:
        module: module to be cleaned up
    r-   N)children_remove_qconfigrP   r-   r   )r:   rB   s     r*   r   r     sY     ""  vy!! N#F+++++r)   c                    t           j                            d           |t                      }|st	          j        |           } |                                  t          | d            || g|R   t          | |d           | S )a  Quantize the input float model with post training static quantization.

    First it will prepare the model for calibration, then it calls
    `run_fn` which will run the calibration step, after that we will
    convert the model to a quantized model.

    Args:
        model: input float model
        run_fn: a calibration function for calibrating the prepared model
        run_args: positional arguments for `run_fn`
        inplace: carry out model transformations in-place, the original module is mutated
        mapping: correspondence between original module types and quantized counterparts

    Return:
        Quantized model.
    z"quantization_api.quantize.quantizeNTr   )	r3   r   r   r   r   r   evalr   r"   )r   run_fnrun_argsmappingr   s        r*   r   r     s    $ 
H  !EFFF:<< %e$$	JJLLLE4    
F58E7D))))Lr)   c                 4   t           j                            d           |`|t           j        k    rjt          j        t          t          j        t          t          j        t          t          j	        t          t          j
        t          t          j        t          i}n|t           j        k    rjt          j        t          t          j        t          t          j        t          t          j	        t          t          j
        t          t          j        t          i}n%|t           j        k    r%t          j        t           t          j        t           i}n|t           j        k    rt          j        t&          i}nt)          d| d          t+          |t,                    r|t           j        u rt          }n_|t           j        u rt          }nI|t           j        u rt           }n3|t           j        u rt&          }nt/          dt1          |                    t3          t5          |t7          j        |                              }|t;                      }|st=          j        |           } |                                   tC          | |           tE          | |d           | S )av  Converts a float model to dynamic (i.e. weights-only) quantized model.

    Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

    For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization
    by default is performed for layers with large weights size - i.e. Linear and RNN variants.

    Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`.
    If `qconfig` is provided, the `dtype` argument is ignored.

    Args:
        model: input model
        qconfig_spec: Either:

            - A dictionary that maps from name or type of submodule to quantization
              configuration, qconfig applies to all submodules of a given
              module unless qconfig for the submodules are specified (when the
              submodule already has qconfig attribute). Entries in the dictionary
              need to be QConfig instances.

            - A set of types and/or submodule names to apply dynamic quantization to,
              in which case the `dtype` argument is used to specify the bit-width

        inplace: carry out model transformations in-place, the original module is mutated
        mapping: maps type of a submodule to a type of corresponding dynamically quantized version
            with which the submodule needs to be replaced

    z*quantization_api.quantize.quantize_dynamicNz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr   )#r3   r   r   qint8rk   Linearr   LSTMGRULSTMCellRNNCellGRUCellfloat16r   quint8EmbeddingBagr	   	Embeddingquint4x2r
   
ValueErrorrc   r   RuntimeErrorstrdictzip	itertoolsrepeatr   r   r   r   r   r"   )r   qconfig_specdtyper   r   default_qconfigs         r*   r   r     s   @ 
H  !MNNNEK	20/4
3
3LL em##	20/4
3
3LL el""!B?LL en$$!GLL llll   
L#	&	& REK5OOem##5OOel""?OOen$$DOO@#e**   Ci.>.O.OPPQQ;== %e$$	JJLLLul+++E7D))))Lr)   c                 \   t           j                            d           | j        s
J d            |t	                      }|st          j        |           } t          | d           t          | |dd           t          | t          |                                          d           | S )	a  
    Prepares a copy of the model for quantization calibration or
    quantization-aware training and converts it to quantized version.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    Args:
        model: input model to be modified in-place
        mapping: dictionary that maps float modules to quantized modules to be
                 replaced.
        inplace: carry out model transformations in-place, the original module
                 is mutated
    z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNr   TF)r   r   remove_qconfig)r   r   )r3   r   r   trainingr   r   r   r   r"   r   r   values)r   r   r   s      r*   r    r    :  s      
H  !HIII>NNNNNN133 %e$$u40000E7DGGGGEW^^5E5E1F1FPTUUUULr)   c                     t           j                            d           |st          j        |           } |                                  t          | d            || g|R   t          | d           | S )ag  Do quantization aware training and output a quantized model

    Args:
        model: input model
        run_fn: a function for evaluating the prepared model, can be a
                function that simply runs the prepared model or a training
                loop
        run_args: positional arguments for `run_fn`

    Return:
        Quantized model.
    z&quantization_api.quantize.quantize_qatTr   )r3   r   r   r   r   trainr    r"   )r   r   r   r   s       r*   r!   r!   X  s     
H  !IJJJ %e$$	KKMMMt$$$$
F58E4    Lr)   Tc                     t           j                            d           |st          j        |           } t          | |d|||           |rt          |            | S )a  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class. And remove qconfig at the
    end if remove_qconfig is set to True.

    Args:
        `module`: prepared and calibrated module
        `mapping`: a dictionary that maps from source module type to target
                   module type, can be overwritten to allow swapping user defined
                   Modules
        `inplace`: carry out model transformations in-place, the original module
                   is mutated
        `convert_custom_config_dict`: custom configuration dictionary for convert function
        `use_precomputed_fake_quant`: a flag to enable use of precomputed fake quant

    .. code-block:: python

       # Example of convert_custom_config_dict:
       convert_custom_config_dict = {
           # user will manually define the corresponding quantized
           # module class which has a from_observed class method that converts
           # observed custom module to quantized custom module
           "observed_to_quantized_custom_module_class": {
               ObservedCustomModule: QuantizedCustomModule
           }
       }

    z!quantization_api.quantize.convertT)r   is_referenceconvert_custom_config_dictuse_precomputed_fake_quant)r3   r   r   r   r   _convertr   )r:   r   r   r   r   r   r   s          r*   r"   r"   p  sy    J 
H  !DEEE 'v&&!#=#=      Mr)   c           	         ||rt                      nt                      }|t                      }|                    di           }|st	          j        |           } i }|                                 D ]U\  }}	t          |	t                    s&t          |	          |vrt          |	|d|||           t          |	|||          ||<   V|                                D ]\  }
}|| j        |
<   | S )ao  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class

    Args:
        module: input module
        mapping: a dictionary that maps from source module type to target
                 module type, can be overwritten to allow swapping user defined
                 Modules
        inplace: carry out model transformations in-place, the original module
                 is mutated
        is_reference: a flag to enable quantized reference module
        use_precomputed_fake_quant: a flag to enable use of precomputed fake quant

    Nr%   Tr   )r   r   r   r1   r   r   r7   rc   r   r   r   r#   r   r   )r:   r   r   r   r   r   rx   reassignrA   modkeyvalues               r*   r   r     s?   ,  <>@@@9;; 	
 ")%C%E%E""<"@"@3R# #  'v&&H**,, 
 
	c 3--	,S119TTT*+E    %57Q
 
 nn&& % %
U$Mr)   c                    | }t          | d          r:| j        2d}t          |           |v r+|t          |                                        |           }d}nt          |           |v r|t          |                    }t          |d          r_|j        rX| j        J | j                                        } || j                   t          |          }|                    | |          }nOt          j	        |j                  }	d|	j
        v r|                    | |          }n|                    |           }d}|r| j                                        D ]}
|                    |
           | j                                        D ] }|t          ur|                    |           !t#          |           }t%          |          dk    s6t%          |          d	k    rt'          j        d
          |v sJ d|             t%          |          dk    rt+          t-          |                    nd}|r|                    |           |S )a	  Swaps the module if it has a quantized counterpart and it has an
    `observer` attached.

    Args:
        mod: input module
        mapping: a dictionary that maps from nn module to nnq module

    Return:
        The corresponding quantized module of `mod`
    r-   NFT_IS_REFERENCEr   r   r      r   zOswap_module only works with cpu or single-device CUDA modules, but got devices r   )rP   r-   r   from_observedr   weightr   rq   inspect	signaturer   r   r   rQ   r   rK   rR   rg   rh   r3   rZ   ri   rj   rY   )r   r   rx   r   new_modswappedqmodweight_post_processweight_qparamssigpre_hook_fnr   ry   rZ   s                 r*   r#   r#     s    GsI ,#3;#:',,0KKK1,S11mC    GG)#..'997<<=Dt_-- 3$2D 3{...&)k&8&8&:&:###CJ///!01D!E!E//#~>>'88/3>AA"oo8R .  GG #ooc22GG 	#"5<<>> ? ?11+>>>> -4466 ; ;"88811'::: +3//Gw<<1$$G!!el6&:&:g&E&E&Ekbikk 'F&E&E -0LL1,<,<T$w--((($F #

6"""Nr)   c                     d }t          | d          r| j        | ||          dz   <   |                                 D ](\  }}|r ||          |z   n|}t          |||           )dS )a,  Traverse the modules and save all observers into dict.
    This is mainly used for quantization accuracy debug
    Args:
        mod: the top module we want to save all observers
        prefix: the prefix for the current module
        target_dict: the dictionary used to save all the observers
    c                     | dk    r| n| dz   S )Nr+   r.   r(   )r=   s    r*   
get_prefixz&_get_observer_dict.<locals>.get_prefix*  s    2vv6C<7r)   rG   N)rP   rG   r7   _get_observer_dict)r   target_dictr=   r   rA   rB   rC   s          r*   r   r   !  s    8 8 8 s-.. 
' 	JJv&&)BBC ))++ > >e5;E

6**T115+}====> >r)   )Nr+   N)NNr   )NNNN)FNNN)NF)NFTFNF)NFFNF)r+   )Er   r   r   typing_extensionsr   r3   torch.ao.nn.quantizedr4   rk   	quantizedrn   torch.nntorch.ao.nn.intrinsicr   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   r   r   r	   r
   +torch.ao.quantization.quantization_mappingsr   r   r   r   r   r   r   r   torch.ao.quantization.stubsr   r   torch.nn.utils.parametrizer   utilsr   r   r   __all__is_activation_post_processr   quantizableMultiheadAttentionr'   r   r9   r   rK   rM   rT   rt   rg   r   
deprecatedr   r   r   r   r   r   r    r!   r"   r   r#   r   r(   r)   r*   <module>r      s               # # # # # # # # # # # #       . . . . . . F F F F F F               	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 B A A A A A A A C C C C C C            9 
 	$
r~@.
 	R\.
)2<+J2	 	 ' ' ' #. . . .b   20 0 0
2 2 2
K K K K " $F/ F/ F/ F/R    6 122 "&#? ? ? 32?D! ! !4, , ,  122   32: 122EKuW W W 32Wt 122   32: 122   32. 122 #$1 1 1 321l #$; ; ; ;~ KP; ; ; ;|> > > > > >r)   