o
    hKC                     @   s   d dl mZ d dlZd dlmZ ddlmZ e r)d dlmZm	Z	 d dl
mZmZ G dd	 d	ejjZ		dd
edeej deej fddZG dd dejjZG dd dejjZG dd dejjZdS )    )OptionalN)GenerationConfig   )is_torch_available)PreTrainedModelStaticCache)is_torch_greater_or_equal"is_torch_greater_or_equal_than_2_3c                	       sb   e Zd ZdZdef fddZdejdejfddZe	d	ej
jd
ejdedejfddZ  ZS )$TorchExportableModuleWithStaticCachea  
    A wrapper module designed to make a `PreTrainedModel` exportable with `torch.export`,
    specifically for use with static caching. This module ensures that the exported model
    is compatible with further lowering and execution in `ExecuTorch`.

    Note:
        This class is specifically designed to support export process using `torch.export`
        in a way that ensures the model can be further lowered and run efficiently in `ExecuTorch`.
    modelc                    s$  t    |jdu rtd|jjstd|jjdkr td|| _t| jj| jjj	j
| jjj	j| jjj	j| jjd| _tt| jjD ] }| jd| | jj| dd	 | jd
| | jj| dd	 qEtdd | jjjD | _| jrttj| jj| jjtjd}| jd|dd	 dS dS )a  
        Initializes the wrapper module with the pretrained model.

        Args:
            model (`PreTrainedModel`): The pretrained model to wrap. The model must have caching
            enabled and use a 'static' caching implementation.

        Raises:
            AssertionError: If the pretrained model does not have caching enabled or if it does
            not use a 'static' caching implementation in `model.generation_config`.
        NzkThe model must have a generation config to be exported with static caching. Please set `generation_config`.zvThe model must have caching enabled to be exported with static caching. Please set `generation_config.use_cache=True`.staticzThe model must use a 'static' caching implementation to be exported with static caching. Please set `generation_config.cache_implementation='static'`.configmax_batch_sizemax_cache_lendevicedtype
key_cache_F
persistentvalue_cache_c                 s   s    | ]}d |v V  qdS )CausalLMN ).0archr   r   x/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/integrations/executorch.py	<genexpr>S   s    z@TorchExportableModuleWithStaticCache.__init__.<locals>.<genexpr>r   mask)super__init__generation_configAssertionError	use_cachecache_implementationr   r   r   cache_config
batch_sizer   r   r   static_cacherangelen	key_cacheregister_buffervalue_cacheanyarchitectures	is_causaltorchtrilonesbool)selfr   icausal_mask	__class__r   r   r    &   sF   




 z-TorchExportableModuleWithStaticCache.__init__	input_idscache_positionc           	      C   sR   |j \}}| jr| j|d|f nd}|d}| j}| j|||||dd}|jS )a  
        Forward pass of the module, which is compatible with the ExecuTorch runtime.

        Args:
            input_ids (`torch.Tensor`): Tensor representing current input token id to the module.
            cache_position (`torch.Tensor`): Tensor representing current input position in the cache.

        Returns:
            torch.Tensor: Logits output from the model.

        This forward adapter serves two primary purposes:

        1. **Making the Model `torch.export`-Compatible**:
            The adapter hides unsupported objects, such as the `Cache`, from the graph inputs and outputs,
            enabling the model to be exportable using `torch.export` without encountering issues.

        2. **Ensuring Compatibility with `ExecuTorch` runtime**:
            The adapter matches the model's forward signature with that in `executorch/extension/llm/runner`,
            ensuring that the exported model can be executed in `ExecuTorch` out-of-the-box.
        Nr   T)r9   attention_maskposition_idsr:   past_key_valuesr#   )shaper/   r   	unsqueezer'   r   logits)	r4   r9   r:   _seqlen	attn_maskr<   r=   outsr   r   r   forward^   s   

z,TorchExportableModuleWithStaticCache.forwardexported_programprompt_token_idsmax_new_tokensreturnc                 C   sb  |j d }|| }|  D ]\}}|dr"|j d }t||} nqg }tt||D ]'}	|  j|dd|	|	d f tj|	gtj	dd}
|
|d |	   q,tj|
dddddf dd	 }|
| t||k r|  jtj|ggtj	dtjt|gtj	dd}
tj|
dddddf dd	 }|
| t||k sqtj|gtj	dS )
a  
        Generate a sequence of tokens using an exported program.

        This util function is designed to test exported models by simulating the generation process.
        It processes the input prompt tokens sequentially (no parallel prefill).
        This generate function is not intended to replace the original `generate` method, and the support
        for leveraging the original `generate` is potentially planed!

        Args:
            exported_program (`torch.export.ExportedProgram`): The exported program generated via `torch.export`.
            prompt_token_ids (`torch.Tensor`): Tensor representing the input prompt token IDs.
            max_new_tokens (`int`): Maximum number of new tokens to generate. Note that the total generation
                length is limited by both `max_new_tokens` and the model's cache size.

        Returns:
            torch.Tensor: A tensor containing the generated sequence of token IDs, including the original prompt tokens.
        r*   r   N   r   )r9   r:   r   dim)r>   named_buffers
startswithminr(   modulerE   r0   tensorlongappenditemargmaxr)   )rF   rG   rH   prompt_token_lenmax_generation_lengthbuffer_namebufferr   response_tokens	input_posresultcurrent_tokenr   r   r   generate   s4   



$
$
z-TorchExportableModuleWithStaticCache.generate)__name__
__module____qualname____doc__r   r    r0   TensorrE   staticmethodexportExportedProgramintr_   __classcell__r   r   r7   r   r
      s    
8$r
   r   example_input_idsexample_cache_positionc                 C   s   t stdddl}| N |dur|n	|jdgg|jd}|dur%|n|jdg|jd}tdrB|jjt| |fd|idd	}n|jj	j
t| |fd|id
dd}|W  d   S 1 s_w   Y  dS )a6  
    Convert a `PreTrainedModel` into an exportable module and export it using `torch.export`,
    ensuring the exported model is compatible with `ExecuTorch`.

    Args:
        model (`PreTrainedModel`): The pretrained model to be exported.
        example_input_ids (`torch.Tensor`): Example input token id used by `torch.export`.
        example_cache_position (`torch.Tensor`): Example current cache position used by `torch.export`.

    Returns:
        Exported program (`torch.export.ExportedProgram`): The exported program generated via `torch.export`.
    ztorch >= 2.3 is required.r   NrK   r   z2.5.0r:   T)argskwargsstrictF)rl   rm   pre_dispatchrn   )r	   ImportErrortorch.export._traceno_gradrR   rS   r   rf   r
   _trace_export)r   rj   rk   r0   rF   r   r   r   convert_and_export_with_cache   s0   
$ru   c                       (   e Zd ZdZ fddZdd Z  ZS ) Seq2SeqLMEncoderExportableModulez
    A wrapper module designed to make a Seq2Seq LM encoder exportable with `torch.export`.
    This module ensures that the exported encoder model is compatible with ExecuTorch.
    c                    s   t    || _d S )N)r   r    encoder)r4   encoder_modelr7   r   r   r       s   

z)Seq2SeqLMEncoderExportableModule.__init__c                 C   s   | j |djS )N)r9   )rx   last_hidden_state)r4   r9   r   r   r   rE      s   z(Seq2SeqLMEncoderExportableModule.forwardr`   ra   rb   rc   r    rE   ri   r   r   r7   r   rw      s    rw   c                       rv   )/Seq2SeqLMDecoderExportableModuleWithStaticCachez
    A wrapper module designed to make a Seq2Seq LM decoder exportable with `torch.export`,
    specifically for use with static caching. This module ensures the exported decoder
    is compatible with ExecuTorch.
    c                    s   t    | | _|j| _|j| _t| j||dtjd| _	t
t| j	jD ] }| jd| | j	j| dd | jd| | j	j| dd q&d S )Ncpur   r   Fr   r   )r   r    get_decoderdecoderlm_headr   r   r0   float32r'   r(   r)   r*   r+   r,   )r4   r   max_static_cache_lengthr&   r5   r7   r   r   r       s   

	 z8Seq2SeqLMDecoderExportableModuleWithStaticCache.__init__c                 C   s(   | j ||| jd|d}| |d }|S )NT)r9   encoder_hidden_statesr=   r#   r:   r   )r   r'   r   )r4   decoder_input_idsr   r:   outputs	lm_logitsr   r   r   rE     s   	z7Seq2SeqLMDecoderExportableModuleWithStaticCache.forwardr{   r   r   r7   r   r|      s    r|   c                       sB   e Zd Z	d fdd	Zdd Zd	d
 ZdddZdd Z  ZS )Seq2SeqLMExportableModulerK      r      c                    sP   t    || _| | _|j| _|| _td||||dd| _d | _	d | _
d S )NT)r&   r   )r#   
max_lengthr$   r%   )r   r    
full_modelget_encoderrx   r   max_hidden_seq_lengthr   r!   exported_encoderexported_decoder)r4   r   r&   r   r$   max_cache_lengthr7   r   r   r    &  s   

	
z"Seq2SeqLMExportableModule.__init__c                 C   sr   t | jd }tjjd| jd}t  tjj||fdd|iidd}W d    |S 1 s2w   Y  |S )Nr}   encoder_seq_lengthmaxr9   rK   Tdynamic_shapesrn   )	rw   rx   toevalr0   rf   Dimr   rr   )r4   encoder_input_idswrapped_encoderseq_len_dimr   r   r   r   _export_encoder;  s   

z)Seq2SeqLMExportableModule._export_encoderc                 C   s   t | j| jjj| jjjdd }tj	j
d| jd}t  tj	j	||||fd d|id ddd}W d    |S 1 s?w   Y  |S )	N)r   r   r&   r}   encoder_hidden_seq_lengthr   rK   )r   r   r:   Tr   )r|   r   r!   r%   r   r&   r   r   r0   rf   r   r   rr   )r4   r   r   r:   wrapped_decoderencoder_seq_len_dimr   r   r   r   _export_decoderI  s.   

z)Seq2SeqLMExportableModule._export_decoderNc           	      C   s   |d ur|nt jdt jd}|d ur|n	t jdggt jd}|d ur$|nt jdgt jd}|d ur3|nt j| jjjd| jj	ft j
d}| || _| |||| _| S )N)rK   
   r   r   r   )r0   r2   rS   rR   zerosr!   r%   r&   r   d_modelr   r   r   r   r   )	r4   r   r   r   r:   example_encoder_input_idsexample_decoder_input_idsrk   example_encoder_hidden_statesr   r   r   rf   f  s    z Seq2SeqLMExportableModule.exportc           	   	   C   s   t  ` | j |}t jdggt jd}dg}t|d D ];}| j ||t j|gt jd}t j|d d dd d f dd	 }|
| t j|ggt jd}|| jjkrZ nq|W  d    S 1 sgw   Y  d S )Nr   r   rK   rJ   rL   )r0   rr   r   rQ   rR   rS   r(   r   rV   rU   rT   r   eos_token_id)	r4   rG   rH   encoder_outputr   generated_idsr5   r@   
next_tokenr   r   r   r_   }  s    
$
$z"Seq2SeqLMExportableModule.generate)rK   r   r   r   )NNNN)	r`   ra   rb   r    r   r   rf   r_   ri   r   r   r7   r   r   %  s    
r   )NN)typingr   r0   +transformers.generation.configuration_utilsr   utils.import_utilsr   transformersr   r   transformers.pytorch_utilsr   r	   nnModuler
   rd   ru   rw   r|   r   r   r   r   r   <module>   s*    
5-