o
    h                     @   s   d dl mZmZ d dlZddlmZmZ e Z				ddejj	dej
dej
d	ej
d
eej
 dedee dee dee deej
df fddZdS )    )OptionalTupleN   )_flash_attention_forward!flash_attn_supports_top_left_mask        modulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                 K   s   |j d }
|dd}|dd}|dd}d }|jtjkr@t r(t }nt| jdr3| jj	}nt
dd |  D jj}|	dd  t||||f|
| j||||t|d|	}|d fS )Nr      _pre_quantization_dtypec                 s   s"    | ]}t |tjjr|V  qd S )N)
isinstancetorchnnLinear).0layer r   }/var/www/html/construction_image-detection-poc/venv/lib/python3.10/site-packages/transformers/integrations/flash_attention.py	<genexpr>,   s     z*flash_attention_forward.<locals>.<genexpr>	is_causal)query_lengthr   r   softmax_scaler   r   use_top_left_masktarget_dtype)shape	transposedtyper   float32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr   r   _use_top_left_mask)r   r	   r
   r   r   r   r   r   r   kwargsseq_lenr!   attn_outputr   r   r   flash_attention_forward   s<   


r2   )r   NNN)typingr   r   r   modeling_flash_attention_utilsr   r   r.   r   ModuleTensorfloatintr2   r   r   r   r   <module>   s:    		