o
    m9:jM                  #   @   s(  U d Z ddlZddlmZ ddlmZ ddlmZ ddlZddl	m
Z
 edZedZi Zeejjef ed	< eh d
Zdededeeef deeeef geeef f fddZ	dCdddddejdejdejdejdB dedededejfddZedde	dCdddddejdejdejdejdB dedededejfddZd edB d!edefd"d#Zd$ejd%ededejfd&d'Zd(ejd)ejd*ed+ed edB d,edejfd-d.Zd*ed+eddfd/d0Zd(ejd)ejd*ed+ed edB dejfd1d2Z 			dDdddddd3dd4d(ejd)ejd5ejd6ejdB d7ejdB d8ejdB d9ed:ed;ed,ed edB d<ed=edB de!ejejejejf fd>d?Z"ed@de"			dDdddddd3dd4d(ejd)ejd5ejd6ejdB d7ejdB d8ejdB d9ed:ed;ed,ed edB d<ed=edB de!ejejejejf fdAdBZ#dS )EzImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)TypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                    s.   dt ttf dt ttf f fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    sP   d }t jjd d| dd| }| tttt jj|< |  |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   r   [/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/torch/onnx/ops/_impl.py	decorator'   s   

z_onnx_op.<locals>.decorator)r   r   r   )r   r   r   r!   r   r   r    _onnx_op"   s   *r"   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr#   r$   r%   c                C   s   |   S )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r&   r'   r(   r)   r#   r$   r%   r   r   r    _rotary_embedding_23_fake_impl5   s   r+   RotaryEmbedding   c                   s  | j t}d  d durWt dkfdd tj d  k fdd tj d kfd	d t dkoN dkfd
d nt dkod dkfdd |dkrwt| d} n$|dkrt|dkfdd d }|| }	 ||	g}
t| |
} tt| j dkdd  | j d }	|dkr|	}| ddddddd|f }| dddddd|df }|d dur  ntj d  koj d k fdd tj d  koj d k fdd tj d kfdd tj d kfdd tdtd|rk|dddddddddf }|dddddddddf }n
tj|ddd\}}| |  }| |  }|rt|d}t|d}tj	||fdd}t||j }n	tj	||fdd}tj	||fdd}|dkrt|S t|dS )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                      s   d j  S )Nz6position_ids must be 2D when provided. Received shape shaper   )r)   r   r    <lambda>Z   s    z%rotary_embedding_23.<locals>.<lambda>c                      s   d  dj d  S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r0   r   )
batch_sizer)   r   r    r2   ^       r	   c                      s   d d j d  S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r3   r	   r0   r   )r)   sequence_lengthr   r    r2   b   r5   c                         d j  dj  S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r0   r   r'   r(   r   r    r2   f   
       c                      r7   )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r8   r0   r   r9   r   r    r2   l   r:      )r   r/   r	   r;   c                      s
   d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   r   )input_shaper   r    r2   y   s   
 c                   S      dS )Nzx should be a 4D tensor by nowr   r   r   r   r    r2          c                      s   dj  d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r0   r   )r4   cosr6   r   r    r2          c                      s   dj  d  d dS )Nzsin has shape r@   rA   rB   r0   r   )r4   r6   sinr   r    r2      rD   c                      s   d j d  d dS )NzLast dimension of cos cache (rF   ') should match rotary_embedding_dim/2 ().r0   r   )rC   rotary_embedding_dim_halfr   r    r2          c                      s   dj d  d  dS )NzLast dimension of sin cache (rF   rG   rH   r0   r   )rI   rE   r   r    r2      rJ   dim)
r1   lenr   _checkrL   permutereshape	unsqueezechunkcat)r&   r'   r(   r)   r#   r$   r%   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr   )	r4   rC   r'   r=   r)   rI   r6   rE   r(   r    rotary_embedding_23C   s   



  "$
r`   scalerV   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)ra   rV   r   r   r    _get_scale_factor   s   rd   tensorr4   c                 C   s:   | j d | j d }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r	   r/   )r1   view	transpose
contiguous)re   r4   r$   r6   rU   rV   r   r   r    _reshape_3d_to_4d   s   ri   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec              	   C   s2   |dkrt | ||||S tt| |ddS )z1Get QK output tensor based on the specified mode.r   r.   rF   )_compute_qk_output_for_mode_0r   
zeros_likematmulrg   )rj   rk   rl   rm   ra   rn   r   r   r    _get_qk_output_for_aten_spda   s
   	
rr   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rm   rl   r   r    r2      s    z-_validate_gqa_configuration.<locals>.<lambda>N)r   rN   )rl   rm   r   rs   r    _validate_gqa_configuration   s   
rt   c                 C   s`   |}||kr|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r	   rK   r;   r.   rF   )repeat_interleaverd   r1   rb   rc   r   rq   rg   )rj   rk   rl   rm   ra   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r    ro      s   	
ro           )	is_causalkv_num_headsq_num_headsrn   ra   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer}   r~   r   r   r   c                C   sZ  | j d }t| j dkrE| j d }| j }|dur,|||j d |j d  |j d | f}n|||j d |j d | f}|}||||d f}n:| j d }| j }|duri|j d |j d |j d |j d  |j d f}n|j }|}| j d | j d | j d |d f}tj|| j| jd}tj||j|jd}tj||j|jd}tj|| j| jd}||||fS )z@Fake implementation for Attention-23 for torch.compile purposes.r   r;   r	   Nr/   dtypedevice)r1   rM   r   emptyr   r   )rj   rk   r   r   r   r   r}   r~   r   rn   ra   r   r   r4   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shaper_   present_keypresent_value	qk_outputr   r   r    _attention_23_fake_impl  sR   


r   	Attentionc          '      C   s  d\}}}t | j}| jd }t | jdkr;t|dko|dkdd  | jd }t| ||} t|||}t|||}tt | jdkoQt |jdkoQt |jdkdd  | j| }t|
|}
|d	urmtj||g|d
n| }|d	ur~tj||g|d
n| }||}}| j| }|j| }| j| }|j| }|dko|	dko|d	u o|d	u p|jtj	k}t
|| |rtjjj| |||d||
t	||kd}t| ||||
|	}n||kr|| }|j||d
}|j||d
}tj||| j| jd}|rt|d	u dd  ttj||tj	| jd}|| td}|d	ur4|jtj	kr0|| td}n|| }t|
| jd }t|} | |  }!||  }"t|!|"dd}#|#}|#| }$|	dkr`|$}|dkrn|t|$|  }$|	dkru|$}|d	ur|tv r|$j}%|$tj| }$tj|$dd
}&|&|%}&ntj|$dd
}&ntj|$dd
}&|	dkr|&}t|&|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r	   r/   r;   r   r;   c                   S   r>   )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r    r2     r?   zattention_23.<locals>.<lambda>r	   r<   c                   S   r>   )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r    r2     r?   NrK   r|   )r   	dropout_pr}   ra   
enable_gqar   c                   S   r>   )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r    r2     r?   z-infr.   rF   r/   )"rM   r1   r   rN   ri   rd   rS   r*   r   boolrt   nn
functionalscaled_dot_product_attentionrr   ru   zerosr   trilonesmasked_fillfloatrb   rc   rq   rg   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrh   rf   )'rj   rk   r   r   r   r   r}   r~   r   rn   ra   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr4   r   q_head_sizer   r   rl   rm   kv_sequence_lengthcan_use_sdpar_   r   rw   	attn_biascausal_maskrx   ry   rz   r{   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxr   r   r    attention_23c  s   



(
















r   )N)NNN)$__doc__rb   collections.abcr   typingr   typing_extensionsr   r   torch.onnx.opsr   r   r   r   dict_ops
OpOverload__annotations__	frozensetr   strintr"   Tensorr   r+   r`   r   rd   ri   rr   rt   ro   tupler   r   r   r   r   r    <module>   s   


	

	 




	


T	
