o
    q::j6O  ã                   @  sÀ   d dl mZ d dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ ejfddd„Zejfddd„Zejfddd„Zejfddd„Zejfddd„ZG dd„ deƒZdS ) é    )ÚannotationsN)ÚTensorProtoÚhelper)ÚBase)Úexpect)ÚAI_ONNX_PREVIEW_DOMAIN)Ú_compute_flex_attentionÚ
bias_valueÚfloatÚdtypeúTensorProto.DataTypeÚreturnúonnx.GraphProtoc                 C  ód   t  d|g d¢¡}t  d|g d¢¡}t  d|g | g¡}t  dddgdg¡}t  |gd|g|g|g¡S )zqCreate a score_mod subgraph that adds a constant bias to the scores.

    score_mod(scores) -> scores + bias
    Úscores©ÚBÚHÚLÚSÚ
scores_outÚbiasÚAddÚscore_mod_bias©r   Úmake_tensor_value_infoÚmake_tensorÚ	make_nodeÚ
make_graph)r	   r   Úscore_inÚ	score_outÚbias_tensorÚadd_node© r#   úp/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/onnx/backend/test/case/node/flexattention.pyÚ_make_score_mod_bias_graph   ó   ûr%   Úscale_valuec                 C  r   )zdCreate a prob_mod subgraph that scales the probabilities.

    prob_mod(probs) -> probs * scale
    Úprobsr   Ú	probs_outÚscaleÚMulÚprob_mod_scaler   )r'   r   Úprob_inÚprob_outÚscale_tensorÚmul_noder#   r#   r$   Ú_make_prob_mod_scale_graph)   r&   r1   c                 C  sr  t  d| g d¢¡}t  d| g d¢¡}t  ddgdg¡t jdddgdgd	d
t jdddgdgd	d
t  dg d¢dg¡t  dddgdg¡t  dg d¢dg¡t  dddgdg¡t  dddgdg¡t  dg d¢dg¡g	}t  dtjg d	g¡t  dtjg dg¡t  dtjg dg¡t  dtjg dg¡t  dtjd gg d!¢¡t  dtjd gg d"¢¡t  d#| g td$ƒg¡g}t  |d%|g|g|¡S )&a  Create a score_mod subgraph that applies causal masking.

    For each position, masks out future tokens by setting scores to -inf
    where k_idx > q_idx. This pattern is used in Qwen-3, Gemma-3, Llama-3, etc.

    score_mod(scores) -> Where(q_idx >= k_idx, scores, -inf)
    r   r   r   ÚShapeÚscores_shapeÚGatherÚidx_2ÚL_dimr   ©ÚaxisÚidx_3ÚS_dimÚRange©Úzeror6   ÚoneÚq_rangeÚReshapeÚq_shapeÚq_idx©r=   r:   r>   Úk_rangeÚk_shapeÚk_idxÚGreaterOrEqualÚmaskÚWhere)rH   r   Úneg_infr=   r>   é   é   é   é   )rK   rK   éÿÿÿÿrK   )rK   rK   rK   rO   rJ   z-infÚscore_mod_causal_mask)r   r   r   r   r   ÚINT64r
   r   ©r   r   r    ÚnodesÚinitializersr#   r#   r$   Ú!_make_score_mod_causal_mask_graph@   s6   
òù
ûrU   Ú	cap_valuec                 C  s†   t  d|g d¢¡}t  d|g d¢¡}t  dddgdg¡t  ddgdg¡t  d	ddgdg¡g}t  d|g | g¡g}t  |d
|g|g|¡S )z¢Create a score_mod subgraph that applies soft capping.

    Used in Gemma-2 to stabilize attention scores.

    score_mod(scores) -> tanh(scores / cap) * cap
    r   r   r   ÚDivÚcapÚscaledÚTanhÚtanh_outr+   Úscore_mod_soft_cap)r   r   r   r   r   )rV   r   r   r    rS   rT   r#   r#   r$   Ú_make_score_mod_soft_cap_graphq   s   
ýÿûr]   c                 C  sr  t  d| g d¢¡}t  d| g d¢¡}t  ddgdg¡t jdddgdgd	d
t jdddgdgd	d
t  dg d¢dg¡t  dddgdg¡t  dg d¢dg¡t  dddgdg¡t  dddgdg¡t jddgdg| dt  dddgdg¡g
}t  dtjg d	g¡t  dtjg dg¡t  dtjg d g¡t  dtjg d!g¡t  dtjd gd"dg¡t  dtjd gdd"g¡g}t  |d#|g|g|¡S )$a  Create a score_mod subgraph that adds relative positional bias.

    Adds (q_idx - k_idx) to the scores. This pattern captures the core idea
    of relative position embeddings used in various Transformer models.

    score_mod(scores) -> scores + Cast(q_idx - k_idx, dtype)
    r   r   r   r2   r3   r4   r5   r6   r   r7   r9   r:   r;   r<   r?   r@   rA   rB   rC   rD   rE   rF   ÚSubÚrel_posÚCastÚrel_pos_cast)Útor   r=   r>   rK   rL   rM   rO   Úscore_mod_relative_positional)r   r   r   r   r   rQ   r   rR   r#   r#   r$   Ú)_make_score_mod_relative_positional_graph‘   s6   
ñú	ûrd   c                   @  s¦   e Zd Zeddd„ƒZeddd„ƒZeddd„ƒZedd	d
„ƒZeddd„ƒZeddd„ƒZ	eddd„ƒZ
eddd„ƒZeddd„ƒZeddd„ƒZeddd„ƒZdS )ÚFlexAttentionr   ÚNonec               
   C  óº   t jdg d¢dgtd} d\}}}}d\}}tj ||||¡ tj¡}tj ||||¡ tj¡}tj ||||¡ tj¡}	t|||	ƒ\}
t	| |||	g|
gdt  
dd	¡t  
td
¡gd dS )z/Basic FlexAttention test with default settings.re   ©ÚQÚKÚVÚY©ÚinputsÚoutputsÚdomain©rL   rN   é   é   ©é   rs   Útest_flexattentionÚ é   rK   ©rn   ro   ÚnameÚopset_importsN©r   r   r   ÚnpÚrandomÚrandÚastypeÚfloat32r   r   Úmake_opsetid©Únoder   ÚHqr   ÚEr   ÚEvri   rj   rk   rl   r#   r#   r$   Úexport_flexattentionÃ   ó*   ü

þ
ûz"FlexAttention.export_flexattentionc               
   C  sÄ   d} t jdg d¢dg| td}d\}}}}d\}}tj ||||¡ tj¡}tj ||||¡ tj¡}	tj ||||¡ tj¡}
t||	|
| d\}t	|||	|
g|gd	t  
d
d¡t  
td¡gd dS )z,FlexAttention with explicit scale attribute.gš™™™™™¹?re   rh   rl   )rn   ro   r*   rp   rq   rt   )r*   Útest_flexattention_scaledrw   rx   rK   ry   Nr|   )r*   r„   r   r…   r   r†   r   r‡   ri   rj   rk   rl   r#   r#   r$   Úexport_flexattention_scaledá   s.   û

þ
ûz)FlexAttention.export_flexattention_scaledc               
   C  s¸   t jdg d¢dgtd} d\}}}}}}}tj ||||¡ tj¡}tj ||||¡ tj¡}	tj ||||¡ tj¡}
t||	|
ƒ\}t	| ||	|
g|gdt  
dd¡t  
td	¡gd
 dS )z1FlexAttention with Grouped Query Attention (GQA).re   rh   rl   rm   )rL   rr   rL   rN   ru   rs   rs   Útest_flexattention_gqarw   rx   rK   ry   Nr|   )r„   r   r…   ÚHkvr   r   r†   r‡   ri   rj   rk   rl   r#   r#   r$   Úexport_flexattention_gqa  s(   ü

þ
ûz&FlexAttention.export_flexattention_gqac               
   C  rg   )z5FlexAttention with different head sizes for Q/K vs V.re   rh   rl   rm   rq   )ru   é    Ú"test_flexattention_diff_head_sizesrw   rx   rK   ry   Nr|   rƒ   r#   r#   r$   Ú$export_flexattention_diff_head_sizes  r‰   z2FlexAttention.export_flexattention_diff_head_sizesc               
   C  s<  d} t | tjƒ}tjdg d¢dgtd}t d|¡}|j |¡ d\}}}}d\}}	t	j
 ||||¡ t	j¡}
t	j
 ||||¡ t	j¡}t	j
 ||||	¡ t	j¡}d	t	 |¡ }t	 d
|
|¡| }||  }t	 ||jddd ¡}||jddd }t	 d||¡ t	j¡}t||
||g|gdt dd¡t td¡gd dS )z<FlexAttention with score_mod subgraph (adds bias to scores).ç      à?re   rh   rl   rm   Ú	score_mod©rK   rL   rM   rN   ©rM   rN   ç      ð?úbhle,bhse->bhlsrO   T©r8   Úkeepdimsúbhls,bhsv->bhlvÚtest_flexattention_score_modrw   rx   rK   ry   N)r%   r   ÚFLOATr   r   r   Úmake_attributeÚ	attributeÚappendr}   r~   r   r€   r   ÚsqrtÚeinsumÚexpÚmaxÚsumr   r‚   )r	   Úscore_mod_graphr„   Úscore_mod_attrr   r…   r   r†   r   r‡   ri   rj   rk   r*   r   r(   rl   r#   r#   r$   Úexport_flexattention_score_mod<  s<   ü

þ
ûz,FlexAttention.export_flexattention_score_modc               
   C  s<  d} t | tjƒ}tjdg d¢dgtd}t d|¡}|j |¡ d\}}}}d\}}	t	j
 ||||¡ t	j¡}
t	j
 ||||¡ t	j¡}t	j
 ||||	¡ t	j¡}d	t	 |¡ }t	 d
|
|¡| }t	 ||jddd ¡}||jddd }||  }t	 d||¡ t	j¡}t||
||g|gdt dd¡t td¡gd dS )z<FlexAttention with prob_mod subgraph (scales probabilities).r’   re   rh   rl   rm   Úprob_modr”   r•   r–   r—   rO   Tr˜   rš   Útest_flexattention_prob_modrw   rx   rK   ry   N)r1   r   rœ   r   r   r   r   rž   rŸ   r}   r~   r   r€   r   r    r¡   r¢   r£   r¤   r   r‚   )r'   Úprob_mod_graphr„   Úprob_mod_attrr   r…   r   r†   r   r‡   ri   rj   rk   r*   r   r(   rl   r#   r#   r$   Úexport_flexattention_prob_mode  s<   ü

þ
ûz+FlexAttention.export_flexattention_prob_modc               
   C  rg   )z"FlexAttention with float16 inputs.re   rh   rl   rm   rq   rt   Útest_flexattention_fp16rw   rx   rK   ry   N)r   r   r   r}   r~   r   r€   Úfloat16r   r   r‚   rƒ   r#   r#   r$   Úexport_flexattention_fp16  r‰   z'FlexAttention.export_flexattention_fp16c               
   C  rg   )z+FlexAttention with double precision inputs.re   rh   rl   rm   rq   rt   Útest_flexattention_doublerw   rx   rK   ry   N)r   r   r   r}   r~   r   r€   Úfloat64r   r   r‚   rƒ   r#   r#   r$   Úexport_flexattention_double«  r‰   z)FlexAttention.export_flexattention_doublec               
   C  st  t tjƒ} tjdg d¢dgtd}t d| ¡}|j |¡ d\}}}}d\}}t	j
 ||||¡ t	j¡}	t	j
 ||||¡ t	j¡}
t	j
 ||||¡ t	j¡}dt	 |¡ }t	 d	|	|
¡| }t	 |¡ d
d
|d
¡}t	 |¡ d
d
d
|¡}||k}t	 ||t	j ¡}t	 ||jddd ¡}||jddd }t	 d||¡ t	j¡}t||	|
|g|gdt dd¡t td
¡gd dS )zOFlexAttention with causal masking score_mod (Qwen-3, Gemma-3, Llama-3 pattern).re   rh   rl   rm   r“   ©rK   rL   rN   rr   ©rN   rr   r–   r—   rK   rO   Tr˜   rš   Útest_flexattention_causal_maskrw   rx   ry   N)rU   r   rœ   r   r   r   r   rž   rŸ   r}   r~   r   r€   r   r    r¡   ÚarangeÚreshapeÚwhereÚinfr¢   r£   r¤   r   r‚   )r¥   r„   r¦   r   r…   r   r†   r   r‡   ri   rj   rk   r*   r   rB   rF   rH   r(   rl   r#   r#   r$   Ú export_flexattention_causal_maskÉ  s@   
ü

þ
ûz.FlexAttention.export_flexattention_causal_maskc               
   C  sF  d} t | tjƒ}tjdg d¢dgtd}t d|¡}|j |¡ d\}}}}d\}}	t	j
 ||||¡ t	j¡}
t	j
 ||||¡ t	j¡}t	j
 ||||	¡ t	j¡}d	t	 |¡ }t	 d
|
|¡| }t	 ||  ¡|  }t	 ||jddd ¡}||jddd }t	 d||¡ t	j¡}t||
||g|gdt dd¡t td¡gd dS )z<FlexAttention with soft capping score_mod (Gemma-2 pattern).g      4@re   rh   rl   rm   r“   r³   r´   r–   r—   rO   Tr˜   rš   Útest_flexattention_soft_caprw   rx   rK   ry   N)r]   r   rœ   r   r   r   r   rž   rŸ   r}   r~   r   r€   r   r    r¡   Útanhr¢   r£   r¤   r   r‚   )rV   r¥   r„   r¦   r   r…   r   r†   r   r‡   ri   rj   rk   r*   r   r(   rl   r#   r#   r$   Úexport_flexattention_soft_capõ  s<   ü

þ
ûz+FlexAttention.export_flexattention_soft_capc               
   C  sj  t tjƒ} tjdg d¢dgtd}t d| ¡}|j |¡ d\}}}}d\}}t	j
 ||||¡ t	j¡}	t	j
 ||||¡ t	j¡}
t	j
 ||||¡ t	j¡}dt	 |¡ }t	 d	|	|
¡| }t	 |¡ d
d¡}t	 |¡ dd
¡}||  t	j¡}|| }t	 ||jd
dd ¡}||jd
dd }t	 d||¡ t	j¡}t||	|
|g|gdt dd¡t td¡gd dS )z6FlexAttention with relative positional bias score_mod.re   rh   rl   rm   r“   r³   r´   r–   r—   rO   rK   Tr˜   rš   Ú&test_flexattention_relative_positionalrw   rx   ry   N)rd   r   rœ   r   r   r   r   rž   rŸ   r}   r~   r   r€   r   r    r¡   r¶   r·   r¢   r£   r¤   r   r‚   )r¥   r„   r¦   r   r…   r   r†   r   r‡   ri   rj   rk   r*   r   rB   rF   r_   r(   rl   r#   r#   r$   Ú(export_flexattention_relative_positional  s@   
ü

þ
ûz6FlexAttention.export_flexattention_relative_positionalN)r   rf   )Ú__name__Ú
__module__Ú__qualname__Ústaticmethodrˆ   r‹   rŽ   r‘   r§   r¬   r¯   r²   rº   r½   r¿   r#   r#   r#   r$   re   Â   s.    ('+(re   )r	   r
   r   r   r   r   )r'   r
   r   r   r   r   )r   r   r   r   )rV   r
   r   r   r   r   )Ú
__future__r   Únumpyr}   Úonnxr   r   Úonnx.backend.test.case.baser   Úonnx.backend.test.case.noder   Ú	onnx.defsr   Ú3onnx.reference.ops.aionnx_preview.op_flex_attentionr   rœ   r%   r1   rU   r]   rd   re   r#   r#   r#   r$   Ú<module>   s&   þþÿ3þ!ÿ1