o
    i9:j                 !   @   s5  U d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dl mZmZ d dlmZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlm  mZ d dlmZm Z m!Z! d d	l"m#Z# d d
l$m%Z% d dlm&Z&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ej5j6Z6g Z7e8e9 e:d< ej;j<j=Z=G dd deZ>		ddedej?de@de@fddZAeeAej?jBddZCeeAej?jBdZDeeAej?jBddZEeeAej?jFdZGde!deHde!fd d!ZIe#e=jJe/d"eDd#e!d$e!fd%d&ZJe#e=jKe/d"eDd#e!d$e!fd'd(ZKe#e=jLe/d"eDd#e!de!d)eMd*eMfd+d,ZLe#e=jNe/d"eDd-e!d.eMd/eMd0eMd1e@d2e!fd3d4ZNe#e=jOjPgd5d6 ZQe#e=jOj!gd7e!fd8d9ZRe#e=jSe/ eDd:e!de!fd;d<ZSe#e=jTe/d"eDd-e!d:e!fd=d>ZTe#e=jUe/d"d-e!d:e!d?eMd@eMfdAdBZUe#e=jVe/ eDd:e!de!fdCdDZVe#e=jWe/ eDd-e!d:e!de!fdEdFZWe#e=jXe/d"d-e!d:e!d*eMfdGdHZXe#e=jYe/d"eDd-e!d:e!dIeMdJe@fdKdLZYe#e=jZe/d"eDddNe!d:e!dOe9fdPdQZZe#e=j[eDd-e!dRe!fdSdTZ[e#e=j\e/ eDd:e!de!fdUdVZ\e#e=j]e/d"eDd-e!d:e!de!fdWdXZ]e#e=j^d:e!dYe!de!fdZd[Z^e#e=j_d-e!d:e!dYe!de`e!e!f fd\d]Z_e#e=jae/ eDd-e!d:e!d^e!d_eMd`eMdae@dJe@de!fdbdcZae#e=jbe/d"eDd-e!d:e!dde!de!fdedfZbe#e=jce/ d:e!dge!de!fdhdiZcdje!dkeHfdldmZddnejefdodpZfe#e=jge/ eDe>jhjifd:e!dqe!dkeHde!fdrdsZge#e=jje/d"eDd-e!dRe!dqe!dkeHfdtduZje#e=jkddvdwZle#e=jme/ eDe>jhjidxfd:e!dqe!dkeHd)eMfdydzZme#e=jnjoeDd-e!d:e!dqe!dkeHd)eMf
d{d|Zne#e=jnjpeDd-e!d:e!dqe!dkeHd)eMd"e!fd}d~Zqe#e=jrjoeDd-e!d:e!dqe!dkeHdeMf
ddZre#e=jrjseDd-e!d:e!dqe!dkeHdeMd"e!fddZtd-e!d:e!dqe!dYe!dB dkeHdeHde!de!fddZue#e=jve/d"eDd-e!d:e!deHde!fddZve#e=jwe/d"d-e!d:e!dqe!dYe!dB dkeHdeHde!de!fddZwe#e=jxe/d"d-e!d:e!dqe!dYe!dB dkeHdeHde!de!fddZxe#e=jye/ eDde>jhjifd:e!dqe!dYe!dB dkeHde!f
ddZye#e=jze/d"eDde>jhjifd-e!d:e!dqe!dYe!dB dkeHde!fddZze#e=j{e/ eDe>jhjifdRe!dqe!dkeHde!fddZ{e#e=j|e/d"eDe>jhjifd-e!d:e!dqe!dkeHde!f
ddZ|e#e=j}e/ ddRe!dge!deMfddZ}e#e=j~e/ de!de!de!fddZ~e#e=je/ d-e!de8eH deHdeHdeHdeHfddZe#e=jj!	 			dd:e!deHdeHdB deHdB deHf
ddZde!deHdeHdB deHdB de`eHeHf f
ddZe#e=je/ 	 			ddRe!de!deHdeHdB deHdB deHfddZe#e=je/ d-e!de8eH deHdeHfddZe#e=je/ d-e!de8eH deHdeHdeHf
ddZd-e!d"e!dejefddZe#e=je/d"eCd-e!de!deHdejefddZe#e=je/ eCd-e!de!deHdejefddZdd Ze#e=je/ dRe!de8eH de8eH de8eH de8eH de!fddÄZe#e=je/ eDdRe!de8eH de8eH de8eH de8eH de8eH de!fddƄZe#e=je/ d-e!de!d/eMfddɄZe#e=je/ dNe!de8eH deHdeHdeHde!fdd΄Ze#e=jjoeD	dd-e!d:e!deMdB de!fddфZe#e=je=jjoe6je=jjoe6jdRe!deMde@dB fddԄZe#e=je/ddփdRe!deMde@dB fdd؄Ze#e=je/ de!deHde@fddۄZe#e=je/dd܍de!deHde@fddބZe#e=je/ 			ddYe!de!deHde@de@de!fddZe#e=je/ d-e!de!deHdeHde@f
ddZde8eH fddZde8e! deHdeHde8e! fddZde8e! fddZde8e! deHfddZde8e! deHdeHfddZe#e=jjoe=jjsg	dde8e! deHdeHde!dB de!f
ddZe#e=jjoe=jjsg	 	dd:e!de8eH deHde8e! dB de8e! dB f
ddZe#e=jj!ddRe!deHdeHde`e!df fddZe#e=jjo	 ddRe!de8eH deHde`e!df fdd Ze#e=jj!dd:e!deHdeHde`e!df fddZe=jje6j	 dd:e!de!deHde`e!df fddZe#e=je/dd܍eDdd:e!de!de!d)eHd.eHf
dd	Ze#e=je/ eD			dd:e!de!de!d)eHd.eHd
e@fddZe#e=je/dd܍eDdd:e!de!de!d)eHd.eHf
ddZe#e=jjoeDd-e!dRe!de!de!de!dB deHdeHdeHdeHde8e@ de`e!dB e!dB e!dB f fddZe#e=jjsd-e!dRe!de!de!de!dB deHdeHdeHdeHde8e@ dej!dej!dej!de`e!dB e!dB e!dB f fddZde!dB de!dB fddZe#e=jjode!dRe!d e8eH de!de!dYe!dB d!e!dB de8e@ de`e!dB e!dB e!dB f fd"d#Ze#e=jjsde!dRe!d e8eH de!de!dYe!dB d!e!dB de8e@ dej!dej!dej!de`e!dB e!dB e!dB f fd$d%Ze#e=jjodRe!d e8eH dYe!dB deMdB de`e!e!f f
d&d'Ze#e=jjode!dRe!d e8eH de!dYe!dB de8e@ de`e!dB e!dB f fd(d)ZdRe!dYe!dB d!e!dB d*e!dB d+e!dB dae@d,eMdeMd-e@de`e!e!e!e!dB e!dB f fd.d/Ze#e=je/dd0d1dRe!dYe!dB d!e!dB d*e!dB d+e!dB dae@d,eMdeMde`e!e!e!f fd2d3Ze=jjoe6je=jjoe6jdRe!dYe!dB d!e!dB d*e!dB d+e!dB dae@d,eMdeMde`e!e!e!f fd4d5Ze=jjoe6jdde8e! fd6d7Ze#e=jjodRe!dYe!dB d!e!dB d*e!d+e!d,eMdeMde`e!e!e!f fd8d9Ze#e=jjodRe!dYe!dB d!e!dB d*e!d+e!dae@d,eMdeMde`e!e!e!f fd:d;Ze#e=jjdRe!dYe!dB d!e!dB dae@d,eMdeMde`e!e!e!f fd<d=Ze#e=jjodRe!dYe!dB d!e!dB d*e!d+e!dae@d,eMdeMde`e!e!e!e!e!f fd>d?ZdRe!dYe!dB d!e!dB d*e!d+e!deMdae@de!fd@dAZe#e=jjodRe!dYe!dB d!e!dB d*e!d+e!d,eMdeMde`e!e!e!e!f fdBdCZe#e=jjodRe!dYe!dB d!e!dB d*e!d+e!d,eMdeMde`e!e!e!e!e!e!f fdDdEZe#e=jjodRe!dYe!dB d!e!dB d*e!d+e!d,eMdeMde`e!e!e!e!f fdFdGZe#e=je/ddփeDddHdIZe#e=je/ dddddddJde!e'B dnejedB dKejdB dLe@dMe@dNejdB fdOdPZe#e=je=je=jge/ dQdR Ze=jjoe6je#e=jƃe/dd֐ddSdRe!dYe!d!e!dB d*e!dB d+e!dB dae@dTeMdUeMfdVdWZƐdXdY Ze#e=jjode!dRe!dYe!dB d*e!dB d+e!dB d0e!dB d1e!dB de@deMde8e@ dZe!de`e!e!dB e!dB f fd[d\Ze#e=jjode!dRe!dYe!dB d*e!dB d+e!dB d0e!dB d1e!dB de@deMde8e@ de`e!e!dB e!dB f fd]d^Ze#e=jjsde!dRe!dYe!dB d*e!dB d+e!dB d0e!dB d1e!dB de@deMde8e@ dej!dej!dej!de`e!e!dB e!dB f fd_d`Ze#e=j˃e/dd֐ddRe!d-e!dYe!d*e!dB d+e!dB d0e!dB dae!dB dUeMfdbdcZe#e=j̃e/dd֐ddRe!d-e!dYe!d*e!dB d+e!dB d0e!dB dae!dB dUeMdde!fdedfZe#e=j̓e/ eDdRe!de`eHeHf fdgdhZd:e)de)de8eH deHfdidjZe#e=jЃe/ d:e)de)de8eH fdkdlZe#e=jуe/ dRe)de)de8eH de8eH de8eH f
dmdnZe#e=j҃ddode)deHde)dpe)d.e'f
dqdrZe#e=jӃe/ ddode)deHde)dpe)d.e'f
dsdtZddode)deHde)dpe)due@d.e'fdvdwZe#e=jjoe=jjoe6jddydzZe#e=jփde)deHde)dpe)fd{d|Ze#e=j׃e/ de)deHde)dpe)fd}d~Zde)deHde)dpe)due@f
ddZe#e=jكe/dddeDd:e!de`e!e!f fddZe#e=jڃe/ 	x	x	dde!de@eHB eMB de@eHB eMB dejdB fddZe#e=j܃dddZܐdd Zݐdd Ze#e=jje#e=jje#e=jje=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6jdRe!de8eH dB de8eM dB de!fddZe#e=jje#e=jje#e=jje=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6jdRe!de8eH dB de8eM dB de!fddZdddZe#e=jjoe=jjsge=jjoe6je=jjoe6je/ddd	ddRe!de8eH deMdB de!fddZe#e=jjoe=jjsge=jjoe6je=jjoe6je/ddd	ddRe!de8eH deMdB de!fddZe#e=jjoe=jjsge=jjoe6je=jjoe6je/ddd		ddRe!de8eH deMdB deMdB de!f
ddZe#e=jjoe=jjsge=jjoe6je=jjoe6je/ddd		ddRe!de8eH deMdB deMdB de!f
ddZe#e=jjoe=jjsge=jjoe6je=jjoe6je/ddd			ddRe!de8eH deMdB deMdB deMdB de!fddZe#e=jjoe=jjsge=jjoe6je=jjoe6je/ddd			ddRe!de8eH deMdB deMdB deMdB de!fddZeD	ddRe!de8eH de8eMdB  de@de!f
ddZdd Zdd Zdd Zdd Z	dddZdd Zdd ZdddZdddZdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdd Ze#e=jje=jje6je=jje6jdÐdĄ ZdŐdƄ ZddǐdȄZddɐdʄZdːd̄ Z e#e=jje=jje6je=jje6jd͐d΄ Ze#e=jje=jje6je=jje6jdϐdЄ Zdѐd҄ ZdӐdԄ Ze#e=jje=jje6je=jje6jdՐdք Ze#e=jje=jje6je=jje6jdאd؄ Ze#e=j	je=j	jࠐe6je=j	jࠐe6jdِdڄ Z
e#e=jje=jjࠐe6je=jjࠐe6jdېd܄ Ze#e=jje=jjࠐe6je=jjࠐe6jdݐdބ Ze#e=jje#e=jje=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6je=jjࠐe6jdߐd Ze#e=jjoe=jjsge/ 	ddRe!de8eH de@deMdB de!f
ddZe#e=jjoe=jjsge=jjoe6je/ 		ddRe!de8eH de@deMdB deMdB de!fddZe#e=jjoe=jjsge/ 			ddRe!de8eH de@deMdB deMdB deMdB de!fddZdddZdd Zdee! dee! de!de!fddZde*de!fddZeDdRe!de8eH de@de8eMdB  de!f
ddZe#e=jjode!de!de@fddZe#e=je=jge/ dd Ze#e=jgdd Ze#e=jgdddZe#e=jgdd Ze#e=jgd d Zd:e!dqe!dYe!dB dkeHdeHde`e!e!f fddZe#e=j e/ddd:e!dqe!dYe!dB dkeHdeHde`e!e!f fddZ e#e=j!e/ddd:e!dqe!dYe!dB dkeHdeHde`e!e!f fddZ!de!deMde!fd	d
Z"de!deMde!fddZ#de!de*fddZ$de*de!de!fddZ%dee! de!fddZ&deHde@dnejedKejfddZ'de!deHdeHde@fddZ(de!deHdeHdeHde@f
dd Z)de!de8eH de@fd!d"Z*de!de8eH de@fd#d$Z+e#e=j,e/ eDde!de8eH de@fd%d&Z,	 	 		dde!d'e!d(eHd)eHde@d*e@de!fd+d,Z-e#e=j.e/ eD	 	 	dde!d'e!d(eHd)eHde@de!fd-d.Z.e#e=j/e/dd܍eDd/d0 Z/e#e=j0e/ dde>jhjifd1d2Z0d3ej!d4ej!d5e@de@fd6d7Z1e=j2joe6je=j2jse6je/dd8dd9d:d;Z2e#e=j3joe=j3jsge=j3joe6je/ eD		ddRe!de`eHeHf de@d<eMdB d=eMdB de!fd>d?Z4e#e=j3je=j3jࠐe6je=j3jࠐe6je/ eD	dde!de`eHeHf dB de@de`eMeMf dB de!f
d@dAZ5e#e=j6e#e=j7e#e=j8eDe/ de!de`eHdf de!fdBdCZ9e#e=j:e#e=j;e#e=j<eDe/ de!de`eHdf de!fdDdEZ=de!de`eHdf dFeeHeHeHge!f de!fdGdHZ>e#e=j?e#e=j@e#e=jAe/d"dIdJ ZBe#e=jCe/dKdLdddMdNdOZCe#e=jDe/ dddPdQdRZDe#e=jEjoe=jEjsge/ dejFdddSde'dnejedB dTejGdKejdB dLe@f
dUdVZHe#e=jEjIgdejFdddSde'de'dnejedB dTejGdKejdB dLe@fdWdXZJe#e%dYdZ ZKe#e=jLe=jLjoe6je/ ddde>jhjifdRe!dqe!de'd[e'dYe!dB dkeHde!fd\d]ZLe#e=jMe=jMjoe6je/dd^dRe!dqe!dkeHde`e!e!f fd_d`ZMe#e=jNjo	x	ddddadbe!dce!d7e!ddeMdee@dfe!dB d/eMdB de`e!e!f fdgdhZOdidj ZPe#e=jQge/dd܍eDddkdlZQe#e=jRe/ dmdn ZRe#e=jSdodp ZSe#e=jTjoe=jTjsgdddqd:e!dnejedB de!dB de!fdrdsZUe#e=jVjoe=jVjWgdd:e!deHdB fdtduZXe#ej<j=jYddvdwZYe#e=jZe/ dddxdydzZZe#e=j[jodd{d:ej!dejdB dej!fd|d}Z[dd~ddZ\dddxddZ]e#e=j^e/ dd Z^e#e=j_dddZ_e#e=j`d-e!d:e!de@de!fddZ`e#e=jajoe=jajsge/ dddddSdeHdnejedB dTejGdB dKejdB dLe@dB de!fddZae#e=jajbe=jajcge/ 	ddddddSdeHde@dnejedB dTejGdB dKejdB dLe@dB de!fddZdePe=jee=jf ePe=jge=j ePe=jhe=j ePe=jie=jQ ePe=jje=jO ePe=jke=jl ePe=jme=jV ePe=jne=jo ePe=jpe=jS ePe=jqe=jr ePe=jse=jt ePe=jue=jv ePe=jwe=jx ePe=jye=jz ePe=j{e=j| ePe=j}e=j~ ePe=je=jc ePe=je=j ePe=je=j ePe=je=j ePe=je=j ePe=je=j ePe=je=j ePe=je=j ePe=je=j ePe=je=j\ e=jjoe6jdd:e!deHde!fddZdS (      N)CallableIterable)nullcontext)Enum)partialreduce)chainproduct)Anycast)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r'   r'   c/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/torch/_decomp/decompositions.pyr   1   s    r   Fftype_promotioncompute_dtype_onlyinclude_non_tensor_argsc                    s    t  fdd}|S )Nc                     s   rt tjjfnt f  fddtj| i |D }tj|di\fdd}fdd}t|| i t||}rA|S t||S )Nc                    s   g | ]	}t | r|qS r'   )
isinstance.0x)allowed_typesr'   r(   
<listcomp>E   s    z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                       t | tr
|  S | S Nr-   r   tor0   computation_dtyper'   r(   increase_precO      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r4   r5   r6   r8   )result_dtyper'   r(   decrease_precU   r<   z0type_casts.<locals>.inner.<locals>.decrease_prec)	r   torchtypes_Numberpytreearg_tree_leavesutilselementwise_dtypesr   )argskwargs	flat_argsr;   r>   rr+   r)   r,   r*   )r1   r:   r=   r(   inner@   s    


ztype_casts.<locals>.inner)	functoolswraps)r)   r*   r+   r,   rK   r'   rJ   r(   
type_casts:   s    rN   T)r*   r+   )r*   )r*   r,   r0   dimreturnc                 C   s$   t ||   D ]}| d} q| S N)rangerO   	unsqueeze)r0   rO   _r'   r'   r(   _unsqueeze_to_dimw   s   rV   
grad_inputout_gradyc                 C   s   | d||     S Nr   conj_physicalrX   rY   r'   r'   r(   tanh_backward}      r^   c                 C   s   | |d|     S rZ   r[   r]   r'   r'   r(   sigmoid_backward   r_   r`   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)expr?   where)rX   r0   ra   rb   zr'   r'   r(   softplus_backward   s   "rh   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )r?   rf   re   )	ri   rj   rk   rl   rm   rn   negcoefposcoef
negiptcoefr'   r'   r(   elu_backward   s   rs   c                 C      t | |S r5   )r?   	full_likeselfvaluer'   r'   r(   fill_scalar      ry   rx   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrO   r'   rx   r'   r(   <lambda>       zfill_tensor.<locals>.<lambda>)r?   _checkrO   atencopyrv   r'   r|   r(   fill_tensor   s
   

r   rw   c                 C   s    t jt j| d ddddd S N   r   min   maxr?   clamprw   r'   r'   r(   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        r?   rf   ri   rw   r'   r'   r(   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S )Nr   r   )ri   rw   r   r   r'   r'   r(   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S r   r   r   r'   r'   r(   	hardswish   s   $r   c              
   C   s,   t |dkdt |dk | |d d  | S )Nr   r         ?r   r   r'   r'   r(   hardswish_backward   s
   r   c                 C   s   t ||kd| S ro   r   )ri   rw   rb   r'   r'   r(   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S ro   r   )ri   rw   r   r   r'   r'   r(   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r   r         )r?   r   erfre   )r   rw   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberK   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfr'   r'   r(   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S rZ   )r?   r   Fsoftplussigmoid)ri   r   input_tanh_softplusinput_sigmoidoutr'   r'   r(   mish_backward  s   
r   c                 C   s   | t |  S r5   )r?   r   r   r'   r'   r(   silu"  s   r   c                 C   s,   ddt |   }| | d|d|    S rZ   )r?   re   )ri   rw   r   r'   r'   r(   silu_backward)  s   r   weightc                 C   s   t | dk| ||  S ro   r   )rw   r   r'   r'   r(   _prelu_kernel1  s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )ri   rw   r   
input_gradweight_gradr'   r'   r(   _prelu_kernel_backward6  s   r   noiseloweruppertrainingc                 C   s*   |r|  |S || d }t| |||S Nr    )mulr   r   )ri   rw   r   r   r   r   r   r   r'   r'   r(   rrelu_with_noise_backwardA  s   
r   bufferc                 C   s^   |dk }t |dd}t |dd}| dkr|nt t | }| |||d|     S )Nr   r   rR   )r?   rf   numelre   abs)ri   rw   r   in_negative	max_derivsignrg   r'   r'   r(   log_sigmoid_backwardV  s
   "r   otherc                 C   sD   t | jst | jrtjn| j}| jdd|d}| t|| S )Nr'          @dtype)rD   is_integer_dtyper   is_boolean_dtyper?   float32new_fullpow)rw   r   	two_dtype
two_tensorr'   r'   r(   ldexpc  s   

r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r5   )r   r%   rx   r?   meanr&   sum)r   r   r'   r'   r(   apply_loss_reductiono  s
   

r   r   c                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r5   )r?   	complex32float16	complex64r   
complex128float64r   r'   r'   r(   to_real_dtypex  s   


r   targetc                 C   s   | | d }t ||S r   )r   )rw   r   r   r   r'   r'   r(   mse_loss  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )Nr   )r   r%   rx   r   )ri   r   r   r   normr'   r'   r(   mse_loss_backward  s   r   c                 C   sF   t j| ||d}| td}t j||dd}t |}t |||S )N)rO   r   z-infTrO   keepdim)r?   softmaxeqfloatall
zeros_likerf   )rw   rO   r   r   maskedmasked_rowszerosr'   r'   r(   safe_softmax  s
   
r   rd   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r    )r   r?   rf   r   )rw   r   r   ra   r   r'   r'   r(   smooth_l1_loss  s   	&
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S rc   )r   r%   rx   r   r?   r   rf   r   )	ri   rw   r   r   ra   r   r0   abs_x	norm_gradr'   r'   r(   smooth_l1_loss_backward  s   

r   c                 C   *   t | ||||}t||j t||ddS NT	copy_fromcopy_toexact_dtype)r   r   shaper   )ri   rw   r   r   ra   rW   resultr'   r'   r(   smooth_l1_loss_backward_out     
r  deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S rc   )r   r%   rx   r   r?   rf   )ri   rw   r   r   r  r   r0   r'   r'   r(   huber_loss_backward  s    r  c                 C   r   r   )r  r   r  r   )ri   rw   r   r   r  rW   r  r'   r'   r(   huber_loss_backward_out  r  r	  ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } |  dkr$|  dkr$|d }||}t||k|d}t|}	t|	||d}	|	  |     krMdkrTn n| |} |d ursdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr    r   r   g      c                 S   s   g | ]}d qS r   r'   r/   rU   r'   r'   r(   r2     r~   z&_nll_loss_backward.<locals>.<listcomp>)rO   r   r%   rx   rT   r?   rf   r   scatterrS   r  reshape)ri   rw   r   r   r   r
  r  channel_dimsafe_targetrW   	new_shaper'   r'   r(   _nll_loss_backward  s$   	

 

r  c           
      C   s   |  dkr
tdt|  |}||}|d dkr'td| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr    z.Halving dimension must be even, but dimension z	 is size rd   r{   )	rO   AssertionErrorrD   canonicalize_dimsizenarrowr?   r   cat)
ri   rw   rO   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfr'   r'   r(   glu_backward  s    

r   c                 C   s  d|    krdksn td|   d|  dkr'td|   d|  dko2|  dk}|sL|jd |jd ksLtd|j d|j d	| dkr`td
|j d|  d|d urq| |jd krqtd|tjjkr|  dkr|   dkr| jd |jd kstd|jd  d|    d| jd  n|   dkr|  dkstd| j t| ||||||S )Nr   r    %input tensor should be 1D or 2D, got Dr   A0D or 1D target tensor expected, multi-target not supported, got size mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rR   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rO   r  r  r   r   r$   rx   r  )ri   rw   r   r   r   r
  r  no_batch_dimr'   r'   r(   nll_loss_backward-  sP    
r)  c                 C   s   |  dkrtd|   |  dkrtd|   |jd |jd kr<|jd |jd kr<|jd |jd ksHtd|j d	|j | dkr\td
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r    r   r$  r%  r'  z ( z, elements))rO   r  r  r   r  )ri   rw   r   r   r   r
  r  r'   r'   r(   nll_loss2d_backward`  s2   r+  c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr   r'   i)r?   maximumlog1pr   logr   )rw   r   r   r   r   r'   r'   r(   binary_cross_entropy  s   

r/  c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r   r   )r?   r   r   r%   rx   r   )ri   rw   r   r   r   EPSILONr  r'   r'   r(   binary_cross_entropy_backward  s   
"r1  c                 C   s    t t |  | }t||S r5   )r?   r-  re   r   )r   r   r   r   r'   r'   r(   soft_margin_loss  s   
r2  c                 C   s6   ||  t || d  }|tjjkr||  }|S rZ   )r?   r   r   r%   rx   r   )ri   rw   r   r   rW   r'   r'   r(   soft_margin_loss_backward  s   	r3  r    pc                 C   s   t j| | |dS )N)r4  )r   r   )r   r   r4  r'   r'   r(   dist  r   r5  x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr    rR   Tmemory_formatr   )r   r   r?   	ones_likecontiguous_formatr  r   matmulmT	clamp_minsqrt)	r6  r7  x1_normx1_padx2_normx2_padx1_x2_r  r'   r'   r(   _euclidean_dist  s   rG  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r5   )	new_zerosr?   slice_scatter)ri   rH  rO   rI  rJ  rK  rW   r'   r'   r(   slice_backward  s   

rN  r   c                 C   s^  ddl m} |  }|dkrtdt|  |}t|  }t|  }|dkr.td|d ur4|nd}	|d ur<|nt	j
}
|	dk rI|	|| 7 }	|
dk rS|
|| 7 }
|	dk rZd}	n
|	|| krd|| }	||
t	j
krp|| }
n|
|	k rw|	}
n
|
|| kr|| }
|  |	||   }|
|	 }|| d | ||< ||  |9  < | jrtd| |||S )Nr   statically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver   z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesrP  rO   RuntimeErrorrD   r  listr  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)rw   rO   rI  rJ  rK  rP  ndimsizesstrides	start_valend_valrW  lenr'   r'   r(   slice_forward  sD   	
ra  c                    s@   | j |  dtf fdd}||d d}|||  }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rP   c                    s,   | d u r|S | dk r|   } t t| ||S ro   r   r   )valr   r   defaultdim_sizer'   r(   
clamp_wrap.  s
   z(_normalize_start_end.<locals>.clamp_wrapr   )r  int)r0   rO   rI  rJ  rg  r'   re  r(   _normalize_start_end%  s
   
ri  srcc              	   C   sB  t | j|}| j| }t| |||\}}t| j}|| |d  | ||< ||}|dkr;||kr;|dkr;| S d g|   }t	j
|| jd}	|	| | ||< t	j|| jt	jd}
|dkrht	|
|	|k}
||krtt	|
|	|k }
|dkrt	|
|	| | dk}
dg|   }d||< |
|}
t|
t||
|d| S )Nr   r   devicerl  r   rR   )rD   r  r[  r  ri  rS  expandclonerO   r?   arangerl  onesboollogical_andviewr   rf   _unsafe_masked_index)r   rj  rO   rI  rJ  rK  rf  src_sizeindicesidxmask
mask_shaper'   r'   r(   rM  <  s,   




rM  indexc                 C   s   |  |}t|| ||S r5   )rL  r?   select_scatter)ri   rH  rO   r{  rW   r'   r'   r(   select_backwarde  s   
r}  offsetdim1dim2c                 C   s   |  |}t|| |||S r5   )rL  r?   diagonal_scatter)ri   rH  r~  r  r  rW   r'   r'   r(   diagonal_backwardl  s   
r  input_dtypec                 C   s   | j |kr
||}|S r5   )r   r7   )ri   rW   r  r'   r'   r(   _cast_grad_to_input_dtypeu  s   

r  outputc                 C   s0   | | }||t j||dd  }t| || S NTr   )r?   r   r  
contiguous)ri   r  rO   r  new_grad_outputrW   r'   r'   r(   _softmax_backward_data}  s
   
r  c                 C   s*   | t |t j| |dd  }t| ||S r  )r?   re   r   r  )ri   r  rO   r  rW   r'   r'   r(   _log_softmax_backward_data  s   
r  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr    r   r   rl  r   rR   )r   r?   rp  int64rT   )
input_dkernel_d
dilation_d	padding_dstride_drl  blocks_d	arange_kwblocks_d_indiceskernel_gridr'   r'   r(    _im2col_col2im_indices_along_dim  s
   r  kernel_sizedilationpaddingrT  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr    c                   S      dS )Nz"im2col(): only 2D kernel supportedr'   r'   r'   r'   r(   r}         zim2col.<locals>.<lambda>c                   S   r  )Nz$im2col(): only 2D dilation supportedr'   r'   r'   r'   r(   r}     r  c                   S   r  )Nz#im2col(): only 2D padding supportedr'   r'   r'   r'   r(   r}     r  c                   S   r  )Nz"im2col(): only 2D stride supportedr'   r'   r'   r'   r(   r}     r  Tc                    B   |rt dd  D nt dd  D }t| fdd d S )Nc                 s       | ]}|d kV  qdS r   Nr'   r/   r4  r'   r'   r(   	<genexpr>      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS r  r'   r  r'   r'   r(   r    r  c                          d  S Nz& should be greater than zero, but got r'   r'   param
param_namer'   r(   r}         z0im2col.<locals>.check_positive.<locals>.<lambda>r   r?   r   r  r  strictcondr'   r  r(   check_positive     (zim2col.<locals>.check_positiver  r  r  Fr  rT  r   r*  c                 s       | ]}|d kV  qdS r  r'   r/   dr'   r'   r(   r    r  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler'   r  r'   r(   r}         c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r   r    Nr'   r/   r   paddilkerstr'   r'   r(   r    s
    "
r:  c                 s   r  r  r'   )r/   cr'   r'   r(   r    r  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spatial size r:  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  r'   r  r  output_sizer  r  rT  r'   r(   r}     s    r*  r   rR   r   r      T)r?   r   r`  r  r   r  ziprT   r  rl  r   r  permuter  r  squeeze)r   r  r  r  rT  r  r[  batched_input	batch_dimr  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr  num_blocks_rownum_blocks_colr'   r  r(   im2col  sd   	



 




r  r  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr    c                   S   r  )Nzonly 2D output_size supportedr'   r'   r'   r'   r(   r}     r  zcol2im.<locals>.<lambda>c                   S   r  )Nzonly 2D kernel supportedr'   r'   r'   r'   r(   r}     r  c                   S   r  )Nzonly 2D dilation supportedr'   r'   r'   r'   r(   r}     r  c                   S   r  )Nzonly 2D padding supportedr'   r'   r'   r'   r(   r}     r  c                   S   r  )Nzonly 2D stride supportedr'   r'   r'   r'   r(   r}     r  Tc                    r  )Nc                 s   r  r  r'   r  r'   r'   r(   r    r  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   r  r  r'   r  r'   r'   r(   r    r  c                      r  r  r'   r'   r  r'   r(   r}     r  z0col2im.<locals>.check_positive.<locals>.<lambda>r  r  r'   r  r(   r    r  zcol2im.<locals>.check_positiver  r  r  Fr  rT  r  )r    r   c                 s   r  r  r'   r  r'   r'   r(   r     r  zcol2im.<locals>.<genexpr>r:  c                      r  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r'   r  r'   r(   r}   !  r  r   r   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r:  z and kernel_size=r'   r'   )r  r  r'   r(   r}   '  s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r   r    r'   r  r'   r'   r(   r2   +  s    "zcol2im.<locals>.<listcomp>rR   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rR   .r'   r'   Lr  r  r  r  r  rT  r'   r(   r}   4      c                      r  r  r'   r'   r  r'   r(   r}   :  r  r   r*  r  c                 S   s   g | ]
\}}|d |  qS r    r'   )r/   or4  r'   r'   r(   r2   V      
accumulater  )r?   r   r`  r  r   r  rT   r  r  r  rl  rV   rL  prodr   _unsafe_index_putr   r  r  )r   r  r  r  r  rT  r  r[  prod_kernel_sizecolr  out_hout_wr  r  r  r  r  r  r  r  indices_rowindices_coloutput_padded_sizer  rx  r'   r  r(   col2im   s   




 



"

r  ry  c                 C   s$   | | | |  jt| d}|S Nr8  )type_asro  rD   r   )ri   ry  rk   rI   r'   r'   r(   native_dropout_backwardc  s   	r  
input_size	dimensionr  c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   rm  rR   r   r5   Tr  )r`  r?   squeeze_copyrD   r  rp  rl  int32unfoldflattenmovedimrL  r   r  r  )	r   r  r  r  rK  rO   rx  rW   r{  r'   r'   r(   unfold_backwardr  s   
r  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nrd   r   r'   nan)r?   rf   rs  r   r   )ri   rw   r  lohir'   r'   r(   logit_backward  s   r  trainc                 C   s&   |r|dkrt | ||d S |  S ro   )r   native_dropoutro  )r   r4  r  r'   r'   r(   dropout  s   r  out0out1c                 C   s   |r6|dkr6|dkrt | t j| t jdfS | jjstdt | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r   r   z?result type Float can't be cast to the desired output type Longrd   )	r?   r   rr  r   is_floating_pointrR  	rand_liker   r;  )r   r4  r  	bool_maskresr'   r'   r(   r    s   r  half_to_floatc           	      C   s   ddl m} |  } |r| jtjkrtd| j dtj| tj	j
d\}}| |} ||  dkr9t| }ntj| |dd}t| | }|tj||dd }|sY||}|S Nr   guard_or_falsez%half_to_float is True but x.dtype is z, expected torch.halfr3   T)r   )rQ  r  r  r   r?   halfr  rD   rE   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   re   amaxr   )	r0   rO   r  r  r:   r=   unnormalizedx_maxr  r'   r'   r(   _softmax  s&   


r  )r  c           
      C   s   ddl m} |  } |r| jtjkrtd| j dtj| tj	j
d\}}| |} ||  dkr6| }ntj| |dd}| | }ttjt||dd}|| }	|s[|	|}	|	S r  )rQ  r  r  r   r?   r  r  rD   rE   r  r  r7   r   r  r.  r   re   )
r0   rO   r  r  r:   r=   shiftedr  shifted_logsumexpr  r'   r'   r(   _log_softmax  s(   


r  rR   rw  padding_idxscale_grad_by_freqsparsec                 C   sV   |   dkrtd|    d|jdkr'| d|}|jdkr%|d}|S | | S )Nr    z'weight' must be 2-D, got z-Dr   r   )rO   r  r[  index_selectr  )r   rw  r  r  r  r   r'   r'   r(   	embedding  s   	


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr  Tr  rR   r   )rD   rE   r  r  r7   r   r?   longrL  r;  r   r  rT   rV   r[  masked_fillr  )ri   rw  r   r  r  r:   r=   countsrq  grad_weights_scalery  r   grad_weightr'   r'   r(   embedding_dense_backward  s&   	


r&  c                 C   s   d}| D ]}||9 }q|S rZ   r'   )r0   rI   ir'   r'   r(   r  #  s   
r  tensors
num_chunksc           	      C   s   g }| D ]H}|  }|| | d | | }||| kr7dgd |j| d  d|||  g }t||d}|d | t|dg }||| q|S )Nr   r   r    rR   )r  r[  r   constant_pad_ndr?   Sizeappendr  )	r(  rO   r)  padded_tensorstensortensor_sizepad_along_dimr  	view_sizer'   r'   r(   
_pad_chunk*  s   
r2  c                 C   s(   | d j }| D ]
}|j |kr dS qdS )Nr   FTr[  )r(  r[  r.  r'   r'   r(   have_same_ndims?  s   

r4  c                 C   sB   | d   d | }| D ]}t|  d | |kdd  qd S )Nr   c                   S   r  )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr'   r'   r'   r'   r(   r}   L  r  z+leading_dimension_matches.<locals>.<lambda>)r  r?   r   )r(  rO   leading_dim_sizesr.  r'   r'   r(   leading_dimension_matchesG  s   r6  c                 C   s   t |dkdd  t t| dkdd  | d j}| d j}| D ]$}t | dkdd  t |j|kdd  t |j|kdd  q"t| rVt| d 	 |}nt |dkd	d  | D ]}t ||j
k d
d  qbt| | |S )Nr   c                   S   r  )Nz&_chunk_cat expects positive num_chunksr'   r'   r'   r'   r(   r}   U  r  z._preprocess_chunk_cat_inputs.<locals>.<lambda>r   c                   S   r  )Nz0_chunk_cat expects a non-empty input tensor listr'   r'   r'   r'   r(   r}   W  r  c                   S   r  )Nz#_chunk_cat expects non-empty tensorr'   r'   r'   r'   r(   r}   \  r  c                   S   r  )Nz8_chunk_cat expects all input tensors with the same dtyper'   r'   r'   r'   r(   r}   _  r  c                   S   r  )Nz8_chunk_cat expects all inputs tensors on the same devicer'   r'   r'   r'   r(   r}   c  r  c                   S   r  )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr'   r'   r'   r'   r(   r}   j  r  c                   S   r  )Nz3_chunk_cat expects dim < ndim for all input tensorsr'   r'   r'   r'   r(   r}   o  r  )r?   r   r`  r   rl  r   r4  rD   r  rO   r[  r6  )r(  rO   r)  expected_dtypeexpected_devicer.  r'   r'   r(   _preprocess_chunk_cat_inputsP  s:   


r9  r   c                 C   sH   t | ||}t| ||}|d u rt||d S tj||d |d |S )Nr   )r   )r9  r2  r?   r  )r(  rO   r)  r   r-  r'   r'   r(   
_chunk_catu  s   r:  split_sizesc                 C   sX   t j| ||d}|d u rdd |D S t||D ]\}}t||j t||dd qd S )Nr{   c                 S   s   g | ]	}|j tjd qS )r8  )ro  r?   r<  r/   sr'   r'   r(   r2     s    z)split_with_sizes_copy.<locals>.<listcomp>Tr   )r   split_with_sizesr  r   r  r   )rw   r;  rO   r   splitsr  splitr'   r'   r(   split_with_sizes_copy  s   	rA  
split_size.c                 C      t j| ||S r5   )r   r@  r   )r   rB  rO   r'   r'   r(   unsafe_split     rD  c                 C   rC  r5   )r   r>  rd  )r   r;  rO   r'   r'   r(   unsafe_split_with_sizes  s   rF  c                    s   | j }|| } dkr|dkrtd| d|  fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )	Nr   z split_size is 0 but dim_size is z, expected 0r   )	guard_intc                       g | ]} qS r'   r'   r/   r'  rB  r'   r(   r2     r~   zsplit.<locals>.<listcomp>rR   )r  r  detachrQ  rG  rS   r?   r@  )rw   rB  rO   rH  rf  chunksrG  r;  r'   rJ  r(   r@    s   

r@  tensor_indices_or_sectionsc                    s   |j jdkrtd|j  |jtjkrtd|j |  t dkp) dk fdd  dkrM| }t	|t
sGtdt|j | ||S t}tj  }r^|j }r^|j}|  d	d
 |D }W d    n1 ssw   Y  | ||S )Ncpuz/tensor_indices_or_sections must be on CPU, got z.tensor_indices_or_sections must be int64, got r   r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr'   r'   	split_dimr'   r(   r}     s    zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>z%Expected sections to be IntLike, got c                 S   s   g | ]}|  qS r'   )itemrI  r'   r'   r(   r2         zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)rl  typer  r   r?   r  rO   r   rQ  r-   r   r!   tensor_splitr   _guardsdetect_fake_mode	shape_envignore_fresh_unbacked_symbols)rw   rM  rO   sectionsctx	fake_moderW  rw  r'   rO  r(   /tensor_split_tensor_indices_or_sections_py_impl  s:   



r\  mat1mat2c                 C   sH   |   s|  st|}t|}|t|| }|dkr|S |||   S ro   )r  
is_complexrh  r?   mm)rw   r]  r^  ra   rj   r   r'   r'   r(   addmm  s   ra  use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )ra  is_cudar   gelurelu)rw   r]  r^  ra   rj   rb  r   r'   r'   r(   _addmm_activation  s   

rf  vecc                 C   s\   |   s|  st|}t|}|t|| }|dkr|S | dkr(||  S |||   S ro   )r  r_  rh  r?   mvr   )rw   r]  rg  ra   rj   r   r'   r'   r(   addmv  s   ri  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd   }
t |
 k fdd t| | j	dgd	}|  j	dgd	}d }d }d }|	d
 r9d|
  }d urt|
d
|
	d}t|
d
|
	d}t|
dd|
}n&||
	d}||
	d}t|
dtjd|
f|jd}| | | | | | }|  || |  }|
d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r^||
||

d  |
d j	d
gd	 }|	d rj|j	d
gd	}|||fS )NF)allow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr'   r'   )rm  rn  rl  r'   r(   r}   <  rR  z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r  r'   )rl  ro  r   r'   r(   r}   @      c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rR   )r   r'   )rm  rk  r'   r(   r}   D     $ c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r'   r'   )rm  ro  r'   r(   r}   J  r~   r    r{   r   rd   rR   r   rk  r*  )rD   check_same_devicecheck_same_shaper?   r   r   r  r   rt  r   rT   r  rq  rl  rV   r7   r   )ri   r   r   rj  rk  rl  rm  rn  ro  rp  cpgdsdbd_inputd_gammad_biasr=  ds_valdb_valc1c2c3r'   )rm  rn  rl  rk  ro  r   r(   native_group_norm_backward'  s   

 
""



$

r  out2c
                C   d   t | |||||||||	
}|
||f}t|D ]\}}|d ur/t|| |j t||| dd q|S r   )r  	enumerater   r  r   )ri   r   r   rj  rk  rl  rm  rn  ro  rp  r  r  r  r  rW   r'  rI   r'   r'   r(   native_group_norm_backward_out  s   
r  c                 C   s   | d ur	|  |S | S r5   r7   )r0   r   r'   r'   r(   _maybe_cast  s   
r  grad_outnormalized_shapebiasc           "         s  |j }| }	t|j  fdd| |||fD \}
}}}|
d u r&td|	t| }||d  }|d | }g }g }t|	D ]}||krL|| q@|| q@t	|}t	|}ddl
m} ||dksl||dkr|d ru||nd |d r|||d  nd |d r|||d  fS d fS t|| }t|| }|d u rtd|| | }|d ur|
| }n|
}|| }t||d	}t||}t||d	}t||}|| | }d }d } d }!|d r|| | }|d r|d urt|dkrt|
| |d
} n|
| } |d r,|d ur,t|dkr(t|
|d
}!n|
 }!t||jt| |d ur;|jnd t|!|d urI|jfS d fS )Nc                 3   s,    | ]}|d ur|j  tjdn|V  qd S r  )r7   r?   r<  r.   r9   r'   r(   r    s    
z-native_layer_norm_backward.<locals>.<genexpr> grad_out_cast should not be Noner   rO  r   r    zinput_cast should not be NoneTF)r  rO   rD   get_computation_dtyper   r  r`  rS   r,  r  rQ  rP  rL  rV   r?   r   r   ro  r  )"r  r   r  r   rj  r   r  rp  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesr'  rl  MrP  x_hat
grad_x_hatabr  r  r  rK   rz  d_weightr|  r'   r9   r(   native_layer_norm_backward  sx   



r  c             	   C   s`   t | |||||||}||	|
f}t|D ]\}}|d ur-t|| |j t||| dd q|S r   )r  r  r   r  r   )r  r   r  r   rj  r   r  rp  r  r  r  r  rW   r'  rI   r'   r'   r(   native_layer_norm_backward_out  s   
r  c                 C   s  g }t t|D ]}||  | d  qt| j}| |}|d u r=|tj	tj
fv r5ttj	j}n
ttjj}n|}ttjjjt|dj|dd|}	||	}
|d urb|
|}
| jpk|d uok|j}t| }|tjtjfv }|s|s|
 }
|	 }	|
| }||	fS )Nr   r    Tr   )rS   r`  r,  rO   rD   r  r   r7   r?   r   r   finfor  r   rsqrtopsr   addScalarr   r   r   	is_nestedr   channels_lastchannels_last_3dr  r  )r   r  r   r  dims_to_reducer'  r:   upcasted_inputeps_valrqrst_inputupcasted_resultr  r9  is_channels_lastr  r'   r'   r(   _fused_rms_norm  s:   





r  c                 C   s  |j }| }t|j}| j|tjd}	|j|tjd}
|d ur)|j|tjdnd }|	d u r3td|t	| }||d  }|d | }g }g }t
|D ]}||krY|| qM|| qMt|}t|}ddlm} ||dksy||dkr|d r||nd |d r|||d  fS d fS t||
 }|d ur|	| }n|	}d }d }|
| }|d rtj|| |dd}||| |  | }|d r|d ur|	| }t	|dkrtj||dd}n|}t||jt||jfS )	Nr8  r  r   r  r   Tr   F)r  rO   rD   r  r   r7   r?   r<  r  r`  rS   r,  r  rQ  r  rL  rV   r   r  )r  r   r  rj  r   rp  r  r  r:   r  r  r  r  r  r  r  r  r'  rl  r  r  r  rz  r  r  sum_vald_weight_full_shaper'   r'   r(   _fused_rms_norm_backwardL  sf   	


r  running_meanrunning_varmomentum
functionalc	                 C   sX  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nV|d u s|d u rtd|j|
dd}|}|j|
dd}|}|}dt||  }| jjd	kr|}|}n
| d
}| d
}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur| }t||  d }|| }| jjd	kr |j| jd}|j| jd}|j| jd||||fS )Nr   r    r   T)rO   
correctionr   r   z:running_mean and running_var must not be None in eval mode)r   r   rN  r   )rS  rS   rO   rD   r  r   r7   r?   var_meanr  r  copy_r   r  r  r@  rl  rS  rL  rV   r  )r   r   r  r  r  r   r  r  r  reduction_dimsr:   new_running_meannew_running_var	input_acc
biased_varr   rj  r  	save_mean	save_rstdnsqueezed_varunbiased_varinvstdr'   r'   r(   native_batch_norm_helper  sz   





r  r  save_invstdc              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r  r  r  r  rU   r'   r'   r(   native_batch_norm  s   
r  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r   _native_batch_norm_legitrR  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r  r'   r'   r(   native_batch_norm_decomposition	  s&   r  c                    s   |  |}|| d |   dkr6|dkr6 fddt|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr   r   c                    rH  r'   r'   r  rJ  r'   r(   r2   4  r~   z(unsafe_chunk_py_impl.<locals>.<listcomp>)	r  rS   r?   r  r   rF  rd  rD  r   )r.  rL  rO   rf  r;  r'   rJ  r(   unsafe_chunk_py_impl.  s   
r  c              
   C   s   t j| ||||d||S r  )r   r  rd  )r   r   r  r  r  r  r  r'   r'   r(   r  :  s   
r  c              
   C   r  r  r  r  r'   r'   r(   r  P  s   
r  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r  r  r  r  r  rU   r'   r'   r(   !_native_batch_norm_legit_no_statsa  s   	
r  c              
   C   sP   t | |||||||d	\}}	}
}}|d u rtd|d u r!td||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be None)r  r  )r   r   r  r  r  r   r  r  r  r  r  r  r  r'   r'   r(   #_native_batch_norm_legit_functionalp  s   r  c           	   	   C   sP   t j| ||||d|}d}|t jjjkrt j| |}t j|t j| j| j	dS )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutrl  )
r?   _C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r  rl  )	r   r   r  r  r  r  r   backendreserve_sizer'   r'   r(   _get_batch_norm_reserve_tensor  s   r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NTFr   r  r  r   r   r  r  r  r  r  r  r  r  rU   reserver'   r'   r(   _batch_norm_with_update     
r  c              
   C   sh   t | ||||d||d	\}}}	}
}t| |||||dd}|
d u r$td|d u r,td|||	||
|fS )NTr  r  r  )r  r  r  )r   r   r  r  r  r  r  r  r  r  new_rmnew_rvr  r'   r'   r(   "_batch_norm_with_update_functional  s"   r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NFr  r  r  r'   r'   r(   _batch_norm_no_update  r  r  c                 C   sL   |d urt d| t| |k jtjd}|| |  d|  }||fS )Nz=generator must be None for _fused_dropout decomposition, got r   rd   )r  r?   r	  r7   r  r  )r   r4  	generatorry  r  r'   r'   r(   _fused_dropout_decomposition  s   r  )r   r  rl  
pin_memorynon_blockingr9  rl  r  r  r9  c          	      C   s   |r|t jkrtd| |rtdt| t jttttfs)tdt	| j
 |d u rA|d u rA|d u rAt| t jr?|  S | S d}t| t jrL| }nt | }|d urt||jkrt|d url|j	dkrlt j||}d}t j|||}|d ur|st j||}d}|d urt j||dS |S )Nz*layout must be None or torch.strided, got z:pin_memory=True is not supported in _to_copy decompositionz x must be Tensor or scalar, got FrN  Tr8  )r?   stridedr  r-   r   rh  r   rr  complexrS  r!   ro  scalar_tensorrl  _primsconvert_element_type
device_put)	r0   r   r  rl  r  r  r9  dtype_convertedx_tensorr'   r'   r(   _to_copy	  s6   
r  c                 C   s
   t | S r5   )r   aliasr8   r'   r'   r(   nop_decomposition;	  s   
r  out3exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )r   r  rL  r?   r  )r   r   r  r  r  r   r  r  r  r  r  r'   r'   r(   cudnn_batch_normC	  s"   
r  c                 C   s@   t |D ]\}}|dkr|| jk r| j| |ks| |} q| S rZ   )r  r[  r  rT   )r0   broadcast_maskr  ry  r'   r'   r(   _broadcast_batch_norm_backwarde	  s
    
r  r  c                 C   s   t | |||||||||	
S r5   )native_batch_norm_backward)r  r   r   r  r  r  r  r  r  rp  r  r'   r'   r(   batch_norm_backwardl	  s   r   c
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dk r<td| d}tt|||  }|}|}|r[|d u sV|d u rZtdn|d u sc|d u rgtd|}t	|| }dg| }|| ||< g }t
|D ]}||kr|| qt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )	Nc                 3   s&    | ]}|d ur|  n|V  qd S r5   r  r.   r9   r'   r(   r  	  s
    
z-native_batch_norm_backward.<locals>.<genexpr>r    z*rank of the input must be at least 2, got r   z1mean and invstd must not be None in training modezDrunning_mean_cast and running_var_cast must not be None in eval moderd   )r   rD   r  r  rO   r  r  rS  r?   r  rS   r,  r  r   r   r7   r  )&r  r   r   r  r  r  r  r  r  rp  r  weight_dtyper  r  r  running_mean_castrunning_var_castsave_mean_castsave_invstd_castr  
input_rankr  num_featuresr   r  r  reduction_axesr'  r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrW   r%  	grad_biasr'   r9   r(   r  	  s   
	



r  c
                C   r  r   )r  r  r   r  r   )r  r   r   r  r  r  r  r  r  rp  r  r  r  r  rW   r'  rI   r'   r'   r(   native_batch_norm_backward_out	  s&   
r  save_varc                 C       t || |||||d|g d
S NT)TTTr   r  )r   ri   r   r  r  r  r  r  r'   r'   r(   miopen_batch_norm_backward
  s   r  reserveSpacec	           	      C   r  r  r  )	r   ri   r   r  r  r  r  r  r  r'   r'   r(   cudnn_batch_norm_backward6
  s   r  c                    s  | j  | jttdv fdd | jdd  D ]}t|dkfdd qd |d  dkrjd |d  dkrjtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )Nr  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r'   r'   r3  r'   r(   r}   [
     
 z%adaptive_avg_pool2d.<locals>.<lambda>r:  r   c                         dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r'   r  r'   r(   r}   `
  s    rR   c                 s   s    | ]	\}}|| V  qd S r5   r'   )r/   r'  r  r'   r'   r(   r  f
      z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS r   Nr'   )r/   r'  r  r=  r'   r'   r(   r  g
  s    
c                 S   s   t j| | |ddS )Ntruncrounding_moder?   divr  r  r  r'   r'   r(   start_indexl
  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr   r  r  r   r"  r'   r'   r(   	end_indexo
      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nrm  r   r   rR   r  )r?   rp  r  rT   r  r   rl  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxrx  maxvali1length)rl  r$  r#  r'   r(   compute_idxr
  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r*  )r   rR   r{   c                 S   sj   t |tr	| |fS |dkrtd| ||dk}|dkr$t|d}t| |d} t|| }| |fS )Nr   z)dim should be negative when masking, got rR   r:  r*  r   )r-   r   r  rT   rV   r?   r"  )valsr1  r.  r-  rO   ry  r'   r'   r(   
maybe_mask
  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)r-  rO   r   )rl  r  r`  r?   r   r  r  nnr  
avg_pool2drV   r   r	   rS   )r   r  r  rT  kernelr2  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr3  r4  retr'  jr'   )rl  r$  r[  r  r#  r(   adaptive_avg_pool2dQ
  sN   

(  



&rB  c           
      C   s   t d| d t| jd |  t| }tdd |D r%| |S ttj| jd |  }ttj|}dg| j	 }| jd |  |d | < |t
j|| jd||  d}| |}	t
j|	d|g| ddd	|	jS )
Nmax_unpoolingd_forward_outc                 s   s    | ]}|d kV  qdS r  r'   r<  r'   r'   r(   r  
  r  z _max_unpoolnd.<locals>.<genexpr>r   rk  rR   Fr  )rD   alert_not_deterministicrS  r  anyrL  r   operatorr   r[  r   rp  rl  rt  r  r  )
rw   rw  r  rO   output_shapenchwindices_nc_shapeindices_flatr  r'   r'   r(   _max_unpoolnd
  s"   	

rM  c                    s   t jt jkfdd t tdkfdd t jdv fdd t jjkfdd tdjD ] t  d	k fd
d q>t	dS )Nc                         d j  S )Nz2elements in indices should be type int64 but got: r   r'   )rw  r'   r(   r}   
      zmax_unpool2d.<locals>.<lambda>r    c                      r  )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r`  r'   r  r'   r(   r}   
     r  c                         d j  dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r3  r'   r   r'   r(   r}   
  s   c                         dj  d j  S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r  r'   )rw  rw   r'   r(   r}   
     
r   r   c                         dj  d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension  being empty.r  r'   )r'  rw   r'   r(   r}   
  
   )
r?   r   r   r  r`  r[  r  rS   r  rM  )rw   rw  r  r'   )r'  rw  r  rw   r(   max_unpool2d
  s,   





	r]  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ] t  dk fdd qXt d dko~d
 dko~d dkfdd t	dS )Nc                   S   r  )Nz(elements in indices should be type int64r'   r'   r'   r'   r(   r}     r  zmax_unpool3d.<locals>.<lambda>r*  r  c                      rT  )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with rU  r3  r'   r   r'   r(   r}     r  r   c                      r  )NzVThere should be exactly three elements (depth, height, width) in output_size, but got rP  rQ  r'   rR  r'   r(   r}     rS  c                      r  )NzRThere should be exactly three elements (depth, height, width) in stride, but got: rP  rQ  r'   rT  r'   r(   r}     r~   c                      r  )NzSThere should be exactly three elements (depth, height, width) in padding, but got: rP  rQ  r'   )r  r'   r(   r}   "  r~   c                      rV  rW  r  r'   )rw  r   r'   r(   r}   &  rX  r   r   c                      rY  )NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got rZ  r[  r  r'   )r'  r   r'   r(   r}   /  r\  r    c                      r  )Nz5strides should be greater than zero, but got stride: r'   r'   r`  r'   r(   r}   8  r  )
r?   r   r   r  r[  r`  r  rS   r  rM  )r   rw  r  rT  r  r'   )r'  rw  r   r  r  rT  r(   max_unpool3d  sB   	







	"
ra  )rj   r.  c                C      t | |||d|dS )NTinplacerj   
_index_addr0   rO   r{  r.  rj   r'   r'   r(   
index_add_>  s   	rh  c                C   rb  )NFrc  re  rg  r'   r'   r(   	index_addJ  s   
ri  rd  c                   s"  t | jtjdkfdd jdkrdnd|jdkr*|ndtkfdd  dkr]t | jttu pQt 	t
  fdd |  }| jdk}|ri| dn| }d f }|rwtjntj}	|	|||dd	}
|r| S |r|
dS |
 S )
Nr   c                      rT  Nz(Index should have dimension 1 or 0 (got r&  r3  r'   r{  r'   r(   r}   c  r  z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r'   r'   )rO   
index_sizer/  r'   r(   r}   i      c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)rS  r'   )rj   python_typer'   r(   r}   p  rm  r5   Tr  )rD   canonicalize_dimsr[  r?   r   r  dtype_to_typer   rr  is_weakly_lesser_typerS  rT   r   
index_put_	index_putr  r  )r0   rO   r{  r.  rd  rj   zero_dimr6  rx  rt  r   r'   )rj   rO   r{  rl  ro  r/  r(   rf  W  s6   	

rf  r   c              
   C   s   t t| dkdd  t| }| d  }|dd  }tdd | D }|r,||f}n||f}|| }| d ||}dt| }	t|D ]+}
| |
 }t||	d||d f |}|rhtj	||d|
d}qFtj	||d|
d}qF|S )	Nr   c                   S   r  )Nz#received an empty list of sequencesr'   r'   r'   r'   r(   r}     r  zpad_sequence.<locals>.<lambda>r   c                 s   s    | ]}| d V  qdS r  r  r.   r'   r'   r(   r        zpad_sequence.<locals>.<genexpr>)r   r   rO   r{  )
r?   r   r`  r  r   r   rS   r   r*  r|  )	sequencesbatch_firstpadding_valuesequences_sizemax_sizetrailing_dimsmax_lenout_dimsr   dim_paddingsr'  currseqrowr'   r'   r(   pad_sequence  s(   
r  c                 C      t | |||ddS )NTrd  _index_copyr0   rO   r{  r.  r'   r'   r(   index_copy_     r  c                 C   r  )NFr  r  r  r'   r'   r(   
index_copy  r   r  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| } jdkr, dn  d|  f }|r:tjntj}||||}	|rG| S |rN|		dS |	
 S )Nr   c                      rT  rj  r3  r'   rk  r'   r(   r}     r  z_index_copy.<locals>.<lambda>r   r5   )rD   rp  r[  r?   r   rT   r   rs  rt  r  r  )
r0   rO   r{  r.  rd  ru  r6  rx  rt  r   r'   rk  r(   r    s   

r  c                 C   sR   t | d| }t t |  }| js| jr| d}n|}|t | |fS )Nr'   r  )r?   r&  rL  re   r   rc  is_xpur-  )rw   r   rg   r   r'   r'   r(   log_sigmoid_forward  s   r  lowhighr  c              	   C   s*   t j| j|  t|t|| j| j|dS )N)rT  r  r  r   rl  r  )prims_uniform_helperr  rT  r   r   rl  )r0   r  r  r  r'   r'   r(   uniform  s   r  c                 C   s   |  t| |||S r5   )r  r  )rw   r  r  r  r'   r'   r(   uniform_  s   r  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr    c                   S   r  Nz9Must specify exactly one of output_size and scale_factorsr'   r'   r'   r'   r(   r}     r  z.upsample_compute_output_size.<locals>.<lambda>c                   S   r  N r'   r'   r'   r'   r(   r}     r  c                   S   r  r  r'   r'   r'   r'   r(   r}     r  c                   S   r  r  r'   r'   r'   r'   r(   r}     r  Fc                   S   r  r  r'   r'   r'   r'   r(   r}     r  )r`  r?   r   r  rh  r,  r   )r  r  scale_factorsspatial_dimensionsr'  r=  r'   r'   r(   upsample_compute_output_size  s.   r  c                 C   s   | d u rd S | | S r5   r'   )scalesrx  r'   r'   r(   get_scale_value  s   r  r  c                 C   s2   t |  ||}|r|nd gt| }t| ||S r5   r  r  r`  _upsample_nearestr   r  r  osizer  r'   r'   r(   _upsample_nearest_vec  s   r  c                 C   s6   t |  ||}|r|nd gt| }t| ||ddS NTexactr  r  r'   r'   r(   _upsample_nearest_exact_vec  s   r  c                 C   s   g }t |}|r
dnd}t|D ]O}|| }| j| |  }	|| d ur2|| dkr2|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}qR|
| q|S )Nr   r   r   r  r   rR   )r`  rS   r  r?   rp  r   rl  r7   r  rT   r,  )r   r  r  r  rw  num_spatial_dimsr~  r  r  isizerk   output_indicesinput_indicesrU   r'   r'   r(   !_compute_upsample_nearest_indices-  s    r  )preserve_memory_formatr  r  c                 C   s   t | ||gS r5   r  r   r  r  r'   r'   r(   upsample_nearest1dS  s   	r  c                 C   s   t | ||gddS r  r  r  r'   r'   r(   upsample_nearest_exact1d_     r  scales_hscales_wc                 C   s   t | |||gS r5   r  r   r  r  r  r'   r'   r(   upsample_nearest2dm  s   
r  c                 C   s   t | |||gddS r  r  r  r'   r'   r(   _upsample_nearest_exact2dz  s   r  scales_dc                 C   s   t | ||||gS r5   r  r   r  r  r  r  r'   r'   r(   upsample_nearest3d  r  r  c                 C   s   t | ||||gddS r  r  r  r'   r'   r(   _upsample_nearest_exact3d  s   r  r  c           	      C   sp   t | |||d}d d g| }t| |}|jdkr6t| }| jd }| jjdkr0|dk r0t	j
}|j|d}|S )Nr  r*  r   cudar8  )r  r   _unsafe_indexr[  rD   r   r  rl  rS  r?   r<  r  )	r   r  r  r  spatial_indicesrw  r  r9  
n_channelsr'   r'   r(   r    s   


r  c                    sn   |r|rd n|rd n|rd nd t   dkr'tdt  d   fdd	tdt  D S )
Nr  r*  r   r    r   zlen(params)=z  is not divisible by group_size=c                    s    g | ]}t ||   qS r'   r  rI  
group_sizeparamsr'   r(   r2     s    z!gather_params.<locals>.<listcomp>)r`  r  rS   )r  
has_biaseshas_projectionsr'   r  r(   gather_params  s   r  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr    r   NNr'   )r  hiddensr'  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr'   r'   r(   params_hiddens  s   $r  c                 C   sD   ||krt d| d| d|| d|||  | dd|S )Nlast_batch_size (z) must be > batch_size (r&  r   )r  r,  r  )r  last_batch_size
batch_sizer  r'   r'   r(   update_hidden_for_packed  s   r  c              	   C   sF   ||kr| S ||krt d| d| dt| |d||| fS )Nr  z) must be < batch_size (r&  r   )r  r?   concatr  )r  r  r  
inp_hiddenr'   r'   r(    update_hidden_for_packed_reverse  s   r  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r   r    r   rR   )
r  r?   r@  rS  r  r  r  r,  reverser  )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr'  r   
hidden_outr'   r'   r(   one_layer_rnn_data  s@   


r  c                        fdd}|S )Nc                    s    t ||||  S r5   r   linearr'  r  r  r  r  r  nonlinearityr'   r(   rK   +  s   zrnn_cell.<locals>.innerr'   r  rK   r'   r  r(   rnn_cell*  s   r  c                    r  )Nc                    s$   t | ||}  t ||||  S r5   r  r  r  r'   r(   rK   2  s   zrnn_cell_data.<locals>.innerr'   r  r'   r  r(   rnn_cell_data1  s   r  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r   r    r   )	r   r  fliprT   r,  r  r?   r  r  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  r'  r   r'   r'   r(   one_layer_rnn9  s   
r  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r   r    r   F)
r?   r   r  rT   r  r  r   mkldnn_rnn_layerrd  r  )r  r  r  r  r  w0w1w2w3hxcxr  modehidden_size
num_layersr  rz  r  outputsrY   hycyr'   r'   r(   mkldnn_one_layer_lstmO  sN   


r  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r   r   T)r  )r  )	transposerS   r  r,  r?   r  rO   r  )r   r  r  r  r  r  r  r  rz  layer_fnfinal_hiddensr'  r  r  r  r  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddenr'   r'   r(   _rnn_helper  s,   



r  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  r?   r   stackr   r  r  r  r  r  r  r  rz  r  r   r  r'   r'   r(   rnn_tanh_input     
r  c	                 C   r  r  )	r  r  r  r   r  r  r?   re  r  r  r'   r'   r(   rnn_relu_input  r	  r
  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  r?   re  r  datar  r  r  r  r  r  r  r  r  r   r  r'   r'   r(   rnn_relu_data  &   
r  c	                 C   r  r  )	r  r  r  r   r  r  r?   r   r  r  r'   r'   r(   rnn_tanh_data  r  r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr*  r   r   r    r   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gater  r  r'   r'   r(   	lstm_cell.  s   r  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r   r    r   r  r*  r  )r`  rT   r   r  r  r  r,  r  r?   r  r  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r'   r'   r(   one_layer_lstm<  s$   *r  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r   r    r   r  r*  rR   r  )r`  r?   r@  rS  r  r  r   r  r,  r  r  r  r  r  )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  orig_hxorig_cxr  r  r'  r  hidden0hidden1r   r'   r'   r(   one_layer_lstm_dataW  s\   *

r$  c                 C   s   dd }|| ||rt S tS )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t j sdS | gt| tt| }dd |D }t|dkr$dS | }|t dkr1dS dd |D }|D ]}|t j	t j
fvrG dS q:| jrMdS |d d|d dk}|r_dS d	S )
NFc                 S      h | ]}|j qS r'   rk  r/   tr'   r'   r(   	<setcomp>      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r   rN  c                 S   r%  r'   r   r&  r'   r'   r(   r(    r)  r   r    T)r?   r  _get_mkldnn_enabledrS  r   from_iterabler`  poprl  r   bfloat16requires_gradr  )	r   r  r  r(  devicesrl  dtypesr   r  r'   r'   r(   
use_mkldnn  s(   
z2select_one_layer_lstm_function.<locals>.use_mkldnn)r  r  )r   r  r  r1  r'   r'   r(   select_one_layer_lstm_function  s   r2  c	                 C   s   t |dkrtdt | t|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t	|d dt	|d dfS )Nr    $lstm expects two hidden states, got r   r   )
r`  r  r  r  rS  r  r2  r  r?   r  )r   r  r  r  r  r  r  r  rz  r  r  r   r  r'   r'   r(   	lstm_impl  s&   $"r4  c	                 C   s   t |dkrtdt | t|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	
|d dt	
|d dfS )Nr    r3  r   r   F)r  )r`  r  r  r  rS  r  r  r   r$  r?   r  r  r'   r'   r(   lstm_data_impl  s$   $
"r5  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr   r   r    r   )r  r   r  r   r   r  r  r  r  r  r  chunked_igateschunked_hgates
reset_gate
input_gatenew_gater'   r'   r(   gru_cell  s   r<  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr   r   r   r    r  r6  r'   r'   r(   gru_cell_data  s   r=  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r=  r?   r  )r  r  r  r  r  r  r  r  r  r   r  r'   r'   r(   gru_impl_data   s   r>  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r<  r?   r  )r   r  r  r  r  r  r  r  rz  r   r  r'   r'   r(   gru_impl>  s   
r?  c                 C   :   t |  ||}t|d}t|d}tjj| ||||S Nr   r   )r  r  r  r?   r  r   _upsample_bilinear2d_aar   r  align_cornersr  r  scale_hscale_wr'   r'   r(   upsample_bilinear2d_aa_vec\     


rG  c                 C   r@  rA  )r  r  r  r?   r  r   _upsample_bicubic2d_aarC  r'   r'   r(   upsample_bicubic2d_aa_vech  rH  rJ  c                 C   r@  rA  )r  r  r  r?   r  r   _upsample_lanczos2d_aarC  r'   r'   r(   upsample_lanczos2d_aa_vect  rH  rL  c                 C   s4   t |  ||}|r|nd gt| }t| |||S r5   )r  r  r`  _upsample_linear)r   r  rD  r  r  r  r'   r'   r(   _upsample_linear_vec  s   	rN  rD  c                 C   s   t | |||gS r5   rM  )r   r  rD  r  r'   r'   r(   upsample_linear1d  s   rP  c                 C   s   t | ||||gS r5   rO  )r   r  rD  r  r  r'   r'   r(   upsample_bilinear2d  s   rQ  c                 C   s   t | |||||gS r5   rO  )r   r  rD  r  r  r  r'   r'   r(   upsample_trilinear3d  s   rR  c                 C   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S )Nr   rd   r   r'   )r'  r(  rD  rk   r'   r'   r(   _compute_scale  s    rS  c                 C   s   |r| | S | |d  d S Nr   r'   )rk   	dst_indexrD  r'   r'   r(   _compute_source_index  s   rV  weightsweights_precisionc                 C   sB   t dd t| |D d|d >  }||? }t|ddtjS )Nc                 s   s,    | ]\}}| tj| tj V  qd S r5   )r7   r?   r  )r/   r=  r  r'   r'   r(   r    s    
z%_sum_tensors_uint8.<locals>.<genexpr>r   r      )_sum_tensorsr  r?   r   r7   r  )rj  rW  rX  r  r'   r'   r(   _sum_tensors_uint8  s   
r[  c                 C   sJ   t |  }d}t j||jd}d|d|d >   }|dk}||  S )N   rk  r   r   i   )r?   r  r   rp  rl  r   )rW  
max_weightmax_weight_precision
precisionsvaluesry  r'   r'   r(   _compute_weight_precision  s   ra  c                    s  j d }j dd  }t|tjtjjd\}fddfddtt|||D }tt| \}g }	t	ddgg  D ]# d d g fd	dt
D  }
t|
}t|}|	| qGtt
D ]'}|| |  d
dfddt|	d d d |	dd d D }	qqt|	dkrtdt|	 |	d }t}jjdkr|dk rtj}t|tjstdt|j |j|d} s| }|S )Nr   r    r  c           	         s   t | | |}tj|jdjd}t|| jdd}|j|jd gdg| R  }|tj	}|d j| d d}|||fS )Nrk  r   r   r   r   r   r   )
rS  r?   rp  rl  r7   rV  r   r  r  r  )	inp_sizer(  r  nsqueezescale_factorr'  x_f32r0   xp1)rD  r   r   r'   r(   
get_values  s   
z$_upsample_linear.<locals>.get_valuesc                    s,   g | ]\}\}}} |||d  | qS r  r'   )r/   r'  rb  r(  r  )rg  n_dimsr'   r(   r2     s    z$_upsample_linear.<locals>.<listcomp>r   c                    s(   g | ]} | d kr| n| qS r  r'   )r/   k)r  xp1sxsr'   r(   r2        ( r   rd   c                    s$   g | ]\}}|t ||   qS r'   )r?   r   )r/   v1v2)xscaler'   r(   r2   	  s    z+Expected vs to have exactly 1 element, got r     z$Expected result to be a Tensor, got r8  )r  r`  rD   rE   r  INT_TO_FLOATr  r  rS  r	   rS   r   r  r   r,  reversedr   r7   r  r   rl  rS  r?   r<  r-   r   r!   r  r  round)r   r  rD  r  r  	inp_sizesrU   r`  xs_f32vsrx  vr'  r  r9  r'   )	r  rD  r   rg  r   rh  rj  rk  ro  r(   rM    sN   


"


rM  r  r  c                 C   s   | j |j kS r5   r  )r  r  r'   r'   r(   is_same_size(  rz   rx  c                 G   rt   r5   )r   rt  )r0   r  rF   r'   r'   r(   _reshape_alias-  s   ry  c                 C   rt   r5   )r   r{  )r0   rw  r'   r'   r(   r  3  rz   r  c                 C   s   t | |||S r5   )r   rt  )r0   rw  rx   r  r'   r'   r(   r  8  rE  r  c                 C   s   |D ]}|d urt |jt jt jfv dd  qt |jt jkdd  ddlm} ||  dkr@t j	
| |}| |j|S tt|D ]}|| }|d ur^|jd| |d d||< qFt| || |S )Nc                   S   r  Nz3tensors used as indices must be long or int tensorsr'   r'   r'   r'   r(   r}   C  r  z&_unsafe_masked_index.<locals>.<lambda>c                   S   r  Nz*tensors used as masks must be bool tensorsr'   r'   r'   r'   r(   r}   H  r  r   r  r   rb  )r?   r   r   r!  rh  rr  rQ  r  r   _meta_registrationsmeta_index_Tensorr   r  rS   r`  r   r  r   r  r"  )r0   ry  rw  fillr{  r  meta_resultr'  r'   r'   r(   ru  =  s*   
ru  c                 C   s   |D ]}|d urt |jt jt jfv dd  qt |jt jkdd  |  dkr.|  S tt	|D ]}|| }|d urP|j
| | | |d d||< q4|| d}tj| ||ddS )	Nc                   S   r  rz  r'   r'   r'   r'   r(   r}   _  r  z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>c                   S   r  r{  r'   r'   r'   r'   r(   r}   d  r  r   r   rb  Tr  )r?   r   r   r!  rh  rr  r   ro  rS   r`  r   r  r"  r   r  )r0   ry  rw  r`  r{  r'  masked_valuer'   r'   r(   #_unsafe_masked_index_put_accumulateY  s(   
$r  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr   r    r   r'   r   )rO   r  rt  r?   rf   rT   gatherr  r   r$   rx   r   rn  r   r7   r&   r%   )rw   r   r   r   r
  rh  r  r  wr  safe_target_r  r  wsumr'   r'   r(   _nll_loss_forwards  sB   


r  c                    s      dkr   dkstd    d  dkr&td   d   dko1  dk}|sGt jd jd k fdd  jd	 }|d urg|  dkr\| |ksgtd
| d|j t |||S )Nr   r    r!  r"  r   r#  c                      s   d j  dj  dS )Nr$  r%  r&  r  r'   rw   r   r'   r(   r}     rm  z"nll_loss_forward.<locals>.<lambda>rR   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rO   r  r?   r   r  r   r  )rw   r   r   r   r
  r(  	n_classesr'   r  r(   nll_loss_forward  s(   	
 r  c                 C   s   t | ||||S r5   )r  )rw   r   r   r   r
  r'   r'   r(   nll_loss2d_forward  s   	r  Ac                 C   s    |d |  |d  |  |  d S )Nr    r   r   r'   r0   r  r'   r'   r(   _upsample_cubic_convolution1  r%  r  c                 C   s(   ||  d|  |  d|  |  d|  S )Nr     r*  r'   r  r'   r'   r(   _upsample_cubic_convolution2  s   (r  r'  c           
      C   s   d}| j t dkrDtj| d|  gdd}tj| d d|  gdd}t||}t||}tj|dd\}}tj|dd\}}	|||	|fS t| d |t| |td|  |td|  |fS )Ng      rN  rd   r   r{   r   )rl  r?   r  r  r  r  )
r'  r  tt1tt2w03w12r  r  r  r  r'   r'   r(    _upsample_get_cubic_coefficients  s   

r  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s       | ]	\}}|| V  qd S r5   r'   r/   r  r  r'   r'   r(   r    r  z+_upsample_cubic_interp1d.<locals>.<genexpr>)r  rZ  r  )r  r  coeffs2r'   r'   r(   _upsample_cubic_interp1d  s   r  c                 C   s   t tj| S r5   )r   r?   r  )r  r'   r'   r(   rZ    s   rZ  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr   r   rm  )stepsrl  r   )r?   r.  linspace)r  rD  r   rl  r  r'   r'   r(   _linspace_from_neg_one  s   r  thetahr  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr   )r   r   r   r  )r   r    constantr   r  r  rx   r   r   )r    r   	r   rl  r  rt  r?   rq  r5  r  r  )	r  r  r  rD  r   rl  grid_xgrid_ygrid_oner'   r'   r(   _make_base_grid_4d	  s   r  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr   )r   r   r   r   r  )r   r   r  r   r  r  r    r   )r   r   r  )r  r  r  r  rD  r   rl  r  r  grid_zr  r'   r'   r(   _make_base_grid_5d  s   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )NrD  rR   r   r   r:  r    )r  rt  r>  rT   r   )	r  r  rD  r  rU   r  r  	base_gridgridr'   r'   r(   _affine_grid_generator_4d+  s    r  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr  rR   r*  r   r:  r   )r  rt  r>  rT   r   )
r  r  rD  r  rU   r  r  r  r  r  r'   r'   r(   _affine_grid_generator_5d5  s    r  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )Nr^  c                   S   r  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r'   r'   r'   r'   r(   r}   E  r  z'affine_grid_generator.<locals>.<lambda>r*  r  )r?   r   r`  r  r  )r  r  rD  r'   r'   r(   affine_grid_generator?  s   
r  r  interpolation_modepadding_mode_expand_gridc                    sT  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\ |j\}}|dkrhtd| rz|d| d}dtdtdtffddt jj	ddddt j j	dd dddtdtdtdt
f fdddtdtdtffdd 
|d! }	|d" }
d#kr6||	}||
}| | d }}d }}||}}|| ||  }|| ||  }|| ||  }| |  }t
fd$d%|f|||f|||f|||ffD S dkrS||	}||
}| }| }
||dS |	}|
}| | | | }szd|d}dtdtdtf
fd&d'd(tdtffd)d*	t	fd+d%td,D }t||S )-N)r   r   r    c                      r  )NzInvalid interpolation mode r'   r'   )r  r'   r(   r}   ^  r  z"_grid_sampler_2d.<locals>.<lambda>c                      r  )NzInvalid padding mode r'   r'   )r  r'   r(   r}   a  r  coordsr  rP   c                    s0    r|d d n|d }|d d }| | | S rT  r'   )r  r  r   ofsr  r'   r(   unnormalized  s   z%_grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr    r   r   r   )r?   r   r   fmodfloorr7   int8rf   )r  r  r  
coords_mincoords_spancoords2extraflipsr'   r'   r(   reflect_coordinateso  s   
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r   r    rR   r   )r  r  coords_reflected)rD  r  r  r'   r(   compute_coordinates{  s   z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r5   r'   )r  r  	coords_un)r  r  r'   r(   compute_source_index  s   

z._grid_sampler_2d.<locals>.compute_source_indexr    z4grid last dimension must be 2 (for x,y coords), got r   rk  ysc                    s,   t d| kt | k t d|k| k S ro   r?   rs  )rk  r  )iHiWr'   r(   in_bounds_cond  s   $z(_grid_sampler_2d.<locals>.in_bounds_condrk  wsc                    sN   | |r	nd t  fdd| jtjd|jtjd|fD S )Nr   c                 3   s*    | ]}t |d  V  qdS r  )r?   rf   rt  r&  )rl  r  r  oHoWr'   r(   r    s
    
z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r  r7   r?   r  )rk  r  r  )rm  rl  r  r  r  r  )r  r  r(   clip  s
   
z_grid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r5   r'   )r  r  r  idx_xidx_yw_)C_idxN_idxr  r  r'   r(   get_summand  s   z%_grid_sampler_2d.<locals>.get_summand).r   ).r   r   c                 3   s"    | ]\}}} |||V  qd S r5   r'   )r/   r  r  r  )r  r'   r(   r    s
    

z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS rZ   r'   )r  r  r0   rY   )r  r  r  r  r'   r(   get_value_bounded  s   

z+_grid_sampler_2d.<locals>.get_value_boundedr  c                    sF   | d  } d | | d | d |f}t |S )Nr   r    )r  )r  iy_ofscs)r  ix_nwiy_nwtxr'   r(   	get_coeff  s   
z#_grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r5   r'   )r/   r  )r  r'   r(   r    r  r*  )r?   r   r   rh  r  r  rt  rn  rp  rl  r   r  rZ  rs  rT   r  rS   r  )r  r  r  r  rD  r  r  rU   twor0   rY   r  r  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyr  r'   )rm  r  rl  r  r  r  rD  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r(   _grid_sampler_2dM  s~   
 ( 




	





 

r  c                 C   s   t | ||||dS )N)r  r  r  rD  )r  )r  r  r  r  rD  r'   r'   r(   grid_sampler_2d  s   
r  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr    r   c                      s   d    d   S )Nzmatrix @ vector expected, got rr  r{   r'   rw   rg  r'   r(   r}     rs  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r0   r   z), vec (r&  rv  r'   r  r'   r(   r}     s   * r{   )r?   r   rO   r  r   r  r'   r  r(   rh  	  s   rh  c                 C   sp   |d ur|d | d }d| |  |t |   }nd| |  t |  }|d ur-|| }||j}t||S rZ   )r   
logsigmoidr7   r   r   )rw   r   r   
pos_weightr   
log_weightr   r'   r'   r(    binary_cross_entropy_with_logits  s   
r  tensor1tensor2is_outc           	         s   | j |j kr
| |fn|| f\}}ddlm  |j dkr |j dks"dS |jr)|s)dS | j dkr0dS  | dkr:dS |j}| }dg}t|dd  D ]}|||d   qLt	 fd	d
t
|tt||D S )Nr   r  r   r    FTr   rR   c                 3   s.    | ]\}}} |d kp ||kV  qdS r  r'   )r/   r   r   r  r  r'   r(   r  D  s
    
zshould_fold.<locals>.<genexpr>)r[  rQ  r  r.  r   r  rT  rr  r,  r   r  rS  )	r  r  r  t1t2t1_shape	t1_strideexpected_strider  r'   r  r(   should_fold,  s(    

r  )pass_is_out)r  c                C   s  ddl m}m} |  }| }|dks|dkr"td| d| |dkr0|dkr0t| |S |dkr>|dkr>t| |S |dkrT|dkrTtt	t
| d|dS |dkrb|dkrbt	| |S t| ||r||k}|rq|jn| }|sw|n	|dkr|  n| }	|j}
t|
d d }ttj|}|	 dk}|r||	jd  |||
d }|rtjj|	|	|}|r|j S |S tjj||	|S |dkr|dkr|dkr| dnd}| d}| jd d }|dkr|dn|d}|dkr|dnd}g }t|d D ]}||| q|d	krY|d	krY||d |d krY||d dkrD| jrDt| d|S ||d dkrY|jrYt| |dS tt||}|||g }t|}| ||||}|dk}|r||g }||||
d}n|||g }|||||}|}|dkr|| |dkr|| |r| |d!|S | |!|S t"d
dd  d S )Nr   )r  guard_or_truez9matmul does not support 0-dimensional tensors, got dims: z and r   r    rR   r:  r   Fc                   S   r  )Nz/both arguments to matmul need to be at least 1Dr'   r'   r'   r'   r(   r}     r  zmatmul.<locals>.<lambda>)#rQ  r  r  rO   r  r?   dotrh  r  r`  rT   r  r>  r'  r  rS  r   rG  r   r,  r  r  r   _unsafe_viewr  r  rS   r.  r=  broadcast_shapesr  rn  bmmrt  r   )r  r  r  r  r  dim_tensor1dim_tensor2r  r  r  sizes_1rH  folded_dim1t2_is_matrix	t1_foldedr  r  m1batch_tensor1m2r4  batch_tensor2r'  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandedr'   r'   r(   r=  L  s   	










r=  rE  rF  c                    s  j \}}t|d ||}t|d ||}tjtjjd\}}tj|d jdj	|d}	tj|d jdj	|d}
t
||
|}t
||	|}|d}| }| }|| dd}|| dd}|	tj}|	tj}|d ||d |d	 f}|d ||d |d	 ft|t|}d
\jtjkrtt|fddD fdd|D }fddfdd t fdd|D }jtjkrd u rtdt||}ntdd t||D }t}|j|d}|S )Nr   r   r  rk  r   rR   r   rd   r    r  c                    .   g | ]}|d  >  t |d  t jqS r   r   r?   r   r7   int16r/   r  )weights_precision_xr'   r(   r2          z.upsample_bicubic2d_default.<locals>.<listcomp>c                    r  r  r  r  )weights_precision_yr'   r(   r2     r  c                    s<   t | d d }t |dd }td d ||g}|S rA  )r?   r   r   r  )r  rk  y_idxx_idxrw  )in_hin_wr   r'   r(   load_bounded  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    sX   t  fddD }jtjkr d u rtdt|S tdd t|D S )Nc                 3   s    | ]} |V  qd S r5   r'   )r/   x_ofs)r!  rY   r'   r(   r    rw  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>z4weights_precision_x must not be None for uint8 inputc                 s   r  r5   r'   r  r'   r'   r(   r    r  )r  r   r?   r  r  r[  rZ  r  )rY   src_x)r   ixs_ofsr!  r  	weights_x)rY   r(   get_x_interp  s   z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   r  r5   r'   )r/   y_ofs)r&  r'   r(   r    r  z-upsample_bicubic2d_default.<locals>.<genexpr>z4weights_precision_y must not be None for uint8 inputc                 s   r  r5   r'   r  r'   r'   r(   r    r  r8  )r  rS  rD   rE   r  rq  r?   rp  rl  r7   rV  rT   r  r   r  r  r   r  ra  r  r  r[  rZ  r  r   r  )r   r  rD  rE  rF  rU   h_scale_factorw_scale_factorr   r'  rA  x_floaty_floatr0   rY   yscalero  iys_ofs	weights_ysrc_yr  r9  r'   )	r&  r  r   r   r$  r!  r  r  r%  r(   upsample_bicubic2d_default  sT   





r0  c                 C   s   t t|t| dkdd  |d u r4|d u rtdttttf tdd t| jdd  |D }|r8|nd\}}t	| ||||S )	Nr   c                   S   r  )Nz:Must specify exactly one of output_size and scale_factors.r'   r'   r'   r'   r(   r}   3  r  z(upsample_bicubic2d_vec.<locals>.<lambda>z7scale_factors must not be None when output_size is Nonec                 s   s$    | ]\}}t t|| V  qd S r5   )r   r   )r/   r  rk   r'   r'   r(   r  <  s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r    r  )
r?   r   rr  r  r   r  rh  r  r  r0  )r  r  rD  r  rE  rF  r'   r'   r(   upsample_bicubic2d_vec&  s"   
r1  c                        fdd}t  ||S )Nc                    s4   t j|  ||  jd}|d |d |    S )Nrk  r   )r?   rp  rl  r   r   middler   dim_idxr  r'   r(   rx  K  s   z_reflection_pad.<locals>.idx_reflection_or_replication_padr  r  rx  r'   r6  r(   _reflection_padE     r:  c                    r2  )Nc                    s*   t j|  ||  jd}t |d|d S )Nrk  r   r   )r?   rp  rl  r   r3  r6  r'   r(   rx  \  s   z_replication_pad.<locals>.idxr7  r9  r'   r6  r(   _replication_padV  r;  r<  idx_fnc                    s   t d  t|   d  d fv  fdd | j  d  }|    } fddt D } fddt D }| }t D ]}d g|  }	||| || || |	|| < t||	}qFt	|}
|j
|
d}|S )	Nr    r   c                      s    d  d d  d d  dS )Nreflection_padzd requires r   zD or r    zD inputr'   r'   r{   r'   r(   r}   o       z0_reflection_or_replication_pad.<locals>.<lambda>c                        g | ]}d  d |   qS r  r'   rI  rO   r  r'   r(   r2   t  r?  z2_reflection_or_replication_pad.<locals>.<listcomp>c                    $   g | ]}d  d |  d  qS r  r'   rI  rA  r'   r(   r2   u  rt  r8  )r`  r?   r   rO   r  rS   r   r  rD   r   r  )r  r  r=  	inp_shapenc_dimpadding_leftpadding_rightr  r'  rx  r9  r'   rA  r(   r8  g  s"   
 
r8  c                    s\  t d dd |j d  D fddtD fddtD g }t|jD ]}dg|j }d||< |tj|j| |jd| q2|d    | d  
d	d
 
fddtD 
fddtD }
fddtD }fddtD 	t	
tj	fddtD }t|  d}	 fdd}
tjdd tD  D ]f}|tdg krqg }g }tD ]K}|| dkr| }	| }n0|| dkr|| }
| d| f}n|| dkr|| }
| | |  | d f}|| || q|
|	||}	q|	S )Nr    c                 S   s   g | ]}|d  qS r  r'   )r/   r  r'   r'   r(   r2     rR  z,_reflection_pad_backward.<locals>.<listcomp>c                    r@  r  r'   rI  rA  r'   r(   r2     r?  c                    rB  r  r'   rI  rA  r'   r(   r2     rt  r   rR   rk  c                 S   s   | \}}}t ||k||kS r5   r  )index_ranger'  lbubr'   r'   r(   index_range_condition  s   
z7_reflection_pad_backward.<locals>.index_range_conditionc                    s   g | ]
}|  |  qS r'   r'   rI  rE  xyzr'   r(   r2     r  c                    s   g | ]
} | |  qS r'   r'   rI  rK  r'   r(   r2     r  c                    s(   g | ]}d  |  |  |  qS r  r'   rI  )dhwrE  rL  r'   r(   r2     rl  c                    s.   g | ]} | d | |  |  fqS r  r'   rI  )centerrM  rE  rF  r'   r(   r2     s    "c                    s   g | ]} | qS r'   r'   rI  )rJ  range_cr'   r(   r2     rs  r   c                    st   t D ]}|| d || d k }t|tr|r|   S qttjfdd|D }t| | d}| | S )Nr    r   c                    s   g | ]} |qS r'   r'   )r/   rG  )rJ  r'   r(   r2     rR  z@_reflection_pad_backward.<locals>.accumulate.<locals>.<listcomp>r   )rS   r-   rr  rL   r   r   rs  ru  )r   r   index_rangesr'  upper_less_than_lowerr  g)r  rO   ri   rJ  r'   r(   r    s   z,_reflection_pad_backward.<locals>.accumulatec                 S   s   g | ]}g d qS ))rR   r   r   r'   r  r'   r'   r(   r2     rR  r   )r`  r  rS   r[  r,  r?   rp  rl  rt  rL   r   r   rs  ru  	itertoolsr	   r  )ri   r0   r  rw  r'  
view_shapeleft_reflectright_reflectr  r   r  areaoutsrP  r   rG  r'   )r  rN  rM  rO   ri   rJ  r  rE  rF  rO  rL  r(   _reflection_pad_backward  sT   $
"
rY  r   r   r   c                C   s(   t j| ||d}t j| ||d}||fS )Nr   )r?   aminr  )rw   rO   r   rZ  r  r'   r'   r(   aminmax  s   r[  r   c                C   s"   t jtt| d| |||dS )Nr   r   )r   r   r?   rf   isnan)rw   rO   r   r   r'   r'   r(   nansum  s   "r]  r   r  rl  r  r  c             	   C   s   t jjd| d||||dS )Nr   r   r^  r   rp  
start_step)rJ  r   r  rl  r  r'   r'   r(   arange_default     
ra  c             	   C   s   t jj| |d||||dS )Nr   r^  r_  )rI  rJ  r   r  rl  r  r'   r'   r(   arange_start  rb  rc  c                  O   s   ddl m} || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyperd  )rF   rG   rd  r'   r'   r(   out_dtype_decomp  s   rf  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r   r    c                   S   r  )Nz only p == 1 and p == 2 supportedr'   r'   r'   r'   r(   r}   "  r  z#multi_margin_loss.<locals>.<lambda>c                      rN  NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  r'   r_  r'   r(   r}   %  rO  c                         d  dj  S )Nz#inconsistent target size, expected r  r  r'   )nframer   r'   r(   r}   )  r)  c                      ri  )Nz#inconsistent weight size, expected r  r  r'   )rO   r   r'   r(   r}   /  r)  rx  rk  r{   )r?   
atleast_2d
atleast_1dr  r   r[  r   rT   r  r?  rp  rl  rf   r   r%   rx   r   r&   r   )	r   r   r4  rg  r   r   urg   rx  r'   )rO   r   rj  r   r   r(   multi_margin_loss  sB   







rn  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr   r    r   c                      r  rh  r'   r'   )orig_input_shaper'   r(   r}   Q  r  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r'   r'   rp  orig_target_shaper'   r(   r}   U  r~   rk  rR   Tr   rx  r{   rd   )r   rR   )r  r?   rk  r   r`  rp  rl  rZ  rf   r  rF  rT   Tr?  r   r%   rx   r   r   r&   r7   r   r  )r   r   r   rO   rx  is_endend_idxtarget_masktidx0rm  tidx1ro  rg   r'   rq  r(   multilabel_margin_loss_forwardB  s@   





ry  )	attn_maskrk   querykey	dropout_p	is_causalrz  c          	         s   t t fdd t  dko  dko  dkfdd t  dk fdd t jd jd koJjd jd kdd  tjj| |d |d	d	kd
	\}}|	ddd	dj
t jd	d	ddd}||fS )Nc                      rN  )Nz-query must be FP32, FP64, BF16, FP16 but got r   r'   )r{  r'   r(   r}     rO  z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>r*  c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got rr  r{   r'   )r|  r{  rx   r'   r(   r}     s   " r   c                      r  )Nz&dropout probability must be zero, got r'   r'   )r}  r'   r(   r}     r  r   c                   S   r  )Nz&q, k, v should have the same head sizer'   r'   r'   r'   r(   r}     r  r   )rz  r}  r~  dropout_maskrk   
enable_gqar    r   r8  )r?   r   r  rO   r  r   "_scaled_dot_product_attention_mathrd  r  r  r  r<  )	r{  r|  rx   r}  r~  rz  rk   r  attnr'   )r}  r|  r{  rx   r(   *scaled_dot_product_flash_attention_for_cpu  s@   
"&
#r  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S ro   )r  )rF   rG   r   outplace_opr'   r(   
inplace_op  s   z$register_inplace.<locals>.inplace_opr   )aten_opr  r  r'   r  r(   register_inplace  s   r  c                 C   sx   |   s|  st|}t|}t||}t|tjr |dkr$|| }|dkr*|S t|tjr4|dkr8| | } | | S )Nr   r   )r  r_  rh  r?   r  r-   numbersNumber)rw   batch1batch2ra   rj   r  r'   r'   r(   baddbmm  s   r  c                 C   s   t j| |ddS )Nr  r  r   )rw   r   r'   r'   r(   floor_divide  s   r  c                 C   s   t tj| jdS rZ   )rL   r   rG  r   r  )r'  r'   r'   r(   	sym_numel  r  r  r   r   c                C   s.   |d u rt jj| g |dS t jj| g ||dS )Nr   r  )r   r   dim_IntListIntList_out)rw   r   r   r'   r'   r(   sum_default  s   r  c                 C   sB   t | tjs| S |d u rtj| tt|  S tj| |gS r5   )	r-   r?   r   r   r  dimsrS  rS   rO   )rw   rO   r'   r'   r(   squeeze_default  s
   r  c                    s`   t  fddtt| jD }|jtjkrtjnd }| jd|d|d}| ||	|j  |fS )Nc                 3   s    | ]	}| kr|V  qd S r5   r'   rI  r{   r'   r(   r    r  z)_weight_norm_interface.<locals>.<genexpr>r    T)r   r   )
r  rS   r`  r  r   r?   r-  r   r   r7   )rw  rR  rO   keep_dim
norm_dtyper   r'   r{   r(   _weight_norm_interface  s    r  assume_uniqueinvertc                C   s   t | tjstj| |jd} t |tjs"|rt| |S t| |S ddlm} ||	 dt
| 	 d k r>t| ||dS t| |||dS )Nrk  r   r  g      $@g(\?r  r  )r-   r?   r   r  rl  ner   rQ  r  r   r   isin_defaultisin_sorting)elementstest_elementsr  r  r  r'   r'   r(   isin  s   r  )r  c                C   sP   |d u rt j|  t j| jd}nt j|  |t j| jd}|| k | j}|S )Nr  )r  r   rl  )r?   randr  r   rl  r7   r   )rw   r  raw_pr4  r'   r'   r(   	bernoulli$  s   r  r  c                C   s   |   dkrtj| tjdS |jdkr| |k}|r| S |S | jd|j  }| |}ttd|j d d}||kj	|d}|rD| S |S )Nr   r   r  rR   r   r{   )
r   r?   
empty_likerr  r[  r  rt  r  rS   rF  )r  r  r  r  expanded_elem_shaper0   rO   r'   r'   r(   r  7  s   

r  c                C   s   |   }|  }|rMt||g}tj|dd\}}|dd  |d d k}	t|	ddgd}	|r5|	 }	t|	}
|
d||	}
|
d|   	| j
S t|\}}t||}t|| k |d}|| |k}|rq| n|}|	| j
S )NT)stabler   rR   r   F)r  r?   r  sortr*  logical_notr  r  r   r  r  searchsortedrf   )r  r  r  r  elements_flattest_elements_flatall_elementssorted_elementssorted_orderduplicate_maskry  sorted_test_elementsrU   rx  test_idxcmpr'   r'   r(   r  D  s$   
r  c                 C   s   |  d}|| S rQ   )r  )rw   r{  	flattenedr'   r'   r(   takea  s   
r  c                 C   s2   |d u rt j}|t jkrt|}tj| |j|dS r  )r?   r<  preserve_formatr   r   resizer  )rw   r   r9  r'   r'   r(   	resize_ash  s
   
r  	ceil_modec                 C   sR  t  rtS | jjdkrtS |d}|d}	| d}
| d}| dk}|s9|d}| d} |d}|d}|d}| jt j	t j
fv }|rQt jn| j}t j|| ||	 || jd}| || |
| }||| |
| }|r~|t j}|d||}|||||	}|r|| j}t|}|j|d}|s|d}|S )	u  
    Decomposition of max_pool2d_with_indices_backward using scatter_add.

    This replaces the native implementation with a high-level decomposition
    that uses scatter_add for gradient accumulation. The scatter-based approach
    provides automatic optimization opportunities for Inductor and handles all
    pooling configurations without requiring specialized fallback paths.

    Algorithm:
        For each output gradient position, use the corresponding index from the
        forward pass to scatter the gradient to the input position. When multiple
        output positions select the same input position as max, scatter_add
        automatically accumulates their gradients.

    Complexity: O(B * C * H_out * W_out)
        Independent of kernel size, unlike traditional O(B * C * H_in * W_in * K²)
        approaches that iterate over input positions and kernel windows.

    Known Limitations:
        - FP16/BF16: Uses FP32 accumulation internally to preserve precision when
          many gradients accumulate to the same position (overlapping pooling windows).
          This adds slight overhead but ensures numerical stability.
        - Deterministic mode: Falls back to native implementation to ensure
          consistent results across runs

    Args:
        grad_output: Gradient w.r.t. pooling output [B, C, H_out, W_out]
        self: Original input tensor (for shape) [B, C, H_in, W_in]
        kernel_size: Pooling kernel size
        stride: Pooling stride
        padding: Pooling padding
        dilation: Pooling dilation
        ceil_mode: Whether to use ceil for output size calculation
        indices: Indices from forward pass (per-channel linear positions)

    Returns:
        Gradient w.r.t. input [B, C, H_in, W_in]
    mpsr:  rR   r*  r   r   r  r8  )r?   $are_deterministic_algorithms_enabledNotImplementedrl  rS  r  rO   rT   r   r   r-  r   r   r  r7   scatter_addrD   r   r  r  )ri   rw   r  rT  r  r  r  rw  	in_heightin_width
out_height	out_width
is_batchedr  channelsuse_fp32_accumaccum_dtypegrad_input_flatgrad_output_flatrL  rW   r9  r'   r'   r(    max_pool2d_with_indices_backwardq  sL   2










r  window_lengthc                C   s   t jj| d||||dS )a  hann_window(window_length, *, dtype=None, layout=None, device=None, pin_memory=False) -> Tensor

    Returns a Hann window of size :attr:`window_length` with ``periodic=True``.

    Equivalent to :func:`torch.hann_window` with ``periodic=True``.

    Args:
        window_length (int): the size of returned window.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): desired dtype. Default: global default.
        layout (:class:`torch.layout`, optional): desired layout. Default: ``torch.strided``.
        device (:class:`torch.device`, optional): desired device. Default: current device.
        pin_memory (bool, optional): if ``True``, pins the returned tensor. Default: ``False``.
    Tr^  )r   hann_windowperiodic)r  r   r  rl  r  r'   r'   r(   r    s   r  r  c          
      C   s   |dur|nt  }| dkrt jd||||dS | dkr&t jd||||dS t|}|r1| d n| }t j|||||d}|dt j |d   }t |}|d d	 }|r\|	dd| n|}	|	
|S )
a@  hann_window(window_length, periodic=True, *, dtype=None, layout=None, device=None, pin_memory=False) -> Tensor

    Returns a Hann window of size :attr:`window_length`.

    .. math::
        w[n] = 0.5 - 0.5 \cos\!\left(\frac{2\pi n}{N-1}\right)

    where :math:`N` is ``window_length + 1`` when ``periodic=True`` (for spectral analysis),
    or ``window_length`` when ``periodic=False`` (symmetric window).

    Low-precision dtypes (``bfloat16``, ``float16``) are computed in ``float32`` then cast.

    Args:
        window_length (int): the size of returned window.
        periodic (bool, optional): if ``True``, returns a periodic window for use with STFT.
            Default: ``True``.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): desired dtype. Default: global default.
        layout (:class:`torch.layout`, optional): desired layout. Default: ``torch.strided``.
        device (:class:`torch.device`, optional): desired device. Default: current device.
        pin_memory (bool, optional): if ``True``, pins the returned tensor. Default: ``False``.
    Nr   r  r^  r   r  r   r   r   )r?   get_default_dtyper  rq  rD   r  rp  picosr  r7   )
r  r  r   r  rl  r  compute_dtyper  r'  windowr'   r'   r(   hann_window_periodic  s.   "




r  num_classesc                 C   sr   |dkrt |   d }tjt| dkd tjt| |k d | dtj	|| j
| jdktjS )NrR   r   r   z+one_hot: Class values must be non-negative.z7one_hot: Class values must be smaller than num_classes.r  )rh  r   rQ  r   _assert_asyncmsgr?   r   rT   rp  r   rl  r7   r  )rw   r  r'   r'   r(   one_hotb  s   r  )FF)r   r5   r  )r   NNr   )rR   FFr  r  r  )r   r   F)Fr   )r   rd   N)r   r   N)Fr  )NNN)r   r   FT)r   r   Fr  )r   Fr  )rR   (  rL   rS  r  rG  rU  collections.abcr   r   
contextlibr   enumr   r   r   r   r	   typingr
   r   r?   torch._meta_registrationstorch._primsr  r  torch._prims_common_prims_commonrD   torch.nn.functionalr5  r  r   r   r   r   torch._decompr   re  r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   rB   torch.utils._pytreer   r  DispatchKeyr   rS  str__annotations___opsr  r   r   r  rr  rN   r  compute_only_pw_cast_for_opmathpw_cast_for_opmath"pw_cast_for_opmath_non_tensor_argsrq  pw_cast_for_int_to_realrh  rV   r^   r`   rh   r   rs   r~  r  ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r%   rx   r   _safe_softmaxr   r   r   rd  rW   r  r  r   r	  r  r   r)  r+  r/  r1  r2  r3  r5  rG  rN  slicera  ri  rM  r}  r  r  r  r  r  r  r  r  r  r  r  py_implCompositeImplicitAutogradAutogradr  r  r  r  r&  r  r2  r4  r6  r9  r:  rA  rD  rF  r@  rT  rM  r\  ra  rf  ri  r  r  r  r  r  r  r  r  r  r  unsafe_chunkr  r  r  no_statsr  r  r  r  r  r  _fused_dropoutr  r  rl  r9  rK  lift
lift_freshr  r  r  r   r  r  r  r  _adaptive_avg_pool2drB  rM  r]  ra  rh  ri  rf  r  r  r  r  r  r  	Generatorr  r  r  r  rg  r  r  r  _upsample_nearest_exact1dr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rnn_tanhr   r  rnn_relur
  r  r  r  r  r  r$  r2  lstmr4  r5  r<  r=  grur>  r?  rB  rG  rI  rJ  rK  rL  rQ  rR  rP  rN  rS  rV  r[  ra  rM  rx  ry  r  r  r  ru  r  r  r  r  r  r  r  r  rZ  r  r  r  r  r  r  r  r  rh  r  r  r=  upsample_bicubic2dr0  r1  reflection_pad1dreflection_pad2dreflection_pad3dr:  replication_pad1dreplication_pad2dreplication_pad3dr<  r8  reflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardrY  r[  r]  rp  r  r  ra  rI  rc  rf  rn  ry  +_scaled_dot_product_flash_attention_for_cpur  r  r  r  r  r   r  r  rO   r  r  r  r  r  r  r  r  r  r  r  periodic_outr  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_rd  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__rs  rt  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__ldexp_leaky_relu_
leaky_relulogit_logitrelu_re  renorm_renormround_rs  scatter_r  scatter_add_r  scatter_reduce_scatter_reducesilu_r  r'   r'   r'   r(   <module>   s>  


* 
 

 

	




& 1'	
6

'"
	P`
 
	
%


(


,
300
	

W	

	
U



6K	
U		#

	

	



1
	

	

m	
%	$g-7
("$$







 




 &		

	.2
)


  @2 						

	
M"

6

 
 (
.$$


* 
*


* 
|
W


0


0




W


,

<

	D	

"
	


v

!

9(