o
    m9:j                  
   @   s  d dl Z d dlmZmZ d dlmZmZ d dlmZ ddl	m
Z
 e jjZee je je je jgZeg ejejejejejejejejejejejejejejej ej!ej"ej#ej$ej%ej&ej'ej(ej)ej*ej+ej,ej-ej.ej/ej0Z1eej2ej3ej4ej5ej6ej7ej8ej9ej:g	Z;e1e;B Z<de=fdd	Z>d
e j?de@fddZAde=fddZBdS )    N)get_device_tflopsget_gpu_dram_gbps)optimization_hintstatically_known_true)
OrderedSet   )flop_registryreturnc                 C   s~   | t v r=t|dkrtd| d|  | }t|d }d}|| }t |  }	|	|i |d|id }
|
| d }|S d	S )
aw  
    Estimates the compute time of an aten operator.

    Args:
        func_packet: The operator overload packet.
        args: The arguments to the operator.
        kwargs: The keyword arguments to the operator.
        out: The output of the operator.
        out_dtypes: The output data types.

    Returns:
        float: The estimated compute time in nanoseconds.
    r   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r   lenAssertionErrorpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_time r   f/home/nk/hobo-godmode/plappi-mvp/.venv/lib/python3.10/site-packages/torch/utils/_runtime_estimation.pyget_compute_timeM   s   r   tc                 C   sF   d}t | j|  D ]\}}t|dks|t|dd9 }q
||   S )z
    Calculates the memory consumption of a tensor.

    Args:
        t (torch.Tensor): The input tensor.

    Returns:
        int: The memory consumption of the tensor in bytes.
    r   r   )fallback)zipshapestrider   r   element_size)r   
real_numelsizer"   r   r   r   get_num_byteso   s   
r&   c                 C   s>   t  }tdd | D }tdd |D }|| }|| }|S )aG  
    Estimates the memory transfer time of input and output tensors.

    Args:
        flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
        flat_outs (List[torch.Tensor]): The flat list of outputs.

    Returns:
        float: The estimated memory transfer time in nanoseconds.
    c                 s   $    | ]}t |tjrt|V  qd S N
isinstancetorchTensorr&   .0r   r   r   r   	<genexpr>       
z$get_transfer_time.<locals>.<genexpr>c                 s   r'   r(   r)   r-   r   r   r   r/      r0   )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_timer   r   r   get_transfer_time   s   r9   )Cr+   torch._inductor.utilsr   r   %torch.fx.experimental.symbolic_shapesr   r   torch.utils._ordered_setr   flop_counterr   opsatenfloat16bfloat16float32float64_FLOAT_TYPES
lift_freshr   	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r,   intr&   r9   r   r   r   r   <module>   s    
	
$"