Source code for embedl_hub._internal.core.profile.result

# Copyright (C) 2026 Embedl AB

"""Result and error types for model profiling."""

from __future__ import annotations

from enum import Enum



[docs]
class ProfileError(RuntimeError):
    """Raised when a profiling job fails."""




[docs]
class ProfilingMethod(Enum):
    """Methods for measuring execution time during model profiling.

    Profiling can be done with multiple levels of granularity.  Each
    method calculates the execution time using a different approach, so
    the results are not directly comparable.

    Not every method is natively supported by every provider.  When a
    provider does not support the requested method, it may fall back to
    an equivalent method with a warning.
    """

    PYTHON = "python"
    """Use :func:`time.time` to measure wall-clock time on the remote
    system.  The elapsed time is divided by the number of iterations
    to compute the average latency."""

    LAYERWISE = "layerwise"
    """Use the runtime's built-in profiling infrastructure to obtain
    per-layer (or per-operator) execution times.

    - **TensorRT**: Uses ``trtexec --exportProfile`` to produce a
      detailed per-layer JSON profile.
    - **ONNX Runtime**: Uses the native ONNX Runtime profiler
      (``--profiling-method onnxruntime``) to produce a detailed JSON
      profile with per-operator statistics.
    """

    MODEL = "model"
    """Measure the total execution time of the model as reported by the
    runtime, without per-layer breakdown.

    - **TensorRT**: Uses ``trtexec --exportTimes`` to report per-inference
      latency including enqueue and data-transfer time.
    - **ONNX Runtime**: Falls back to :attr:`PYTHON` with a warning,
      since ONNX Runtime does not provide a native model-level timing
      mode distinct from wall-clock measurement.
    """