Source code for embedl_hub._internal.core.profile.result

# Copyright (C) 2026 Embedl AB

"""Result and error types for model profiling."""

from __future__ import annotations

from enum import Enum


[docs] class ProfileError(RuntimeError): """Raised when a profiling job fails."""
[docs] class ProfilingMethod(Enum): """Methods for measuring execution time during model profiling. Profiling can be done with multiple levels of granularity. Each method calculates the execution time using a different approach, so the results are not directly comparable. Not every method is natively supported by every provider. When a provider does not support the requested method, it may fall back to an equivalent method with a warning. """ PYTHON = "python" """Use :func:`time.time` to measure wall-clock time on the remote system. The elapsed time is divided by the number of iterations to compute the average latency.""" LAYERWISE = "layerwise" """Use the runtime's built-in profiling infrastructure to obtain per-layer (or per-operator) execution times. - **TensorRT**: Uses ``trtexec --exportProfile`` to produce a detailed per-layer JSON profile. - **ONNX Runtime**: Uses the native ONNX Runtime profiler (``--profiling-method onnxruntime``) to produce a detailed JSON profile with per-operator statistics. """ MODEL = "model" """Measure the total execution time of the model as reported by the runtime, without per-layer breakdown. - **TensorRT**: Uses ``trtexec --exportTimes`` to report per-inference latency including enqueue and data-transfer time. - **ONNX Runtime**: Falls back to :attr:`PYTHON` with a warning, since ONNX Runtime does not provide a native model-level timing mode distinct from wall-clock measurement. """