Source code for embedl_hub._internal.core.profile.result
# Copyright (C) 2026 Embedl AB
"""Result and error types for model profiling."""
from __future__ import annotations
from enum import Enum
[docs]
class ProfileError(RuntimeError):
"""Raised when a profiling job fails."""
[docs]
class ProfilingMethod(Enum):
"""Methods for measuring execution time during model profiling.
Profiling can be done with multiple levels of granularity. Each
method calculates the execution time using a different approach, so
the results are not directly comparable.
Not every method is natively supported by every provider. When a
provider does not support the requested method, it may fall back to
an equivalent method with a warning.
"""
PYTHON = "python"
"""Use :func:`time.time` to measure wall-clock time on the remote
system. The elapsed time is divided by the number of iterations
to compute the average latency."""
LAYERWISE = "layerwise"
"""Use the runtime's built-in profiling infrastructure to obtain
per-layer (or per-operator) execution times.
- **TensorRT**: Uses ``trtexec --exportProfile`` to produce a
detailed per-layer JSON profile.
- **ONNX Runtime**: Uses the native ONNX Runtime profiler
(``--profiling-method onnxruntime``) to produce a detailed JSON
profile with per-operator statistics.
"""
MODEL = "model"
"""Measure the total execution time of the model as reported by the
runtime, without per-layer breakdown.
- **TensorRT**: Uses ``trtexec --exportTimes`` to report per-inference
latency including enqueue and data-transfer time.
- **ONNX Runtime**: Falls back to :attr:`PYTHON` with a warning,
since ONNX Runtime does not provide a native model-level timing
mode distinct from wall-clock measurement.
"""