Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions libs/giskard-checks/src/giskard/checks/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@ class TestCaseResult(BaseResult, frozen=True):
Check results produced during the test case execution.
duration_ms : int
Total execution time in milliseconds.
trace_index : int | None
0-based index, in the scenario's final trace, of the last interaction
this step added before its checks ran. ``None`` when the step added no
interactions (e.g. skipped). Consumers (such as the Giskard Hub upload
flow) use this to attribute check results to a specific interaction.
status : TestCaseStatus
Aggregated outcome of the test case derived from its results.
passed : bool
Expand All @@ -399,6 +404,13 @@ class TestCaseResult(BaseResult, frozen=True):

results: list[CheckResult] = Field(..., description="Check results for each run")
duration_ms: int = Field(..., description="Total execution time in milliseconds")
trace_index: int | None = Field(
default=None,
description=(
"0-based index of the last trace interaction added by this step's "
"interacts before checks ran; None when no interactions were added."
),
)

@computed_field
@property
Expand Down Expand Up @@ -581,6 +593,17 @@ def to_junit_xml(self, path: str | Path | None = None) -> str:

return to_junit_xml(self, path=path)

def to_hub_format(self) -> dict[str, Any]:
"""Convert the suite result into a JSON-serializable Giskard Hub payload.

The returned dict can be passed directly to
:meth:`giskard_hub.HubClient.evaluations.upload` to upload the suite
result to the Hub.
"""
from ..export.hub import to_hub_format

return to_hub_format(self)

def group_by(self, key: str) -> "GroupedSuiteResult":
"""Group results by a tag key and return a GroupedSuiteResult.

Expand Down
3 changes: 2 additions & 1 deletion libs/giskard-checks/src/giskard/checks/export/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .hub import to_hub_format
from .junit import to_junit_xml

__all__ = ["to_junit_xml"]
__all__ = ["to_hub_format", "to_junit_xml"]
28 changes: 28 additions & 0 deletions libs/giskard-checks/src/giskard/checks/export/hub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Hub format export for SuiteResult."""

from typing import Any

from ..core.result import SuiteResult


def to_hub_format(result: SuiteResult) -> dict[str, Any]:
"""Convert a SuiteResult into a JSON-serializable Giskard Hub payload.

The returned dict is the payload accepted by the Giskard Hub
``POST /v2/evaluations/upload`` endpoint and can be passed directly to
:meth:`giskard_hub.HubClient.evaluations.upload`.

Parameters
----------
result : SuiteResult
The suite result to convert.

Returns
-------
dict[str, Any]
JSON-serializable representation of the suite result, containing
``results`` (list of scenario results), ``duration_ms``, and aggregate
computed fields (``passed_count``, ``failed_count``, ``errored_count``,
``skipped_count``, ``pass_rate``).
"""
return result.model_dump(mode="json")
11 changes: 9 additions & 2 deletions libs/giskard-checks/src/giskard/checks/scenarios/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,14 @@ async def _run_once[InputType, OutputType, TraceType: Trace[Any, Any]](

for step in steps:
trace = await trace.with_interactions(*step.interacts)
trace_index = len(trace.interactions) - 1 if trace.interactions else None
Comment on lines 136 to +137

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

If a step does not add any interactions (i.e., step.interacts is empty), but previous steps have already added interactions to the trace, trace.interactions will not be empty. In this case, trace_index will incorrectly be set to the index of the last interaction from the previous step instead of None.

To fix this, we should compare the number of interactions before and after applying the step's interactions.

Suggested change
trace = await trace.with_interactions(*step.interacts)
trace_index = len(trace.interactions) - 1 if trace.interactions else None
prev_len = len(trace.interactions)
trace = await trace.with_interactions(*step.interacts)
trace_index = len(trace.interactions) - 1 if len(trace.interactions) > prev_len else None


test_case = TestCase(
trace=trace,
checks=step.checks,
)
step_result = await test_case.run(return_exception)
step_result = step_result.model_copy(update={"trace_index": trace_index})
steps_results.append(step_result)

# Stop on first failure
Expand All @@ -151,9 +153,14 @@ async def _run_once[InputType, OutputType, TraceType: Trace[Any, Any]](
step_result = TestCaseResult(
results=[
CheckResult.skip(
message=f"Step {i + 1} was skipped due to previous failure"
message=f"Step {i + 1} was skipped due to previous failure",
details={
"check_kind": check.kind,
"check_name": check.name,
"check_description": check.description,
},
)
for _ in steps[i].checks
for check in steps[i].checks
],
duration_ms=0,
)
Expand Down