Skip to content
41 changes: 37 additions & 4 deletions libs/giskard-checks/src/giskard/checks/core/result.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from collections.abc import Mapping
from enum import Enum
from pathlib import Path
Expand Down Expand Up @@ -75,6 +76,16 @@ def _pluralize(count: int, word: str, plural: str | None = None) -> str:
return f"{count} {plural}"


def _format_check_params(params: Any) -> str:
if not isinstance(params, Mapping) or not params:
return ""

return "\n".join(
f"{key}: {json.dumps(value, ensure_ascii=False, default=str)}"
for key, value in params.items()
)


class CheckStatus(str, Enum):
"""Outcome categories for a check execution."""

Expand Down Expand Up @@ -222,18 +233,35 @@ def skipped(self) -> bool:
"""Return True if `status` is `SKIP`."""
return self.status == CheckStatus.SKIP

@property
def check_label(self) -> str:
details = self.details if isinstance(self.details, Mapping) else {}
return str(
details.get("check_name")
or details.get("check_kind")
or details.get("name")
or "Unnamed check"
)

def __rich_console__(
self, console: Console, options: ConsoleOptions
) -> RenderResult:
status = STATUS_MAPPING[self.status]

name = self.details.get("check_name", "[dim italic]Unnamed check[/dim italic]")
name = self.check_label

if self.status == CheckStatus.FAIL or self.status == CheckStatus.ERROR:
details = (
self.message
or "[dim italic]No specific error message provided[/dim italic]"
)
params = _format_check_params(
self.details.get("check_params")
if isinstance(self.details, Mapping)
else None
)
if params:
details = f"{details}\n{params}"
else:
details = ""

Expand Down Expand Up @@ -445,11 +473,16 @@ def format_failures(self) -> list[str]:
failure_messages: list[str] = []
for result in self.results:
if result.failed or result.errored:
check_name: str = result.details.get(
"check_name"
) or result.details.get("check_kind", "Unknown check")
check_name = result.check_label
status = "ERRORED" if result.errored else "FAILED"
message = result.message or "No specific error message provided"
params = _format_check_params(
result.details.get("check_params")
if isinstance(result.details, Mapping)
else None
)
if params:
message = f"{message}\n{params}"
failure_messages.append(f"{check_name} {status}: {message}")
return failure_messages

Expand Down
8 changes: 2 additions & 6 deletions libs/giskard-checks/src/giskard/checks/export/junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,8 @@ def _to_json(value: Any) -> str:

def _check_label(result: CheckResult, fallback: str) -> str:
if isinstance(result.details, dict):
return str(
result.details.get("check_name")
or result.details.get("check_kind")
or result.details.get("name")
or fallback
)
label = result.check_label
return fallback if label == "Unnamed check" else label
return fallback


Expand Down
20 changes: 20 additions & 0 deletions libs/giskard-checks/src/giskard/checks/testing/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,25 @@
from ..core.result import CheckResult, TestCaseResult
from ..core.testcase import TestCase

_CHECK_PARAM_EXCLUDES = {
"kind",
"name",
"description",
"generator",
"embedding_model",
"checks",
"check",
}


def _check_params(check: Check) -> dict[str, object]:
params = check.model_dump(
mode="json",
exclude=_CHECK_PARAM_EXCLUDES,
exclude_none=True,
)
return {key: value for key, value in params.items() if value not in ({}, [])}


async def _run_check[
InputType,
Expand Down Expand Up @@ -47,6 +66,7 @@ async def _run_check[
"check_kind": check.kind,
"check_name": check.name,
"check_description": check.description,
"check_params": _check_params(check),
}
}
)
Expand Down
41 changes: 41 additions & 0 deletions libs/giskard-checks/tests/export/test_junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,47 @@ def test_to_junit_xml_maps_failure_error_and_skip() -> None:
assert "no retrieved context" in (skipped.text or "")


def test_to_junit_xml_uses_check_kind_when_check_name_is_missing() -> None:
from giskard.checks import TestCaseResult

suite_result = SuiteResult(
results=[
ScenarioResult(
scenario_name="scenario_unnamed_check",
steps=[
TestCaseResult(
results=[
CheckResult(
status=CheckStatus.FAIL,
message="rule was not followed",
details={
"check_name": None,
"check_kind": "conformity",
"check_params": {
"rule": "Do not share copyrighted lyrics"
},
},
)
],
duration_ms=10,
)
],
duration_ms=10,
final_trace=Trace(),
)
],
duration_ms=10,
)

root = ET.fromstring(to_junit_xml(suite_result))
failure = root.find("testcase/failure")

assert failure is not None
assert failure.attrib["type"] == "conformity"
assert "[FAIL] step_1.conformity: rule was not followed" in (failure.text or "")
assert "check_params" in (failure.text or "")


def test_to_junit_xml_writes_file(tmp_path: Path) -> None:
suite_result = _sample_suite_result()
output_path = tmp_path / "test-results.xml"
Expand Down
72 changes: 72 additions & 0 deletions libs/giskard-checks/tests/scenarios/test_testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
from giskard.checks import (
Check,
CheckResult,
CheckStatus,
Equals,
Interact,
Interaction,
TestCase,
TestCaseResult,
Trace,
)
from rich.console import Console

# Test Classes

Expand Down Expand Up @@ -234,6 +237,75 @@ async def test_testcase_result_format_failures(self):
assert "FAILED" in failures[0]
assert "wrong" in failures[0] or "output" in failures[0]

async def test_failure_report_uses_kind_and_check_params_for_unnamed_check(self):
trace = await Trace.from_interactions(
Interaction(inputs="input", outputs="output")
)
check = Equals(expected_value="wrong", key="trace.interactions[-1].outputs")
test_case = TestCase(
name="unnamed_check_report",
trace=trace,
checks=[check],
)

result = await test_case.run()
check_result = result.results[0]

assert check_result.details["check_name"] is None
assert check_result.details["check_kind"] == "equals"
assert check_result.details["check_params"] == {
"key": "trace.interactions[-1].outputs",
"expected_value": "wrong",
"normalization_form": "NFKC",
}

failures = result.format_failures()
assert failures == [
"equals FAILED: Expected value equal to 'wrong' but got 'output'\n"
'key: "trace.interactions[-1].outputs"\n'
'expected_value: "wrong"\n'
'normalization_form: "NFKC"'
]

async def test_console_report_uses_kind_and_check_params_for_unnamed_check(self):
trace = await Trace.from_interactions(
Interaction(inputs="input", outputs="output")
)
test_case = TestCase(
name="unnamed_check_console_report",
trace=trace,
checks=[Equals(expected_value="wrong", key="trace.interactions[-1].outputs")],
)
result = await test_case.run()

console = Console(record=True, force_terminal=False, no_color=True, width=120)
result.print_report(console)
report = console.export_text()

assert "None" not in report
assert "equals" in report
assert 'expected_value: "wrong"' in report

def test_failure_formatting_handles_non_dict_details(self):
result = CheckResult.model_construct(
status=CheckStatus.FAIL,
message="failed",
metrics=[],
details=None,
)

assert result.check_label == "Unnamed check"

test_case_result = TestCaseResult(results=[result], duration_ms=1)
assert test_case_result.format_failures() == [
"Unnamed check FAILED: failed"
]

console = Console(record=True, force_terminal=False, no_color=True, width=120)
result.print_report(console)

assert "Unnamed check" in console.export_text()

async def test_testcase_result_format_failures_with_errors(self):
"""Test format_failures() with error results."""

Expand Down
Loading