Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions lisa/microsoft/testsuites/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,16 @@ def verify_l3_cache(
# For all other cases, check L3 cache mapping with socket awareness
cpu_info = lscpu.get_cpu_info()

# On some VMs (e.g. confidential VMs), cache topology is not exposed
# by the hypervisor, so lscpu reports "-" for all cache values.
# In this case, we cannot verify L3 cache mapping.
if any(cpu.l3_cache == -1 for cpu in cpu_info):
raise SkippedException(
"Cache topology is not exposed on this VM. "
"lscpu reports no cache information (likely a confidential VM "
"or a VM size that does not expose cache topology to the guest)."
)
Comment on lines +127 to +132
Copy link

Copilot AI May 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The skip condition uses any(cpu.l3_cache == -1 ...), but the comment says lscpu reports "-" for all cache values. If only a subset of CPUs has unknown cache IDs (mixed output), any() will skip and potentially hide a real parsing/host issue. Consider using all(...) for the expected "no cache info at all" case, and treat a mixed state as a test failure (or raise a more specific exception).

Suggested change
if any(cpu.l3_cache == -1 for cpu in cpu_info):
raise SkippedException(
"Cache topology is not exposed on this VM. "
"lscpu reports no cache information (likely a confidential VM "
"or a VM size that does not expose cache topology to the guest)."
)
unknown_l3_cache_count = sum(1 for cpu in cpu_info if cpu.l3_cache == -1)
if unknown_l3_cache_count == len(cpu_info):
raise SkippedException(
"Cache topology is not exposed on this VM. "
"lscpu reports no cache information (likely a confidential VM "
"or a VM size that does not expose cache topology to the guest)."
)
if unknown_l3_cache_count:
raise LisaException(
"Inconsistent L3 cache topology reported by lscpu: "
f"{unknown_l3_cache_count} of {len(cpu_info)} CPUs have unknown "
"L3 cache IDs while others have valid values. Investigate lscpu "
"parsing or host cache-topology exposure on this VM."
)

Copilot uses AI. Check for mistakes.

# Build a mapping of socket -> NUMA nodes and socket -> L3 caches
socket_to_numa_nodes: dict[int, set[int]] = {}
socket_to_l3_caches: dict[int, set[int]] = {}
Expand Down Expand Up @@ -299,6 +309,11 @@ def _create_stimer_interrupts(self, node: Node, cpu_count: int) -> None:

def _verify_node_mapping(self, node: Node, numa_node_size: int) -> None:
cpu_info = node.tools[Lscpu].get_cpu_info()
if any(cpu.l3_cache == -1 for cpu in cpu_info):
raise SkippedException(
"Cache topology is not exposed on this VM. "
"lscpu reports no cache information."
)
Comment on lines +312 to +316
Copy link

Copilot AI May 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above: using any(cpu.l3_cache == -1 ...) will skip even if only some CPUs have unknown cache IDs. If the intent is to skip only when cache topology is entirely hidden, switch to all(...) and fail on mixed/partial cache data so real regressions aren't masked.

Suggested change
if any(cpu.l3_cache == -1 for cpu in cpu_info):
raise SkippedException(
"Cache topology is not exposed on this VM. "
"lscpu reports no cache information."
)
if all(cpu.l3_cache == -1 for cpu in cpu_info):
raise SkippedException(
"Cache topology is not exposed on this VM. "
"lscpu reports no cache information."
)
if any(cpu.l3_cache == -1 for cpu in cpu_info):
raise LisaException(
"Cache topology is partially exposed on this VM: some CPUs "
"report unknown L3 cache IDs while others do not. Verify the "
"guest cache topology reporting and investigate inconsistent "
"lscpu output before rerunning the test."
)

Copilot uses AI. Check for mistakes.
cpu_info.sort(key=lambda cpu: cpu.cpu)
for i, cpu in enumerate(cpu_info):
numa_node_id = i // numa_node_size
Expand Down
54 changes: 40 additions & 14 deletions lisa/tools/lscpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ class Lscpu(Tool):
r"(?P<l1_data_cache>\d+):(?P<l1_instruction_cache>\d+):"
r"(?P<l2_cache>\d+):(?P<l3_cache>\d+)$"
)
# On some VMs (e.g. confidential VMs), cache topology is not exposed
# and lscpu outputs "-" instead of cache IDs:
# 0 0 0 -
_core_numa_no_cache = re.compile(
r"\s*(?P<cpu>\d+)\s+(?P<numa_node>\d+)\s+(?P<socket>\d+)\s+-$"
)
# Model name: Intel(R) Xeon(R) Platinum 8168 CPU @ 2.70GHz
# Model name: AMD EPYC 7763 64-Core Processor
# Model name: AMD EPYC 7763 64-Core Processor
Expand Down Expand Up @@ -267,6 +273,10 @@ def get_cpu_info(self) -> List[CPUInfo]:
# CPU NODE SOCKET L1d:L1i:L2:L3
# 0 0 0 0:0:0:0
# 1 0 0 0:0:0:0
#
# On some VMs (e.g. confidential VMs), cache topology is not exposed:
# CPU NODE SOCKET CACHE
# 0 0 0 -
result = self.run(
"--extended=cpu,node,socket,cache", expected_exit_code=0
).stdout
Expand All @@ -278,21 +288,37 @@ def get_cpu_info(self) -> List[CPUInfo]:
output: List[CPUInfo] = []
for item in mappings:
match_result = self._core_numa_mappings.fullmatch(item)
assert (
match_result
), f"lscpu NUMA node mapping is not in expected format: {item}"
output.append(
CPUInfo(
cpu=int(match_result.group("cpu")),
numa_node=int(match_result.group("numa_node")),
socket=int(match_result.group("socket")),
l1_data_cache=int(match_result.group("l1_data_cache")),
l1_instruction_cache=int(
match_result.group("l1_instruction_cache")
),
l2_cache=int(match_result.group("l2_cache")),
l3_cache=int(match_result.group("l3_cache")),
if match_result:
output.append(
CPUInfo(
cpu=int(match_result.group("cpu")),
numa_node=int(match_result.group("numa_node")),
socket=int(match_result.group("socket")),
l1_data_cache=int(match_result.group("l1_data_cache")),
l1_instruction_cache=int(
match_result.group("l1_instruction_cache")
),
l2_cache=int(match_result.group("l2_cache")),
l3_cache=int(match_result.group("l3_cache")),
)
)
continue
no_cache_match = self._core_numa_no_cache.fullmatch(item)
if no_cache_match:
output.append(
CPUInfo(
cpu=int(no_cache_match.group("cpu")),
numa_node=int(no_cache_match.group("numa_node")),
socket=int(no_cache_match.group("socket")),
l1_data_cache=-1,
l1_instruction_cache=-1,
l2_cache=-1,
l3_cache=-1,
)
Comment on lines +313 to +317
Copy link

Copilot AI May 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using literal -1 as a sentinel for unknown cache IDs makes the meaning easy to miss and spreads a magic value across the codebase (the tests also check for -1). Please define a named constant (e.g., UNKNOWN_CACHE_ID) or switch the cache fields to Optional[int] and use None for "unknown" so callers can reliably detect this state without hardcoding -1.

Copilot uses AI. Check for mistakes.
)
continue
raise AssertionError(
f"lscpu NUMA node mapping is not in expected format: {item}"
)
return output

Expand Down
Loading