Skip to content

Commit 64340b8

Browse files
anirudhrbCopilot
authored andcommitted
mshv: mount nvme resource disk for VM image copies
The previous logic in MshvHostStressTestSuite._get_disk_img_copy_path treated existence of /mnt as proof that a large resource disk was mounted there. On NVMe-based Azure SKUs the temporary disks show up as /dev/nvme*n1 and are not mounted anywhere, so the test ended up copying many large guest images onto the small OS disk and ran out of space. Use lsblk to detect what is actually mounted at /mnt/resource and /mnt and reuse those when present. Otherwise pick an unused nvme*n1 disk (not the OS disk, no partitions, nothing mounted), format it as ext4, and mount it at /mnt/resource. Fall back to the working path if no suitable disk is found or the mount fails. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Anirudh Rayabharam <anrayabh@microsoft.com>
1 parent 34031a5 commit 64340b8

1 file changed

Lines changed: 82 additions & 9 deletions

File tree

lisa/microsoft/testsuites/mshv/mshv_root_stress_tests.py

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT license.
3+
import re
34
import time
45
from pathlib import Path, PurePath
5-
from typing import Any, Dict
6+
from typing import Any, Dict, List, Optional
67

78
from assertpy import assert_that
89
from microsoft.testsuites.mshv.cloud_hypervisor_tool import CloudHypervisor
910

1011
from lisa import Logger, Node, TestCaseMetadata, TestSuite, TestSuiteMetadata
1112
from lisa.messages import TestStatus, send_sub_test_result_message
1213
from lisa.testsuite import TestResult
13-
from lisa.tools import Cp, Free, Ls, Lscpu, QemuImg, Rm, Ssh, Usermod, Wget
14+
from lisa.tools import (
15+
Cp,
16+
Free,
17+
Ls,
18+
Lsblk,
19+
Lscpu,
20+
Mount,
21+
QemuImg,
22+
Rm,
23+
Ssh,
24+
Usermod,
25+
Wget,
26+
)
27+
from lisa.tools.lsblk import DiskInfo
28+
from lisa.tools.mkfs import FileSystem
1429
from lisa.util import SkippedException
1530

1631

@@ -142,7 +157,7 @@ def _mshv_stress_vm_create(
142157
)
143158
hypervisor_fw_path = str(node.get_working_path() / self.HYPERVISOR_FW_NAME)
144159
disk_img_path = node.get_working_path() / self.DISK_IMG_NAME
145-
disk_img_copy_path = self._get_disk_img_copy_path(node)
160+
disk_img_copy_path = self._get_disk_img_copy_path(node, log)
146161
threads = node.tools[Lscpu].get_thread_count()
147162
vm_count = int(threads / cpus_per_vm)
148163
failures = 0
@@ -217,10 +232,68 @@ def _mshv_stress_vm_create(
217232

218233
assert_that(failures).is_equal_to(0)
219234

220-
def _get_disk_img_copy_path(self, node: Node) -> PurePath:
221-
# Azure temporary disk is mounted at /mnt. It has more space then OS
222-
# disk. Use it for storing copies of the disk image if it exists.
223-
if node.tools[Ls].path_exists("/mnt"):
224-
return PurePath("/mnt")
225-
else:
235+
def _get_disk_img_copy_path(self, node: Node, log: Logger) -> PurePath:
236+
# The guest disk image is copied once per concurrent VM, so we need
237+
# a directory backed by a large disk. Prefer an existing resource
238+
# disk mount; otherwise try to mount an unused nvme*n1 disk at
239+
# /mnt/resource.
240+
mount_point = "/mnt/resource"
241+
fallback_mount = "/mnt"
242+
243+
disks = node.tools[Lsblk].get_disks(force_run=True)
244+
245+
if self._is_mountpoint_in_use(disks, mount_point):
246+
return PurePath(mount_point)
247+
if self._is_mountpoint_in_use(disks, fallback_mount):
248+
return PurePath(fallback_mount)
249+
250+
candidate = self._find_unused_nvme_disk(disks)
251+
if candidate is None:
252+
log.info(
253+
"No mounted resource disk and no unused nvme*n1 disk found; "
254+
"falling back to working path. The test may run out of disk "
255+
"space."
256+
)
226257
return node.working_path
258+
259+
try:
260+
node.execute(f"mkdir -p {mount_point}", shell=True, sudo=True)
261+
node.tools[Mount].mount(
262+
name=candidate,
263+
point=mount_point,
264+
fs_type=FileSystem.ext4,
265+
format_=True,
266+
)
267+
except Exception as e:
268+
log.info(
269+
f"Failed to mount {candidate} at {mount_point}: {e}; "
270+
"falling back to working path."
271+
)
272+
return node.working_path
273+
274+
log.info(f"Mounted {candidate} at {mount_point} for VM disk copies")
275+
return PurePath(mount_point)
276+
277+
@staticmethod
278+
def _is_mountpoint_in_use(disks: List[DiskInfo], mountpoint: str) -> bool:
279+
for disk in disks:
280+
if disk.mountpoint == mountpoint:
281+
return True
282+
for partition in disk.partitions:
283+
if partition.mountpoint == mountpoint:
284+
return True
285+
return False
286+
287+
def _find_unused_nvme_disk(self, disks: List[DiskInfo]) -> Optional[str]:
288+
nvme_pattern = re.compile(r"^nvme\d+n1$")
289+
for disk in disks:
290+
if disk.is_os_disk:
291+
continue
292+
if not nvme_pattern.match(disk.name):
293+
continue
294+
if disk.partitions:
295+
continue
296+
if disk.is_mounted:
297+
continue
298+
return f"/dev/{disk.name}"
299+
return None

0 commit comments

Comments
 (0)