Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 83 additions & 57 deletions cuda_bindings/tests/test_cufile.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,14 @@ def cufile_env_json(monkeypatch):
config_path = os.path.join(test_dir, "cufile.json")
assert os.path.isfile(config_path)
monkeypatch.setenv("CUFILE_ENV_PATH_JSON", config_path)
monkeypatch.setenv("CUFILE_LOGGING_LEVEL", "TRACE")
logging.info(f"Using cuFile config: {config_path}")
yield
cufile_log_path = pathlib.Path.cwd() / "cufile.log"
if cufile_log_path.is_file():
logging.info(f"cuFile log contents from {cufile_log_path}:\n{cufile_log_path.read_text(errors='replace')}")
else:
logging.info(f"cuFile log does not exist: {cufile_log_path}")


@cache
Expand Down Expand Up @@ -78,15 +85,21 @@ def cufileVersionLessThan(target):
return True # Assume old version if any error occurs


@cache
def isSupportedFilesystem():
"""Check if the current filesystem is supported (ext4 or xfs).
@pytest.fixture(scope="session")
def skipIfUnsupportedFilesystem(tmpdir_factory):
"""Fixture that skips if the current filesystem is not supported (ext4 or xfs).

The actual requirements are probably both stricter (ext4 was not working on CI previously)
and possibly also less strict.

This uses `findmnt` so the kernel's mount table logic owns the decoding of the filesystem type.
"""
fs_type = subprocess.check_output(["findmnt", "-no", "FSTYPE", "-T", os.getcwd()], text=True).strip() # noqa: S603, S607
cmd = ["findmnt", "-no", "FSTYPE", "-T", tmpdir_factory.getbasetemp()]
Comment thread
leofang marked this conversation as resolved.
fs_type = subprocess.check_output(cmd, text=True).strip() # noqa: S603
logging.info(f"Current filesystem type (findmnt): {fs_type}")
return fs_type in ("ext4", "xfs")
if fs_type not in ("ext4", "xfs"):
# pytest.skip("cuFile handle_register requires ext4 or xfs filesystem")
pass


@cache
Expand Down Expand Up @@ -195,12 +208,11 @@ def driver(ctx):
cufile.driver_close()


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_handle_register(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usefixtures string order is not an execution-order guarantee — pytest orders fixtures by scope and dependency graph, not by position in the marker. With ("driver", "skipIfUnsupportedFilesystem") driver can (and likely will) set up first, so we'll run cufile.driver_open() on every test we then skip, and if driver setup itself touches the FS on an unsupported mount it'll error instead of cleanly skipping.

Suggest making the skip a dependency of driver (and stats) so it always resolves first:

@pytest.fixture
def driver(ctx, skipIfUnsupportedFilesystem):
    ...

@pytest.fixture
def stats(..., skipIfUnsupportedFilesystem):
    ...

Then skipIfUnsupportedFilesystem can be dropped from each test's usefixtures list — the dependency carries it transitively.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like the bot is not thinking it through?

driver is used in basically all tests, this just skips some. If it depended on it, sure we skip some setup work but we would stop running tests that can run just fine?!

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's been a while since I last read this file. My take is that for the tests that do need the driver fixture, we want to ensure the execution order to be "check if the test needs to be skipped" -> "driver setup" -> "test" -> "driver teardown", and the bot was cautiously calling out that this order may or may not be preserved by usefixtures with multiple args?

def test_handle_register():
"""Test file handle registration with cuFile."""
# Create test file
file_path = tmpdir / "test_handle_register.bin"
file_path = "test_handle_register.bin"

# Create file with POSIX operations
fd = os.open(file_path, os.O_CREAT | os.O_RDWR, 0o600)
Expand Down Expand Up @@ -234,6 +246,8 @@ def test_handle_register(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.usefixtures("driver")
Expand Down Expand Up @@ -385,12 +399,11 @@ def test_buf_register_already_registered():
cuda.cuMemFree(buf_ptr)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_cufile_read_write(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")
def test_cufile_read_write():
"""Test cuFile read and write operations."""
# Create test file
file_path = tmpdir / "test_cufile_rw.bin"
file_path = "test_cufile_rw.bin"

# Allocate CUDA memory for write and read
write_size = 65536 # 64KB, aligned to 4096 bytes (65536 % 4096 == 0)
Expand Down Expand Up @@ -467,14 +480,15 @@ def test_cufile_read_write(tmpdir):
# Free CUDA memory
cuda.cuMemFree(write_buf)
cuda.cuMemFree(read_buf)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_cufile_read_write_host_memory(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")
def test_cufile_read_write_host_memory():
"""Test cuFile read and write operations using host memory."""
# Create test file
file_path = tmpdir / "test_cufile_rw_host.bin"
file_path = "test_cufile_rw_host.bin"

# Allocate host memory for write and read
write_size = 65536 # 64KB, aligned to 4096 bytes (65536 % 4096 == 0)
Expand Down Expand Up @@ -547,14 +561,15 @@ def test_cufile_read_write_host_memory(tmpdir):
# Free host memory
cuda.cuMemFreeHost(write_buf)
cuda.cuMemFreeHost(read_buf)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_cufile_read_write_large(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")
def test_cufile_read_write_large():
"""Test cuFile read and write operations with large data."""
# Create test file
file_path = tmpdir / "test_cufile_rw_large.bin"
file_path = "test_cufile_rw_large.bin"

# Allocate large CUDA memory (1MB, aligned to 4096 bytes)
write_size = 1024 * 1024 # 1MB, aligned to 4096 bytes (1048576 % 4096 == 0)
Expand Down Expand Up @@ -634,14 +649,15 @@ def test_cufile_read_write_large(tmpdir):
# Free CUDA memory
cuda.cuMemFree(write_buf)
cuda.cuMemFree(read_buf)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("ctx", "cufile_env_json", "driver")
def test_cufile_write_async(tmpdir):
@pytest.mark.usefixtures("ctx", "cufile_env_json", "driver", "skipIfUnsupportedFilesystem")
def test_cufile_write_async():
"""Test cuFile asynchronous write operations."""
# Create test file
file_path = tmpdir / "test_cufile_write_async.bin"
file_path = "test_cufile_write_async.bin"
fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600)

try:
Expand Down Expand Up @@ -709,14 +725,15 @@ def test_cufile_write_async(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("ctx", "cufile_env_json", "driver")
def test_cufile_read_async(tmpdir):
@pytest.mark.usefixtures("ctx", "cufile_env_json", "driver", "skipIfUnsupportedFilesystem")
def test_cufile_read_async():
"""Test cuFile asynchronous read operations."""
# Create test file
file_path = tmpdir / "test_cufile_read_async.bin"
file_path = "test_cufile_read_async.bin"

# First create and write test data without O_DIRECT
fd_temp = os.open(file_path, os.O_CREAT | os.O_RDWR, 0o600)
Expand Down Expand Up @@ -797,14 +814,15 @@ def test_cufile_read_async(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("ctx", "cufile_env_json", "driver")
def test_cufile_async_read_write(tmpdir):
@pytest.mark.usefixtures("ctx", "cufile_env_json", "driver", "skipIfUnsupportedFilesystem")
def test_cufile_async_read_write():
"""Test cuFile asynchronous read and write operations in sequence."""
# Create test file
file_path = tmpdir / "test_cufile_async_rw.bin"
file_path = "test_cufile_async_rw.bin"
fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600)

try:
Expand Down Expand Up @@ -908,14 +926,15 @@ def test_cufile_async_read_write(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_batch_io_basic(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")
def test_batch_io_basic():
"""Test basic batch IO operations with multiple read/write operations."""
# Create test file
file_path = tmpdir / "test_batch_io.bin"
file_path = "test_batch_io.bin"

# Allocate CUDA memory for multiple operations
buf_size = 65536 # 64KB
Expand Down Expand Up @@ -1104,14 +1123,15 @@ def test_batch_io_basic(tmpdir):
# Free CUDA memory
for buf in buffers + read_buffers:
cuda.cuMemFree(buf)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_batch_io_cancel(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")
def test_batch_io_cancel():
"""Test batch IO cancellation."""
# Create test file
file_path = tmpdir / "test_batch_cancel.bin"
file_path = "test_batch_cancel.bin"

# Allocate CUDA memory
buf_size = 4096 # 4KB, aligned to 4096 bytes
Expand Down Expand Up @@ -1181,14 +1201,15 @@ def test_batch_io_cancel(tmpdir):
# Free CUDA memory
for buf in buffers:
cuda.cuMemFree(buf)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("driver")
def test_batch_io_large_operations(tmpdir):
@pytest.mark.usefixtures("driver", "skipIfUnsupportedFilesystem")
def test_batch_io_large_operations():
"""Test batch IO with large buffer operations."""
# Create test file
file_path = tmpdir / "test_batch_large.bin"
file_path = "test_batch_large.bin"

# Allocate large CUDA memory (1MB, aligned to 4096 bytes)
buf_size = 1024 * 1024 # 1MB, aligned to 4096 bytes
Expand Down Expand Up @@ -1366,6 +1387,8 @@ def test_batch_io_large_operations(tmpdir):
# Free CUDA memory
for buf in all_buffers:
cuda.cuMemFree(buf)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(
Expand Down Expand Up @@ -1585,13 +1608,12 @@ def test_stats_start_stop():
@pytest.mark.skipif(
cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later"
)
@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("stats")
@pytest.mark.usefixtures("stats", "skipIfUnsupportedFilesystem")
@pytest.mark.thread_unsafe(reason="cuFile stats counters and collection state are process-global")
def test_get_stats_l1(tmpdir):
def test_get_stats_l1():
"""Test cuFile L1 statistics retrieval with file operations."""
# Create test file directly with O_DIRECT
file_path = tmpdir / "test_stats_l1.bin"
file_path = "test_stats_l1.bin"
fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600)

try:
Expand Down Expand Up @@ -1658,18 +1680,19 @@ def test_get_stats_l1(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(
cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later"
)
@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("stats")
@pytest.mark.usefixtures("stats", "skipIfUnsupportedFilesystem")
@pytest.mark.thread_unsafe(reason="cuFile stats counters and collection state are process-global")
def test_get_stats_l2(tmpdir):
def test_get_stats_l2():
"""Test cuFile L2 statistics retrieval with file operations."""
# Create test file directly with O_DIRECT
file_path = tmpdir / "test_stats_l2.bin"
file_path = "test_stats_l2.bin"
fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600)

try:
Expand Down Expand Up @@ -1740,18 +1763,19 @@ def test_get_stats_l2(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(
cufileVersionLessThan(1150), reason="cuFile parameter APIs require cuFile library version 13.0 or later"
)
@pytest.mark.skipif(not isSupportedFilesystem(), reason="cuFile handle_register requires ext4 or xfs filesystem")
@pytest.mark.usefixtures("stats")
@pytest.mark.usefixtures("stats", "skipIfUnsupportedFilesystem")
@pytest.mark.thread_unsafe(reason="cuFile stats counters and collection state are process-global")
def test_get_stats_l3(tmpdir):
def test_get_stats_l3():
"""Test cuFile L3 statistics retrieval with file operations."""
# Create test file directly with O_DIRECT
file_path = tmpdir / "test_stats_l3.bin"
file_path = "test_stats_l3.bin"
fd = os.open(file_path, os.O_CREAT | os.O_RDWR | os.O_DIRECT, 0o600)

try:
Expand Down Expand Up @@ -1832,6 +1856,8 @@ def test_get_stats_l3(tmpdir):

finally:
os.close(fd)
with suppress(OSError):
os.unlink(file_path)


@pytest.mark.skipif(
Expand Down
Loading