From 860479a1b6fdad5081033e80c4417148dd9b9291 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 11:43:22 -0700 Subject: [PATCH 1/5] initial commit --- ci/scripts/python_wheel_validate_contents.py | 7 ++++--- python/pyproject.toml | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index 8388f6ebf391..65b858a5d3e1 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -54,9 +54,10 @@ def validate_wheel(path): info.filename.split("/")[-1] == filename for info in wheel_zip.filelist ), f"{filename} is missing from the wheel." - assert any( - info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist - ), "pyarrow/py.typed is missing from the wheel." + # TODO(GH-48970): Uncomment when stubfiles are complete + # assert any( + # info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist + # ), "pyarrow/py.typed is missing from the wheel." source_root = Path(__file__).resolve().parents[2] stubs_dir = source_root / "python" / "pyarrow-stubs" / "pyarrow" diff --git a/python/pyproject.toml b/python/pyproject.toml index fe508b855a87..f03627338d6e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -88,6 +88,11 @@ metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "pyarrow-stubs/"] wheel.packages = ["pyarrow"] wheel.install-dir = "pyarrow" +# TODO(GH-48970): Remove this when stubfiles are complete +# Withhold the PEP 561 marker until the type stubs are complete. The .pyi files still ship +# and are tested in CI, but without py.typed type checkers ignore them, we don't break +# downstream users (GH-49831). The file is kept in-tree for CI type-checking. +wheel.exclude = ["pyarrow/py.typed"] [tool.scikit-build.cmake.define] PYARROW_BUNDLE_ARROW_CPP = {env = "PYARROW_BUNDLE_ARROW_CPP", default = "OFF"} From 7d9019e93989ecab7849e3efeeb6efea227e0299 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 15 Jun 2026 00:26:02 -0700 Subject: [PATCH 2/5] omit annotations from wheel --- ci/scripts/python_wheel_validate_contents.py | 60 +++++++++++--------- python/CMakeLists.txt | 58 ++++++++++--------- python/pyproject.toml | 7 ++- 3 files changed, 67 insertions(+), 58 deletions(-) diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index 65b858a5d3e1..b662ae4e6049 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -59,34 +59,38 @@ def validate_wheel(path): # info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist # ), "pyarrow/py.typed is missing from the wheel." - source_root = Path(__file__).resolve().parents[2] - stubs_dir = source_root / "python" / "pyarrow-stubs" / "pyarrow" - assert stubs_dir.exists(), f"Stub source directory not found: {stubs_dir}" - - expected_stub_files = { - f"pyarrow/{stub_file.relative_to(stubs_dir).as_posix()}" - for stub_file in stubs_dir.rglob("*.pyi") - } - - wheel_stub_files = { - info.filename - for info in wheel_zip.filelist - if info.filename.startswith("pyarrow/") and info.filename.endswith(".pyi") - } - - assert wheel_stub_files == expected_stub_files, ( - "Wheel .pyi files differ from python/pyarrow-stubs/pyarrow.\n" - f"Missing in wheel: {sorted(expected_stub_files - wheel_stub_files)}\n" - f"Unexpected in wheel: {sorted(wheel_stub_files - expected_stub_files)}" - ) - - wheel_docstring_count = sum( - _count_docstrings(wheel_zip.read(wsf).decode("utf-8")) - for wsf in wheel_stub_files - ) - - print(f"Found {wheel_docstring_count} docstring(s) in wheel stubs.") - assert wheel_docstring_count, "No docstrings found in wheel stub files." + # TODO(GH-49831): Re-enable when incomplete stubs are shipped in wheels + # again. For now, wheels intentionally omit pyarrow-stubs because some + # type checkers consume .pyi files even without py.typed. + # + # source_root = Path(__file__).resolve().parents[2] + # stubs_dir = source_root / "python" / "pyarrow-stubs" / "pyarrow" + # assert stubs_dir.exists(), f"Stub source directory not found: {stubs_dir}" + # + # expected_stub_files = { + # f"pyarrow/{stub_file.relative_to(stubs_dir).as_posix()}" + # for stub_file in stubs_dir.rglob("*.pyi") + # } + # + # wheel_stub_files = { + # info.filename + # for info in wheel_zip.filelist + # if info.filename.startswith("pyarrow/") and info.filename.endswith(".pyi") + # } + # + # assert wheel_stub_files == expected_stub_files, ( + # "Wheel .pyi files differ from python/pyarrow-stubs/pyarrow.\n" + # f"Missing in wheel: {sorted(expected_stub_files - wheel_stub_files)}\n" + # f"Unexpected in wheel: {sorted(wheel_stub_files - expected_stub_files)}" + # ) + # + # wheel_docstring_count = sum( + # _count_docstrings(wheel_zip.read(wsf).decode("utf-8")) + # for wsf in wheel_stub_files + # ) + # + # print(f"Found {wheel_docstring_count} docstring(s) in wheel stubs.") + # assert wheel_docstring_count, "No docstrings found in wheel stub files." print(f"The wheel: {wheels[0]} seems valid.") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d0ddb9009f89..a9eb6b9bc65b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1041,32 +1041,36 @@ endif() # # Type stubs with docstring injection # +# TODO(GH-49831): Temporarily do not install pyarrow-stubs into wheels. +# The stubs are incomplete, and some type checkers consume .pyi files even +# without the py.typed marker. Re-enable this when the stubs are complete. +# # Stubs live in pyarrow-stubs/pyarrow/ during development but are installed # alongside the package so type checkers can find them (PEP 561). -set(PYARROW_STUBS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pyarrow-stubs/pyarrow") -if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") - install(DIRECTORY "${PYARROW_STUBS_SOURCE_DIR}/" - DESTINATION "." - FILES_MATCHING - PATTERN "*.pyi") - - if(PYARROW_REQUIRE_STUB_DOCSTRINGS) - install(CODE " - execute_process( - COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" - \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" - RESULT_VARIABLE _pyarrow_stub_docstrings_result - ) - if(NOT _pyarrow_stub_docstrings_result EQUAL 0) - message(FATAL_ERROR \"Stub docstring injection failed (exit code: \${_pyarrow_stub_docstrings_result})\") - endif() - ") - endif() -else() - if(PYARROW_REQUIRE_STUB_DOCSTRINGS) - message(FATAL_ERROR "PyArrow stub source directory not found at ${PYARROW_STUBS_SOURCE_DIR}; " - "cannot build wheel without .pyi files.") - endif() -endif() +# set(PYARROW_STUBS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pyarrow-stubs/pyarrow") +# if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") +# install(DIRECTORY "${PYARROW_STUBS_SOURCE_DIR}/" +# DESTINATION "." +# FILES_MATCHING +# PATTERN "*.pyi") +# +# if(PYARROW_REQUIRE_STUB_DOCSTRINGS) +# install(CODE " +# execute_process( +# COMMAND \"${Python3_EXECUTABLE}\" +# \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" +# \"${CMAKE_INSTALL_PREFIX}\" +# \"${CMAKE_CURRENT_SOURCE_DIR}\" +# RESULT_VARIABLE _pyarrow_stub_docstrings_result +# ) +# if(NOT _pyarrow_stub_docstrings_result EQUAL 0) +# message(FATAL_ERROR \"Stub docstring injection failed (exit code: \${_pyarrow_stub_docstrings_result})\") +# endif() +# ") +# endif() +# else() +# if(PYARROW_REQUIRE_STUB_DOCSTRINGS) +# message(FATAL_ERROR "PyArrow stub source directory not found at ${PYARROW_STUBS_SOURCE_DIR}; " +# "cannot build wheel without .pyi files.") +# endif() +# endif() diff --git a/python/pyproject.toml b/python/pyproject.toml index f03627338d6e..48283475e7d1 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -89,9 +89,10 @@ sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "pyarrow-stu wheel.packages = ["pyarrow"] wheel.install-dir = "pyarrow" # TODO(GH-48970): Remove this when stubfiles are complete -# Withhold the PEP 561 marker until the type stubs are complete. The .pyi files still ship -# and are tested in CI, but without py.typed type checkers ignore them, we don't break -# downstream users (GH-49831). The file is kept in-tree for CI type-checking. +# Withhold the PEP 561 marker until the type stubs are complete. The .pyi files +# are also temporarily omitted from wheels, so type checkers don't rely on the +# incomplete stubs and break downstream users (GH-49831). py.typed is kept +# in-tree for CI type-checking. wheel.exclude = ["pyarrow/py.typed"] [tool.scikit-build.cmake.define] From 4ad93c2a8c50205427ffda0c1dc176d33ae9b2b1 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 15 Jun 2026 00:38:24 -0700 Subject: [PATCH 3/5] GH-49831: Validate pyarrow stubs are omitted from wheels --- ci/scripts/python_wheel_macos_build.sh | 3 +- ci/scripts/python_wheel_validate_contents.py | 66 +++++--------------- ci/scripts/python_wheel_windows_build.bat | 3 +- ci/scripts/python_wheel_xlinux_build.sh | 3 +- python/CMakeLists.txt | 4 ++ 5 files changed, 24 insertions(+), 55 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 31395e26c23a..558f5e69ffd1 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -147,7 +147,8 @@ popd echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_REQUIRE_STUB_DOCSTRINGS=ON +# TODO(GH-49831): Re-enable when pyarrow-stubs are shipped in wheels again. +# export PYARROW_REQUIRE_STUB_DOCSTRINGS=ON export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} export PYARROW_WITH_DATASET=${ARROW_DATASET} diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index b662ae4e6049..4934788f1d7d 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -16,27 +16,11 @@ # under the License. import argparse -import ast from pathlib import Path import re import zipfile -def _count_docstrings(source): - """Count docstrings in module, function, and class bodies.""" - tree = ast.parse(source) - count = 0 - for node in ast.walk(tree): - if isinstance(node, (ast.Module, ast.FunctionDef, - ast.AsyncFunctionDef, ast.ClassDef)): - if (node.body - and isinstance(node.body[0], ast.Expr) - and isinstance(node.body[0].value, ast.Constant) - and isinstance(node.body[0].value.value, str)): - count += 1 - return count - - def validate_wheel(path): p = Path(path) wheels = list(p.glob('*.whl')) @@ -54,43 +38,21 @@ def validate_wheel(path): info.filename.split("/")[-1] == filename for info in wheel_zip.filelist ), f"{filename} is missing from the wheel." - # TODO(GH-48970): Uncomment when stubfiles are complete - # assert any( - # info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist - # ), "pyarrow/py.typed is missing from the wheel." + # TODO(GH-48970): Invert these checks when stubfiles are complete and + # pyarrow-stubs are intentionally shipped in wheels again. + assert not any( + info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist + ), "pyarrow/py.typed must not be present in the wheel." - # TODO(GH-49831): Re-enable when incomplete stubs are shipped in wheels - # again. For now, wheels intentionally omit pyarrow-stubs because some - # type checkers consume .pyi files even without py.typed. - # - # source_root = Path(__file__).resolve().parents[2] - # stubs_dir = source_root / "python" / "pyarrow-stubs" / "pyarrow" - # assert stubs_dir.exists(), f"Stub source directory not found: {stubs_dir}" - # - # expected_stub_files = { - # f"pyarrow/{stub_file.relative_to(stubs_dir).as_posix()}" - # for stub_file in stubs_dir.rglob("*.pyi") - # } - # - # wheel_stub_files = { - # info.filename - # for info in wheel_zip.filelist - # if info.filename.startswith("pyarrow/") and info.filename.endswith(".pyi") - # } - # - # assert wheel_stub_files == expected_stub_files, ( - # "Wheel .pyi files differ from python/pyarrow-stubs/pyarrow.\n" - # f"Missing in wheel: {sorted(expected_stub_files - wheel_stub_files)}\n" - # f"Unexpected in wheel: {sorted(wheel_stub_files - expected_stub_files)}" - # ) - # - # wheel_docstring_count = sum( - # _count_docstrings(wheel_zip.read(wsf).decode("utf-8")) - # for wsf in wheel_stub_files - # ) - # - # print(f"Found {wheel_docstring_count} docstring(s) in wheel stubs.") - # assert wheel_docstring_count, "No docstrings found in wheel stub files." + wheel_stub_files = sorted( + info.filename + for info in wheel_zip.filelist + if info.filename.startswith("pyarrow/") and info.filename.endswith(".pyi") + ) + assert not wheel_stub_files, ( + "pyarrow .pyi files must not be present in the wheel: " + f"{wheel_stub_files}" + ) print(f"The wheel: {wheels[0]} seems valid.") diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index e094d82861df..54d119ea92df 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -116,7 +116,8 @@ popd echo "=== (%PYTHON%) Building wheel ===" set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_REQUIRE_STUB_DOCSTRINGS=ON +rem TODO(GH-49831): Re-enable when pyarrow-stubs are shipped in wheels again. +rem set PYARROW_REQUIRE_STUB_DOCSTRINGS=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% set PYARROW_WITH_DATASET=%ARROW_DATASET% diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index f810b68c0c50..8713c1a57b0c 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -157,7 +157,8 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_REQUIRE_STUB_DOCSTRINGS=ON +# TODO(GH-49831): Re-enable when pyarrow-stubs are shipped in wheels again. +# export PYARROW_REQUIRE_STUB_DOCSTRINGS=ON export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} export PYARROW_WITH_DATASET=${ARROW_DATASET} diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a9eb6b9bc65b..24a21fa1a2c5 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1044,6 +1044,10 @@ endif() # TODO(GH-49831): Temporarily do not install pyarrow-stubs into wheels. # The stubs are incomplete, and some type checkers consume .pyi files even # without the py.typed marker. Re-enable this when the stubs are complete. +if(PYARROW_REQUIRE_STUB_DOCSTRINGS) + message(FATAL_ERROR "PYARROW_REQUIRE_STUB_DOCSTRINGS cannot be used while " + "pyarrow-stubs are omitted from wheels (GH-49831).") +endif() # # Stubs live in pyarrow-stubs/pyarrow/ during development but are installed # alongside the package so type checkers can find them (PEP 561). From 9c62d86485fa3d3f4ba269586b56f02d177b91e4 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 15 Jun 2026 01:08:03 -0700 Subject: [PATCH 4/5] GH-49831: Keep wheel stub docstring counter commented --- ci/scripts/python_wheel_validate_contents.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index 4934788f1d7d..04a1cc3072ed 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -16,11 +16,29 @@ # under the License. import argparse +# TODO(GH-48970): Uncomment when wheel stub validation is re-enabled. +# import ast from pathlib import Path import re import zipfile +# TODO(GH-48970): Uncomment when wheel stub validation is re-enabled. +# def _count_docstrings(source): +# """Count docstrings in module, function, and class bodies.""" +# tree = ast.parse(source) +# count = 0 +# for node in ast.walk(tree): +# if isinstance(node, (ast.Module, ast.FunctionDef, +# ast.AsyncFunctionDef, ast.ClassDef)): +# if (node.body +# and isinstance(node.body[0], ast.Expr) +# and isinstance(node.body[0].value, ast.Constant) +# and isinstance(node.body[0].value.value, str)): +# count += 1 +# return count + + def validate_wheel(path): p = Path(path) wheels = list(p.glob('*.whl')) From 1c4cec99ed9e1639e0a57a3bb2a9880dec99b31e Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 15 Jun 2026 01:16:32 -0700 Subject: [PATCH 5/5] GH-49831: Minimize temporary wheel validation changes --- ci/scripts/python_wheel_validate_contents.py | 86 ++++++++++++++------ 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py index 04a1cc3072ed..7c6e98c630aa 100644 --- a/ci/scripts/python_wheel_validate_contents.py +++ b/ci/scripts/python_wheel_validate_contents.py @@ -16,27 +16,30 @@ # under the License. import argparse -# TODO(GH-48970): Uncomment when wheel stub validation is re-enabled. -# import ast +import ast from pathlib import Path import re import zipfile -# TODO(GH-48970): Uncomment when wheel stub validation is re-enabled. -# def _count_docstrings(source): -# """Count docstrings in module, function, and class bodies.""" -# tree = ast.parse(source) -# count = 0 -# for node in ast.walk(tree): -# if isinstance(node, (ast.Module, ast.FunctionDef, -# ast.AsyncFunctionDef, ast.ClassDef)): -# if (node.body -# and isinstance(node.body[0], ast.Expr) -# and isinstance(node.body[0].value, ast.Constant) -# and isinstance(node.body[0].value.value, str)): -# count += 1 -# return count +# TODO(GH-48970): Set to True and remove the temporary absence checks below when +# pyarrow-stubs are complete and should be shipped in wheels again. +_STUBS_SHIPPED_IN_WHEEL = False + + +def _count_docstrings(source): + """Count docstrings in module, function, and class bodies.""" + tree = ast.parse(source) + count = 0 + for node in ast.walk(tree): + if isinstance(node, (ast.Module, ast.FunctionDef, + ast.AsyncFunctionDef, ast.ClassDef)): + if (node.body + and isinstance(node.body[0], ast.Expr) + and isinstance(node.body[0].value, ast.Constant) + and isinstance(node.body[0].value.value, str)): + count += 1 + return count def validate_wheel(path): @@ -56,22 +59,57 @@ def validate_wheel(path): info.filename.split("/")[-1] == filename for info in wheel_zip.filelist ), f"{filename} is missing from the wheel." - # TODO(GH-48970): Invert these checks when stubfiles are complete and - # pyarrow-stubs are intentionally shipped in wheels again. - assert not any( + if not _STUBS_SHIPPED_IN_WHEEL: + assert not any( + info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist + ), "pyarrow/py.typed must not be present in the wheel." + + wheel_stub_files = sorted( + info.filename + for info in wheel_zip.filelist + if (info.filename.startswith("pyarrow/") + and info.filename.endswith(".pyi")) + ) + assert not wheel_stub_files, ( + "pyarrow .pyi files must not be present in the wheel: " + f"{wheel_stub_files}" + ) + print(f"The wheel: {wheels[0]} seems valid.") + return + + assert any( info.filename == "pyarrow/py.typed" for info in wheel_zip.filelist - ), "pyarrow/py.typed must not be present in the wheel." + ), "pyarrow/py.typed is missing from the wheel." - wheel_stub_files = sorted( + source_root = Path(__file__).resolve().parents[2] + stubs_dir = source_root / "python" / "pyarrow-stubs" / "pyarrow" + assert stubs_dir.exists(), f"Stub source directory not found: {stubs_dir}" + + expected_stub_files = { + f"pyarrow/{stub_file.relative_to(stubs_dir).as_posix()}" + for stub_file in stubs_dir.rglob("*.pyi") + } + + wheel_stub_files = { info.filename for info in wheel_zip.filelist if info.filename.startswith("pyarrow/") and info.filename.endswith(".pyi") + } + + assert wheel_stub_files == expected_stub_files, ( + "Wheel .pyi files differ from python/pyarrow-stubs/pyarrow.\n" + f"Missing in wheel: {sorted(expected_stub_files - wheel_stub_files)}\n" + f"Unexpected in wheel: {sorted(wheel_stub_files - expected_stub_files)}" ) - assert not wheel_stub_files, ( - "pyarrow .pyi files must not be present in the wheel: " - f"{wheel_stub_files}" + + wheel_docstring_count = sum( + _count_docstrings(wheel_zip.read(wsf).decode("utf-8")) + for wsf in wheel_stub_files ) + print(f"Found {wheel_docstring_count} docstring(s) in wheel stubs.") + assert wheel_docstring_count, "No docstrings found in wheel stub files." + print(f"The wheel: {wheels[0]} seems valid.")