Skip to content

Commit ed3fbbb

Browse files
Zethsonclaude
andauthored
Modernize packaging and tooling from the scverse template (#215)
Adopt the modern parts of the scverse cookiecutter while preserving gget's mdbook docs, bundled binaries/data, and live-API test workflow. Packaging: - Consolidate setup.py + setup.cfg + requirements.txt + dev-requirements.txt + MANIFEST.in into a single pyproject.toml (hatchling backend, static version 0.30.6). Bundled binaries (gget/bins) and data (gget/constants) stay in the wheel; the `gget` console script is preserved. - Require Python >= 3.12; classifiers and CI cover 3.12 / 3.13 / 3.14. - Declare runtime deps in [project] and a [dependency-groups] `test` group; keep cellxgene-census in a separate optional `cellxgene` group (no wheels for newer Python yet) and have its test skip itself when the dep is absent. Linting / formatting: - Add the full scverse pre-commit config (biome, pyproject-fmt, ruff lint + format, standard hygiene hooks) plus the scverse ruff config. - Run ruff format + fixes across the tree; add docstrings, `raise ... from`, isinstance checks, and targeted noqa to reach a green `prek run --all-files`. - Vendored binaries, bundled data, test fixtures, and the CI report are excluded from formatting; Markdown hard line breaks are preserved. CI: - Modernize ci.yml (uv, pull_request trigger, 3.12/3.13/3.14 matrix, codecov, alls-green gate) while preserving the scheduled pytest-report commit-back. - Add build.yml (package build check) and release.yml (PyPI trusted publishing). Also fixes two latent bugs surfaced by ruff (undefined `Optional` in gget_cbio, use-before-assignment of `seqs` in gget_muscle) and a duplicate tearDown in the muscle tests. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ea099b4 commit ed3fbbb

127 files changed

Lines changed: 6583 additions & 6724 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/translate_docs.py

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,22 @@
1212

1313
import os
1414
import subprocess
15-
import sys
1615
from pathlib import Path
1716

1817
from anthropic import Anthropic
1918

2019
EN_DIR = "docs/src/en"
2120
ES_DIR = "docs/src/es"
2221

22+
# Source files outside EN_DIR whose Spanish translation lives in ES_DIR. The
23+
# English docs page for these is an mdbook {{#include}} of the source file, so
24+
# the source file is the single source of truth and drives its es/ translation.
25+
EXTERNAL_SOURCES = {"CONTRIBUTING.md": f"{ES_DIR}/contributing.md"}
26+
27+
# English doc files that must not be translated directly — e.g. pages that are
28+
# just an mdbook {{#include}} of a source handled via EXTERNAL_SOURCES above.
29+
SKIP_EN_FILES = {f"{EN_DIR}/contributing.md"}
30+
2331
# Files to use as style/terminology reference (picked for breadth of patterns)
2432
REFERENCE_FILES = ["archs4.md", "blast.md", "info.md"]
2533

@@ -80,31 +88,46 @@
8088
"""
8189

8290

91+
def es_target(filepath):
92+
"""Map an English/source doc path to its Spanish counterpart path."""
93+
if filepath in EXTERNAL_SOURCES:
94+
return EXTERNAL_SOURCES[filepath]
95+
return filepath.replace(EN_DIR, ES_DIR, 1)
96+
97+
8398
def get_changed_files(before_sha, after_sha):
84-
"""Return dict of added/modified/deleted English doc files."""
99+
"""Return dict of added/modified/deleted documentation source files.
100+
101+
Watches the English docs directory plus any external source files
102+
(e.g. the root CONTRIBUTING.md, which the English docs page includes).
103+
"""
104+
watched = [EN_DIR, *EXTERNAL_SOURCES]
85105
# Check if before_sha is a valid commit
86-
is_valid = subprocess.run(
87-
["git", "cat-file", "-t", before_sha],
88-
capture_output=True,
89-
text=True,
90-
).returncode == 0
106+
is_valid = (
107+
subprocess.run(
108+
["git", "cat-file", "-t", before_sha],
109+
capture_output=True,
110+
text=True,
111+
).returncode
112+
== 0
113+
)
91114

92115
if not is_valid:
93116
# Initial push or invalid ref — treat all current files as new
94117
result = subprocess.run(
95-
["git", "ls-tree", "-r", "--name-only", after_sha, "--", EN_DIR],
118+
["git", "ls-tree", "-r", "--name-only", after_sha, "--", *watched],
96119
capture_output=True,
97120
text=True,
98121
check=True,
99122
)
100123
return {
101-
"added": [f for f in result.stdout.strip().split("\n") if f],
124+
"added": [f for f in result.stdout.strip().split("\n") if f and f not in SKIP_EN_FILES],
102125
"modified": [],
103126
"deleted": [],
104127
}
105128

106129
result = subprocess.run(
107-
["git", "diff", "--name-status", before_sha, after_sha, "--", EN_DIR],
130+
["git", "diff", "--name-status", before_sha, after_sha, "--", *watched],
108131
capture_output=True,
109132
text=True,
110133
check=True,
@@ -125,6 +148,10 @@ def get_changed_files(before_sha, after_sha):
125148
elif status == "R":
126149
files["deleted"].append(parts[1])
127150
files["added"].append(parts[2])
151+
152+
# Drop English pages that must not be translated directly (handled elsewhere).
153+
for key in files:
154+
files[key] = [f for f in files[key] if f not in SKIP_EN_FILES]
128155
return files
129156

130157

@@ -151,9 +178,7 @@ def load_reference_files():
151178

152179
def build_reference_block(references):
153180
"""Format reference files into a single text block."""
154-
return "\n\n---\n\n".join(
155-
f"=== {name} ===\n{content}" for name, content in references.items()
156-
)
181+
return "\n\n---\n\n".join(f"=== {name} ===\n{content}" for name, content in references.items())
157182

158183

159184
def clean_model_output(text):
@@ -231,6 +256,7 @@ def translate_diff(client, diff_text, en_content, es_content, filename, ref_bloc
231256

232257

233258
def main():
259+
"""Translate English docs changed between two commits into Spanish."""
234260
before_sha = os.environ.get("BEFORE_SHA", "").strip()
235261
after_sha = os.environ.get("AFTER_SHA", "HEAD").strip()
236262

@@ -259,7 +285,7 @@ def main():
259285

260286
# --- Deletions ---
261287
for filepath in changed["deleted"]:
262-
es_path = filepath.replace(EN_DIR, ES_DIR, 1)
288+
es_path = es_target(filepath)
263289
if Path(es_path).exists():
264290
Path(es_path).unlink()
265291
print(f"Deleted: {es_path}")
@@ -270,15 +296,15 @@ def main():
270296
filename = Path(filepath).name
271297
print(f"Translating new file: {filename} ...")
272298
translated = translate_new_file(client, en_content, filename, ref_block)
273-
es_path = filepath.replace(EN_DIR, ES_DIR, 1)
299+
es_path = es_target(filepath)
274300
Path(es_path).parent.mkdir(parents=True, exist_ok=True)
275301
Path(es_path).write_text(translated)
276302
print(f" -> Created: {es_path}")
277303

278304
# --- Modified files ---
279305
for filepath in changed["modified"]:
280306
filename = Path(filepath).name
281-
es_path = filepath.replace(EN_DIR, ES_DIR, 1)
307+
es_path = es_target(filepath)
282308
en_content = Path(filepath).read_text()
283309

284310
if not Path(es_path).exists():
@@ -292,9 +318,7 @@ def main():
292318
continue
293319
es_content = Path(es_path).read_text()
294320
print(f"Applying edits to {filename} ...")
295-
translated = translate_diff(
296-
client, diff_text, en_content, es_content, filename, ref_block
297-
)
321+
translated = translate_diff(client, diff_text, en_content, es_content, filename, ref_block)
298322

299323
Path(es_path).parent.mkdir(parents=True, exist_ok=True)
300324
Path(es_path).write_text(translated)

.github/workflows/build.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Check Build
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
package:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v4
18+
with:
19+
fetch-depth: 0
20+
- name: Install uv
21+
uses: astral-sh/setup-uv@v7
22+
- name: Build package
23+
run: uv build
24+
- name: Check package
25+
run: uvx twine check --strict dist/*.whl

0 commit comments

Comments
 (0)