janelia-cellmap · davidackerman · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/README.md b/README.md
@@ -137,6 +137,19 @@ use_existing_scales: true        # Set false to FORCE in-worker downsampling at
                                  # when you don't trust the source's downsampling and want
                                  # consistent `downsample_method` behavior at every LOD.
                                  # (default: true)
+# When the input zarr exposes only s0 (no pre-built coarser levels),
+# the downsample multires strategy auto-builds the missing s_k arrays
+# into `{output_directory}/_intermediate_scales.zarr` in a single
+# parallel super-chunk pass over s0. Each LOD then reads from the
+# pre-built pyramid instead of repeatedly re-reading s0.
+delete_intermediate_scales: true # Delete the auto-built `_intermediate_scales.zarr` once the
+                                 # multires pipeline finishes. Set false to keep the pyramid
+                                 # around for re-running with different mesh params on the same
+                                 # data. (default: true)
+pyramid_alignment_mode: snap     # "snap" rounds an unaligned ROI INWARD to multiples of the
+                                 # max per-axis factor (drops up to max_factor-1 voxels per edge);
+                                 # "halo" rounds OUTWARD and reads beyond the ROI to complete the
+                                 # boundary cubes (no data loss). (default: snap)
 decimation_factor: 6             # Per-LOD face reduction factor. In decimate strategy: literal ratio
                                  # between LODs. In downsample strategy: target ratio used to derive
                                  # per-LOD target_reduction so the LOD-to-LOD face count drops by this

diff --git a/src/mesh_n_bone/config.py b/src/mesh_n_bone/config.py
@@ -68,6 +68,13 @@ def read_multires_config(config_path):
             optional_decimation_settings["target_faces_per_lod0_chunk"] = (
                 DEFAULT_TARGET_FACES_PER_LOD0_CHUNK
             )
+        # voxel_size_nm controls the info file's `lod_scale_multiplier` so
+        # NG sees per-LOD spatial resolutions in physical units (voxel_size,
+        # 2*voxel_size, 4*voxel_size, ...). Set to the dataset's source
+        # voxel size in nm. Default 1.0 = NG sees scales as raw [1,2,4,8]
+        # (the historical buggy behavior — NG aggressively picks coarse LODs).
+        if "voxel_size_nm" not in optional_decimation_settings:
+            optional_decimation_settings["voxel_size_nm"] = 1.0
 
         optional_properties_settings = config.get("optional_properties_settings", {})
         if "segment_properties_csv" not in optional_properties_settings:

diff --git a/src/mesh_n_bone/meshify/meshify.py b/src/mesh_n_bone/meshify/meshify.py
diff --git a/src/mesh_n_bone/multires/multires.py b/src/mesh_n_bone/multires/multires.py
@@ -20,12 +20,22 @@
 
 
 DEFAULT_TARGET_FACES_PER_LOD0_CHUNK = 25_000
+# Hemibrain uses 1024 voxels per LOD-0 chunk (= 16 μm at their 16-nm
+# meshing scale). Same number used here as a sensible dataset-level
+# default: 1024 × voxel_size for a single isotropic chunk size that
+# behaves consistently across mesh sizes.
+DEFAULT_BOX_SIZE_VOXELS = 1024
+# A LOD whose decimated face count would fall below this is dropped from
+# per-segment output — further decimation produces degenerate meshes.
+MIN_USEFUL_FACES_PER_LOD = 50
 
 
 def generate_neuroglancer_multires_mesh(
     id, num_subtask_workers, output_path, lods, original_ext, lod_0_box_size=None,
     vertex_quantization_bits=16,
     target_faces_per_lod0_chunk=DEFAULT_TARGET_FACES_PER_LOD0_CHUNK,
+    voxel_size_nm=1.0,
+    min_useful_faces=MIN_USEFUL_FACES_PER_LOD,
 ):
     """Create a complete multiresolution mesh for a single segment.
 
@@ -98,6 +108,15 @@ def generate_neuroglancer_multires_mesh(
                     f"{previous_lod} ({previous_faces} faces)."
                 )
                 break
+            if current_lod > 0 and num_faces < min_useful_faces:
+                lod_truncation_reason = (
+                    f"Segment {id} using {idx}/{requested_lod_count} requested "
+                    f"LODs; dropping LOD {current_lod} and later because it "
+                    f"has {num_faces} faces, below the useful-detail "
+                    f"threshold ({min_useful_faces}) — further decimation "
+                    f"produces degenerate meshes."
+                )
+                break
             lod_face_counts.append((current_lod, num_faces))
             # LOD-0 bounds drive the chunk grid.
             if current_lod == 0 and vertices is not None:
@@ -132,18 +151,27 @@ def generate_neuroglancer_multires_mesh(
             return
 
         if lod_0_box_size is None:
-            # Surface-area scaling: triangles distribute across the
-            # mesh's 2-D surface, so total chunks ∝ N²-on-axis. Use
-            # sqrt for the per-axis count.
-            heuristic_num_chunks = np.ceil(
-                lod0_num_faces / target_faces_per_lod0_chunk
-            )
-            num_chunks_per_axis = max(
-                1, int(np.ceil(np.sqrt(heuristic_num_chunks)))
-            )
-            lod_0_box_size = (
-                np.ceil(lod0_distances_per_axis / num_chunks_per_axis) + 1
-            )
+            if target_faces_per_lod0_chunk != DEFAULT_TARGET_FACES_PER_LOD0_CHUNK:
+                # User explicitly tuned target_faces_per_lod0_chunk →
+                # use the adaptive surface-area heuristic.
+                heuristic_num_chunks = np.ceil(
+                    lod0_num_faces / target_faces_per_lod0_chunk
+                )
+                num_chunks_per_axis = max(
+                    1, int(np.ceil(np.sqrt(heuristic_num_chunks)))
+                )
+                lod_0_box_size = (
+                    np.ceil(lod0_distances_per_axis / num_chunks_per_axis) + 1
+                )
+            else:
+                # Dataset-level default: fixed N voxels per chunk side
+                # (matches hemibrain's setup at 1024 voxels). Gives
+                # consistent LOD-transition zoom thresholds across mesh
+                # sizes — a 1 μm mito and a 100 μm neuron get the same
+                # physical chunk size, just different chunk counts.
+                lod_0_box_size = np.full(
+                    3, DEFAULT_BOX_SIZE_VOXELS * voxel_size_nm, dtype=float,
+                )
 
         # Compute the LOD 0 chunk grid from the s0 mesh extent.
         mesh_extent = vertex_max - vertex_min
@@ -298,6 +326,8 @@ def generate_all_neuroglancer_multires_meshes(
     output_path, num_workers, ids, lods, original_ext, file_sizes,
     lod_0_box_size=None, vertex_quantization_bits=16,
     target_faces_per_lod0_chunk=DEFAULT_TARGET_FACES_PER_LOD0_CHUNK,
+    voxel_size_nm=1.0,
+    min_useful_faces=MIN_USEFUL_FACES_PER_LOD,
 ):
     """Generate Neuroglancer multiresolution meshes for all segments.
 
@@ -335,6 +365,7 @@ def get_number_of_subtask_workers(file_sizes, num_workers):
     fixed_args_list = [
         output_path, lods, original_ext, lod_0_box_size,
         vertex_quantization_bits, target_faces_per_lod0_chunk,
+        voxel_size_nm, min_useful_faces,
     ]
     for idx, id in enumerate(ids):
         variable_args_list.append((id, num_subtask_workers[idx]))
@@ -382,6 +413,7 @@ def run_multires(config_path, num_workers, roi=None):
     aggressiveness = optional_decimation_settings["aggressiveness"]
     delete_decimated_meshes_flag = optional_decimation_settings["delete_decimated_meshes"]
     target_faces_per_lod0_chunk = optional_decimation_settings["target_faces_per_lod0_chunk"]
+    voxel_size_nm = float(optional_decimation_settings.get("voxel_size_nm", 1.0))
     retry_on_oom = optional_decimation_settings.get("retry_on_oom", True)
     memory_retry_max = optional_decimation_settings.get("memory_retry_max", 3)
 
@@ -479,6 +511,7 @@ def _run_multires(workers, config):
                             np.array(file_sizes), lod_0_box_size,
                             vertex_quantization_bits=16,
                             target_faces_per_lod0_chunk=target_faces_per_lod0_chunk,
+                            voxel_size_nm=voxel_size_nm,
                         )
             dask_util.run_with_oom_retry(
                 _run_multires, effective_workers, "multires creation", logger,
@@ -526,6 +559,7 @@ def _pack(workers, config):
                 with Timing_Messager("Writing sharded info file", logger):
                     sharded_mesh_util.write_sharded_info_file(
                         multires_output_path, spec, vertex_quantization_bits=16,
+                        lod_scale_multiplier=voxel_size_nm,
                     )
 
                 if sharding_settings["delete_unsharded_files"]:
@@ -539,6 +573,7 @@ def _pack(workers, config):
                 with Timing_Messager("Writing info file", logger):
                     neuroglancer.write_info_file(
                         multires_output_path, vertex_quantization_bits=16,
+                        lod_scale_multiplier=voxel_size_nm,
                     )
 
             if not skip_decimation and delete_decimated_meshes_flag:

diff --git a/src/mesh_n_bone/util/image_data_interface.py b/src/mesh_n_bone/util/image_data_interface.py
@@ -83,6 +83,13 @@ def _capped_tensorstore_context_spec():
         "gcs_request_concurrency": {"limit": 1},
         "s3_request_concurrency": {"limit": 1},
         "http_request_concurrency": {"limit": 1},
+        # Disable tensorstore's decoded-chunk cache. Default is unbounded,
+        # so for long-lived worker processes (e.g. dask LSF workers that
+        # cache opened handles in _PYRAMID_WORKER_TS_CACHE across multiple
+        # super-chunks), the cache accumulates decoded chunks until the
+        # worker is OOM-killed. We don't benefit from caching anyway —
+        # each super-chunk reads/writes a non-overlapping region.
+        "cache_pool": {"total_bytes_limit": 0},
     }
 
 

diff --git a/src/mesh_n_bone/util/logging.py b/src/mesh_n_bone/util/logging.py
@@ -11,7 +11,47 @@
 from subprocess import Popen, PIPE, TimeoutExpired, run as subprocess_run
 from datetime import datetime
 
-logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
+# Force stdout/stderr to be line-buffered so each log line is flushed
+# immediately. When the pipeline runs under LSF/dask with output
+# redirected to a file, the default block-buffering on stderr swallows
+# progress logs until the buffer fills (hundreds of KiB), which can
+# mean no visible progress for a long time even when the worker is
+# making steady headway.
+for _stream in (sys.stdout, sys.stderr):
+    try:
+        _stream.reconfigure(line_buffering=True)
+    except (AttributeError, ValueError):
+        pass
+
+
+class _FlushingStreamHandler(logging.StreamHandler):
+    """StreamHandler that flushes after every emit.
+
+    The base StreamHandler's emit() already calls self.flush(), but only
+    on the Python-level stream; when output is piped to a file via the
+    shell, the OS-level buffer for the file descriptor may still hold
+    the data. We force flush on every record to make sure tail -f works.
+    """
+
+    def emit(self, record):
+        super().emit(record)
+        try:
+            self.flush()
+            self.stream.flush()
+        except (AttributeError, ValueError, OSError):
+            pass
+
+
+_root = logging.getLogger()
+if not any(isinstance(h, _FlushingStreamHandler) for h in _root.handlers):
+    # Remove any default handlers installed earlier so our flushing one wins
+    for _h in list(_root.handlers):
+        _root.removeHandler(_h)
+    _h = _FlushingStreamHandler()
+    _h.setFormatter(logging.Formatter("%(levelname)s:%(message)s"))
+    _root.addHandler(_h)
+    _root.setLevel(logging.INFO)
+
 logger = logging.getLogger(__name__)
 
 

diff --git a/src/mesh_n_bone/util/mesh_io.py b/src/mesh_n_bone/util/mesh_io.py
@@ -283,8 +283,20 @@ def rewrite_index_with_empty_fragments(path, current_lod_fragments):
         set(map(tuple, p[s > 0].tolist())) for (p, s) in existing_per_lod
     ]
 
-    # Descent: reachable positions are sub-octants of REAL reachable
-    # positions one level up. The root is reachable by definition.
+    # Reachability is the union of two passes:
+    #   1. Top-down descent through REAL parents, which adds empty
+    #      placeholders under each real parent so NG's octree carve-out
+    #      stays complete (this was the only pass before).
+    #   2. Bottom-up ancestor walk from every REAL position, which
+    #      keeps real leaves listed even when an intermediate LOD is
+    #      empty in their cell. Independently-meshed LODs disagree:
+    #      mode-downsampling can drop a thin edge at LOD k+1 that still
+    #      exists at LOD k. Without the bottom-up pass, the real LOD-k
+    #      leaf is dropped as an "orphan" (no real parent chain) and
+    #      NG can't reach it — the user sees a chunk-shaped hole at
+    #      the finest LOD. With it, the empty intermediates are listed
+    #      as 0-byte placeholders and NG's fall-back-to-parent rule
+    #      lets it traverse down to the real leaf.
     reachable = [set() for _ in range(num_lods)]
     if real_pos_per_lod[top]:
         reachable[top] = real_pos_per_lod[top].copy()
@@ -294,6 +306,13 @@ def rewrite_index_with_empty_fragments(path, current_lod_fragments):
                     for b in (0, 1):
                         for c in (0, 1):
                             reachable[k].add((2 * X + a, 2 * Y + b, 2 * Z + c))
+    for k in range(num_lods):
+        for (X, Y, Z) in real_pos_per_lod[k]:
+            reachable[k].add((X, Y, Z))
+            x, y, z = X, Y, Z
+            for kk in range(k + 1, num_lods):
+                x, y, z = x // 2, y // 2, z // 2
+                reachable[kk].add((x, y, z))
 
     # Walk the existing data blob LOD-by-LOD in its current listed
     # order, extracting bytes for reachable real fragments. Drop bytes

diff --git a/src/mesh_n_bone/util/neuroglancer.py b/src/mesh_n_bone/util/neuroglancer.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 
-def write_info_file(path, vertex_quantization_bits=16):
+def write_info_file(path, vertex_quantization_bits=16, lod_scale_multiplier=1.0):
     """Write the ``info`` JSON file for a Neuroglancer multi-LOD Draco mesh layer.
 
     The generated file uses the specified vertex quantization, an identity
@@ -19,13 +19,21 @@ def write_info_file(path, vertex_quantization_bits=16):
     vertex_quantization_bits : int, optional
         Number of quantization bits per vertex coordinate.  Default is
         ``16``.
+    lod_scale_multiplier : float, optional
+        Multiplied into each segment manifest's `lod_scales` to give NG
+        the true per-LOD spatial resolution in physical units. Should be
+        set to the dataset's LOD-0 voxel size (e.g. 16 for 16 nm voxels)
+        so the per-LOD scales `[1, 2, 4, 8]` stored in manifests become
+        `[16, 32, 64, 128]` nm in NG's eyes. With the default 1.0, NG
+        would treat LOD-0 as 1-unit-resolution and aggressively pick
+        coarse LODs for any view (the historical buggy default).
     """
     with open(f"{path}/info", "w") as f:
         info = {
             "@type": "neuroglancer_multilod_draco",
             "vertex_quantization_bits": vertex_quantization_bits,
             "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
-            "lod_scale_multiplier": 1,
+            "lod_scale_multiplier": float(lod_scale_multiplier),
             "segment_properties": "segment_properties",
         }
         json.dump(info, f)