From 666e70a735051199920ad6bbb73c3824e935d410 Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 1 Jun 2026 15:19:43 -0600 Subject: [PATCH 1/6] BUG: fix contour plotting for bubblesam detection * plot largest segmented area from each segmentation map * fix background filtration by checking bounding box area * simplify parquet saving * add test for changes to `analyze_and_filter_masks` --- neat_ml/bubblesam/bubblesam.py | 36 +++++++++++++++------------------ neat_ml/tests/test_bubblesam.py | 23 ++++++++++++++++++++- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/neat_ml/bubblesam/bubblesam.py b/neat_ml/bubblesam/bubblesam.py index bdc3ee2..090113f 100644 --- a/neat_ml/bubblesam/bubblesam.py +++ b/neat_ml/bubblesam/bubblesam.py @@ -131,7 +131,9 @@ def analyze_and_filter_masks( if len(props_list) == 0: continue - rp = props_list[0] + # take the region properties from the segmentation map with the greatest area + rp_areas = [x.area for x in props_list] + rp = props_list[np.argmax(rp_areas)] area = rp.area perimeter = rp.perimeter if perimeter == 0: @@ -141,19 +143,18 @@ def analyze_and_filter_masks( major_axis = rp.major_axis_length minor_axis = rp.minor_axis_length h, w = seg.shape[:2] - # Using a small margin (2 pixels) to be safe + # Using a small margin (2 pixels) to be safe, + # filter any segmentations with bounding boxes close to the size of the image + # because SAM-2 can sometimes detect the image background itself. + bbox_area = np.prod(rp.bbox[2:]) max_allowed_area = (h - 2) * (w - 2) - if area >= area_threshold and circ >= circularity_threshold: + if (area >= area_threshold and circ >= circularity_threshold + and bbox_area < max_allowed_area): binary_mask = seg.astype('uint8') * 255 contours, _ = cv2.findContours(binary_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) - # reshape contours for plotting and remove any contours - # close to the size of the image because cv2.findContours - # can sometimes detect the image edge itself. - all_contours = [ - c.reshape(-1, 2)[:, ::-1] - for c in contours - if cv2.contourArea(c) < max_allowed_area - ] + # keep only the largest contour in each segmentation area + # and reshape for plotting + max_contour = max(contours, key=cv2.contourArea).squeeze() radius = np.sqrt(area / np.pi) euler_number = rp.euler_number # output of cucim ``rp`` stores values as objects @@ -164,7 +165,7 @@ def analyze_and_filter_masks( euler_number = euler_number.item() mask_info = { 'bbox': rp.bbox, - 'contour': all_contours, + 'contour': max_contour, 'major_axis': major_axis, 'minor_axis': minor_axis, 'area': area, @@ -202,7 +203,7 @@ def plot_filtered_masks( for idx, row in masks_summary_df.iterrows(): contour = row['contour'] bbox = row['bbox'] - ax.plot(contour[0][:, 1], contour[0][:, 0], linewidth=1, color='blue') + ax.plot(contour[:, 0], contour[:, 1], linewidth=1, color='blue') min_row, min_col, max_row, max_col = bbox rect = Rectangle( (min_col, min_row), @@ -271,16 +272,11 @@ def bubblesam_detection( ) # save filtered dataframe as parquet file - # convert ``contours`` and ``bbox`` columns to list to save as parquet + # convert ``contour`` column to list to save as parquet save_filtered_df = filtered_df.copy() - save_filtered_df["bbox"] = save_filtered_df["bbox"].apply(list) - save_filtered_df["contour"] = save_filtered_df["contour"].apply( - lambda x: [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in x] - ) + save_filtered_df["contour"] = save_filtered_df["contour"].apply(list) save_filtered_df.to_parquet( output_dir / f'{image_basename}_masks_filtered.parquet.gzip', - engine="fastparquet", - compression="gzip", ) if debug: diff --git a/neat_ml/tests/test_bubblesam.py b/neat_ml/tests/test_bubblesam.py index caf6d29..a7224b7 100644 --- a/neat_ml/tests/test_bubblesam.py +++ b/neat_ml/tests/test_bubblesam.py @@ -156,7 +156,6 @@ def test_bubblesam_detection_generates_pngs( ) saved_df = pd.read_parquet( out_dir / "circles_masks_filtered.parquet.gzip", - engine="fastparquet", ) saved_df["bbox"] = saved_df["bbox"].apply(tuple) saved_df['contour'] = saved_df['contour'].apply( @@ -318,3 +317,25 @@ def test_run_bubblesam_model_cfg_error(): """ with pytest.raises(ValueError, match="Must provide model configuration"): run_bubblesam(pd.DataFrame(), Path("output"), detection_cfg={}) + +def test_bubblesam_contours(): + """ + test that running `analyze_and_filter_masks` generates a dataframe with + only a single contour per detection and without background areas + """ + # create two segmentation maps, one that takes up the whole image (background) + # and one that has two segmented areas (one smaller than the other) + seg = np.ones((100, 100)).astype(bool) + seg2 = np.zeros((100, 100)).astype(bool) + seg2[50:60, 50:60] = True + seg2[40:45, 40:45] = True + input_df = pd.DataFrame({"segmentation": [seg, seg2]}) + # call `analyze_and_filter_masks` to return filtered dataframe + # (the circularity of a perfect square is ~0.8, so lower the + # circularity threshold so that the background only gets filtered + # out by the bounding box area) + df = analyze_and_filter_masks(input_df, 25, 0.7, device="cpu") + # assert that there is only a single dataframe row after filtration + # corresponding to the larger of the two segmented areas from `seg2` + assert df.bbox.item() == (50, 50, 60, 60) + assert df.contour.item().shape == (36, 2) From 103836f4ecef951a5b42fc1dfcbd21b73676c2ad Mon Sep 17 00:00:00 2001 From: Adam Date: Mon, 1 Jun 2026 17:21:57 -0600 Subject: [PATCH 2/6] BLD: remove fastparquet dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 00d3eee..fa49310 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ 'pyyaml', 'pooch', 'pyarrow', - 'fastparquet', 'torch', 'torchvision', 'huggingface_hub', From 51a117eacf4871257268bae686d8a55928b9ca7d Mon Sep 17 00:00:00 2001 From: Adam Witmer Date: Fri, 5 Jun 2026 15:31:02 -0600 Subject: [PATCH 3/6] MAINT: address greptile review comments * fix bounding box area calculation * add axis argument to squeeze operation --- neat_ml/bubblesam/bubblesam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neat_ml/bubblesam/bubblesam.py b/neat_ml/bubblesam/bubblesam.py index 090113f..66e6058 100644 --- a/neat_ml/bubblesam/bubblesam.py +++ b/neat_ml/bubblesam/bubblesam.py @@ -146,7 +146,7 @@ def analyze_and_filter_masks( # Using a small margin (2 pixels) to be safe, # filter any segmentations with bounding boxes close to the size of the image # because SAM-2 can sometimes detect the image background itself. - bbox_area = np.prod(rp.bbox[2:]) + bbox_area = (rp.bbox[2] - rp.bbox[0]) * (rp.bbox[3] - rp.bbox[1]) max_allowed_area = (h - 2) * (w - 2) if (area >= area_threshold and circ >= circularity_threshold and bbox_area < max_allowed_area): @@ -154,7 +154,7 @@ def analyze_and_filter_masks( contours, _ = cv2.findContours(binary_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) # keep only the largest contour in each segmentation area # and reshape for plotting - max_contour = max(contours, key=cv2.contourArea).squeeze() + max_contour = max(contours, key=cv2.contourArea).squeeze(axis=1) radius = np.sqrt(area / np.pi) euler_number = rp.euler_number # output of cucim ``rp`` stores values as objects From 6d938c7411cb8b46162f2d48f5c34f221d5b4204 Mon Sep 17 00:00:00 2001 From: Adam Witmer Date: Tue, 9 Jun 2026 11:26:41 -0600 Subject: [PATCH 4/6] MAINT: add gzip compression back to bubblesam parquet --- neat_ml/bubblesam/bubblesam.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neat_ml/bubblesam/bubblesam.py b/neat_ml/bubblesam/bubblesam.py index 66e6058..d47f6be 100644 --- a/neat_ml/bubblesam/bubblesam.py +++ b/neat_ml/bubblesam/bubblesam.py @@ -277,6 +277,7 @@ def bubblesam_detection( save_filtered_df["contour"] = save_filtered_df["contour"].apply(list) save_filtered_df.to_parquet( output_dir / f'{image_basename}_masks_filtered.parquet.gzip', + compression="gzip", ) if debug: From 6cdaef2113739bea261ec5c1ccde8d48bc918dca Mon Sep 17 00:00:00 2001 From: Adam Witmer Date: Mon, 15 Jun 2026 15:52:21 -0600 Subject: [PATCH 5/6] MAINT, TST: PR #33 revisions * revert changes associated with removal of fastparquet * add test case for contour filtering to enforce bbox_area calculation --- neat_ml/bubblesam/bubblesam.py | 6 +++++- neat_ml/tests/test_bubblesam.py | 24 ++++++++++++++++++------ pyproject.toml | 1 + 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/neat_ml/bubblesam/bubblesam.py b/neat_ml/bubblesam/bubblesam.py index d47f6be..53f52c0 100644 --- a/neat_ml/bubblesam/bubblesam.py +++ b/neat_ml/bubblesam/bubblesam.py @@ -274,9 +274,13 @@ def bubblesam_detection( # save filtered dataframe as parquet file # convert ``contour`` column to list to save as parquet save_filtered_df = filtered_df.copy() - save_filtered_df["contour"] = save_filtered_df["contour"].apply(list) + save_filtered_df["bbox"] = save_filtered_df["bbox"].apply(list) + save_filtered_df["contour"] = save_filtered_df["contour"].apply( + lambda x: [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in x] + ) save_filtered_df.to_parquet( output_dir / f'{image_basename}_masks_filtered.parquet.gzip', + engine="fastparquet", compression="gzip", ) diff --git a/neat_ml/tests/test_bubblesam.py b/neat_ml/tests/test_bubblesam.py index a7224b7..6322e65 100644 --- a/neat_ml/tests/test_bubblesam.py +++ b/neat_ml/tests/test_bubblesam.py @@ -156,6 +156,7 @@ def test_bubblesam_detection_generates_pngs( ) saved_df = pd.read_parquet( out_dir / "circles_masks_filtered.parquet.gzip", + engine="fastparquet", ) saved_df["bbox"] = saved_df["bbox"].apply(tuple) saved_df['contour'] = saved_df['contour'].apply( @@ -318,17 +319,28 @@ def test_run_bubblesam_model_cfg_error(): with pytest.raises(ValueError, match="Must provide model configuration"): run_bubblesam(pd.DataFrame(), Path("output"), detection_cfg={}) -def test_bubblesam_contours(): +@pytest.mark.parametrize("seg_params, exp_bbox", + [ + # a test case where the segmentation contains two disjoint areas + ([[50, 60], [40, 45]], (50, 50, 60, 60)), + # a test case where the segmentation contains a region that touches + # the image boundary at the bottom right corner + ([[90, 100]], (90, 90, 100, 100)), + ] +) +def test_bubblesam_contours(seg_params, exp_bbox): """ test that running `analyze_and_filter_masks` generates a dataframe with only a single contour per detection and without background areas """ # create two segmentation maps, one that takes up the whole image (background) - # and one that has two segmented areas (one smaller than the other) + # and one containing the segmentation map generated using the test case parameters seg = np.ones((100, 100)).astype(bool) seg2 = np.zeros((100, 100)).astype(bool) - seg2[50:60, 50:60] = True - seg2[40:45, 40:45] = True + for seg_param in seg_params: + start = seg_param[0] + end = seg_param[1] + seg2[start:end, start:end] = True input_df = pd.DataFrame({"segmentation": [seg, seg2]}) # call `analyze_and_filter_masks` to return filtered dataframe # (the circularity of a perfect square is ~0.8, so lower the @@ -336,6 +348,6 @@ def test_bubblesam_contours(): # out by the bounding box area) df = analyze_and_filter_masks(input_df, 25, 0.7, device="cpu") # assert that there is only a single dataframe row after filtration - # corresponding to the larger of the two segmented areas from `seg2` - assert df.bbox.item() == (50, 50, 60, 60) + # corresponding to the appropriate segmentation map to keep from `seg2` + assert df.bbox.item() == exp_bbox assert df.contour.item().shape == (36, 2) diff --git a/pyproject.toml b/pyproject.toml index fa49310..00d3eee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ 'pyyaml', 'pooch', 'pyarrow', + 'fastparquet', 'torch', 'torchvision', 'huggingface_hub', From cd8e7dcc9b6d37edfb19e016698ba90e6492c65b Mon Sep 17 00:00:00 2001 From: Adam Witmer Date: Mon, 15 Jun 2026 15:56:08 -0600 Subject: [PATCH 6/6] DOC: fix inline comment --- neat_ml/bubblesam/bubblesam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neat_ml/bubblesam/bubblesam.py b/neat_ml/bubblesam/bubblesam.py index 53f52c0..880b0e7 100644 --- a/neat_ml/bubblesam/bubblesam.py +++ b/neat_ml/bubblesam/bubblesam.py @@ -272,7 +272,7 @@ def bubblesam_detection( ) # save filtered dataframe as parquet file - # convert ``contour`` column to list to save as parquet + # convert ``contour`` and ``bbox`` columns to list to save as parquet save_filtered_df = filtered_df.copy() save_filtered_df["bbox"] = save_filtered_df["bbox"].apply(list) save_filtered_df["contour"] = save_filtered_df["contour"].apply(