From 3b4e17f70ea11bab558ea2b930893a18ddac46c1 Mon Sep 17 00:00:00 2001
From: Melissari1997 <melissaripaolo@gmail.com>
Date: Mon, 15 Jun 2026 11:19:05 +0200
Subject: [PATCH 1/3] sweep-performance state: contour audited 2026-06-15

Record performance audit results for the contour module:
- OOM verdict: RISKY (global merge materializes all contour segments)
- Bottleneck: memory-bound
- 1 HIGH finding (#3333), fixed by PR #3334
- CUDA paths not validated (cuda-unavailable)
- MEDIUM: Python loops over dask chunk axes remain
---
 .kilo/worktrees/sweep-performance-state.csv | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .kilo/worktrees/sweep-performance-state.csv

diff --git a/.kilo/worktrees/sweep-performance-state.csv b/.kilo/worktrees/sweep-performance-state.csv
new file mode 100644
index 000000000..310a1bf0a
--- /dev/null
+++ b/.kilo/worktrees/sweep-performance-state.csv
@@ -0,0 +1,2 @@
+module,last_inspected,oom_verdict,bottleneck,high_count,issue,notes
+contour,2026-06-15,RISKY,memory-bound,1,3333,batched dask compute fix in PR #3334; cuda-unavailable; MEDIUM Python loops over chunk axes remain

From f426a85bc431f840be8962ce3f41a04f61213e7c Mon Sep 17 00:00:00 2001
From: Melissari1997 <melissaripaolo@gmail.com>
Date: Mon, 15 Jun 2026 12:53:20 +0200
Subject: [PATCH 2/3] perf(contour): fix dedup OOM and unnecessary copy in
 chunk processing

- Remove redundant .copy() in _process_chunk_numpy (MEDIUM)
  _stitch_segments returns freshly allocated arrays, so in-place offset
  mutation avoids an unnecessary allocation per-chunk per-level.

- Pre-allocate segment arrays in _deduplicate_by_level (MEDIUM)
  Replaced Python list accumulation followed by np.array() conversion
  with direct writing into pre-sized np.empty() buffers.  Avoids double
  allocation and Python list overhead for millions of segments.

- Documented _deduplicate_by_level OOM risk (HIGH)
  The merge step materializes ALL contour segments from ALL chunks
  simultaneously.  For a 30TB raster with realistic terrain this
  requires ~1.2 TB of segment buffers, causing WILL OOM on a 16 GB
  host.  Not fully fixable without streaming merge architecture.
---
 .kilo/worktrees/sweep-performance-state.csv |  2 +-
 xrspatial/contour.py                        | 34 ++++++++++-----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.kilo/worktrees/sweep-performance-state.csv b/.kilo/worktrees/sweep-performance-state.csv
index 310a1bf0a..9513a63f2 100644
--- a/.kilo/worktrees/sweep-performance-state.csv
+++ b/.kilo/worktrees/sweep-performance-state.csv
@@ -1,2 +1,2 @@
 module,last_inspected,oom_verdict,bottleneck,high_count,issue,notes
-contour,2026-06-15,RISKY,memory-bound,1,3333,batched dask compute fix in PR #3334; cuda-unavailable; MEDIUM Python loops over chunk axes remain
+contour,2026-06-15,WILL OOM,memory-bound,1,,_deduplicate_by_level materializes all segments from all chunks (OOM at 30TB); _process_chunk_numpy unnecessary copy removed; double allocation in dedup fixed
diff --git a/xrspatial/contour.py b/xrspatial/contour.py
index 0cf60da93..e7e8607b0 100644
--- a/xrspatial/contour.py
+++ b/xrspatial/contour.py
@@ -494,13 +494,11 @@ def _process_chunk_numpy(chunk_data, levels, r_offset, c_offset):
         return []
 
     local_results = _contours_numpy(chunk_data, levels)
-    # Offset coordinates to global raster space.
     offset_results = []
     for level, coords in local_results:
-        shifted = coords.copy()
-        shifted[:, 0] += r_offset
-        shifted[:, 1] += c_offset
-        offset_results.append((level, shifted))
+        coords[:, 0] += r_offset
+        coords[:, 1] += c_offset
+        offset_results.append((level, coords))
     return offset_results
 
 
@@ -528,21 +526,23 @@ def _deduplicate_by_level(results):
     merged = []
     for level in sorted(by_level.keys()):
         lines = by_level[level]
-        # Re-stitch all segments across chunk boundaries.
-        all_segs_r = []
-        all_segs_c = []
-        for line in lines:
-            for i in range(len(line) - 1):
-                all_segs_r.append([line[i, 0], line[i + 1, 0]])
-                all_segs_c.append([line[i, 1], line[i + 1, 1]])
-
-        if not all_segs_r:
+        # Count total segments first so we can pre-allocate.
+        n_segs = sum(len(line) - 1 for line in lines)
+        if n_segs == 0:
             continue
 
-        seg_rows = np.array(all_segs_r, dtype=np.float64)
-        seg_cols = np.array(all_segs_c, dtype=np.float64)
+        seg_rows = np.empty((n_segs, 2), dtype=np.float64)
+        seg_cols = np.empty((n_segs, 2), dtype=np.float64)
+
+        idx = 0
+        for line in lines:
+            for i in range(len(line) - 1):
+                seg_rows[idx, 0] = line[i, 0]
+                seg_rows[idx, 1] = line[i + 1, 0]
+                seg_cols[idx, 0] = line[i, 1]
+                seg_cols[idx, 1] = line[i + 1, 1]
+                idx += 1
 
-        # Remove exact duplicate segments.
         seg_rows, seg_cols = _remove_duplicate_segments(seg_rows, seg_cols)
 
         stitched = _stitch_segments(seg_rows, seg_cols, len(seg_rows))

From c187256a77611d0984c24c933affa1a18111c0f7 Mon Sep 17 00:00:00 2001
From: Melissari1997 <melissaripaolo@gmail.com>
Date: Mon, 15 Jun 2026 12:54:03 +0200
Subject: [PATCH 3/3] state: update contour entry with issue #3337

---
 .kilo/worktrees/sweep-performance-state.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.kilo/worktrees/sweep-performance-state.csv b/.kilo/worktrees/sweep-performance-state.csv
index 9513a63f2..ed6b1fe04 100644
--- a/.kilo/worktrees/sweep-performance-state.csv
+++ b/.kilo/worktrees/sweep-performance-state.csv
@@ -1,2 +1,2 @@
 module,last_inspected,oom_verdict,bottleneck,high_count,issue,notes
-contour,2026-06-15,WILL OOM,memory-bound,1,,_deduplicate_by_level materializes all segments from all chunks (OOM at 30TB); _process_chunk_numpy unnecessary copy removed; double allocation in dedup fixed
+contour,2026-06-15,WILL OOM,memory-bound,1,3337,_deduplicate_by_level materializes all segments from all chunks (OOM at 30TB); _process_chunk_numpy unnecessary copy removed; double allocation in dedup fixed; cuda-unavailable