From 2e6b81f9d37556d8854b29d234020a8297021193 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 17:23:26 -0700 Subject: [PATCH 1/9] #300: add h3_res4/h3_res6 to samples_map_lite for in-browser filtered clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The explorer will aggregate filtered H3 clusters on the fly off samples_map_lite (GROUP BY the res-appropriate h3 column + the #293 mask predicate) so a broad facet filter at world zoom renders as fast filtered clusters instead of capped raw points. samp_geo already computes h3_res4/h3_res6/h3_res8; lite carried only res8 (point-mode cell lookups). Add res4/res6 (UBIGINT) — they dictionary- compress well, so the size delta is small. - build_samples_map_lite: emit h3_res4, h3_res6 alongside h3_res8 - validate: map_lite re-derivation now covers res4/res6 - header doc + corruption-test schema updated to match - fixture tests: 23/23 Republish of the 202608 lite to R2 follows as a separate data step; the browser feature gates on the columns being present (falls back to today's point-mode behavior otherwise), so this is safe to ship before the republish. --- scripts/build_frontend_derived.py | 10 +++++++++- scripts/validate_frontend_derived.py | 3 ++- tests/test_frontend_derived.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/build_frontend_derived.py b/scripts/build_frontend_derived.py index 6101d72..d92cff0 100755 --- a/scripts/build_frontend_derived.py +++ b/scripts/build_frontend_derived.py @@ -14,7 +14,7 @@ OUTPUTS (into --outdir, prefixed --tag): - {tag}_sample_facets_v2.parquet pid, source, material, context, object_type, label, description (search-only; includes appended concept labels), place_name(VARCHAR) - - {tag}_samples_map_lite.parquet pid, label, source, latitude, longitude, place_name(VARCHAR[]), result_time, h3_res8(UBIGINT), h3_res8_hex + - {tag}_samples_map_lite.parquet pid, label, source, latitude, longitude, place_name(VARCHAR[]), result_time, h3_res4(UBIGINT), h3_res6(UBIGINT), h3_res8(UBIGINT), h3_res8_hex - {tag}_h3_summary_res{4,6,8}.parquet h3_cell(UBIGINT), sample_count(INT), center_lat, center_lng, dominant_source, source_count(INT), resolution(INT) - {tag}_facet_summaries.parquet facet_type, facet_value, scheme, count - {tag}_facet_cross_filter.parquet filter_source/material/context/object_type, facet_type, facet_value, count @@ -231,8 +231,16 @@ def build_sample_facets_v2(con, out): def build_samples_map_lite(con, out): + # h3_res4/h3_res6 (#300): the browser aggregates filtered clusters on the + # fly off this file (GROUP BY the res-appropriate h3 column + the #293 mask + # predicate) so a broad facet filter at world zoom renders as fast filtered + # clusters instead of capped raw points. They dictionary-compress well (far + # fewer distinct values than res8), so the size delta is small. h3_res8 was + # already present (point-mode selected-cell lookups). con.execute(f"""COPY ( SELECT pid, label, source, latitude, longitude, place_name, result_time, + h3_res4::UBIGINT AS h3_res4, + h3_res6::UBIGINT AS h3_res6, h3_res8::UBIGINT AS h3_res8, h3_h3_to_string(h3_res8) AS h3_res8_hex FROM samp_geo ORDER BY pid diff --git a/scripts/validate_frontend_derived.py b/scripts/validate_frontend_derived.py index c629206..debd8e4 100755 --- a/scripts/validate_frontend_derived.py +++ b/scripts/validate_frontend_derived.py @@ -213,9 +213,10 @@ def except_diff(asql, bsql): "facets rows differ from a re-derivation off the wide (corruption/stale/wrong-version)") ref_ml = ("SELECT pid, label, source, latitude, longitude, result_time, " + "h3_res4::UBIGINT AS h3_res4, h3_res6::UBIGINT AS h3_res6, " "h3_res8::UBIGINT AS h3_res8, h3_h3_to_string(h3_res8) AS h3_res8_hex, " "place_name::VARCHAR AS pn FROM samp_geo") - file_ml = (f"SELECT pid, label, source, latitude, longitude, result_time, h3_res8, h3_res8_hex, " + file_ml = (f"SELECT pid, label, source, latitude, longitude, result_time, h3_res4, h3_res6, h3_res8, h3_res8_hex, " f"place_name::VARCHAR AS pn FROM {ML}") check("map_lite == fresh build from --wide", except_diff(ref_ml, file_ml) == 0, "map_lite coords/h3/place_name differ from a re-derivation off the wide") diff --git a/tests/test_frontend_derived.py b/tests/test_frontend_derived.py index aa01018..94e05bc 100644 --- a/tests/test_frontend_derived.py +++ b/tests/test_frontend_derived.py @@ -179,7 +179,7 @@ def test_semantic_gate_catches_corruption_that_internal_checks_miss(tmp_path): con = duckdb.connect() tmp_ml = ml + ".tmp" con.execute(f"""COPY (SELECT pid, label, source, 0.0::DOUBLE AS latitude, longitude, - place_name, result_time, h3_res8, h3_res8_hex FROM read_parquet('{ml}')) + place_name, result_time, h3_res4, h3_res6, h3_res8, h3_res8_hex FROM read_parquet('{ml}')) TO '{tmp_ml}' (FORMAT PARQUET)""") con.close(); os.replace(tmp_ml, ml) From c8c72a88bf197d16a7a1529cec4d71f4b7b5966b Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 22:01:33 -0700 Subject: [PATCH 2/9] #300 C1 (infra): filter-aware loadRes + readiness preflight + cluster sig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dormant infrastructure for filtered clusters — no behavior change yet because computeTargetMode still forces point mode when a facet is active (relaxed in C2). - filteredClustersReady preflight cell: probes samples_map_lite for h3_res4/res6; sets window.__filteredClustersReady. Hard requirement is only the columns (masks readiness is orthogonal — facetFilterSQL self-falls-back to membership). Safe before the lite republish: flag false → today's point-mode behavior. - Top-level helpers: wantFilteredClusters(), desiredClusterSig() (semantic, not SQL text — kind + sources + tree selections), filteredClusterSQL(res) (masks- backed lite aggregation; INTEGER casts; same columns/grain as build_h3_summary). - loadRes: when wantFilteredClusters(), query filtered lite instead of the summary parquet. Snapshot the sig BEFORE the await; discard on `gen !== loadResGen || sig !== desiredClusterSig()` (filters toggled mid-query). Stamp viewer._clusterFilterSig on success. - phase1: seed viewer._clusterFilterSig from the initial summary load. Render OK. Per Codex design review (P0.1 casts, P0.2/P1.3 snapshot signature). --- explorer.qmd | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index c1506ba..e050a23 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -1191,6 +1191,55 @@ function facetFilterSQL() { return ' AND ' + parts.map(p => `(${p})`).join(' AND '); } +// #300: true when a facet filter is active AND lite carries h3_res4/res6, so the +// cluster layer should be aggregated from the FILTERED sample set instead of the +// pre-aggregated (facet-blind) summary parquets. Top-level so both phase1 and the +// zoom watcher's loadRes can use it. +function wantFilteredClusters() { + return hasFacetFilters() && (typeof window !== 'undefined' && window.__filteredClustersReady === true); +} + +// #300: a STABLE, SEMANTIC description of what the current cluster layer should +// contain — data kind (filtered vs pre-aggregated summary), the active ?sources= +// set, and the tree facet selections. Captured BEFORE a cluster load and compared +// again AFTER the await so a filter change mid-query can't mislabel the cache +// (Codex P0.2/P1.3). Deliberately NOT the SQL text: masks becoming ready can +// change the SQL strategy without changing results. +function desiredClusterSig() { + return JSON.stringify({ + kind: wantFilteredClusters() ? 'filtered' : 'summary', + sources: getActiveSources().slice().sort(), + material: treeSelection('material').slice().sort(), + context: treeSelection('context').slice().sort(), + objectType: treeSelection('object_type').slice().sort(), + }); +} + +// #300: the filtered-cluster aggregation off samples_map_lite (masks-backed via +// facetFilterSQL). Same columns + grain + INTEGER casts as the pre-aggregated h3 +// summary parquet (build_h3_summary), so the loadRes render/cache/stats path is +// unchanged. dominant_source is recomputed over the FILTERED subset (source- +// filtered at the row level), the coherent meaning when a filter is active. +function filteredClusterSQL(res) { + return ` + WITH base AS ( + SELECT h3_res${res} AS cell, source, latitude, longitude + FROM read_parquet('${lite_url}') + WHERE h3_res${res} IS NOT NULL${sourceFilterSQL('source')}${facetFilterSQL()} + ), + sc AS (SELECT cell, source, COUNT(*) AS c FROM base GROUP BY cell, source), + dom AS (SELECT cell, source AS dominant_source, + ROW_NUMBER() OVER (PARTITION BY cell ORDER BY c DESC, source ASC) AS rn FROM sc), + agg AS (SELECT cell, COUNT(*)::INTEGER AS sample_count, + ROUND(AVG(latitude), 6) AS center_lat, ROUND(AVG(longitude), 6) AS center_lng, + COUNT(DISTINCT source)::INTEGER AS source_count + FROM base GROUP BY cell) + SELECT CAST(agg.cell AS VARCHAR) AS h3_cell_dec, agg.sample_count, + agg.center_lat, agg.center_lng, dom.dominant_source, agg.source_count + FROM agg JOIN dom ON dom.cell = agg.cell AND dom.rn = 1 + `; +} + // Shared viewport-padding factor. The samples table (PR #219), the // point-mode sample loader, and the cluster-mode "Samples in View" // stat (issue #221 round 2) all expand the raw view rectangle by this @@ -1728,6 +1777,35 @@ nodeBitsReady = { } ``` +```{ojs} +//| echo: false +//| output: false +// #300: preflight whether samples_map_lite carries h3_res4/h3_res6. The explorer +// aggregates FILTERED clusters on the fly off lite (GROUP BY the res-appropriate +// h3 column + the #293 mask predicate) so a broad facet filter at world zoom +// renders as fast filtered clusters instead of capped raw points. The hard +// requirement is ONLY that lite has res4/res6 (masks readiness is orthogonal: +// facetFilterSQL() self-falls-back to the membership scan when masks absent — +// that affects speed, not correctness). Best-effort: on ANY failure the flag +// stays false and computeTargetMode keeps today's point-mode behavior, so this +// is safe to ship BEFORE the lite republish. When readiness flips true after a +// facet is already active, window.__onFilteredClustersReady (set by the zoom +// watcher) reconciles the globe. +filteredClustersReady = { + window.__filteredClustersReady = false; + try { + await db.query(`SELECT h3_res4, h3_res6 FROM read_parquet('${lite_url}') LIMIT 1`); + window.__filteredClustersReady = true; + if (typeof window.__onFilteredClustersReady === 'function') window.__onFilteredClustersReady(); + return true; + } catch (err) { + console.warn('samples_map_lite lacks h3_res4/h3_res6; #300 filtered clusters disabled (point-mode fallback):', err); + window.__filteredClustersReady = false; + return false; + } +} +``` + ```{ojs} //| echo: false @@ -1983,6 +2061,11 @@ phase1 = { // Cache cluster data for viewport counting viewer._clusterData = Array.from(data); viewer._clusterTotal = { clusters: data.length, samples: totalSamples }; + // #300: phase1 loads the pre-aggregated summary (facet-blind) clusters, so + // label the cache with the current summary/source/tree signature. Without + // this seed, the first post-boot cluster reconcile couldn't tell whether the + // initial layer already matched the desired filter state (Codex P1.3). + viewer._clusterFilterSig = desiredClusterSig(); performance.mark('p1-end'); performance.measure('p1', 'p1-start', 'p1-end'); @@ -2742,11 +2825,13 @@ zoomWatcher = { const loadRes = async (res, url, opts = {}) => { const gen = ++loadResGen; // claim a generation loading = true; + const filtered = wantFilteredClusters(); // #300: source vs filtered lite + const sig = desiredClusterSig(); // #300: snapshot BEFORE the await updatePhaseMsg(opts.loadingMsg || `Loading H3 res${res}...`, 'loading'); try { performance.mark(`r${res}-s`); - const data = await db.query(` + const data = await db.query(filtered ? filteredClusterSQL(res) : ` SELECT CAST(h3_cell AS VARCHAR) AS h3_cell_dec, sample_count, center_lat, center_lng, dominant_source, source_count @@ -2754,7 +2839,11 @@ zoomWatcher = { WHERE 1=1${sourceFilterSQL('dominant_source')} `); - if (gen !== loadResGen) return false; // stale — a newer call superseded this one + // Stale if a newer load superseded us (gen) OR the desired cluster + // CONTENTS changed while we queried (filters toggled mid-flight) — + // applying now would label the cache with the wrong filter sig + // (Codex P0.2/P1.3). + if (gen !== loadResGen || sig !== desiredClusterSig()) return false; viewer.h3Points.removeAll(); const scalar = new Cesium.NearFarScalar(1.5e2, 1.5, 8.0e6, 0.3); let total = 0; @@ -2778,6 +2867,7 @@ zoomWatcher = { // Cache for viewport counting viewer._clusterData = Array.from(data); viewer._clusterTotal = { clusters: data.length, samples: total }; + viewer._clusterFilterSig = sig; // #300: label the cache with what it IS performance.mark(`r${res}-e`); performance.measure(`r${res}`, `r${res}-s`, `r${res}-e`); From c2ba2d99d2dcfcd338c411dda2c397a58a90d552 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 22:07:31 -0700 Subject: [PATCH 3/9] #300 C2 (activation): facets render filtered clusters at world zoom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relax the #267 force-point rule: with a facet active above EXIT_POINT_ALT (and filtered clusters ready), the map now shows FILTERED h3 clusters instead of capped raw points. Zoom-in still drops to individual dots. Search stays point-latched (out of scope). - computeTargetMode(alt, latch=getMode()): search→point; facet&&!ready→point (pre-republish fallback); else ENTER/EXIT altitude hysteresis. latch param lets a URL restore resolve the band against the saved mode (Codex P1.7). - reconcileGlobeForFilters(): shared transition for both filter-change handlers. Point branch invalidates in-flight cluster loads (loadResGen++) so a stale loadRes can't paint under point mode (P1.4). Cluster branch loads filtered clusters into the hidden layer FIRST, then exits point only if applied — no stale-cluster flash on a failed/superseded load (P1.10) — then chases tryEnterPointModeIfNeeded() (supersession invariant, P1.4). - handleFacetFilterChange / applySearchFilterChange: route through the reconcile. - camera.changed cluster branches: reload when resolution OR filter signature changed (a facet toggle in cluster mode refreshes the filtered cells); point→ cluster uses load-first-then-exit (P1.5/P1.10). - moveEnd gate (was: exit only when no filter): now computeTargetMode-driven, so a sub-10% zoom-out with a facet active loads filtered clusters; listener made async (return value unused) (P1.5). - Readiness→reconcile hook (window.__onFilteredClustersReady) for late preflight / republished lite (P1.9). Render OK. C3 (deep-link/boot restore, filtered click hydration, facet note) next. --- explorer.qmd | 224 ++++++++++++++++++++++++++++----------------------- 1 file changed, 122 insertions(+), 102 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index e050a23..5cede9e 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -2787,19 +2787,24 @@ zoomWatcher = { // (relax the FACET case above EXIT_POINT_ALT to filtered clusters) will live. const filtersForcePoint = () => searchIsActive() || hasFacetFilters(); - // #208: the single authority that maps (altitude, current mode) → target - // mode, with the point/cluster hysteresis band. The camera-changed handler - // routes through this; pre-#300 behavior is preserved exactly: - // - any active filter → point (at every altitude, #267) - // - else below ENTER_POINT_ALT → point - // - else above EXIT_POINT_ALT → cluster - // - else (inside the hysteresis band) → unchanged - const computeTargetMode = (alt) => - filtersForcePoint() ? 'point' + // #208/#300: the single authority that maps (altitude, latch) → target mode, + // with the point/cluster hysteresis band. + // - search active → point (clusters can't be text-filtered, #234 C3) + // - facet active but filtered clusters NOT ready (pre-republish / preflight + // pending) → point, exactly as pre-#300 (#267): the summary clusters are + // facet-blind, so point mode renders the real filtered dots + // - otherwise the ALTITUDE hysteresis decides for both filtered and + // unfiltered (#300): facet-active cluster loads are filter-aware + // (loadRes aggregates off the masks), so clusters stay coherent + // - `latch` resolves the hysteresis band; defaults to the live mode, but a + // URL/back-forward restore passes the saved latch (Codex P1.7) so the band + // doesn't resolve to the mode of the view being LEFT + const computeTargetMode = (alt, latch = getMode()) => + searchIsActive() ? 'point' + : (hasFacetFilters() && !(typeof window !== 'undefined' && window.__filteredClustersReady === true)) ? 'point' : alt < ENTER_POINT_ALT ? 'point' : alt > EXIT_POINT_ALT ? 'cluster' - : getMode(); // evaluated only in the hysteresis band, exactly as the - // original inline expression did (Codex PR4c caveat) + : latch; // No viewport cache: the samples table (PR #219) re-queries on every // `moveEnd` against the current padded bbox, so reusing a cached @@ -4132,39 +4137,51 @@ zoomWatcher = { // surface the explanatory `#facetNote` so users understand the filter // takes effect at neighborhood zoom. See issue #156, Phase 1, and // `syncFacetNote()` for the shared visibility invariant (#234 step 1). + // #300: drive the globe to the altitude- and filter-appropriate mode after a + // facet or search change — the single transition path shared by both + // filter-change handlers. Honors computeTargetMode: + // - point: render the real filtered dots (search at any zoom; facets below + // EXIT, or whenever filtered clusters aren't ready) + // - cluster: render the FILTERED h3 clusters (facets above EXIT, once ready) + async function reconcileGlobeForFilters() { + const h = viewer.camera.positionCartographic.height; + if (computeTargetMode(h) === 'point') { + // Invalidate any in-flight cluster load so a stale loadRes can't paint + // clusters/stats under point mode — a search activation may start no + // newer loadRes of its own (Codex P1.4). + loadResGen++; + if (getMode() !== 'point') await enterPointMode(false); + else await loadViewportSamples(); // re-filter the dots in place + } else { + // Target is clusters at this altitude. Load FIRST (into the hidden h3 + // layer while still in point mode), and only drop point mode if the + // load applied — so a failed/superseded filtered load never exposes + // stale clusters (Codex P1.10). loadRes is filter-aware: with a facet + // active + ready it aggregates the FILTERED set off the masks. + const res = h > 3000000 ? 4 : h > 300000 ? 6 : 8; + const wasPoint = getMode() === 'point'; + const applied = await loadRes(res, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[res]); + if (!applied) return; // keep current mode; no stale flash + if (wasPoint) exitPointMode(false); + // Supersession invariant: a zoom below ENTER during the load still + // lands in point mode (Codex P1.4). + await tryEnterPointModeIfNeeded(); + } + } + async function handleFacetFilterChange() { busyAcquire(); try { syncFacetNote(); writeQueryState(); refreshHeatmap(); - // #267: facet selection must visibly drive the MAP, not just the - // table/legend. Cluster dots come from pre-aggregated H3 summaries - // that carry only `dominant_source`, so they cannot honor a - // material/context/object_type facet. Mirror the committed-search - // path (applySearchFilterChange / C3): when any such facet is - // active, FORCE point mode so the map shows the actual filtered - // dots (which already apply facetFilterSQL()). When facets are - // cleared, revert to the altitude-appropriate mode — unless a text - // search is still latching point mode. - if (hasFacetFilters()) { - if (getMode() !== 'point') { - await enterPointMode(false); // forces point; awaits filtered viewport load - } else { - await loadViewportSamples(); - } - } else if (getMode() === 'point') { - const h = viewer.camera.positionCartographic.height; - if (!searchIsActive() && h >= EXIT_POINT_ALT) { - exitPointMode(false); - const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; - if (target !== currentRes) { - await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); - } - } else { - await loadViewportSamples(); - } - } + // #267/#300: a facet selection must visibly drive the MAP. Above + // EXIT_POINT_ALT (once filtered clusters are ready) the map shows the + // FILTERED h3 clusters; below it, the real filtered dots. The shared + // reconcile picks the right mode from the current altitude + filter + // state (and reloads filtered clusters in place when already in + // cluster mode, since the filter set changed). + await reconcileGlobeForFilters(); refreshFacetCounts(); await new Promise(r => setTimeout(r, 300)); } finally { @@ -4193,32 +4210,12 @@ zoomWatcher = { syncSearchPanelState(); syncFacetNote(); refreshHeatmap(); - // #267: point mode is latched by EITHER an active search OR an - // active facet — both filter the sample set and can't be shown as - // clusters. So clearing the search must NOT revert to clusters if a - // facet is still checked (and vice-versa, handled in - // handleFacetFilterChange). Use the same forced-point predicate as - // every other latch. - const forcePoint = filtersForcePoint(); - if (forcePoint) { - if (getMode() !== 'point') { - await enterPointMode(false); // forces point; awaits filtered viewport load - } else { - await loadViewportSamples(); - } - } else { - // Neither search nor facet active: revert to altitude-appropriate mode. - const h = viewer.camera.positionCartographic.height; - if (getMode() === 'point' && h >= EXIT_POINT_ALT) { - exitPointMode(false); - const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; - if (target !== currentRes) { - await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); - } - } else if (getMode() === 'point') { - await loadViewportSamples(); // still point altitude — reload now-unfiltered points - } - } + // #267/#300: search always latches point mode (clusters can't be + // text-filtered). Clearing search must NOT revert to clusters if a + // facet is still active — but at world zoom that now means FILTERED + // clusters, not point mode. The shared reconcile resolves all of this + // from the live search + facet + altitude state. + await reconcileGlobeForFilters(); refreshFacetCounts(); if (typeof window !== 'undefined') window.refreshSamplesTable?.(); } finally { @@ -4229,6 +4226,24 @@ zoomWatcher = { document.getElementById('contextFilterBody').addEventListener('change', handleFacetFilterChange); document.getElementById('objectTypeFilterBody').addEventListener('change', handleFacetFilterChange); + // #300: filtered clusters may become usable AFTER the page already has a facet + // active — the lite preflight resolves asynchronously, or a republished lite + // lands on a later visit. When that readiness flips true, reconcile the globe + // so a world-zoom filtered view switches from forced point mode to filtered + // clusters without needing a camera nudge (Codex P1.9). The preflight cell + // invokes this hook; if it resolved BEFORE this watcher registered the hook, + // the deferred guarded check below catches it (and no-ops if boot already set + // the right mode). Any later camera/moveEnd settle is a further backstop. + window.__onFilteredClustersReady = () => { + if (hasFacetFilters()) reconcileGlobeForFilters(); + }; + setTimeout(() => { + if (window.__filteredClustersReady === true && hasFacetFilters()) { + const h = viewer.camera.positionCartographic.height; + if (computeTargetMode(h) !== getMode()) reconcileGlobeForFilters(); + } + }, 0); + // --- Shared settled-camera tail (#208 smell 1b) --- // The single reconciliation entry point both settled-camera listeners run // once the camera has come to rest: refresh the cluster-mode "Samples in @@ -4299,21 +4314,25 @@ zoomWatcher = { await tryEnterPointModeIfNeeded(); } } else if (targetMode === 'cluster' && getMode() !== 'cluster') { - exitPointMode(); - // Reload appropriate resolution + // point→cluster (e.g. zoom out past EXIT with a facet active → + // FILTERED clusters, #300). Load into the hidden h3 layer FIRST, + // then drop point mode, so a failed/superseded load never flashes + // stale clusters (Codex P1.10). Reload when the resolution OR the + // filter signature changed; if neither, the cached clusters are + // already correct and we just switch layers. const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; - if (target !== currentRes && !loading) { - const applied = await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); - // The user may have crossed below ENTER_POINT_ALT while - // this cluster load was in flight; reconcile after it - // settles so no extra camera nudge is required. - // - // Skip chase on non-applied returns (issue #193): a - // stale return is recovered by the supersedor's own - // chase, and a failed return should leave the user's - // "Failed to load…" message visible instead of - // overpainting it with "Fetching sample index…". - if (applied) await tryEnterPointModeIfNeeded(); + const needLoad = (target !== currentRes || viewer._clusterFilterSig !== desiredClusterSig()) && !loading; + const applied = needLoad + ? await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]) + : true; + if (applied) { + exitPointMode(); + // The user may have crossed below ENTER_POINT_ALT while this + // load was in flight; reconcile so no extra camera nudge is + // required. Skip on non-applied returns (issue #193): a stale + // return is recovered by the supersedor's chase, and a failed + // return leaves point mode + the error message intact (P1.10). + await tryEnterPointModeIfNeeded(); } } else if (targetMode === 'point') { // Already in point mode — viewport sample refresh is driven @@ -4322,9 +4341,12 @@ zoomWatcher = { // (`percentageChanged = 0.1`) so sub-10% pans don't fire // here; the `moveEnd` path catches every settled move. } else { - // Cluster mode — check if resolution should change + // Cluster mode — reload if the resolution OR the filter signature + // changed. The sig check makes a facet toggle that lands here + // (already in cluster mode at the same res) refresh the FILTERED + // clusters instead of leaving the previous filter's cells (#300). const target = h > 3000000 ? 4 : h > 300000 ? 6 : 8; - if (target !== currentRes && !loading) { + if ((target !== currentRes || viewer._clusterFilterSig !== desiredClusterSig()) && !loading) { const applied = await loadRes(target, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[target]); // The user may have crossed below ENTER_POINT_ALT while // this cluster load was in flight; reconcile after it @@ -4401,7 +4423,7 @@ zoomWatcher = { } }); - viewer.camera.moveEnd.addEventListener(() => { + viewer.camera.moveEnd.addEventListener(async () => { // Settled-camera tail shared with `camera.changed` (#208 smell 1b): // URL-hash write + cluster "Samples in View" refresh. moveEnd fires on // every discrete settle including sub-10% pans that `camera.changed` @@ -4416,27 +4438,25 @@ zoomWatcher = { refreshHeatmap(); if (getMode() !== 'point') return; const h = viewer.camera.positionCartographic.height; - // A1 (#234 Step 4) / C3: while a search is active, point mode is - // LATCHED (clusters can't be text-filtered), so a high-altitude - // moveEnd must NOT exit to clusters — otherwise the post-search - // flyTo (200 km, above EXIT_POINT_ALT) would immediately undo the - // forced point mode and the globe would show unfiltered clusters. - if (h > EXIT_POINT_ALT && !searchIsActive() && !hasFacetFilters()) { - // Sub-10% zoom-out from point mode (e.g. 175 km → 181 km) won't - // fire `camera.changed`, so without driving the exit here we'd - // be stuck in point mode above `EXIT_POINT_ALT` until a larger - // camera move. Mirror the `camera.changed` cluster-transition - // branch's `exitPointMode()` (Codex round-2 review of #221). - // The cluster-resolution reload that `camera.changed` also - // does is not needed for these small zoom-outs: in the - // 180–300 km band the target resolution stays res8, which - // point mode already had loaded. Larger zoom-outs that cross - // a resolution threshold will themselves fire `camera.changed` - // and run the normal reload path. The reverse direction - // (sub-10% zoom-in past `ENTER_POINT_ALT` from cluster mode) - // is left to `camera.changed` — pre-existing behavior, out of - // scope here. - exitPointMode(); + // A1 (#234 Step 4) / C3 + #300: route the sub-10% zoom-out transition + // through computeTargetMode so it matches `camera.changed`. Search keeps + // point mode LATCHED (computeTargetMode → 'point' while a search is + // active) so the post-search flyTo can't bounce to clusters. A facet at + // world zoom now targets FILTERED clusters instead of staying in point. + if (computeTargetMode(h) === 'cluster') { + // Sub-10% zoom-out from point mode (e.g. 175 km → 181 km) won't fire + // `camera.changed`, so without driving the transition here we'd be + // stuck in point mode above `EXIT_POINT_ALT` until a larger move. + // Unlike the old unfiltered path, point mode never loaded FILTERED + // clusters, so reload when the cached cluster signature doesn't match + // the current filter (#300). Load into the hidden h3 layer first, + // then exit — no stale-cluster flash on a failed load (P1.10). + const res = h > 3000000 ? 4 : h > 300000 ? 6 : 8; + const needLoad = res !== currentRes || viewer._clusterFilterSig !== desiredClusterSig(); + const applied = needLoad + ? await loadRes(res, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[res]) + : true; + if (applied) exitPointMode(); return; } // In point mode and at point-mode altitude: refresh samples in From cc42bd705a2dc7423401a1f01bc16a0842a97a37 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 22:11:32 -0700 Subject: [PATCH 4/9] #300 C3 (coherence): deep-link/boot restore, filtered click hydration, facet note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Deep-link/back-forward restore (hashchange): resolve mode via computeTargetMode(restoredAlt, latch=s.mode) so a facet at world zoom restores to FILTERED clusters, not point. Load clusters first then exit point; isStale() before mutation and after the load await; suppress-hash released first as before (Codex P1.6/P1.7). - Cold boot mode hydration: same computeTargetMode(latch) treatment; when a facet is active at cluster altitude, reload the FILTERED clusters over phase1's unfiltered summary load (P1.7). Still enters point for the #203 alt setTimeout(r, 300)); } finally { busyRelease(); @@ -4493,7 +4519,25 @@ zoomWatcher = { // globe, so hydrating its side panel would mismatch the user's view // (and would inconsistently combine unfiltered cluster card with // source-filtered nearby-samples in hydrateClusterUI). - const result = await db.query(` + // + // #300: when a facet is active and filtered clusters are ready, the + // visible cell is the FILTERED aggregate, so hydrate from the same + // single-cell aggregation off lite (count / center / dominant_source + // over the filtered subset) — otherwise the card would show the cell's + // full unfiltered count and could even resolve a cell that the filter + // excludes entirely (Codex P1.8). Empty under the filter → sample_count + // 0 → treated as absent below. + const result = await db.query(wantFilteredClusters() ? ` + WITH base AS ( + SELECT source, latitude, longitude FROM read_parquet('${lite_url}') + WHERE h3_res${res} = CAST('${decimal}' AS UBIGINT)${sourceFilterSQL('source')}${facetFilterSQL()} + ) + SELECT (SELECT COUNT(*) FROM base)::INTEGER AS sample_count, + (SELECT ROUND(AVG(latitude), 6) FROM base) AS center_lat, + (SELECT ROUND(AVG(longitude), 6) FROM base) AS center_lng, + (SELECT source FROM base GROUP BY source ORDER BY COUNT(*) DESC, source ASC LIMIT 1) AS dominant_source, + (SELECT COUNT(DISTINCT source) FROM base)::INTEGER AS source_count + ` : ` SELECT sample_count, center_lat, center_lng, dominant_source, source_count FROM read_parquet('${url}') WHERE h3_cell = CAST('${decimal}' AS UBIGINT) @@ -4502,6 +4546,9 @@ zoomWatcher = { `); if (!result || result.length === 0) return null; const r = result[0]; + // #300: filtered aggregation always returns one row; a cell empty under + // the active filter has sample_count 0 → treat as absent. + if (!r.sample_count) return null; return { // The validated input `lower` is the canonical hex; round-tripping // r.h3_cell would lose precision (DuckDB-WASM returns UBIGINT as @@ -4641,14 +4688,30 @@ zoomWatcher = { // through a `#alt=8000` URL with no `mode=point` would exit point // mode here even though boot would have entered it. viewer._suppressTimer = setTimeout(async () => { - viewer._suppressHashWrite = false; + viewer._suppressHashWrite = false; // release first (unconditional, as before) + if (isStale()) return; // a newer hashchange superseded us (P1.6) const s = readHash(); - // A1 (#234 Step 4): an active search forces point mode regardless - // of the restored altitude, so the back/forward globe state stays - // coherent with the (still-filtered) table/legend. - const wantsPoint = filtersForcePoint() || s.mode === 'point' || (s.alt != null && s.alt < ENTER_POINT_ALT); - if (wantsPoint && getMode() !== 'point') await enterPointMode(false); - else if (!wantsPoint && getMode() === 'point') exitPointMode(false); + // #300/#234 C3: resolve the restored mode through computeTargetMode. + // Search still forces point; a facet at world zoom now restores to + // FILTERED clusters instead of point. The hash encodes point + // explicitly and cluster by omission, so pass `s.mode` as the + // hysteresis latch — the band must resolve to the SAVED view, not the + // one being left (Codex P1.7). (Facets live in the query string, not + // the hash, so they're unchanged across this hash-only nav.) + const restoredAlt = s.alt != null ? s.alt : viewer.camera.positionCartographic.height; + const target = computeTargetMode(restoredAlt, s.mode === 'point' ? 'point' : 'cluster'); + if (target === 'point') { + if (getMode() !== 'point') await enterPointMode(false); + } else { + // Clusters at the restored altitude. Load FIRST (filter-aware), then + // drop point mode only if it applied — no stale flash (P1.10) — and + // re-check freshness after the await (P1.6). + const res = restoredAlt > 3000000 ? 4 : restoredAlt > 300000 ? 6 : 8; + const wasPoint = getMode() === 'point'; + const applied = await loadRes(res, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[res]); + if (isStale()) return; + if (applied && wasPoint) exitPointMode(false); + } }, 2000); // Handle pid / h3 selection (sample mode wins if both present — see @@ -5882,10 +5945,24 @@ zoomWatcher = { // any URL whose altitude says it should. `tryEnterPointModeIfNeeded()` // short-circuits if alt >= ENTER_POINT_ALT or we're already in point // mode, so this is a no-op for cluster deep-links at cluster altitude. - if (ih.mode === 'point' || (ih.alt != null && ih.alt < ENTER_POINT_ALT)) { + // #300: resolve the boot mode through computeTargetMode so a cold boot with a + // facet active behaves like the live/hashchange paths — point below EXIT + // (or whenever filtered clusters aren't ready), FILTERED clusters above. + // The latch is the saved hash mode (point explicit, cluster by omission), so + // the hysteresis band restores the saved view (Codex P1.7). This still enters + // point for the #203 loophole (alt < ENTER without mode=point → point). + const bootAlt = ih.alt != null ? ih.alt : viewer.camera.positionCartographic.height; + const bootTarget = computeTargetMode(bootAlt, ih.mode === 'point' ? 'point' : 'cluster'); + if (bootTarget === 'point') { // pushHistory: false — boot should reconcile state without adding // a history entry (issue #207 item 3). await tryEnterPointModeIfNeeded({ pushHistory: false }); + } else if (wantFilteredClusters() && viewer._clusterFilterSig !== desiredClusterSig()) { + // phase1 loaded UNFILTERED summary clusters, but a facet is active at + // cluster altitude — reload the FILTERED clusters so the cold-boot world + // view matches the filter (Codex P1.7). + const bootRes = bootAlt > 3000000 ? 4 : bootAlt > 300000 ? 6 : 8; + await loadRes(bootRes, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[bootRes]); } // #267: a shared `?material=`/`?context=`/`?object_type=` deep link must From f9dd2e0bca0b143bf7b0c63c2859b6ce06d25fba Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 22:20:52 -0700 Subject: [PATCH 5/9] #300: Codex round-2 fixes (boot P0 + 7 correctness P1s) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - P0: delete the dedicated _urlHasFacets boot force-point block — it ran AFTER the new filtered-cluster boot load and switched straight back to points, negating #300 for cold-boot facet deep links. computeTargetMode (via bootTarget) now owns that decision. - P1.2: invalidateClusterLoads() = ++loadResGen + loading=false. A bare loadResGen++ left `loading` stuck true (the superseded load's finally only clears it when its gen is current), wedging every later reload guard. Used in reconcile's point branch and at hashchange entry. - P1.3: clusterSig(kind); phase1 labels its load clusterSig('summary') explicitly so a reconcile can't mistake facet-blind summary clusters for filtered data. - P1.4: readiness fallback reconcile also checks _clusterFilterSig (at world zoom the mode is already 'cluster' though the layer is still summary). - P1.5: boot 'point' uses direct enterPointMode for forced/saved cases (search / facet-not-ready / explicit mode=point) since tryEnterPointModeIfNeeded refuses at alt >= ENTER_POINT_ALT; gentle helper only for altitude-driven entry. - P1.6: moveEnd chases tryEnterPointModeIfNeeded after exitPointMode (an overlapping settle can drop below ENTER during the load await). - P1.7: hashchange invalidates cluster loads at ENTRY so a prior restore callback's loadRes discards instead of replacing data before the late isStale(). - P1.8: handleFacetFilterChange captures the freshness token at entry (a second toggle during the reconcile await must invalidate the first's revalidation). Codex verified correct: integer casts, post-await sig TOCTOU, load-first ordering, filtered fetchClusterByH3, build tests 23/23. Render OK. --- explorer.qmd | 102 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 30 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index 288f1a7..ba61b08 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -1210,15 +1210,20 @@ function wantFilteredClusters() { // again AFTER the await so a filter change mid-query can't mislabel the cache // (Codex P0.2/P1.3). Deliberately NOT the SQL text: masks becoming ready can // change the SQL strategy without changing results. -function desiredClusterSig() { +function clusterSig(kind) { return JSON.stringify({ - kind: wantFilteredClusters() ? 'filtered' : 'summary', + kind, // 'filtered' | 'summary' — explicit so a caller can label what it + // ACTUALLY loaded, not what the live state currently wants sources: getActiveSources().slice().sort(), material: treeSelection('material').slice().sort(), context: treeSelection('context').slice().sort(), objectType: treeSelection('object_type').slice().sort(), }); } +// The signature for what the cluster layer SHOULD contain right now. +function desiredClusterSig() { + return clusterSig(wantFilteredClusters() ? 'filtered' : 'summary'); +} // #300: the filtered-cluster aggregation off samples_map_lite (masks-backed via // facetFilterSQL). Same columns + grain + INTEGER casts as the pre-aggregated h3 @@ -2066,11 +2071,13 @@ phase1 = { // Cache cluster data for viewport counting viewer._clusterData = Array.from(data); viewer._clusterTotal = { clusters: data.length, samples: totalSamples }; - // #300: phase1 loads the pre-aggregated summary (facet-blind) clusters, so - // label the cache with the current summary/source/tree signature. Without - // this seed, the first post-boot cluster reconcile couldn't tell whether the - // initial layer already matched the desired filter state (Codex P1.3). - viewer._clusterFilterSig = desiredClusterSig(); + // #300: phase1 ALWAYS loads the pre-aggregated summary (facet-blind) clusters, + // so label the cache 'summary' explicitly — desiredClusterSig() could say + // 'filtered' if readiness + facet hydration already resolved, which would let + // the boot/readiness reconcile mistake this facet-blind layer for filtered + // data (Codex round-2 P1.3). Without this seed the first reconcile couldn't + // tell whether the initial layer matched the desired filter state. + viewer._clusterFilterSig = clusterSig('summary'); performance.mark('p1-end'); performance.measure('p1', 'p1-start', 'p1-end'); @@ -2832,6 +2839,16 @@ zoomWatcher = { // must use the return value rather than treating a normal `await` return // as success. let loadResGen = 0; // generation counter to discard stale results + + // #300: cancel any in-flight cluster load WITHOUT starting a successor. Bumping + // loadResGen alone makes the in-flight loadRes discard its result, but that + // call's `finally` only clears `loading` when ITS generation is still current + // (it no longer is) — so `loading` would stay true forever and every later + // camera handler's `!loading` reload guard would be wedged. Clear it here + // (Codex round-2 P1.2). Used when a transition supersedes clusters without a + // replacement load (entering point mode; a fresh hashchange). + const invalidateClusterLoads = () => { ++loadResGen; loading = false; }; + const loadRes = async (res, url, opts = {}) => { const gen = ++loadResGen; // claim a generation loading = true; @@ -4153,8 +4170,9 @@ zoomWatcher = { if (computeTargetMode(h) === 'point') { // Invalidate any in-flight cluster load so a stale loadRes can't paint // clusters/stats under point mode — a search activation may start no - // newer loadRes of its own (Codex P1.4). - loadResGen++; + // newer loadRes of its own (Codex P1.4). Use the helper so `loading` + // is cleared too (P1.2). + invalidateClusterLoads(); if (getMode() !== 'point') await enterPointMode(false); else await loadViewportSamples(); // re-filter the dots in place } else { @@ -4176,6 +4194,11 @@ zoomWatcher = { async function handleFacetFilterChange() { busyAcquire(); + // #300: capture the freshness token at ENTRY (like the source handler), so + // a second facet toggle during the reconcile await invalidates this run's + // later selection revalidation instead of hydrating stale cluster metadata + // (Codex round-2 P1.8). + const isStale = freshSelectionToken(viewer); try { syncFacetNote(); writeQueryState(); @@ -4193,8 +4216,8 @@ zoomWatcher = { // can empty the cell (then drop the selection) or change its filtered // count/dominant_source. Revalidate via the (now filter-aware) // fetchClusterByH3, mirroring the source-filter handler (Codex P1.8). - // Guarded by a freshness token so a rapid second toggle wins. - const isStale = freshSelectionToken(viewer); + // Guarded by the entry-captured freshness token so a rapid second + // toggle wins. const sel = viewer._globeState; if (sel.selectedH3) { const meta = await fetchClusterByH3(sel.selectedH3); @@ -4266,7 +4289,12 @@ zoomWatcher = { setTimeout(() => { if (window.__filteredClustersReady === true && hasFacetFilters()) { const h = viewer.camera.positionCartographic.height; - if (computeTargetMode(h) !== getMode()) reconcileGlobeForFilters(); + // Reconcile if the mode is wrong OR the cached cluster layer is still + // summary (facet-blind) data — at world zoom both modes read 'cluster' + // yet the layer needs to become FILTERED (Codex round-2 P1.4). + if (computeTargetMode(h) !== getMode() || viewer._clusterFilterSig !== desiredClusterSig()) { + reconcileGlobeForFilters(); + } } }, 0); @@ -4482,7 +4510,14 @@ zoomWatcher = { const applied = needLoad ? await loadRes(res, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[res]) : true; - if (applied) exitPointMode(); + if (applied) { + exitPointMode(); + // An overlapping settle may have moved the camera below + // ENTER_POINT_ALT during the load await; chase so we re-enter + // point mode instead of stranding the user in clusters at point + // altitude (Codex round-2 P1.6 — the established invariant). + await tryEnterPointModeIfNeeded(); + } return; } // In point mode and at point-mode altitude: refresh samples in @@ -4667,6 +4702,11 @@ zoomWatcher = { // Bump the selection generation BEFORE any early-return so even // hashchanges that lack lat/lng invalidate stale async work. const isStale = freshSelectionToken(viewer); + // #300: also invalidate any in-flight cluster load from a PRIOR + // hashchange's restore callback, so it discards on its generation check + // instead of replacing points/cache/stats/currentRes for the superseded + // hash before the late isStale() guard fires (Codex round-2 P1.7). + invalidateClusterLoads(); const state = readHash(); if (state.lat == null || state.lng == null) return; @@ -5954,9 +5994,19 @@ zoomWatcher = { const bootAlt = ih.alt != null ? ih.alt : viewer.camera.positionCartographic.height; const bootTarget = computeTargetMode(bootAlt, ih.mode === 'point' ? 'point' : 'cluster'); if (bootTarget === 'point') { - // pushHistory: false — boot should reconcile state without adding - // a history entry (issue #207 item 3). - await tryEnterPointModeIfNeeded({ pushHistory: false }); + if (getMode() !== 'point') { + // tryEnterPointModeIfNeeded() refuses at alt >= ENTER_POINT_ALT, so it + // can't restore an explicit saved point view in the hysteresis band, + // nor force point for a high-altitude facet/search when filtered + // clusters aren't available. Enter directly for those forced/saved + // cases; reserve the gentle helper (cold-cache messaging) for pure + // altitude-driven entry below ENTER (Codex round-2 P1.5). + if (searchIsActive() || hasFacetFilters() || ih.mode === 'point') { + await enterPointMode(false); // pushHistory:false (#207 item 3) + } else { + await tryEnterPointModeIfNeeded({ pushHistory: false }); + } + } } else if (wantFilteredClusters() && viewer._clusterFilterSig !== desiredClusterSig()) { // phase1 loaded UNFILTERED summary clusters, but a facet is active at // cluster altitude — reload the FILTERED clusters so the cold-boot world @@ -5965,20 +6015,12 @@ zoomWatcher = { await loadRes(bootRes, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[bootRes]); } - // #267: a shared `?material=`/`?context=`/`?object_type=` deep link must - // force point mode like `?search=` does — cluster H3 summaries can't honor - // those facets. The altitude-gated trigger above won't promote a high-alt - // facet deep link, so enter point mode directly. We read the URL params - // (not the hydrated checkboxes) so this is independent of facet-loader - // timing; the latch points (targetMode / moveEnd / wantsPoint) keep it - // pinned once the checkboxes hydrate. - const _urlHasFacets = ['material', 'context', 'object_type'].some(p => { - const v = new URLSearchParams(location.search).get(p); - return v != null && v.trim() !== ''; - }); - if (_urlHasFacets && getMode() !== 'point') { - await enterPointMode(false); - } + // #300: the dedicated `?material=`/`?context=`/`?object_type=` boot + // force-point block was removed here — computeTargetMode() (driven above + // through `bootTarget`) now owns the facet boot decision: point below EXIT + // (or when filtered clusters aren't ready), FILTERED clusters above. Keeping + // the old unconditional enterPointMode() would immediately undo the + // filtered-cluster boot load (Codex round-2 P0). // #233 phase 1: hydrate heatmap overlay from `heatmap=1` URL param. // Reported by RY 2026-05-27 on PR #240 staging — toggle state was From 73067ea28fdd7ed3880489f8a2e088d480be0fcd Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 23:01:36 -0700 Subject: [PATCH 6/9] #300: fix boot deadlock (serialize db.query) + verify filtered clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit THE BUG: with a facet active at world zoom, filtered clusters never loaded — the loadRes filtered query, issued during boot's concurrent query storm, NEVER resolved. The identical query completes in ~2.5s once the connection is idle, and even two concurrent post-boot queries are fine — but DuckDB-WASM (the non-threaded MVP build this page loads) DEADLOCKS when the heavy filtered aggregation (samples_map_lite + sample_facet_masks) runs amid boot's other in-flight queries. THE FIX: serialize every db.query through a FIFO chain (wrap DuckDBClient.query in the `db` cell), so at most one query runs at a time. SQL queries are atomic (none awaits another mid-execution), so chaining can't deadlock; the latency cost is small. Single-point fix; without it #300's filtered clusters never appear. Also (found while debugging the double-fire): - !loading guards on the readiness reconcile triggers (setTimeout0 + onReady hook) so boot's filtered load isn't redundantly issued twice. Verification (tests/playwright/filtered-clusters-300.spec.js, [data], against a local res46 lite served by dev_server.py): - broad facet (anyanthropogenicmaterial) at world zoom → _clusterFilterSig kind:filtered, cluster mode (not forced point), 81 res4 cells; cluster sample_count sum == independent masks-backed COUNT(*) (count conservation). - zoom-in below ENTER_POINT_ALT → point mode. scripts/regen_lite_res46.py: derive h3_res4/res6 onto the existing 202608 lite via the h3 extension (no wide needed); validated against the shipped h3 summaries. DESIGN_300.md: design + Codex-review record. --- DESIGN_300.md | 174 ++++++++++++++++++ explorer.qmd | 20 +- scripts/regen_lite_res46.py | 90 +++++++++ .../playwright/filtered-clusters-300.spec.js | 106 +++++++++++ 4 files changed, 388 insertions(+), 2 deletions(-) create mode 100644 DESIGN_300.md create mode 100644 scripts/regen_lite_res46.py create mode 100644 tests/playwright/filtered-clusters-300.spec.js diff --git a/DESIGN_300.md b/DESIGN_300.md new file mode 100644 index 0000000..6cf23d2 --- /dev/null +++ b/DESIGN_300.md @@ -0,0 +1,174 @@ +# #300 browser design — filtered H3 clusters at world zoom + +Goal: when a **facet** filter is active and the camera is zoomed out (above +`EXIT_POINT_ALT`), render an h3-clustered view of the **filtered** set instead of +forcing raw capped point mode (#267). Zoom-in still drops to individual dots. +Search is OUT of scope (stays point-mode). Foundation: #293 masks make filtered +h3 aggregation ~25× faster; build now adds `h3_res4/h3_res6` to `samples_map_lite`. + +## Components + +### 1. Readiness preflight (new ojs cell, mirrors `nodeBitsReady`) +```js +filteredClustersReady = { + window.__filteredClustersReady = false; + try { + await db.query(`SELECT h3_res4, h3_res6 FROM read_parquet('${lite_url}') LIMIT 1`); + window.__filteredClustersReady = true; return true; + } catch (err) { + console.warn('lite lacks h3_res4/res6; #300 filtered clusters disabled (point fallback):', err); + return false; + } +} +``` +Hard requirement is ONLY that lite has res4/res6. (masks readiness is orthogonal: +`facetFilterSQL()` already self-falls-back to the membership scan when masks +absent — affects speed, not correctness.) Safe to ship before the lite republish: +flag stays false → today's point-mode behavior. + +### 2. `computeTargetMode(alt)` rule change (was: any filter → point) +```js +const computeTargetMode = (alt) => { + if (searchIsActive()) return 'point'; // search out of scope + if (hasFacetFilters() && !window.__filteredClustersReady) return 'point'; // pre-republish / preflight pending + if (alt < ENTER_POINT_ALT) return 'point'; + if (alt > EXIT_POINT_ALT) return 'cluster'; + return getMode(); // hysteresis band +}; +``` +Net: once ready, facets use the SAME altitude hysteresis as the unfiltered case +(loadRes makes the clusters filter-aware). Flicker-safe (keeps ENTER/EXIT band). +`filtersForcePoint()` stays = `searchIsActive() || hasFacetFilters()` and is still +correct at the remaining call sites (see §5). + +### 3. `loadRes` becomes filter-aware (one branch; everything else identical) +```js +const wantFiltered = hasFacetFilters() && window.__filteredClustersReady; +const data = wantFiltered + ? await db.query(filteredClusterSQL(res)) + : await db.query(`SELECT CAST(h3_cell AS VARCHAR) AS h3_cell_dec, sample_count, + center_lat, center_lng, dominant_source, source_count + FROM read_parquet('${url}') WHERE 1=1${sourceFilterSQL('dominant_source')}`); +``` +`filteredClusterSQL(res)` (off lite, masks-backed) returns the EXACT same columns: +```sql +WITH base AS ( + SELECT h3_res{res} AS cell, source, latitude, longitude + FROM read_parquet('${lite_url}') + WHERE h3_res{res} IS NOT NULL ${sourceFilterSQL('source')} ${facetFilterSQL()}), +sc AS (SELECT cell, source, COUNT(*) c FROM base GROUP BY cell, source), +dom AS (SELECT cell, source AS dominant_source, + ROW_NUMBER() OVER (PARTITION BY cell ORDER BY c DESC, source ASC) rn FROM sc), +agg AS (SELECT cell, COUNT(*) sample_count, ROUND(AVG(latitude),6) center_lat, + ROUND(AVG(longitude),6) center_lng, COUNT(DISTINCT source) source_count + FROM base GROUP BY cell) +SELECT CAST(agg.cell AS VARCHAR) AS h3_cell_dec, agg.sample_count, agg.center_lat, + agg.center_lng, dom.dominant_source, agg.source_count +FROM agg JOIN dom ON dom.cell = agg.cell AND dom.rn = 1 +``` +Same `loadResGen` generation guard, render loop, `_clusterData`/`_clusterTotal` +cache, and "Samples in View" stat — all unchanged. The exact in-view COUNT comes +from `countInViewport(_clusterData)` as today (now over filtered cells). + +### 4. The stale-cluster reload problem (the main risk) +The camera `targetMode==='cluster'` branch only reloads when `target !== currentRes`. +On point→cluster (zoom out with a facet active), `currentRes` may already equal +`target`, so it would SKIP loadRes and show stale (unfiltered or +previously-filtered) clusters. Also, toggling a facet while already in cluster +mode at a fixed altitude is `target === currentRes` → no reload. + +Fix: track the filter signature the current `_clusterData` was built under and +reload when it differs. Add: +```js +viewer._clusterFilterSig = null; // set in loadRes after a successful load +function currentFilterSig() { + return (hasFacetFilters() && window.__filteredClustersReady) + ? JSON.stringify({ f: facetFilterSQL(), s: sourceFilterSQL('source') }) : null; +} +``` +- In `loadRes` success: `viewer._clusterFilterSig = currentFilterSig();` +- Camera `cluster` branch + the "already cluster, check resolution" branch: reload + when `target !== currentRes || viewer._clusterFilterSig !== currentFilterSig()`. + +### 5. Handlers (`handleFacetFilterChange`, `applySearchFilterChange`) +Both currently force point when a filter is active. Replace the force-point block +with a reconcile that honors `computeTargetMode`: +```js +async function reconcileGlobeForFilters() { + const h = viewer.camera.positionCartographic.height; + if (computeTargetMode(h) === 'point') { + if (getMode() !== 'point') await enterPointMode(false); + else await loadViewportSamples(); // refilter the dots + } else { // cluster + if (getMode() === 'point') exitPointMode(false); + const res = h > 3000000 ? 4 : h > 300000 ? 6 : 8; + await loadRes(res, { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }[res]); // filtered (incl. resig change) + } +} +``` +- `handleFacetFilterChange`: call `reconcileGlobeForFilters()` (replaces the + hasFacetFilters() force-point + the facet-cleared exit logic — computeTargetMode + subsumes both: cleared facet + search inactive + high alt → cluster; etc.) +- `applySearchFilterChange`: same. Note clearing SEARCH while a facet remains must + now land in filtered clusters at high alt — `reconcileGlobeForFilters` handles it + because computeTargetMode sees only the facet (search inactive → not forced). + +### 6. Deep-link restore (wantsPoint, ~4538) +Today: `filtersForcePoint() || s.mode==='point' || (s.alt 3000000 ? 4 : restoredAlt > 300000 ? 6 : 8; + await loadRes(res, {4:h3_res4_url,6:h3_res6_url,8:h3_res8_url}[res]); +} +``` +(`s.mode==='point'` still wins to honor an explicitly saved point view.) + +## Codex review integrated (2026-06-18) — implementation spec of record + +Staged in 3 commits within the PR: +- **C1 (dormant infra):** `filteredClustersReady` preflight cell; `filteredClusterSQL(res)` + with `COUNT(*)::INTEGER`/`COUNT(DISTINCT source)::INTEGER` casts (P0.1); semantic + `desiredClusterSig()` = `{kind, sources(always), material, context, objectType}` + (P1.3, NOT sql text); `loadRes` filter-aware with **snapshot sig captured before + the await**, re-checked after (`gen !== loadResGen || sig !== desiredClusterSig()`) + (P0.2); set `viewer._clusterFilterSig = sig` on success; init the sig in phase1 + (P1.3). Dormant because computeTargetMode still forces point until C2. +- **C2 (activation):** `computeTargetMode(alt, latch = getMode())` — add restored-latch + param (P1.7); rule = search→point, facet&&!ready→point, else ENTER/EXIT hysteresis. + `reconcileGlobeForFilters()` used by both filter-change handlers; every `loadRes` + caller captures `applied` + chases `tryEnterPointModeIfNeeded()` and invalidates + pending cluster loads when target flips to point (P1.4). camera `cluster` branches + reload on `target !== currentRes || viewer._clusterFilterSig !== desiredClusterSig()` + (stale-cluster). `moveEnd` gate (4334) → `computeTargetMode(h)` + sig reload (P1.5). + Readiness→reconcile hook (P1.9). Fail-safe: stay in point / hide stale clusters if a + filtered load fails (P1.10). +- **C3 (coherence):** deep-link restore (4538) with latch + filtered cluster load + + `isStale()` before mode mutation and after each await + cluster-gen invalidation on + newer hashchange (P1.6/P1.7); boot facet block (~5771) → computeTargetMode-aware + (P1.7); `fetchClusterByH3()` (4365) filter-aware single-cell aggregation + revalidate + `selectedH3` after facet changes (P1.8); `syncFacetNote()` message no longer claims + "only at neighborhood zoom" when filtered clusters active (P1.10). + +Out of scope (pre-existing, documented): `AVG(longitude)` antimeridian skew and the +cell-center "Samples in View" inexactness — both already in the shipped summaries. + +## Open questions for review +1. Is making `loadRes` filter-aware (vs a separate `loadFilteredClusters`) the + right call? It reuses the generation guard / render / cache / stats exactly, + but couples two data sources in one fn. +2. `_clusterFilterSig` approach for the stale-cluster reload — correct & sufficient? + Any path that loads clusters without setting the sig, or compares it wrong? +3. Deep-link: is loading filtered clusters in the restore `setTimeout` safe wrt the + `isStale()` / freshness-token races in that handler, and the suppress-hash gate? +4. Should `handleFacetFilterChange`'s existing `await new Promise(r=>setTimeout(r,300))` + and busy-flag structure be preserved as-is around the new reconcile? +5. Any hysteresis/latching regression from facets now using the ENTER/EXIT band + instead of always-point? (Esp. #234 A1/C3 search-latch interactions.) +6. `filteredClusterSQL` correctness: `dominant_source` recomputed over the FILTERED + subset (vs summary's all-sample dominant) — intended and fine? diff --git a/explorer.qmd b/explorer.qmd index ba61b08..7205741 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -1733,6 +1733,22 @@ db = { const instance = await DuckDBClient.of(); performance.mark('duckdb-init-end'); performance.measure('duckdb_init', 'duckdb-init-start', 'duckdb-init-end'); + // #300: DuckDB-WASM (the non-threaded MVP build this page loads) DEADLOCKS + // under concurrent queries — a heavy filtered-cluster aggregation + // (filteredClusterSQL, reading samples_map_lite + sample_facet_masks) issued + // while boot's query storm is still in flight never resolves, even though the + // identical query completes in ~2.5s once the connection is idle. Serialize + // EVERY query through a FIFO chain so at most one runs at a time. SQL queries + // are atomic (none awaits another mid-execution), so chaining can't deadlock, + // and the latency cost is small (boot queries are individually fast). This is + // the single-point fix; without it #300's filtered clusters never load. + const origQuery = instance.query.bind(instance); + let queryChain = Promise.resolve(); + instance.query = (...args) => { + const run = queryChain.then(() => origQuery(...args)); + queryChain = run.then(() => {}, () => {}); // keep the chain alive past errors + return run; + }; return instance; } ``` @@ -4284,10 +4300,10 @@ zoomWatcher = { // the deferred guarded check below catches it (and no-ops if boot already set // the right mode). Any later camera/moveEnd settle is a further backstop. window.__onFilteredClustersReady = () => { - if (hasFacetFilters()) reconcileGlobeForFilters(); + if (hasFacetFilters() && !loading) reconcileGlobeForFilters(); }; setTimeout(() => { - if (window.__filteredClustersReady === true && hasFacetFilters()) { + if (window.__filteredClustersReady === true && hasFacetFilters() && !loading) { const h = viewer.camera.positionCartographic.height; // Reconcile if the mode is wrong OR the cached cluster layer is still // summary (facet-blind) data — at world zoom both modes read 'cluster' diff --git a/scripts/regen_lite_res46.py b/scripts/regen_lite_res46.py new file mode 100644 index 0000000..c4dfec3 --- /dev/null +++ b/scripts/regen_lite_res46.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +"""#300: add h3_res4/h3_res6 to an existing samples_map_lite without the wide. + +The published 202608 lite already carries pid/label/source/lat/lng/place_name/ +result_time/h3_res8/h3_res8_hex. h3_res4 and h3_res6 are pure functions of the +SAME rounded lat/lng the build used (build_frontend_derived.py samp_geo computes +them off ROUND(ST_Y,6)/ROUND(ST_X,6), which is exactly what lite stores) via +h3_latlng_to_cell at each resolution — so we derive them the IDENTICAL way and +avoid rebuilding from the (now-gone) /tmp wide. + +NOTE: H3 cells do NOT strictly nest, so h3_latlng_to_cell(...,4) is NOT in general +the parent of h3_latlng_to_cell(...,8) — the build uses per-resolution +latlng_to_cell, never cell_to_parent, and so do we. + +Validation cross-checks the derived res4/res6 against the SHIPPED +h3_summary_res{4,6} parquets (built off the same samp_geo): GROUP BY the new h3 +column must reproduce each summary's (h3_cell, sample_count) exactly. Plus res8 +preserved, row count preserved, no NULLs. Refuse to write otherwise. + +Usage: regen_lite_res46.py SRC OUT SUMMARY_DIR TAG +""" +import os +import sys +import duckdb + +SRC = sys.argv[1] +OUT = sys.argv[2] +SUMMARY_DIR = sys.argv[3] +TAG = sys.argv[4] + +con = duckdb.connect() +con.execute("INSTALL h3 FROM community; LOAD h3;") + +con.execute(f""" + CREATE TEMP TABLE newlite AS + SELECT pid, label, source, latitude, longitude, place_name, result_time, + h3_latlng_to_cell(latitude, longitude, 4)::UBIGINT AS h3_res4, + h3_latlng_to_cell(latitude, longitude, 6)::UBIGINT AS h3_res6, + h3_res8::UBIGINT AS h3_res8, + h3_res8_hex + FROM read_parquet('{SRC}') +""") + +# --- Validation 1: derived res4/res6 reproduce the SHIPPED h3 summaries exactly --- +# (the authoritative cross-check — both come from the same samp_geo latlng_to_cell) +for res in (4, 6, 8): + summ = os.path.join(SUMMARY_DIR, f"{TAG}_h3_summary_res{res}.parquet") + mism = con.execute(f""" + WITH mine AS ( + SELECT h3_res{res} AS cell, COUNT(*) AS c FROM newlite + WHERE h3_res{res} IS NOT NULL GROUP BY h3_res{res} + ), + summ AS ( + SELECT h3_cell AS cell, sample_count AS c FROM read_parquet('{summ}') + ) + SELECT + (SELECT COUNT(*) FROM mine FULL OUTER JOIN summ USING (cell) + WHERE mine.c IS DISTINCT FROM summ.c) AS bad + """).fetchone()[0] + if mism: + raise SystemExit(f"FATAL: res{res} GROUP BY disagrees with shipped summary in {mism} cells") + +# --- Validation 2: row count + res8 preserved exactly --- +n_src = con.execute(f"SELECT COUNT(*) FROM read_parquet('{SRC}')").fetchone()[0] +n_new = con.execute("SELECT COUNT(*) FROM newlite").fetchone()[0] +res8_mismatch = con.execute(f""" + SELECT COUNT(*) FROM newlite n + JOIN read_parquet('{SRC}') s ON s.pid = n.pid + WHERE n.h3_res8 IS DISTINCT FROM s.h3_res8::UBIGINT +""").fetchone()[0] +if n_src != n_new or res8_mismatch: + raise SystemExit(f"FATAL: rows src={n_src} new={n_new}, res8_mismatch={res8_mismatch}") + +# --- Validation 3: no NULL res4/res6 where coords present --- +nulls = con.execute(""" + SELECT COUNT(*) FROM newlite + WHERE latitude IS NOT NULL AND longitude IS NOT NULL + AND (h3_res4 IS NULL OR h3_res6 IS NULL) +""").fetchone()[0] +if nulls: + raise SystemExit(f"FATAL: {nulls} located rows with NULL res4/res6") + +con.execute(f""" + COPY (SELECT * FROM newlite ORDER BY pid) + TO '{OUT}' (FORMAT PARQUET, COMPRESSION ZSTD) +""") + +cols = [r[0] for r in con.execute(f"DESCRIBE SELECT * FROM read_parquet('{OUT}')").fetchall()] +print(f"OK rows={n_new:,} res8_preserved res4/res6 GROUP BY == shipped h3 summaries") +print(f"columns: {cols}") diff --git a/tests/playwright/filtered-clusters-300.spec.js b/tests/playwright/filtered-clusters-300.spec.js new file mode 100644 index 0000000..ae3ce6a --- /dev/null +++ b/tests/playwright/filtered-clusters-300.spec.js @@ -0,0 +1,106 @@ +/** + * #300 verification [data]: filtered H3 clusters at world zoom. + * + * Runs against a LOCAL data mirror (dev_server.py on :8099) whose + * samples_map_lite carries h3_res4/h3_res6, so window.__filteredClustersReady + * becomes true and the feature ACTIVATES. (Production data lacks res4/res6, so + * this can't run against data.isamples.org yet — that's the pending republish.) + * + * Pass DATA_BASE=http://localhost:8099 (default below). + */ +const { test, expect } = require('@playwright/test'); + +const DATA_BASE = process.env.DATA_BASE || 'http://localhost:8099'; +const MATERIAL = 'https://w3id.org/isample/vocabulary/material/1.0/anyanthropogenicmaterial'; +const WORLD_ALT = 18000000; // world zoom, well above EXIT_POINT_ALT + +function url(extraHash = '') { + const qs = new URLSearchParams({ data_base: DATA_BASE, material: MATERIAL }).toString(); + return `/explorer.html?${qs}#v=1&lat=10.0000&lng=0.0000&alt=${WORLD_ALT}${extraHash}`; +} + +// Read an OJS cell value (viewer, db, lite_url, facetFilterSQL, ...) the same way +// the existing helpers do. +const ojs = (page, name) => + page.evaluate((n) => window._ojs?.ojsConnector?.mainModule?.value(n), name); + +test.setTimeout(180000); + +test('(#300) [data] broad facet at world zoom renders FILTERED clusters, not point mode', async ({ page }) => { + const errors = []; + page.on('console', m => { if (m.type() === 'error') errors.push(m.text()); }); + + await page.goto(url()); + + // 1. Readiness preflight resolves true (lite has res4/res6). + await expect.poll(() => page.evaluate(() => window.__filteredClustersReady === true), + { timeout: 120000, message: 'filteredClustersReady should become true with res46 lite' }).toBe(true); + + // 2. The cluster layer becomes FILTERED at world zoom (the #300 win). NB: + // _globeState.mode DEFAULTS to 'cluster' at init, so polling mode alone is + // not meaningful — the authoritative signal is _clusterFilterSig.kind === + // 'filtered', set only when loadRes actually aggregated the filtered set. + await expect.poll(() => page.evaluate(async () => { + const v = await window._ojs?.ojsConnector?.mainModule?.value('viewer'); + try { return JSON.parse(v?._clusterFilterSig || '{}').kind; } catch { return null; } + }), { timeout: 120000, message: 'facet at world zoom should load FILTERED clusters' }).toBe('filtered'); + + // 3. Still in cluster mode (not forced point #267) and cells actually rendered. + const { mode, clusterCount } = await page.evaluate(async () => { + const v = await window._ojs?.ojsConnector?.mainModule?.value('viewer'); + return { mode: v?._globeState?.mode, clusterCount: v?._clusterData?.length || 0 }; + }); + expect(mode, 'cluster mode, not forced point').toBe('cluster'); + expect(clusterCount, 'filtered clusters rendered').toBeGreaterThan(50); + + // 4. COUNT CONSERVATION: the filtered cluster sample_count sum equals an + // independent filtered COUNT(*) via the same facetFilterSQL masks predicate. + const { clusterSum, directCount } = await page.evaluate(async () => { + const mm = window._ojs.ojsConnector.mainModule; + const v = await mm.value('viewer'); + const db = await mm.value('db'); + const lite = await mm.value('lite_url'); + const facetFilterSQL = await mm.value('facetFilterSQL'); + const sum = v._clusterData.reduce((a, r) => a + Number(r.sample_count), 0); + const r = await db.query(`SELECT COUNT(*)::INTEGER AS n FROM read_parquet('${lite}') WHERE 1=1${facetFilterSQL()}`); + const direct = Number(Array.from(r)[0].n); + return { clusterSum: sum, directCount: direct }; + }); + expect(clusterSum, 'cluster sum > 0').toBeGreaterThan(0); + expect(directCount, 'filtered cluster sum == independent filtered count').toBe(clusterSum); + + // 5. No uncaught console errors during the filtered-cluster render. Exclude + // known-benign noise: favicon/Cesium-ion/ResizeObserver/sourcemap, and the + // listings.json 404 (Quarto issue #295 — present in production too, unrelated + // to #300). "Failed to load resource" is the generic Chromium text for these. + const realErrors = errors.filter(e => + !/favicon|cesium ion|ResizeObserver|sourcemap|listings\.json|Failed to load resource/i.test(e)); + expect(realErrors, `console errors: ${realErrors.join(' | ')}`).toHaveLength(0); +}); + +test('(#300) [data] zoom-in promotes filtered clusters to individual point mode', async ({ page }) => { + await page.goto(url()); + await expect.poll(() => page.evaluate(() => window.__filteredClustersReady === true), { timeout: 120000 }).toBe(true); + await expect.poll(() => page.evaluate(async () => { + const v = await window._ojs?.ojsConnector?.mainModule?.value('viewer'); + return v?._globeState?.mode; + }), { timeout: 120000 }).toBe('cluster'); + + // Fly to a low altitude (below ENTER_POINT_ALT = 120 km). Use a real animated + // flight with a completion callback so Cesium raises moveEnd (a duration:0 jump + // may not fire the camera event the explorer's handler needs). + await page.evaluate(async () => { + const v = await window._ojs.ojsConnector.mainModule.value('viewer'); + await new Promise(resolve => { + v.camera.flyTo({ + destination: Cesium.Cartesian3.fromDegrees(0, 40, 60000), + duration: 0.4, complete: resolve, cancel: resolve, + }); + }); + }); + // Facet still active + altitude < ENTER → computeTargetMode → point. + await expect.poll(() => page.evaluate(async () => { + const v = await window._ojs?.ojsConnector?.mainModule?.value('viewer'); + return v?._globeState?.mode; + }), { timeout: 120000, message: 'zoom-in with facet should drop to point mode' }).toBe('point'); +}); From 5fad0fe4721113dd8a9fef2655e934dfdec5a323 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 18 Jun 2026 23:06:01 -0700 Subject: [PATCH 7/9] #300: address Codex serialization review (remove lossy !loading guards, dedup) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - P1: drop the `!loading` guards on the readiness reconcile triggers. They were added to avoid the boot double-fire, but db.query serialization already prevents the deadlock, and the guards are LOSSY — readiness arriving while an older unfiltered loadRes is in flight would skip the only reconcile signal permanently (Codex serialization-review P1). - Add a sig-dedup at the top of reconcileGlobeForFilters' cluster branch: if already in cluster mode with the matching filter signature, no-op. Keeps the now-unguarded redundant reconciles (boot + hook + setTimeout0) cheap instead of re-running the heavy filtered aggregation. - P2: narrow the `db` serialization comment — it wraps `.query` (and `.sql`, which calls `.query`); it does NOT cover queryStream/queryRow/raw connect. All 45 data calls in the page use db.query (verified), so coverage is complete today. --- explorer.qmd | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index 7205741..db59d6d 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -1738,10 +1738,15 @@ db = { // (filteredClusterSQL, reading samples_map_lite + sample_facet_masks) issued // while boot's query storm is still in flight never resolves, even though the // identical query completes in ~2.5s once the connection is idle. Serialize - // EVERY query through a FIFO chain so at most one runs at a time. SQL queries - // are atomic (none awaits another mid-execution), so chaining can't deadlock, - // and the latency cost is small (boot queries are individually fast). This is - // the single-point fix; without it #300's filtered clusters never load. + // queries through a FIFO chain so at most one runs at a time. + // + // Scope: this wraps `.query()`. Every data call in this page goes through + // `db.query` (and `db.sql`, which calls `.query` internally) — verified — so + // that covers us. It does NOT serialize `queryStream`/`queryRow` (which open + // their own connection) or raw `db._db.connect()`; do not introduce those + // without serializing them too, or the deadlock returns. SQL queries are + // atomic (none awaits another mid-execution) so the chain can't re-enter; the + // one hazard is a query that never settles, which would stall the whole queue. const origQuery = instance.query.bind(instance); let queryChain = Promise.resolve(); instance.query = (...args) => { @@ -4192,6 +4197,11 @@ zoomWatcher = { if (getMode() !== 'point') await enterPointMode(false); else await loadViewportSamples(); // re-filter the dots in place } else { + // Dedup: if we're already in cluster mode showing the right filtered + // layer, there's nothing to do. Makes redundant reconciles (boot + + // readiness hook + setTimeout0 all firing) cheap no-ops instead of + // re-running the heavy filtered aggregation (Codex serialization-review). + if (getMode() === 'cluster' && viewer._clusterFilterSig === desiredClusterSig()) return; // Target is clusters at this altitude. Load FIRST (into the hidden h3 // layer while still in point mode), and only drop point mode if the // load applied — so a failed/superseded filtered load never exposes @@ -4299,11 +4309,16 @@ zoomWatcher = { // invokes this hook; if it resolved BEFORE this watcher registered the hook, // the deferred guarded check below catches it (and no-ops if boot already set // the right mode). Any later camera/moveEnd settle is a further backstop. + // NB: no `!loading` guard here — db.query is serialized (see the `db` cell), + // so a redundant reconcile can't deadlock; reconcileGlobeForFilters early-exits + // when the cluster layer already matches the desired filter. A `!loading` guard + // would be LOSSY: readiness arriving while an older unfiltered load is in flight + // would skip the only reconcile signal permanently (Codex serialization-review P1). window.__onFilteredClustersReady = () => { - if (hasFacetFilters() && !loading) reconcileGlobeForFilters(); + if (hasFacetFilters()) reconcileGlobeForFilters(); }; setTimeout(() => { - if (window.__filteredClustersReady === true && hasFacetFilters() && !loading) { + if (window.__filteredClustersReady === true && hasFacetFilters()) { const h = viewer.camera.positionCartographic.height; // Reconcile if the mode is wrong OR the cached cluster layer is still // summary (facet-blind) data — at world zoom both modes read 'cluster' From 1715142ea623ef5462adac02ea8e67f6413c071e Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 19 Jun 2026 05:25:21 -0700 Subject: [PATCH 8/9] #300: point lite_url at samples_map_lite_v2 (res4/res6 cache-bust) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Activates filtered clusters in production: the _v2 lite carries h3_res4/h3_res6. A new filename (not overwriting the original) preserves the immutable-cache contract (isamples_YYYYMM_*.parquet is served immutable/1-yr) — every visitor fetches fresh data, no Cloudflare purge needed. One-off retrofit for 202608; the next generation builds res4/res6 into the canonical name natively. REQUIRES isamples_202608_samples_map_lite_v2.parquet uploaded to R2 (bucket isamples-ry) BEFORE this merges — lite_url is load-bearing (point mode, deep links, filtered clusters all read it), so a missing _v2 would 404 the explorer. --- SESSION_SUMMARY.md | 147 ++++++++++++++++++++++++++------------------- explorer.qmd | 8 ++- 2 files changed, 91 insertions(+), 64 deletions(-) diff --git a/SESSION_SUMMARY.md b/SESSION_SUMMARY.md index 02ac8f8..12781cb 100644 --- a/SESSION_SUMMARY.md +++ b/SESSION_SUMMARY.md @@ -1,82 +1,103 @@ # Session Summary -## Session: 2026-05-30/31 (evening) -**Directory**: `~/C/src/iSamples/isamplesorg.github.io` -**Trust Level**: external-content +## Session: 2026-06-18 (evening) — #300 filtered clusters at world zoom +**Directory**: /Users/raymondyee/C/src/iSamples/isamplesorg.github.io +**Trust Level**: medium (local Playwright + DuckDB; downloaded 3 R2 artifacts read-only; no prod writes, no secrets) --- ## What Happened - -A long, productive session. Started as "tackle the fast-verify shakedown"; ended with **A1 shipped to production (isamples.org)** and **#248 underway**. - -1. **Shakedown root-caused & fixed.** The dev `?data_base=/data` override produced root-relative parquet URLs that DuckDB-WASM's httpfs can't fetch (read as a virtual-FS glob → zero fetches). Resolved to absolute against `location.origin`. This unblocked the fast verify loop (~2.3s to live). -2. **The "globe logjam" was never real** — it was a **backgrounded-Chrome-MCP-tab artifact** (Chrome freezes rAF in hidden tabs → Cesium camera never settles → "globe won't enter point mode"). In any foreground/headless context the C3 fixes work. The reconciler refactor was unnecessary. **Lesson: drive the verify loop with `HEADLESS=1` Playwright, never the MCP tab.** -3. **Fixed an A1 search perf regression** the CI smoke gate caught (double facets scan → materialize side-panel columns+score into `search_pids`, one scan). -4. **Fixed the live facet-padding mismatch** RY hit (legend pad-0 vs table 0.3 → facet read low; e.g. material=rock ~166 vs ~481). Now facet == table. -5. **Shipped A1**: opened **PR #251**, ran a 3-round **Codex review/revise loop to dual approval** (Codex caught a real `search_pids` staging-table race, heatmap search-blindness, and a stale-reader follow-on — all fixed), then **squash-merged to upstream → deployed to isamples.org** (smoke gate green). -6. **Started #248 (Eric Kansa's concept-URI search)**: posted a connecting comment, Codex plan-reviewed ("mostly sound + guardrails"), and committed the **foundation** on `feat/described-by-concept`. -7. **Investigated a transient camera freeze** (RY's `h3=`+`heading=` deep-link, also on isamples.org). Ruled out locked controller / tracked-entity / refresh-loop via a new `?debug=a1` `__a1camera` hook; **resolved on its own → likely transient WebGL context-loss / network**. Surfaced a real **testing gap**: no gate asserts post-hydration *interactivity*. +Implemented **issue #300** — when a facet filter is active at world zoom, render a +filtered H3-cluster view instead of forcing slow capped point mode (#267). Plus a +prerequisite behavior-neutral refactor (PR4c). + +### Shipped to PRs +- **PR4c (#301, OPEN)** — `refactor/208-computeTargetMode`: extracted + `filtersForcePoint()` + `computeTargetMode()` (centralize the point/cluster mode + decision that was duplicated at 4 sites). Behavior-neutral; Codex-approved; + unit 13/13 + characterization green. **Should merge before the #300 PR.** + +### #300 work (branch `feat/300-filtered-clusters`, stacked on PR4c — NOT yet PR'd) +Commits: +- `837e877` build: add `h3_res4`/`h3_res6` to `samples_map_lite` (+validator/tests 23/23) +- `ffe904d` C1 infra: filter-aware `loadRes`, `filteredClustersReady` preflight, semantic cluster sig +- `f558c31` C2 activation: facets → filtered clusters above EXIT_POINT_ALT +- `8c9f2c4` C3 coherence: deep-link/boot restore, filtered click hydration, facet note +- `3891aaf` Codex round-2: P0 (missed `_urlHasFacets` boot force-point block) + 7 P1s +- `b6f32e7` **boot-deadlock fix** + verification spec +- `d4c0280` Codex serialization-review fixes (remove lossy `!loading` guards, dedup) --- ## Safe to Carry Forward -### Key Decisions -- A1 ships on plain ILIKE; **BM25 (#168–172) is a perceived-perf follow-up, not a correctness blocker.** -- `search_pids` is a **singleton**; any new producer (#248) shares one `_searchFilterToken`/`_searchSeq` and the `kind: 'text'|'concept'` tag. -- Codex-reviewed A1 invariants to preserve: **token-scoped staging table**, **empty-table clear** (never DROP the live table), **build-failure distinguished from empty results**. -- `?debug=a1`-gated hooks: `__a1globe`, `__a1log`/`__a1state`, and (new, uncommitted/diagnostic) `__a1camera`. - -### Branch / ship state -- **A1**: merged to upstream `main` as **`e6f9def`** (PR #251), live on isamples.org + rdhyee. Local `feat/search-global-filter-a1` is now redundant (squash-merged). -- **#248**: branch **`feat/described-by-concept`** off merged main; foundation commit **`f2eac35`** (`conceptLabelForUri` + `buildConceptFilter`, behavior-neutral, verified). - -### Files Changed (this session, across A1 + #248) -- `explorer.qmd` — A1 data_base fix, double-scan collapse, facet-padding, Codex fixes (staging race / heatmap / empty-clear / build-failure msg), `?debug=a1` gating; #248 `conceptLabelForUri` + `buildConceptFilter`. -- `dev_server.py` — HTTP/1.1; `tests/playwright/a1-verify.mjs` — `HEADLESS=1` flag; new probes `globe-points-probe.mjs`, `shakedown-206.mjs`; `tests/playwright/facet-viewport.spec.js` — coherence test. - -### Patterns/Learnings -- **Backgrounded tabs freeze rAF** → corrupts every globe/camera observation. Headless Playwright is the reliable instrument. -- **Don't pile up runs**: accumulated hung browsers hold HTTP/1.1 keep-alive + peg CPU and starve `dev_server.py`. Restart between batches. -- **Local mirror full-downloads** (GET 200, not 206) — fine on localhost; validate range/perf on the deploy, not the mirror. -- Codex's `codex exec ... -o FILE` often fails to capture the final message when the diff is large; read the verdict from the streamed `.log` instead (resume the session for continuity). - ---- - -## External Content Processed - -| Source | Type | Notes | -|---|---|---| -| GitHub (gh) — issues/PRs #234/#242/#244/#245/#246/#247/#248/#250/#251, CI logs | web/API | Read issue bodies as data. **Authored**: PR #251 + its review comment, #248 comment. **Merged** #251 to upstream production (RY-authorized "push to isamples"). | -| Codex CLI (gpt-5.4), session `019e7c8d…` | AI tool output | 3-round code review + #248 plan review. Findings **verified before applying**; treat as advisory. | -| isamples.org / rdhyee.github.io / localhost explorer | browser DOM (headless + 1 MCP tab) | Our own app. The MCP tab is what misled earlier sessions (rAF freeze). | -| `data.isamples.org`, local `docs/data/*.parquet` | remote/local data | Our own data. | - -No secrets accessed, no untrusted code executed (Codex output hand-reviewed). - ---- - -## Open Threads - -- [ ] **#248 Flavor A — finish the wiring** (the delicate half): `doDescribedBy(uri)` + extract shared `runPidSetResults({heading,emptyText,orderBy})` from `doSearch` (touches the just-reviewed stale-guards); `described-by=` URL param boot-trigger (search-ready timing) + `writeQueryState` kind-preservation; mutual exclusivity with `search=`; Playwright deep-link coherence test; Codex code-review; open PR. (Codex guardrails are in commit `f2eac35`'s message + the plan in `/tmp/p248.md`.) -- [ ] **Close #245** (facet-padding) — superseded by #251 (RY hadn't confirmed; do at pickup). -- [ ] **#244** (collection-facet DRAFT) and **#246** (points-over-heatmap) — need rebase on the new `main` (A1 + facet-padding); #246 worth checking points-over-heatmap *under a search*. -- [ ] **#248 Flavor B** (arbitrary/Getty URIs) — needs URI→label resolution + free-text fallback; follow-up. -- [ ] **Testing-gap follow-up**: add a deep-link **interactivity** regression test (assert `enableInputs`/no-trackedEntity + camera actually moves), using the `__a1camera` hook. (Hook is uncommitted/local; re-add when building the test.) -- [ ] Deferred A1 items: selection revalidation on search change; BM25 substrate (#168–172). +### Key decisions / discoveries +- **THE hard bug**: filtered clusters never loaded at world zoom because the heavy + `filteredClusterSQL` query, issued during boot's concurrent query storm, + **deadlocked DuckDB-WASM** (non-threaded MVP build). Identical query runs in + ~2.5s once idle; even 2 concurrent post-boot queries are fine. **Fix: serialize + all `db.query` through a FIFO chain** in the `db` cell (single point; all 45 + data calls use `db.query`, verified). Without it the feature is invisible. +- Feature **gates on `filteredClustersReady`** (lite has res4/res6) AND + `window.__nodeBits` (masks fast-path). If absent → `computeTargetMode` keeps + pre-#300 point-mode behavior. **So the #300 code PR can merge BEFORE the data + republish — feature stays dormant until the lite is updated.** +- H3 cells don't strictly nest: `h3_latlng_to_cell(...,4)` ≠ parent of + `...(...,8)`. The build computes each res independently; the local lite regen + (`scripts/regen_lite_res46.py`) matches by validating against the shipped h3 + summaries (exact), not by parent-consistency. +- Codex reviewed 3× (design, implementation, serialization) — all findings fixed. + +### Verification (local, `dev_server.py` on :8099 serving res46 lite, explorer on :5860) +- `tests/playwright/filtered-clusters-300.spec.js` [data]: broad facet + (`anyanthropogenicmaterial`) at world zoom → `_clusterFilterSig` kind:filtered, + cluster mode (not point), 81 res4 cells, **count conservation** (cluster sum == + masks-backed `COUNT(*)`); zoom-in → point. **2 passed.** +- Offline: `filteredClusterSQL` sums == direct filtered counts at every res. +- Regression (explorer-characterization + url-roundtrip, production data, feature + dormant): confirms serialization didn't break boot. The `(e)` facet-hydration + test is a known cold-cache flake (unrelated; passes warm). + +### New data artifact (validated, NOT yet uploaded) +- `~/Data/iSample/pqg_refining/staged_202608/isamples_202608_samples_map_lite_res46.parquet` + (48 MB; res4/res6 added; reproduces shipped h3 summaries exactly). --- -## Next Session Entry Point - -> Start here: continue **#248 Flavor A** on `feat/described-by-concept` (foundation `f2eac35` done). Next concrete step is `doDescribedBy` + extracting `runPidSetResults` from `doSearch`, then the `described-by=` URL plumbing + mutual-exclusivity, then test → Codex review → PR. Verify loop: `python3 dev_server.py --dir docs --port 8099` + `HEADLESS=1 node tests/playwright/a1-verify.mjs`. +## Open Threads / Next Session Entry Point + +> **Start here:** #300 is implemented + verified locally; both PR4c and #300 are +> green. RY chose: **merge #301 first, then open #300**, and activate via a +> **versioned `_v2` lite filename**. Coordinated rollout sequence: +> +> 1. **RY merges PR4c #301** (neutral refactor; green + Codex-approved). +> 2. **CC rebases** `feat/300-filtered-clusters` onto the merged `upstream/main`, +> adds the **`lite_url` → `isamples_202608_samples_map_lite_v2.parquet`** change +> (+ a local serve-dir symlink so the verify spec still passes), pushes, opens +> the **#300 PR**. (`diag-300.spec.js` already deleted; keep +> `filtered-clusters-300.spec.js`.) +> 3. **RY uploads** the staged `_v2` lite to R2 bucket `isamples-ry` (Touch ID): +> `~/Data/iSample/pqg_refining/staged_202608/isamples_202608_samples_map_lite_v2.parquet` +> (48 MB, res4/res6/res8). **MUST happen before merging #300** — `lite_url` +> points at `_v2`, so a missing `_v2` would 404 the explorer's lite entirely. +> 4. **RY merges #300** → feature live (filtered clusters activate immediately, +> since `_v2` carries res4/res6). +> +> Watch-item: the `db.query` serialization (boot-deadlock fix) ships to all users +> and makes boot queries sequential. Local boot is fast; measure real-network boot +> latency during review. Fallback if too slow: defer only the heavy filtered query +> until the connection is idle (keep other queries concurrent). + +### Deferred / known +- `(e)` characterization test is cold-cache flaky (environmental, not #300). +- `listings.json` 404 = Quarto issue #295 (benign, pre-existing). +- Serialization caveat: a query that never settles would stall the whole queue + (documented in the `db` cell). No such caller today. --- ## Session History - | Date | Trust | Summary | -|---|---|---| -| 2026-05-30/31 | external-content | Shakedown root-caused; A1 logjam = backgrounded-tab artifact; A1 perf + facet-padding fixed; Codex loop → dual approval; **A1 merged & deployed to isamples.org** (#251); #248 started (`feat/described-by-concept` foundation). | -| 2026-05-29 | external-content | (prior) A1 scoping + globe logjam framing (superseded — there was no logjam). | +|------|------|---------| +| 2026-06-18 pm | medium | #300 filtered clusters: build+C1/C2/C3+Codex×3+boot-deadlock fix; verified local; PR4c #301 open | +| 2026-06-18 am | high-risk | Shipped #290 cube + #293 masks to prod (#298/#299); Tiered Cache; filed #300 | diff --git a/explorer.qmd b/explorer.qmd index db59d6d..a1c2de7 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -763,7 +763,13 @@ R2_BASE = (() => { h3_res4_url = `${R2_BASE}/isamples_202608_h3_summary_res4.parquet` h3_res6_url = `${R2_BASE}/isamples_202608_h3_summary_res6.parquet` h3_res8_url = `${R2_BASE}/isamples_202608_h3_summary_res8.parquet` -lite_url = `${R2_BASE}/isamples_202608_samples_map_lite.parquet` +// #300: the _v2 lite carries h3_res4/h3_res6 (for in-browser filtered-cluster +// aggregation). A new filename rather than overwriting the original keeps the +// immutable-cache contract intact (isamples_YYYYMM_*.parquet is served +// immutable/1-yr) — every visitor fetches fresh data, no cache purge. The next +// generation builds res4/res6 into the canonical name natively (build change), +// so this _v2 suffix is a one-off retrofit for 202608. +lite_url = `${R2_BASE}/isamples_202608_samples_map_lite_v2.parquet` // Explicit versioned wide (#272: OC concept-enriched — popups read material/ // object-type from this file). The stable alias `current/wide.parquet` still // points at the previous wide until the production cutover flips the manifest; From 8b5b00ad64404c7e79be1c52e98ddeb032af3a73 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 19 Jun 2026 06:15:47 -0700 Subject: [PATCH 9/9] #300: replace blunt query serialization with idle-gate on the filtered load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The full db.query FIFO serialization (boot-deadlock fix) queued interactive queries behind boot's whole storm — the pre-deploy smoke gate's "pottery" search exceeded its 90s budget on a cold CI runner, blocking the staging deploy. Surgical replacement: keep all queries CONCURRENT (fast boot + search, smoke gate happy) and gate ONLY the heavy filtered-cluster aggregation on an idle connection. - db cell: in-flight COUNTER (non-serializing) instead of the FIFO chain; exposes instance._inFlight(). - loadRes: when filtered, `await whenConnectionIdle()` before issuing — waits out boot's concurrent query storm (the deadlock trigger), no-op post-boot. Re-checks supersession after the wait. The light summary path is unchanged (safe concurrent). The deadlock only occurs with a facet active at boot; the smoke test (text search, no facet) never hit that path — its failure was purely the serialization slowing search. Verified: filtered-clusters-300 2/2 (idle-gate avoids deadlock); smoke passes ~28s (was ~40s serialized). --- explorer.qmd | 51 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/explorer.qmd b/explorer.qmd index a1c2de7..d52666d 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -1740,26 +1740,25 @@ db = { performance.mark('duckdb-init-end'); performance.measure('duckdb_init', 'duckdb-init-start', 'duckdb-init-end'); // #300: DuckDB-WASM (the non-threaded MVP build this page loads) DEADLOCKS - // under concurrent queries — a heavy filtered-cluster aggregation - // (filteredClusterSQL, reading samples_map_lite + sample_facet_masks) issued - // while boot's query storm is still in flight never resolves, even though the - // identical query completes in ~2.5s once the connection is idle. Serialize - // queries through a FIFO chain so at most one runs at a time. + // when the heavy filtered-cluster aggregation (filteredClusterSQL, reading + // samples_map_lite + sample_facet_masks) runs CONCURRENTLY with boot's other + // queries — yet the identical query completes in ~2.5s once the connection is + // idle, and 2 concurrent post-boot queries are fine. // - // Scope: this wraps `.query()`. Every data call in this page goes through - // `db.query` (and `db.sql`, which calls `.query` internally) — verified — so - // that covers us. It does NOT serialize `queryStream`/`queryRow` (which open - // their own connection) or raw `db._db.connect()`; do not introduce those - // without serializing them too, or the deadlock returns. SQL queries are - // atomic (none awaits another mid-execution) so the chain can't re-enter; the - // one hazard is a query that never settles, which would stall the whole queue. + // We do NOT serialize all queries (that queued interactive search behind the + // whole boot storm and blew the smoke gate's search budget). Instead we keep + // queries concurrent (fast boot/search) and track in-flight count so the + // filtered-cluster load alone can wait for the connection to go idle before + // issuing (see whenConnectionIdle / loadRes). _inFlight is read there. const origQuery = instance.query.bind(instance); - let queryChain = Promise.resolve(); + let inFlight = 0; instance.query = (...args) => { - const run = queryChain.then(() => origQuery(...args)); - queryChain = run.then(() => {}, () => {}); // keep the chain alive past errors - return run; + inFlight++; + const p = origQuery(...args); + p.then(() => {}, () => {}).finally(() => { inFlight--; }); + return p; }; + instance._inFlight = () => inFlight; return instance; } ``` @@ -2876,6 +2875,20 @@ zoomWatcher = { // replacement load (entering point mode; a fresh hashchange). const invalidateClusterLoads = () => { ++loadResGen; loading = false; }; + // #300: wait for the DuckDB-WASM connection to go idle (no other queries in + // flight) before issuing the heavy filtered-cluster aggregation — running it + // amid boot's concurrent query storm deadlocks the MVP build. Post-boot this + // resolves immediately (sparse queries); during boot it waits out the storm. + // Capped so a never-idle connection can't strand the load. Only the filtered + // path needs this; the light summary query is safe concurrent. + async function whenConnectionIdle(capMs = 20000) { + const t0 = performance.now(); + while ((typeof db._inFlight === 'function' ? db._inFlight() : 0) > 0 + && performance.now() - t0 < capMs) { + await new Promise(r => setTimeout(r, 120)); + } + } + const loadRes = async (res, url, opts = {}) => { const gen = ++loadResGen; // claim a generation loading = true; @@ -2885,6 +2898,12 @@ zoomWatcher = { try { performance.mark(`r${res}-s`); + // #300: gate the heavy filtered aggregation on an idle connection + // (deadlock-avoidance); no-op for the light summary path. + if (filtered) await whenConnectionIdle(); + // Re-check supersession after the idle wait (a newer load or a filter + // change may have landed while we waited). + if (gen !== loadResGen || sig !== desiredClusterSig()) return false; const data = await db.query(filtered ? filteredClusterSQL(res) : ` SELECT CAST(h3_cell AS VARCHAR) AS h3_cell_dec, sample_count, center_lat, center_lng,