From 21780f91f3ccbe1a43e2f9eae24dee077daab5ec Mon Sep 17 00:00:00 2001 From: MarcelMatsal Date: Tue, 19 May 2026 09:23:58 -0400 Subject: [PATCH 01/31] implemented state persistence --- .gitignore | 2 +- stable_pretraining/web/assets/app.js | 127 +++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8a56e6e09..fd2e692b7 100644 --- a/.gitignore +++ b/.gitignore @@ -161,7 +161,7 @@ multirun/ ./data/ examples/data/ wandb/ -DISCOVERABILITY_PLAN.md +SPT_WEB_PLAN.md *.ckpt *.pt diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js index 68b37e8fd..cf19aa829 100644 --- a/stable_pretraining/web/assets/app.js +++ b/stable_pretraining/web/assets/app.js @@ -79,6 +79,116 @@ applyTheme(saved); } + // ---- state persistence ----------------------------------------------- + + function saveState() { + const snap = { + visible: [...state.visible], + filters: state.filters, + groupBy: state.groupBy, + sortBy: state.sortBy, + sortDesc: state.sortDesc, + xAxis: state.xAxis, + logY: state.logY, + smoothing: state.smoothing, + metricSearch: state.metricSearch, + activeTab: state.activeTab, + theme: state.theme, + }; + try { localStorage.setItem('spt-web-state', JSON.stringify(snap)); } catch {} + const ids = [...state.visible].map(id => encodeURIComponent(id)).join(','); + try { + history.replaceState(null, '', '#runs=' + ids + '&tab=' + encodeURIComponent(state.activeTab)); + } catch {} + } + + function loadState() { + // Parse URL fragment first (wins over localStorage on conflict). + let fragVisible = null; + let fragTab = null; + if (location.hash.length > 1) { + const params = new URLSearchParams(location.hash.slice(1)); + const runsParam = params.get('runs'); + if (runsParam !== null) { + fragVisible = runsParam + ? runsParam.split(',').filter(Boolean).map(s => { try { return decodeURIComponent(s); } catch { return s; } }) + : []; + } + const tabParam = params.get('tab'); + if (tabParam && ['figures', 'out', 'err'].includes(tabParam)) fragTab = tabParam; + } + + // Load localStorage snapshot. + let saved = null; + try { + const raw = localStorage.getItem('spt-web-state'); + if (raw) saved = JSON.parse(raw); + } catch {} + + if (saved && typeof saved === 'object') { + if (Array.isArray(saved.filters)) state.filters = saved.filters; + if (typeof saved.groupBy === 'string') state.groupBy = saved.groupBy; + if (typeof saved.sortBy === 'string') state.sortBy = saved.sortBy; + if (typeof saved.sortDesc === 'boolean') state.sortDesc = saved.sortDesc; + if (typeof saved.xAxis === 'string') state.xAxis = saved.xAxis; + if (typeof saved.logY === 'boolean') state.logY = saved.logY; + if (typeof saved.smoothing === 'number') state.smoothing = saved.smoothing; + if (typeof saved.metricSearch === 'string') state.metricSearch = saved.metricSearch; + if (typeof saved.activeTab === 'string') state.activeTab = saved.activeTab; + if (typeof saved.theme === 'string') state.theme = saved.theme; + if (Array.isArray(saved.visible)) state._pendingVisible = new Set(saved.visible); + } + + // Fragment overrides localStorage for visible runs and active tab. + if (fragVisible !== null) state._pendingVisible = new Set(fragVisible); + if (fragTab !== null) state.activeTab = fragTab; + + // Keep the legacy spt-web-theme key in sync so initTheme() still works. + try { localStorage.setItem('spt-web-theme', state.theme); } catch {} + } + + function syncControlsToState() { + const smEl = document.getElementById('smoothing'); + const smvEl = document.getElementById('smoothing-val'); + if (smEl) smEl.value = String(state.smoothing); + if (smvEl) smvEl.textContent = state.smoothing.toFixed(2); + const xEl = document.getElementById('x-axis'); + if (xEl) xEl.value = state.xAxis; + const lyEl = document.getElementById('log-y'); + if (lyEl) lyEl.checked = state.logY; + const msEl = document.getElementById('metric-search'); + if (msEl) msEl.value = state.metricSearch; + // Apply active tab to DOM directly — calling setActiveTab() would fire + // saveState() while state.visible is still empty (runs not yet loaded). + for (const btn of document.querySelectorAll('#tabs .tab')) { + btn.classList.toggle('active', btn.dataset.tab === state.activeTab); + } + for (const pane of document.querySelectorAll('.tab-pane')) { + pane.classList.toggle('active', pane.id === `tab-${state.activeTab}`); + } + } + + async function applyPendingVisible() { + if (!state._pendingVisible) return; + const ids = [...state._pendingVisible].filter(id => state.runs.has(id)); + state._pendingVisible = null; + if (!ids.length) return; + for (const id of ids) state.visible.add(id); + const needMetrics = ids.filter(id => !state.metrics.has(id)); + const needMedia = ids.filter(id => { + if (state.media.has(id)) return false; + const r = state.runs.get(id); + return r && r.has_media; + }); + await Promise.all([...needMetrics.map(fetchMetrics), ...needMedia.map(fetchMedia)]); + renderRunList(); + scheduleRerender(); + if (state.activeTab === 'out' || state.activeTab === 'err') { + await refreshLogStreamsForVisibleRuns(); + renderLogTab(state.activeTab); + } + } + // ---- color: stable hash → HSL ---------------------------------------- const colorCache = new Map(); @@ -125,6 +235,7 @@ } renderRunList(); updateHeaderStats(); + await applyPendingVisible(); } // Stream-fetch metrics via NDJSON. The server emits one JSON object per @@ -438,6 +549,7 @@ renderFilters(); renderRunList(); scheduleRerender(); + saveState(); }); chip.appendChild(rm); @@ -524,6 +636,7 @@ renderFilters(); renderRunList(); scheduleRerender(); + saveState(); }); } @@ -641,6 +754,7 @@ state.visible.delete(id); renderRunList(); scheduleRerender(); + saveState(); return; } state.visible.add(id); @@ -664,6 +778,7 @@ } renderRunList(); scheduleRerender(); + saveState(); if (state.activeTab === 'out' || state.activeTab === 'err') { // Selection changed → log-stream options change. fetchLogStreams(id).then(() => renderLogTab(state.activeTab)); @@ -692,6 +807,7 @@ } renderRunList(); scheduleRerender(); + saveState(); if (state.activeTab === 'out' || state.activeTab === 'err') { refreshLogStreamsForVisibleRuns().then(() => renderLogTab(state.activeTab)); } @@ -1929,6 +2045,7 @@ for (const pane of document.querySelectorAll('.tab-pane')) { pane.classList.toggle('active', pane.id === `tab-${name}`); } + saveState(); if (name === 'out' || name === 'err') { // Lazily fetch logs the first time the user opens a log tab. refreshLogStreamsForVisibleRuns().then(() => renderLogTab(name)); @@ -2050,16 +2167,19 @@ state.smoothing = parseFloat(e.target.value); smv.textContent = state.smoothing.toFixed(2); scheduleRerender(); + saveState(); }); document.getElementById('x-axis').addEventListener('change', e => { state.xAxis = e.target.value; scheduleRerender(); + saveState(); }); document.getElementById('log-y').addEventListener('change', e => { state.logY = e.target.checked; scheduleRerender(); + saveState(); }); document.getElementById('select-all').addEventListener('click', () => setAllVisible(true)); @@ -2072,6 +2192,7 @@ renderRunList(); // Group key shows up in chart legends → rebuild charts. scheduleRerender(); + saveState(); }); // Tabs @@ -2104,6 +2225,7 @@ state.metricSearch = e.target.value; state._lastTreeKey = null; // force rebuild scheduleRerender(); + saveState(); }, 80)); } @@ -2127,11 +2249,13 @@ document.getElementById('theme-toggle').addEventListener('click', () => { applyTheme(state.theme === 'dark' ? 'light' : 'dark'); + saveState(); }); document.getElementById('sort-by').addEventListener('change', e => { state.sortBy = e.target.value; renderRunList(); + saveState(); }); const sortDirBtn = document.getElementById('sort-dir'); @@ -2140,6 +2264,7 @@ state.sortDesc = !state.sortDesc; sortDirBtn.textContent = state.sortDesc ? '↓' : '↑'; renderRunList(); + saveState(); }); // Detail modal wiring. @@ -2203,7 +2328,9 @@ // ---- init ------------------------------------------------------------- async function main() { + loadState(); wireControls(); + syncControlsToState(); initTheme(); // Start SSE first so we don't miss progress events emitted between the // status fetch and the first scan-tick. From 7df08bd27de6c7de2664f4969dfc31876d8e29bb Mon Sep 17 00:00:00 2001 From: MarcelMatsal Date: Thu, 4 Jun 2026 17:26:08 -0400 Subject: [PATCH 02/31] many new features for spt web, more coming soon --- .gitignore | 3 + stable_pretraining/web/assets/app.css | 157 +++++++++++++ stable_pretraining/web/assets/app.js | 275 ++++++++++++++++++++++- stable_pretraining/web/assets/index.html | 8 + 4 files changed, 438 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index fd2e692b7..52395a462 100644 --- a/.gitignore +++ b/.gitignore @@ -159,9 +159,12 @@ docs/source/modules.rst outputs/ multirun/ ./data/ +mock_runs/ examples/data/ wandb/ SPT_WEB_PLAN.md +scripts/ +.DS_Store *.ckpt *.pt diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css index 973774ec2..8f35f087b 100644 --- a/stable_pretraining/web/assets/app.css +++ b/stable_pretraining/web/assets/app.css @@ -579,6 +579,163 @@ body { } .chart-body { width: 100%; } +/* ---- chart annotations ------------------------------------------------ */ +.chart-annot { + margin-top: 6px; + border-top: 1px solid var(--border-2); + padding-top: 4px; +} +.chart-annot:empty { display: none; } +.chart-annot-table { display: contents; } +.chart-annot-row { + display: grid; + grid-template-columns: 10px 1fr auto auto; + gap: 6px; + align-items: center; + padding: 2px 4px; + border-radius: 3px; + font-size: 11px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + color: var(--muted-2); +} +.chart-annot-header { + color: var(--muted-3); + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.4px; + cursor: default; +} +.chart-annot-row:not(.chart-annot-header) { cursor: pointer; } +.chart-annot-row:not(.chart-annot-header):hover { background: var(--hover); } +.chart-annot-row.annot-best { color: var(--text); } +.annot-dot { + width: 8px; height: 8px; + border-radius: 2px; + display: inline-block; + flex-shrink: 0; +} +.annot-label { + overflow: hidden; white-space: nowrap; text-overflow: ellipsis; + min-width: 0; +} +.annot-val { text-align: right; white-space: nowrap; } +.annot-best-bold { font-weight: 700; color: var(--text-strong); } + +/* ---- runs table ------------------------------------------------------- */ + +#table-controls { + display: flex; align-items: center; gap: 10px; + padding: 8px 12px; + border-bottom: 1px solid var(--border); + flex: 0 0 auto; +} +#table-col-search { + flex: 1 1 220px; + max-width: 320px; + padding: 4px 8px; + background: var(--surface); + border: 1px solid var(--border); + border-radius: 4px; + font-size: 12px; + color: var(--text); +} +#table-col-search:focus { outline: 1px solid var(--accent); } +.table-col-count { + font-size: 11px; + color: var(--muted-2); + white-space: nowrap; +} +.runs-table-wrap { + flex: 1 1 auto; + overflow: auto; + padding: 0 12px 12px; +} +.runs-table { + border-collapse: separate; + border-spacing: 0; + font-size: 12px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + white-space: nowrap; + width: max-content; + min-width: 100%; +} +.runs-table thead th { + position: sticky; + top: 0; + background: var(--surface-2); + border-bottom: 1px solid var(--border); + padding: 6px 10px; + text-align: left; + font-weight: 600; + color: var(--muted); + font-size: 11px; + z-index: 2; + cursor: pointer; + user-select: none; +} +.runs-table thead th:hover { color: var(--text); } +.runs-table thead th.rt-sorted { color: var(--accent); } +.runs-table thead th.col-diff { color: var(--text-strong); } +.rt-corner { + position: sticky !important; + left: 0; + z-index: 4 !important; + cursor: default !important; + min-width: 140px; +} +.rt-col-hdr { min-width: 80px; max-width: 200px; } +.rt-col-label { + overflow: hidden; + text-overflow: ellipsis; + max-width: 160px; + display: inline-block; + vertical-align: middle; +} +.rt-sort-arrow { margin-left: 3px; color: var(--accent); } +.rt-section-start { border-left: 2px solid var(--border-strong); } +.runs-table tbody tr { + cursor: pointer; +} +.runs-table tbody tr:hover td { background: var(--hover); } +.runs-table tbody tr:hover td.rt-run-id { background: var(--hover-2); } +.runs-table tbody td { + padding: 5px 10px; + border-bottom: 1px solid var(--border-2); + color: var(--muted-2); +} +.runs-table tbody td.cell-diff { + color: var(--text-strong); + background: rgba(251, 191, 36, 0.07); +} +[data-theme="light"] .runs-table tbody td.cell-diff { + background: rgba(180, 83, 9, 0.06); +} +.runs-table tbody td.cell-same { color: var(--muted-3); } +.rt-run-id { + position: sticky; + left: 0; + background: var(--surface); + z-index: 1; + display: flex; + align-items: center; + gap: 6px; + min-width: 140px; + font-weight: 500; + color: var(--text); + border-right: 1px solid var(--border); +} +.rt-dot { + width: 9px; height: 9px; + border-radius: 2px; + flex-shrink: 0; + border: 1px solid var(--dot-border); +} +.rt-run-label { + overflow: hidden; + text-overflow: ellipsis; + max-width: 120px; +} + /* ---- media panels --------------------------------------------------- */ .media-panel .chart-title { gap: 10px; } diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js index cf19aa829..e2ebc3b1e 100644 --- a/stable_pretraining/web/assets/app.js +++ b/stable_pretraining/web/assets/app.js @@ -26,8 +26,10 @@ expandedMetrics: new Set(), // metric tree paths the user has expanded (closed by default) openSidebarGroups: new Set(), // sidebar group keys the user has expanded theme: 'dark', - activeTab: 'figures', // 'figures' | 'out' | 'err' + activeTab: 'figures', // 'figures' | 'out' | 'err' | 'table' metricSearch: '', + tableColSearch: '', + tableSort: null, // null | { col: string, dir: 'asc' | 'desc' } // Cached per-run log discoveries: runId -> {streams, fetchedAt} logsIndex: new Map(), // Currently selected (runId, stream_id) for each kind, persisted across @@ -92,6 +94,7 @@ logY: state.logY, smoothing: state.smoothing, metricSearch: state.metricSearch, + tableColSearch: state.tableColSearch, activeTab: state.activeTab, theme: state.theme, }; @@ -115,7 +118,7 @@ : []; } const tabParam = params.get('tab'); - if (tabParam && ['figures', 'out', 'err'].includes(tabParam)) fragTab = tabParam; + if (tabParam && ['figures', 'out', 'err', 'table'].includes(tabParam)) fragTab = tabParam; } // Load localStorage snapshot. @@ -134,6 +137,7 @@ if (typeof saved.logY === 'boolean') state.logY = saved.logY; if (typeof saved.smoothing === 'number') state.smoothing = saved.smoothing; if (typeof saved.metricSearch === 'string') state.metricSearch = saved.metricSearch; + if (typeof saved.tableColSearch === 'string') state.tableColSearch = saved.tableColSearch; if (typeof saved.activeTab === 'string') state.activeTab = saved.activeTab; if (typeof saved.theme === 'string') state.theme = saved.theme; if (Array.isArray(saved.visible)) state._pendingVisible = new Set(saved.visible); @@ -158,6 +162,8 @@ if (lyEl) lyEl.checked = state.logY; const msEl = document.getElementById('metric-search'); if (msEl) msEl.value = state.metricSearch; + const tcsEl = document.getElementById('table-col-search'); + if (tcsEl) tcsEl.value = state.tableColSearch; // Apply active tab to DOM directly — calling setActiveTab() would fire // saveState() while state.visible is still empty (runs not yet loaded). for (const btn of document.querySelectorAll('#tabs .tab')) { @@ -822,6 +828,7 @@ requestAnimationFrame(() => { renderPending = false; renderCharts(); + if (state.activeTab === 'table') renderRunsTable(); }); } @@ -1017,6 +1024,176 @@ for (const tag of mediaTags.keys()) updateMediaPanel(tag); } + // ---- runs table --------------------------------------------------------- + + function formatCellValue(v) { + if (v == null) return ''; + if (typeof v === 'boolean') return String(v); + if (typeof v === 'number') { + if (!isFinite(v)) return String(v); + if (Number.isInteger(v)) return String(v); + const abs = Math.abs(v); + if (abs !== 0 && (abs < 1e-3 || abs >= 1e6)) return v.toExponential(2); + return String(+v.toPrecision(4)); + } + const s = String(v); + return s.length > 30 ? s.slice(0, 28) + '…' : s; + } + + function renderRunsTable() { + const wrap = document.querySelector('#tab-table .runs-table-wrap'); + if (!wrap) return; + + const visIds = effectivelyVisible(); + const runs = visIds.map(id => state.runs.get(id)).filter(Boolean); + + const cnt = document.getElementById('table-col-count'); + + if (runs.length === 0) { + wrap.replaceChildren(); + const empty = document.createElement('div'); + empty.className = 'empty-state'; + empty.textContent = 'select runs on the left to compare them in the table'; + wrap.appendChild(empty); + if (cnt) cnt.textContent = ''; + return; + } + + // Build union of hparam + summary column keys across all visible runs. + const hparamKeys = new Set(); + const summaryKeys = new Set(); + for (const r of runs) { + for (const k of Object.keys(r.hparams || {})) hparamKeys.add(k); + for (const k of Object.keys(r.summary || {})) summaryKeys.add(k); + } + const allCols = [ + ...[...hparamKeys].sort().map(k => ({ id: `hparams.${k}`, label: k, section: 'hparams' })), + ...[...summaryKeys].sort().map(k => ({ id: `summary.${k}`, label: k, section: 'summary' })), + ]; + + // Filter columns by search. + const q = (state.tableColSearch || '').trim().toLowerCase(); + const cols = q ? allCols.filter(c => c.label.toLowerCase().includes(q)) : allCols; + + if (cnt) { + cnt.textContent = cols.length === allCols.length + ? `${allCols.length} cols` + : `${cols.length} / ${allCols.length} cols`; + } + + // Determine which columns have any differing values. + const diffSet = new Set(); + for (const col of cols) { + const vals = runs.map(r => { + const v = valueAt(r, col.id); + return v == null ? '\x00null' : String(v); + }); + if (new Set(vals).size > 1) diffSet.add(col.id); + } + + // Apply table sort. + let sortedRuns = [...runs]; + const ts = state.tableSort; + if (ts) { + sortedRuns.sort((a, b) => { + const c = compareValues(valueAt(a, ts.col), valueAt(b, ts.col)); + return ts.dir === 'asc' ? c : -c; + }); + } + + // Track the first column of each section for a visual separator. + const sectionStarts = new Set(); + let lastSection = null; + for (const col of cols) { + if (col.section !== lastSection) { sectionStarts.add(col.id); lastSection = col.section; } + } + + // Build table. + const table = document.createElement('table'); + table.className = 'runs-table'; + + // Header row. + const thead = document.createElement('thead'); + const hdrRow = document.createElement('tr'); + + const cornerTh = document.createElement('th'); + cornerTh.className = 'rt-corner'; + cornerTh.textContent = 'run'; + hdrRow.appendChild(cornerTh); + + for (const col of cols) { + const th = document.createElement('th'); + const cls = ['rt-col-hdr', diffSet.has(col.id) ? 'col-diff' : 'col-same']; + if (sectionStarts.has(col.id)) cls.push('rt-section-start'); + if (ts && ts.col === col.id) cls.push('rt-sorted'); + th.className = cls.join(' '); + th.title = col.id; + + const inner = document.createElement('span'); + inner.className = 'rt-col-label'; + inner.textContent = col.label; + th.appendChild(inner); + + if (ts && ts.col === col.id) { + const arrow = document.createElement('span'); + arrow.className = 'rt-sort-arrow'; + arrow.textContent = ts.dir === 'asc' ? '↑' : '↓'; + th.appendChild(arrow); + } + + th.addEventListener('click', () => { + if (ts && ts.col === col.id) { + state.tableSort = ts.dir === 'asc' ? { col: col.id, dir: 'desc' } : null; + } else { + state.tableSort = { col: col.id, dir: 'asc' }; + } + renderRunsTable(); + }); + + hdrRow.appendChild(th); + } + + thead.appendChild(hdrRow); + table.appendChild(thead); + + // Body rows. + const tbody = document.createElement('tbody'); + for (const run of sortedRuns) { + const tr = document.createElement('tr'); + tr.addEventListener('click', () => openDetail(run.run_id)); + + // Sticky run-ID cell. + const idTd = document.createElement('td'); + idTd.className = 'rt-run-id'; + const dot = document.createElement('span'); + dot.className = 'rt-dot'; + dot.style.background = runColor(run.run_id); + const lbl = document.createElement('span'); + lbl.className = 'rt-run-label'; + lbl.textContent = run.run_id.split('/').pop() || run.run_id; + lbl.title = run.run_id; + idTd.append(dot, lbl); + tr.appendChild(idTd); + + // Data cells. + for (const col of cols) { + const td = document.createElement('td'); + const cls = [diffSet.has(col.id) ? 'cell-diff' : 'cell-same']; + if (sectionStarts.has(col.id)) cls.push('rt-section-start'); + td.className = cls.join(' '); + const val = valueAt(run, col.id); + td.textContent = formatCellValue(val); + td.title = val != null ? String(val) : ''; + tr.appendChild(td); + } + + tbody.appendChild(tr); + } + table.appendChild(tbody); + + wrap.replaceChildren(table); + } + function makePanel(fullName, displayName) { const panel = document.createElement('div'); panel.className = 'chart-panel'; @@ -1031,6 +1208,9 @@ const body = document.createElement('div'); body.className = 'chart-body'; panel.appendChild(body); + const annot = document.createElement('div'); + annot.className = 'chart-annot'; + panel.appendChild(annot); return panel; } @@ -1325,7 +1505,19 @@ spanGaps: true, }); } - return { data, seriesCfg, runIds: series.map(s => s.id) }; + const lower = lowerIsBetter(name); + const seriesStats = series.map(s => { + const raw = s.ys.filter(v => v != null && isFinite(v)); + const lastVal = raw.length ? raw[raw.length - 1] : null; + const bestVal = raw.length ? (lower ? Math.min(...raw) : Math.max(...raw)) : null; + return { id: s.id, lastVal, bestVal }; + }); + return { data, seriesCfg, runIds: series.map(s => s.id), seriesStats }; + } + + function lowerIsBetter(name) { + const n = name.toLowerCase(); + return n.includes('loss') || n.includes('err') || n.includes('perplexity') || n.includes('ppl'); } function fmtTooltipNum(v) { @@ -1477,12 +1669,70 @@ }; } + function updateAnnotTable(name, entry, seriesCfg, seriesStats) { + const annot = entry.panel.querySelector('.chart-annot'); + if (!annot) return; + if (!seriesStats || seriesStats.length === 0) { annot.replaceChildren(); return; } + + const lower = lowerIsBetter(name); + const validBests = seriesStats.map(s => s.bestVal).filter(v => v != null); + const overallBest = validBests.length + ? (lower ? Math.min(...validBests) : Math.max(...validBests)) + : null; + + annot.replaceChildren(); + + // Header row + const hdr = document.createElement('div'); + hdr.className = 'chart-annot-row chart-annot-header'; + hdr.innerHTML = 'last' + (lower ? 'min' : 'max') + ''; + annot.appendChild(hdr); + + for (let i = 0; i < seriesStats.length; i++) { + const { id, lastVal, bestVal } = seriesStats[i]; + const seriesIdx = i + 1; + const s = seriesCfg[seriesIdx]; + if (!s) continue; + const color = typeof s.stroke === 'function' ? s.stroke() : s.stroke; + const isBest = bestVal != null && bestVal === overallBest; + + const row = document.createElement('div'); + row.className = 'chart-annot-row' + (isBest ? ' annot-best' : ''); + row.title = id; + + const dot = document.createElement('span'); + dot.className = 'annot-dot'; + dot.style.background = color; + + const label = document.createElement('span'); + label.className = 'annot-label'; + label.textContent = id.split('/').pop() || id; + + const lastEl = document.createElement('span'); + lastEl.className = 'annot-val'; + lastEl.textContent = fmtTooltipNum(lastVal); + + const bestEl = document.createElement('span'); + bestEl.className = isBest ? 'annot-val annot-best-bold' : 'annot-val'; + bestEl.textContent = fmtTooltipNum(bestVal); + + row.append(dot, label, lastEl, bestEl); + row.addEventListener('click', () => { + if (!entry.plot) return; + const cur = entry.plot.series[seriesIdx]; + if (!cur) return; + entry.plot.setSeries(seriesIdx, { show: cur.show === false }); + }); + annot.appendChild(row); + } + } + function updateChart(name) { const entry = state.charts.get(name); if (!entry) return; const visibleIds = effectivelyVisible().sort(); - const { data, seriesCfg, runIds } = buildChartData(name, visibleIds); + const { data, seriesCfg, runIds, seriesStats } = buildChartData(name, visibleIds); if (runIds.length === 0) { // No data yet for this chart with current selection; clear. @@ -1491,6 +1741,7 @@ entry.plot = null; entry.configKey = ''; } + updateAnnotTable(name, entry, [], []); return; } @@ -1498,6 +1749,7 @@ if (entry.plot && entry.configKey === configKey) { entry.plot.setData(data); + updateAnnotTable(name, entry, seriesCfg, seriesStats); return; } @@ -1506,6 +1758,7 @@ const width = body.clientWidth || 400; entry.plot = new uPlot(makeUplotOpts(name, width, seriesCfg), data, body); entry.configKey = configKey; + updateAnnotTable(name, entry, seriesCfg, seriesStats); } // ---- landing / overview --------------------------------------------- @@ -2037,7 +2290,7 @@ // ---- tabs ------------------------------------------------------------- function setActiveTab(name) { - if (!['figures', 'out', 'err'].includes(name)) return; + if (!['figures', 'out', 'err', 'table'].includes(name)) return; state.activeTab = name; for (const btn of document.querySelectorAll('#tabs .tab')) { btn.classList.toggle('active', btn.dataset.tab === name); @@ -2049,6 +2302,8 @@ if (name === 'out' || name === 'err') { // Lazily fetch logs the first time the user opens a log tab. refreshLogStreamsForVisibleRuns().then(() => renderLogTab(name)); + } else if (name === 'table') { + renderRunsTable(); } } @@ -2229,6 +2484,16 @@ }, 80)); } + // Table column search + const tcs = document.getElementById('table-col-search'); + if (tcs) { + tcs.addEventListener('input', debounce(e => { + state.tableColSearch = e.target.value; + saveState(); + renderRunsTable(); + }, 80)); + } + // Log tab selectors / refresh buttons const outSel = document.getElementById('logs-stream-selector'); if (outSel) outSel.addEventListener('change', () => { diff --git a/stable_pretraining/web/assets/index.html b/stable_pretraining/web/assets/index.html index e8a0149c4..86e96f5fe 100644 --- a/stable_pretraining/web/assets/index.html +++ b/stable_pretraining/web/assets/index.html @@ -52,6 +52,7 @@ +
@@ -90,6 +91,13 @@

     
+
+
+ + +
+
+

@@ -87,6 +89,8 @@
         
+        
+        
         
       
       


From 5bcbfc13c80df43b984141cd1f00bb0967200447 Mon Sep 17 00:00:00 2001
From: MarcelMatsal 
Date: Thu, 4 Jun 2026 20:00:20 -0400
Subject: [PATCH 06/31] added duration for the runs

---
 stable_pretraining/registry/_sidecar.py |  2 +
 stable_pretraining/registry/logger.py   |  3 ++
 stable_pretraining/web/assets/app.css   |  5 +++
 stable_pretraining/web/assets/app.js    | 55 ++++++++++++++++++++++---
 stable_pretraining/web/scan.py          |  8 +++-
 5 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/stable_pretraining/registry/_sidecar.py b/stable_pretraining/registry/_sidecar.py
index 06c8da85e..b5b902a49 100644
--- a/stable_pretraining/registry/_sidecar.py
+++ b/stable_pretraining/registry/_sidecar.py
@@ -56,6 +56,7 @@ def make_sidecar(
     run_dir: str,
     status: str = "running",
     created_at: Optional[float] = None,
+    ended_at: Optional[float] = None,
     hparams: Optional[Dict[str, Any]] = None,
     summary: Optional[Dict[str, Any]] = None,
     tags: Optional[List[str]] = None,
@@ -70,6 +71,7 @@ def make_sidecar(
         "run_dir": run_dir,
         "status": status,
         "created_at": created_at if created_at is not None else now,
+        "ended_at": ended_at,
         "updated_at": now,
         "tags": list(tags or []),
         "notes": notes or "",
diff --git a/stable_pretraining/registry/logger.py b/stable_pretraining/registry/logger.py
index d952e63fe..43e098ad1 100644
--- a/stable_pretraining/registry/logger.py
+++ b/stable_pretraining/registry/logger.py
@@ -177,6 +177,7 @@ def __init__(
         # the registry can order runs chronologically regardless of how
         # often we flush.
         self._created_at: Optional[float] = None
+        self._ended_at: Optional[float] = None
         # First-write flag for summary.json — used to log a one-shot info
         # line on creation, then debug lines on subsequent rewrites so we
         # don't spam every flush.
@@ -262,6 +263,7 @@ def save(self) -> None:
     def finalize(self, status: str) -> None:
         # Map Lightning status strings to our canonical vocabulary.
         self._status = {"success": "completed", "failed": "failed"}.get(status, status)
+        self._ended_at = time.time()
         # Parent writes CSVs.  We don't call super().finalize first
         # because _experiment may be None on rank-zero callers that
         # never logged — super() handles that no-op correctly.
@@ -406,6 +408,7 @@ def _write_sidecar(self) -> None:
             run_dir=str(self._run_dir),
             status=self._status,
             created_at=self._created_at,
+            ended_at=self._ended_at,
             hparams=self._hparams,
             summary=self._summary,
             tags=self._tags,
diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css
index 158523bec..5d78a1b0c 100644
--- a/stable_pretraining/web/assets/app.css
+++ b/stable_pretraining/web/assets/app.css
@@ -284,6 +284,11 @@ body {
 .run-status.completed { color: var(--good); }
 .run-status.failed    { color: var(--bad); }
 .run-status.running   { color: var(--accent); }
+.run-dur {
+  font-size: 10px; color: var(--muted-2);
+  font-variant-numeric: tabular-nums;
+  white-space: nowrap; flex-shrink: 0;
+}
 
 .run-info {
   background: none; border: none; cursor: pointer;
diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js
index 0f27ec0ca..d5878a2e7 100644
--- a/stable_pretraining/web/assets/app.js
+++ b/stable_pretraining/web/assets/app.js
@@ -737,6 +737,14 @@
       row.appendChild(st);
     }
 
+    if (r.status === 'running' || r.ended_at) {
+      const dur = document.createElement('div');
+      dur.className = 'run-dur';
+      dur.dataset.runId = r.run_id;
+      dur.textContent = fmtRunDur(r) || '';
+      row.appendChild(dur);
+    }
+
     const info = document.createElement('button');
     info.type = 'button';
     info.className = 'run-info';
@@ -2140,6 +2148,28 @@
     return d.toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC');
   }
 
+  function fmtDuration(secs) {
+    if (secs == null || secs < 0) return null;
+    const s = Math.floor(secs);
+    if (s < 60) return '<1m';
+    const m = Math.floor(s / 60);
+    if (m < 60) return `${m}m`;
+    const h = Math.floor(m / 60);
+    const rm = m % 60;
+    if (h < 24) return rm > 0 ? `${h}h ${rm}m` : `${h}h`;
+    const d = Math.floor(h / 24);
+    const rh = h % 24;
+    return rh > 0 ? `${d}d ${rh}h` : `${d}d`;
+  }
+
+  function fmtRunDur(r) {
+    if (!r.created_at) return null;
+    const now = Date.now() / 1000;
+    if (r.status === 'running') return fmtDuration(now - r.created_at);
+    if (r.ended_at) return fmtDuration(r.ended_at - r.created_at);
+    return null;
+  }
+
   function classifyValue(v) {
     if (v == null) return 'null';
     if (typeof v === 'number') return 'num';
@@ -2179,10 +2209,10 @@
     }
   }
 
-  function buildKVSection(label, kv, filter) {
+  function buildKVSection(label, kv, filter, sorted = true) {
     const entries = Object.entries(kv)
-      .filter(([k]) => !filter || k.toLowerCase().includes(filter))
-      .sort((a, b) => a[0].localeCompare(b[0]));
+      .filter(([k]) => !filter || k.toLowerCase().includes(filter));
+    if (sorted) entries.sort((a, b) => a[0].localeCompare(b[0]));
     if (!entries.length) return null;
 
     const section = document.createElement('section');
@@ -2225,17 +2255,25 @@
     const editing = prevNotes && document.activeElement === prevNotes.querySelector('textarea');
     const notesSec = editing ? prevNotes : _makeNotesSection(r);
 
+    const now = Date.now() / 1000;
+    let durSecs = null;
+    if (r.created_at) {
+      if (r.ended_at) durSecs = r.ended_at - r.created_at;
+      else if (r.status === 'running') durSecs = now - r.created_at;
+    }
     const meta = {
       run_dir: r.run_dir,
       status: r.status,
-      created_at: fmtTime(r.created_at),
+      started: fmtTime(r.created_at),
+      ended: r.ended_at ? fmtTime(r.ended_at) : null,
+      duration: fmtDuration(durSecs),
       tags: (r.tags || []).join(', ') || null,
       checkpoint_path: r.checkpoint_path,
     };
 
     const filter = state.detailFilter.toLowerCase();
     const sections = [];
-    const metaSec = buildKVSection('meta', meta, filter);
+    const metaSec = buildKVSection('meta', meta, filter, false);
     if (metaSec) sections.push(metaSec);
     const hpSec = buildKVSection('hparams', r.hparams || {}, filter);
     if (hpSec) sections.push(hpSec);
@@ -2777,6 +2815,13 @@
     wireControls();
     syncControlsToState();
     initTheme();
+    // Tick every minute to keep duration displays current for running runs.
+    setInterval(() => {
+      for (const el of document.querySelectorAll('.run-dur[data-run-id]')) {
+        const r = state.runs.get(el.dataset.runId);
+        if (r) el.textContent = fmtRunDur(r) || '';
+      }
+    }, 60_000);
     // Start SSE first so we don't miss progress events emitted between the
     // status fetch and the first scan-tick.
     startSSE();
diff --git a/stable_pretraining/web/scan.py b/stable_pretraining/web/scan.py
index 8b1bd8c98..088eae289 100644
--- a/stable_pretraining/web/scan.py
+++ b/stable_pretraining/web/scan.py
@@ -21,7 +21,7 @@
 from pathlib import Path
 from typing import Any, Optional
 
-from ..registry._sidecar import SIDECAR_NAME, write_sidecar
+from ..registry._sidecar import SIDECAR_NAME, heartbeat_mtime, write_sidecar
 
 
 @dataclass
@@ -397,12 +397,18 @@ def _serialize(run: _Run) -> dict:
             or run.run_id.rsplit("/", 1)[-1]
             or run.run_id
         )
+        try:
+            hb_at = heartbeat_mtime(run.run_dir)
+        except OSError:
+            hb_at = None
         return {
             "run_id": run.run_id,
             "run_dir": str(run.run_dir),
             "display_name": display_name,
             "status": s.get("status"),
             "created_at": s.get("created_at"),
+            "ended_at": s.get("ended_at"),
+            "heartbeat_at": hb_at,
             "tags": s.get("tags") or [],
             "notes": s.get("notes") or "",
             "hparams": s.get("hparams") or {},

From ae86b50df074de1cafb988f3095aa25c99fcb140 Mon Sep 17 00:00:00 2001
From: MarcelMatsal 
Date: Thu, 4 Jun 2026 20:22:40 -0400
Subject: [PATCH 07/31] added functionality to detect stale runs, that would be
 shown as running but are not actually

---
 stable_pretraining/web/assets/app.css    |  9 +++
 stable_pretraining/web/assets/app.js     | 90 ++++++++++++++++++------
 stable_pretraining/web/assets/index.html |  1 +
 3 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css
index 5d78a1b0c..63e280fe0 100644
--- a/stable_pretraining/web/assets/app.css
+++ b/stable_pretraining/web/assets/app.css
@@ -284,11 +284,19 @@ body {
 .run-status.completed { color: var(--good); }
 .run-status.failed    { color: var(--bad); }
 .run-status.running   { color: var(--accent); }
+.run-status.stale     { color: var(--warn, #f59e0b); }
 .run-dur {
   font-size: 10px; color: var(--muted-2);
   font-variant-numeric: tabular-nums;
   white-space: nowrap; flex-shrink: 0;
 }
+.run-dot.stale {
+  background: none; border: none;
+  color: var(--warn, #f59e0b);
+  font-size: 10px; line-height: 1;
+  display: flex; align-items: center; justify-content: center;
+  cursor: help;
+}
 
 .run-info {
   background: none; border: none; cursor: pointer;
@@ -433,6 +441,7 @@ body {
 .status-stat .dot.running   { background: #22d3ee; }
 .status-stat .dot.completed { background: #34d399; }
 .status-stat .dot.failed    { background: #f87171; }
+.status-stat .dot.stale     { background: var(--warn, #f59e0b); }
 .status-stat .n { color: var(--text); font-weight: 600; font-variant-numeric: tabular-nums; }
 
 /* ---- main / charts -------------------------------------------------- */
diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js
index d5878a2e7..2b5febffc 100644
--- a/stable_pretraining/web/assets/app.js
+++ b/stable_pretraining/web/assets/app.js
@@ -436,6 +436,7 @@
 
   function valueAt(run, key) {
     if (!key) return null;
+    if (key === 'status') return effectiveStatus(run);
     if (key.indexOf('.') < 0) return run[key];
     const dot = key.indexOf('.');
     const ns = key.slice(0, dot);
@@ -671,8 +672,15 @@
     row.dataset.runId = r.run_id;
 
     const dot = document.createElement('div');
-    dot.className = 'run-dot';
-    dot.style.background = runColor(r.run_id);
+    if (isStale(r)) {
+      dot.className = 'run-dot stale';
+      dot.textContent = '⚠';
+      const staleMins = Math.floor((Date.now() / 1000 - r.heartbeat_at) / 60);
+      dot.title = `no heartbeat for ${staleMins > 0 ? staleMins + 'm' : '<1m'} — may have crashed`;
+    } else {
+      dot.className = 'run-dot';
+      dot.style.background = runColor(r.run_id);
+    }
     row.appendChild(dot);
 
     // Name column: primary display_name + optional dimmed run_id hint.
@@ -732,8 +740,9 @@
 
     if (r.status) {
       const st = document.createElement('div');
-      st.className = `run-status ${r.status}`;
-      st.textContent = r.status;
+      const es = effectiveStatus(r);
+      st.className = `run-status ${es}`;
+      st.textContent = es;
       row.appendChild(st);
     }
 
@@ -780,8 +789,8 @@
     // that affect appearance (visibility, loading-pulse) are repainted by
     // touching only the affected row's classList below.
     const layoutKey = JSON.stringify({
-      groupKeys: groups.map(g => [g.key, g.runs.map(r => [r.run_id, r.display_name])]),
-      ungrouped: ungrouped.map(r => [r.run_id, r.display_name]),
+      groupKeys: groups.map(g => [g.key, g.runs.map(r => [r.run_id, r.display_name, isStale(r)])]),
+      ungrouped: ungrouped.map(r => [r.run_id, r.display_name, isStale(r)]),
       open: [...state.openSidebarGroups].sort(),
     });
     if (state._lastListKey === layoutKey) {
@@ -1864,9 +1873,10 @@
     wrap.className = 'status-bars';
     const colors = {
       completed: '#34d399', running: '#22d3ee',
+      stale: '#f59e0b',
       failed: '#f87171', unknown: '#6b7280',
     };
-    const order = ['completed', 'running', 'failed'];
+    const order = ['completed', 'running', 'stale', 'failed'];
     const seen = new Set(Object.keys(counts));
     const sorted = [
       ...order.filter(k => seen.has(k)),
@@ -1946,8 +1956,15 @@
     const row = document.createElement('div');
     row.className = 'recent-run';
     const dot = document.createElement('div');
-    dot.className = 'run-dot';
-    dot.style.background = runColor(r.run_id);
+    if (isStale(r)) {
+      dot.className = 'run-dot stale';
+      dot.textContent = '⚠';
+      const staleMins = Math.floor((Date.now() / 1000 - r.heartbeat_at) / 60);
+      dot.title = `no heartbeat for ${staleMins > 0 ? staleMins + 'm' : '<1m'} — may have crashed`;
+    } else {
+      dot.className = 'run-dot';
+      dot.style.background = runColor(r.run_id);
+    }
     row.appendChild(dot);
     const name = document.createElement('div');
     name.className = 'recent-run-name';
@@ -1956,8 +1973,9 @@
     row.appendChild(name);
     if (r.status) {
       const st = document.createElement('div');
-      st.className = `run-status ${r.status}`;
-      st.textContent = r.status;
+      const es = effectiveStatus(r);
+      st.className = `run-status ${es}`;
+      st.textContent = es;
       row.appendChild(st);
     }
     const ago = document.createElement('div');
@@ -1975,13 +1993,16 @@
     const days = 30;
     const startDay = Math.floor((now - (days - 1) * oneDay) / oneDay) * oneDay;
     const totalBins = new Array(days).fill(0);
-    const failBins = new Array(days).fill(0);
+    const failBins  = new Array(days).fill(0);
+    const staleBins = new Array(days).fill(0);
     for (const r of runs) {
       if (!r.created_at) continue;
       const idx = Math.floor((r.created_at - startDay) / oneDay);
       if (idx < 0 || idx >= days) continue;
       totalBins[idx]++;
-      if (r.status === 'failed') failBins[idx]++;
+      const es = effectiveStatus(r);
+      if (es === 'failed') failBins[idx]++;
+      if (es === 'stale')  staleBins[idx]++;
     }
     const xs = new Array(days);
     for (let i = 0; i < days; i++) xs[i] = startDay + i * oneDay + oneDay / 2;
@@ -1990,10 +2011,11 @@
     const bars = uPlot.paths.bars
       ? uPlot.paths.bars({ size: [0.7, 80], align: 0 })
       : undefined;
-    const muted = themeColor('muted') || '#8aa0b8';
-    const grid = themeColor('grid') || '#1f2630';
+    const muted  = themeColor('muted')  || '#8aa0b8';
+    const grid   = themeColor('grid')   || '#1f2630';
     const accent = themeColor('accent') || '#22d3ee';
-    const bad = themeColor('bad') || '#f87171';
+    const bad    = themeColor('bad')    || '#f87171';
+    const warn   = themeColor('warn')   || '#f59e0b';
     new uPlot({
       width, height: 180,
       cursor: { drag: { x: false, y: false } },
@@ -2030,20 +2052,27 @@
         {
           label: 'failed',
           stroke: bad,
-          fill: bad + '8c',    // ~55% alpha
+          fill: bad + '8c',
+          paths: bars,
+          points: { show: false },
+        },
+        {
+          label: 'stale',
+          stroke: warn,
+          fill: warn + '8c',
           paths: bars,
           points: { show: false },
         },
       ],
       legend: { show: true, live: false },
-    }, [xs, totalBins, failBins], parent);
+    }, [xs, totalBins, failBins, staleBins], parent);
   }
 
   function renderOverview(root) {
     const allRuns = [...state.runs.values()].filter(passesFilters);
     const counts = {};
     for (const r of allRuns) {
-      const s = r.status || 'unknown';
+      const s = effectiveStatus(r) || 'unknown';
       counts[s] = (counts[s] || 0) + 1;
     }
 
@@ -2065,6 +2094,7 @@
     cards.className = 'stat-cards';
     cards.appendChild(statCard('total runs', allRuns.length, '#f1f5f9'));
     cards.appendChild(statCard('running',   counts.running   || 0, '#22d3ee'));
+    if (counts.stale) cards.appendChild(statCard('stale', counts.stale, '#f59e0b'));
     cards.appendChild(statCard('completed', counts.completed || 0, '#34d399'));
     cards.appendChild(statCard('failed',    counts.failed    || 0, '#f87171'));
     wrap.appendChild(cards);
@@ -2170,6 +2200,16 @@
     return null;
   }
 
+  function isStale(r) {
+    if (r.status !== 'running') return false;
+    if (!r.heartbeat_at) return false;
+    return (Date.now() / 1000) - r.heartbeat_at > 300;
+  }
+
+  function effectiveStatus(r) {
+    return isStale(r) ? 'stale' : (r.status || null);
+  }
+
   function classifyValue(v) {
     if (v == null) return 'null';
     if (typeof v === 'number') return 'num';
@@ -2263,7 +2303,7 @@
     }
     const meta = {
       run_dir: r.run_dir,
-      status: r.status,
+      status: effectiveStatus(r),
       started: fmtTime(r.created_at),
       ended: r.ended_at ? fmtTime(r.ended_at) : null,
       duration: fmtDuration(durSecs),
@@ -2432,15 +2472,17 @@
   // ---- header stats -----------------------------------------------------
 
   function updateHeaderStats() {
-    const counts = { running: 0, completed: 0, failed: 0 };
+    const counts = { running: 0, completed: 0, failed: 0, stale: 0 };
     for (const r of state.runs.values()) {
-      const s = r.status || 'unknown';
+      const s = effectiveStatus(r) || 'unknown';
       if (s in counts) counts[s] += 1;
     }
     for (const [k, v] of Object.entries(counts)) {
       const el = document.querySelector(`#stat-${k} .n`);
       if (el) el.textContent = String(v);
     }
+    const staleChip = document.getElementById('stat-stale');
+    if (staleChip) staleChip.hidden = counts.stale === 0;
   }
 
   // ---- tabs -------------------------------------------------------------
@@ -2815,12 +2857,14 @@
     wireControls();
     syncControlsToState();
     initTheme();
-    // Tick every minute to keep duration displays current for running runs.
+    // Tick every minute to keep duration displays and stale indicators current.
     setInterval(() => {
       for (const el of document.querySelectorAll('.run-dur[data-run-id]')) {
         const r = state.runs.get(el.dataset.runId);
         if (r) el.textContent = fmtRunDur(r) || '';
       }
+      renderRunList();
+      updateHeaderStats();
     }, 60_000);
     // Start SSE first so we don't miss progress events emitted between the
     // status fetch and the first scan-tick.
diff --git a/stable_pretraining/web/assets/index.html b/stable_pretraining/web/assets/index.html
index 111f3168d..2eeb0b3f6 100644
--- a/stable_pretraining/web/assets/index.html
+++ b/stable_pretraining/web/assets/index.html
@@ -16,6 +16,7 @@
       ·  0 running
       ·  0 done
       ·  0 failed
+      
     
     
From b40300fc69518e1a6121078e27b7d4fe4fad01ce Mon Sep 17 00:00:00 2001 From: MarcelMatsal Date: Fri, 5 Jun 2026 08:54:53 -0400 Subject: [PATCH 08/31] added scatterplot to compare different metrics across runs --- stable_pretraining/web/assets/app.css | 39 +++++ stable_pretraining/web/assets/app.js | 205 ++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css index 63e280fe0..2aac23261 100644 --- a/stable_pretraining/web/assets/app.css +++ b/stable_pretraining/web/assets/app.css @@ -667,6 +667,45 @@ body { .annot-val { text-align: right; white-space: nowrap; } .annot-best-bold { font-weight: 700; color: var(--text-strong); } +/* ---- scatter section -------------------------------------------------- */ + +.scatter-section { + margin: 16px 12px 8px; + background: var(--surface); + border: 1px solid var(--border); + border-radius: 6px; + overflow: hidden; +} +.scatter-header { + display: flex; align-items: center; gap: 16px; flex-wrap: wrap; + padding: 8px 12px; + border-bottom: 1px solid var(--border); +} +.scatter-title { + font-size: 11px; font-weight: 600; + color: var(--muted); text-transform: uppercase; letter-spacing: 0.6px; + margin: 0; white-space: nowrap; +} +.scatter-controls { + display: flex; align-items: center; gap: 12px; flex-wrap: wrap; +} +.scatter-axis-label { + font-size: 11px; color: var(--muted); + display: flex; align-items: center; gap: 4px; +} +.scatter-axis-sel { + background: var(--bg); color: var(--text); + border: 1px solid var(--border); border-radius: 4px; + padding: 2px 5px; font-size: 11px; font-family: inherit; +} +.scatter-axis-sel:focus { outline: none; border-color: var(--accent); } +.scatter-plot { padding: 8px; } +.scatter-empty { + padding: 24px 12px; + color: var(--muted); font-size: 12px; + text-align: center; font-style: italic; +} + /* ---- runs table ------------------------------------------------------- */ #table-controls { diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js index 2b5febffc..fab2bec85 100644 --- a/stable_pretraining/web/assets/app.js +++ b/stable_pretraining/web/assets/app.js @@ -36,11 +36,16 @@ // tab switches so the user doesn't lose their place. logSelection: { out: null, err: null }, // null or {runId, streamId} logLivePaused: { out: false, err: false }, + scatterX: null, // 'hparams.lr' | 'summary.val_acc' | … + scatterY: null, }; // setInterval handles for log auto-refresh; null when not running. const _logLiveTimers = { out: null, err: null }; + // Active uPlot instance for the scatter panel; null when not shown. + let _scatterPlot = null; + const SYNC_KEY = 'sptweb-x'; // ---- theme ----------------------------------------------------------- @@ -99,6 +104,8 @@ smoothing: state.smoothing, metricSearch: state.metricSearch, tableColSearch: state.tableColSearch, + scatterX: state.scatterX, + scatterY: state.scatterY, activeTab: state.activeTab, theme: state.theme, }; @@ -142,6 +149,8 @@ if (typeof saved.smoothing === 'number') state.smoothing = saved.smoothing; if (typeof saved.metricSearch === 'string') state.metricSearch = saved.metricSearch; if (typeof saved.tableColSearch === 'string') state.tableColSearch = saved.tableColSearch; + if (typeof saved.scatterX === 'string') state.scatterX = saved.scatterX; + if (typeof saved.scatterY === 'string') state.scatterY = saved.scatterY; if (typeof saved.activeTab === 'string') state.activeTab = saved.activeTab; if (typeof saved.theme === 'string') state.theme = saved.theme; if (Array.isArray(saved.visible)) state._pendingVisible = new Set(saved.visible); @@ -1051,6 +1060,7 @@ for (const { panel } of state.mediaPanels.values()) panel.remove(); state.mediaPanels.clear(); state._lastTreeKey = null; // force rebuild next time we leave overview + if (_scatterPlot) { _scatterPlot.destroy(); _scatterPlot = null; } renderOverview(root); return; } @@ -1102,6 +1112,7 @@ for (const name of metrics) updateChart(name); for (const tag of mediaTags.keys()) updateMediaPanel(tag); + updateScatterSection(root); } // ---- runs table --------------------------------------------------------- @@ -2068,6 +2079,200 @@ }, [xs, totalBins, failBins, staleBins], parent); } + // ---- scatter plot ------------------------------------------------------- + + function scatterNumericKeys() { + const keys = new Set(); + for (const id of effectivelyVisible()) { + const r = state.runs.get(id); + if (!r) continue; + for (const [k, v] of Object.entries(r.summary || {})) + if (typeof v === 'number' && isFinite(v)) keys.add('summary.' + k); + for (const [k, v] of Object.entries(r.hparams || {})) + if (typeof v === 'number' && isFinite(v)) keys.add('hparams.' + k); + } + return [...keys].sort(); + } + + function getScatterVal(r, key) { + const v = valueAt(r, key); + return (typeof v === 'number' && isFinite(v)) ? v : null; + } + + function _fillScatterSel(sel, keys, currentVal) { + sel.innerHTML = ''; + const empty = document.createElement('option'); + empty.value = ''; empty.textContent = '— select —'; + sel.appendChild(empty); + for (const k of keys) { + const opt = document.createElement('option'); + opt.value = k; + opt.textContent = k.replace(/^(summary|hparams)\./, (_, ns) => ns + ': '); + if (k === currentVal) opt.selected = true; + sel.appendChild(opt); + } + if (!currentVal || !keys.includes(currentVal)) sel.value = ''; + } + + function _rebuildScatterPlot(section) { + const plotDiv = section.querySelector('.scatter-plot'); + if (_scatterPlot) { _scatterPlot.destroy(); _scatterPlot = null; } + plotDiv.replaceChildren(); + + const xKey = state.scatterX; + const yKey = state.scatterY; + if (!xKey || !yKey) { + const msg = document.createElement('div'); + msg.className = 'scatter-empty'; + msg.textContent = 'select x and y axes above to draw the scatter plot'; + plotDiv.appendChild(msg); + return; + } + + const validRuns = effectivelyVisible() + .map(id => state.runs.get(id)) + .filter(r => r && getScatterVal(r, xKey) != null && getScatterVal(r, yKey) != null); + + if (validRuns.length === 0) { + const msg = document.createElement('div'); + msg.className = 'scatter-empty'; + msg.textContent = 'no visible runs have both selected fields'; + plotDiv.appendChild(msg); + return; + } + + // Shared x-axis: deduplicated, sorted numeric x-values across all runs. + const allXVals = [...new Set(validRuns.map(r => getScatterVal(r, xKey)))].sort((a, b) => a - b); + + const muted = themeColor('muted') || '#8aa0b8'; + const grid = themeColor('grid') || '#1f2630'; + + const series = [{ label: '' }]; + const data = [allXVals]; + + for (const r of validRuns) { + const xv = getScatterVal(r, xKey); + const yv = getScatterVal(r, yKey); + const xi = allXVals.indexOf(xv); + const yArr = new Array(allXVals.length).fill(null); + yArr[xi] = yv; + const color = runColor(r.run_id); + series.push({ + label: r.display_name || r.run_id, + stroke: color, fill: color, + paths: () => null, + points: { show: true, size: 8, fill: color, stroke: color }, + }); + data.push(yArr); + } + + const width = plotDiv.clientWidth || 600; + _scatterPlot = new uPlot({ + width, height: 260, + cursor: { drag: { x: false, y: false } }, + scales: { x: { time: false }, y: {} }, + axes: [ + { + label: xKey.replace(/^(summary|hparams)\./, ''), + stroke: muted, grid: { stroke: grid, width: 1 }, ticks: { stroke: grid }, + size: 50, + }, + { + label: yKey.replace(/^(summary|hparams)\./, ''), + stroke: muted, grid: { stroke: grid, width: 1 }, ticks: { stroke: grid }, + size: 60, + }, + ], + series, + legend: { show: true, live: true }, + }, data, plotDiv); + } + + function updateScatterSection(root) { + const visIds = effectivelyVisible(); + + // Hide when fewer than 2 runs are visible. + if (visIds.length < 2) { + const existing = root.querySelector('.scatter-section'); + if (existing) { + if (_scatterPlot) { _scatterPlot.destroy(); _scatterPlot = null; } + existing.remove(); + } + return; + } + + // Create section on first use. + let section = root.querySelector('.scatter-section'); + if (!section) { + section = document.createElement('div'); + section.className = 'scatter-section'; + + const hdr = document.createElement('div'); + hdr.className = 'scatter-header'; + const title = document.createElement('h3'); + title.className = 'scatter-title'; + title.textContent = 'scatter'; + hdr.appendChild(title); + + const controls = document.createElement('div'); + controls.className = 'scatter-controls'; + + const makeAxisLabel = (axis, selId) => { + const lbl = document.createElement('label'); + lbl.className = 'scatter-axis-label'; + lbl.textContent = axis + ' '; + const sel = document.createElement('select'); + sel.className = 'scatter-axis-sel'; + sel.id = selId; + lbl.appendChild(sel); + return lbl; + }; + + controls.appendChild(makeAxisLabel('x', 'scatter-x-sel')); + controls.appendChild(makeAxisLabel('y', 'scatter-y-sel')); + hdr.appendChild(controls); + section.appendChild(hdr); + + const plotDiv = document.createElement('div'); + plotDiv.className = 'scatter-plot'; + section.appendChild(plotDiv); + root.appendChild(section); + + section.querySelector('#scatter-x-sel').addEventListener('change', e => { + state.scatterX = e.target.value || null; + saveState(); + _rebuildScatterPlot(section); + }); + section.querySelector('#scatter-y-sel').addEventListener('change', e => { + state.scatterY = e.target.value || null; + saveState(); + _rebuildScatterPlot(section); + }); + } + + const keys = scatterNumericKeys(); + const xSel = section.querySelector('#scatter-x-sel'); + const ySel = section.querySelector('#scatter-y-sel'); + _fillScatterSel(xSel, keys, state.scatterX); + _fillScatterSel(ySel, keys, state.scatterY); + + // Auto-select defaults on first visit: hparam for X, summary for Y. + if (!state.scatterX || !keys.includes(state.scatterX)) { + const def = keys.find(k => k.startsWith('hparams.')) || keys[0] || null; + state.scatterX = def; + if (xSel && def) xSel.value = def; + } + if (!state.scatterY || !keys.includes(state.scatterY)) { + const def = keys.find(k => k.startsWith('summary.')) + || keys.find(k => k !== state.scatterX) + || null; + state.scatterY = def; + if (ySel && def) ySel.value = def; + } + + _rebuildScatterPlot(section); + } + function renderOverview(root) { const allRuns = [...state.runs.values()].filter(passesFilters); const counts = {}; From fc11d275237ea443ea6e85e3bc0aa598205e0d84 Mon Sep 17 00:00:00 2001 From: MarcelMatsal Date: Fri, 5 Jun 2026 09:00:45 -0400 Subject: [PATCH 09/31] csv download functionality added --- stable_pretraining/web/assets/app.css | 6 ++++ stable_pretraining/web/assets/app.js | 38 ++++++++++++++++++++++++ stable_pretraining/web/assets/index.html | 1 + 3 files changed, 45 insertions(+) diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css index 2aac23261..e3fe33343 100644 --- a/stable_pretraining/web/assets/app.css +++ b/stable_pretraining/web/assets/app.css @@ -393,6 +393,12 @@ body { margin-left: -8px; } +#export-csv { + margin-left: auto; + font-size: 12px; + padding: 3px 8px; +} + .logs-controls { display: flex; gap: 12px; align-items: center; padding: 8px 12px; diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js index fab2bec85..5f567968d 100644 --- a/stable_pretraining/web/assets/app.js +++ b/stable_pretraining/web/assets/app.js @@ -2857,6 +2857,42 @@ } } + function exportMetricsCSV() { + const visIds = effectivelyVisible(); + const metricNames = visibleMetricNames(); + if (!visIds.length || !metricNames.length) return; + + const rows = ['run_id,run_name,metric,step,epoch,value']; + for (const id of visIds) { + const run = state.runs.get(id); + const name = (run && run.display_name) || id; + const m = state.metrics.get(id); + if (!m) continue; + for (const metric of metricNames) { + const col = m.metrics[metric]; + if (!col) continue; + for (let i = 0; i < col.y.length; i++) { + const step = col.step[i] ?? ''; + const epoch = col.epoch[i] ?? ''; + const val = col.y[i] ?? ''; + const escapedName = name.includes(',') ? `"${name.replace(/"/g, '""')}"` : name; + const escapedMetric = metric.includes(',') ? `"${metric.replace(/"/g, '""')}"` : metric; + rows.push(`${id},${escapedName},${escapedMetric},${step},${epoch},${val}`); + } + } + } + + const blob = new Blob([rows.join('\n')], { type: 'text/csv' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = 'spt_metrics.csv'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + } + function wireControls() { document.getElementById('run-search').addEventListener( 'input', @@ -2887,6 +2923,8 @@ document.getElementById('select-all').addEventListener('click', () => setAllVisible(true)); document.getElementById('clear-all').addEventListener('click', () => setAllVisible(false)); + document.getElementById('export-csv').addEventListener('click', exportMetricsCSV); + document.getElementById('add-filter-btn').addEventListener('click', () => openFilterDraft(null)); document.getElementById('group-by').addEventListener('change', e => { diff --git a/stable_pretraining/web/assets/index.html b/stable_pretraining/web/assets/index.html index 2eeb0b3f6..0e5611044 100644 --- a/stable_pretraining/web/assets/index.html +++ b/stable_pretraining/web/assets/index.html @@ -71,6 +71,7 @@ +
select a run on the left to plot its metrics
From e235294ce59e9a3e8c3f30a804eb79dbaee25d59 Mon Sep 17 00:00:00 2001 From: MarcelMatsal Date: Fri, 5 Jun 2026 09:16:24 -0400 Subject: [PATCH 10/31] added resizing functionality for the tables allowing to easily reset zooming in --- stable_pretraining/web/assets/app.css | 18 +++++++++++++++ stable_pretraining/web/assets/app.js | 32 +++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css index e3fe33343..31e5edc86 100644 --- a/stable_pretraining/web/assets/app.css +++ b/stable_pretraining/web/assets/app.css @@ -630,6 +630,19 @@ body { overflow: hidden; white-space: nowrap; text-overflow: ellipsis; } .chart-body { width: 100%; } +.chart-zoom-reset { + flex-shrink: 0; + margin-left: auto; + background: none; + border: 1px solid var(--border); + border-radius: 4px; + color: var(--muted); + font-size: 13px; + line-height: 1; + padding: 1px 5px; + cursor: pointer; +} +.chart-zoom-reset:hover { color: var(--text-strong); border-color: var(--accent); } /* ---- chart annotations ------------------------------------------------ */ .chart-annot { @@ -897,6 +910,11 @@ body { /* uPlot themed */ .u-cursor-pt { mix-blend-mode: normal; } +.u-select { + background: var(--accent-bg-strong); + border: 1px solid var(--accent); + border-radius: 2px; +} /* In-chart cursor tooltip (custom uPlot plugin) */ .spt-tip { diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js index 5f567968d..c6d840c3f 100644 --- a/stable_pretraining/web/assets/app.js +++ b/stable_pretraining/web/assets/app.js @@ -1295,6 +1295,13 @@ span.textContent = displayName || fullName; span.title = fullName; title.appendChild(span); + const resetBtn = document.createElement('button'); + resetBtn.className = 'chart-zoom-reset'; + resetBtn.type = 'button'; + resetBtn.title = 'reset zoom'; + resetBtn.textContent = '⤢'; + resetBtn.hidden = true; + title.appendChild(resetBtn); panel.appendChild(title); const body = document.createElement('div'); body.className = 'chart-body'; @@ -1732,7 +1739,7 @@ }; } - function makeUplotOpts(name, width, seriesCfg) { + function makeUplotOpts(name, width, seriesCfg, resetBtn) { const muted = themeColor('muted') || '#8aa0b8'; const grid = themeColor('grid') || '#1f2630'; return { @@ -1757,6 +1764,18 @@ series: seriesCfg, legend: { show: false }, plugins: [tooltipPlugin()], + hooks: { + setScale: [ + (u, key) => { + if (!resetBtn || key !== 'x') return; + const xData = u.data[0]; + if (!xData || xData.length < 2) { resetBtn.hidden = true; return; } + const sc = u.scales.x; + const zoomed = sc.min > xData[0] || sc.max < xData[xData.length - 1]; + resetBtn.hidden = !zoomed; + }, + ], + }, }; } @@ -1846,8 +1865,17 @@ if (entry.plot) entry.plot.destroy(); const body = entry.panel.querySelector('.chart-body'); + const resetBtn = entry.panel.querySelector('.chart-zoom-reset'); + if (resetBtn) resetBtn.hidden = true; const width = body.clientWidth || 400; - entry.plot = new uPlot(makeUplotOpts(name, width, seriesCfg), data, body); + entry.plot = new uPlot(makeUplotOpts(name, width, seriesCfg, resetBtn), data, body); + if (resetBtn) { + resetBtn.onclick = () => { + const xData = entry.plot && entry.plot.data[0]; + if (!xData || !xData.length) return; + entry.plot.setScale('x', { min: xData[0], max: xData[xData.length - 1] }); + }; + } entry.configKey = configKey; updateAnnotTable(name, entry, seriesCfg, seriesStats); } From dd3d97ad49af99b8eb5833e67fb41d9066ed70b8 Mon Sep 17 00:00:00 2001 From: MarcelMatsal Date: Fri, 5 Jun 2026 09:25:44 -0400 Subject: [PATCH 11/31] added resizeable sidebar --- stable_pretraining/web/assets/app.css | 17 ++++- stable_pretraining/web/assets/app.js | 82 ++++++++++++++++++++++++ stable_pretraining/web/assets/index.html | 1 + 3 files changed, 97 insertions(+), 3 deletions(-) diff --git a/stable_pretraining/web/assets/app.css b/stable_pretraining/web/assets/app.css index 31e5edc86..e6a957441 100644 --- a/stable_pretraining/web/assets/app.css +++ b/stable_pretraining/web/assets/app.css @@ -67,10 +67,19 @@ body { #app { display: grid; - grid-template-columns: 300px 1fr; + grid-template-columns: 300px 6px 1fr; grid-template-rows: 46px 1fr; height: 100vh; } +#sidebar-resizer { + grid-column: 2; grid-row: 2; + cursor: col-resize; + background: var(--border); + transition: background 0.15s; + z-index: 10; +} +#sidebar-resizer:hover, +#sidebar-resizer.dragging { background: var(--accent); } #topbar { grid-column: 1 / -1; @@ -111,9 +120,10 @@ body { /* ---- sidebar -------------------------------------------------------- */ #sidebar { + grid-column: 1; grid-row: 2; border-right: 1px solid var(--border); background: var(--surface); - overflow-y: auto; + overflow: hidden; padding: 10px; display: flex; flex-direction: column; gap: 8px; } @@ -230,7 +240,7 @@ body { } #sidebar-actions button:hover { color: var(--text); border-color: var(--border-strong); } -#run-list { display: flex; flex-direction: column; gap: 1px; } +#run-list { display: flex; flex-direction: column; gap: 1px; flex: 1; overflow-y: auto; min-height: 0; } .run-item { display: flex; align-items: center; gap: 8px; padding: 5px 7px; border-radius: 4px; @@ -331,6 +341,7 @@ body { /* ---- main panel: tabs + figures/logs ------------------------------- */ #main-panel { + grid-column: 3; grid-row: 2; display: flex; flex-direction: column; min-width: 0; /* let children scroll horizontally if needed */ diff --git a/stable_pretraining/web/assets/app.js b/stable_pretraining/web/assets/app.js index c6d840c3f..014deba5b 100644 --- a/stable_pretraining/web/assets/app.js +++ b/stable_pretraining/web/assets/app.js @@ -48,6 +48,12 @@ const SYNC_KEY = 'sptweb-x'; + // Virtual scroll state — set by renderRunList when the flat list is large. + const VSCROLL_THRESHOLD = 300; + const VSCROLL_OVERSCAN = 5; + let _rowHeight = 34; + let _vScrollState = null; // { el, runs } | null + // ---- theme ----------------------------------------------------------- const SUN_SVG = '' @@ -791,6 +797,15 @@ `${state.runs.size} run${state.runs.size === 1 ? '' : 's'}` + (filtered.length !== state.runs.size ? ` (${filtered.length} shown)` : ''); + // Virtual scroll: flat list only (groups defeat windowing). + if (groups.length === 0 && sorted.length > VSCROLL_THRESHOLD) { + _vScrollState = { el, runs: sorted }; + state._lastListKey = null; // force full rebuild next time we drop below threshold + _applyVScrollWindow(); + return; + } + _vScrollState = null; + // Skip rebuilding the sidebar DOM when the *structure* (group keys, // run order, run count) hasn't changed. Background SSE updates fire // every few seconds during training; rebuilding 2 000+ rows on every @@ -2885,6 +2900,69 @@ } } + function initSidebarResize() { + const app = document.getElementById('app'); + const resizer = document.getElementById('sidebar-resizer'); + const sidebar = document.getElementById('sidebar'); + + const saved = parseInt(localStorage.getItem('sptSidebarWidth'), 10); + if (saved && saved > 100 && saved < 800) { + app.style.gridTemplateColumns = `${saved}px 6px 1fr`; + } + + resizer.addEventListener('mousedown', e => { + e.preventDefault(); + const startX = e.clientX; + const startW = sidebar.getBoundingClientRect().width; + resizer.classList.add('dragging'); + + function onMove(e) { + const newW = Math.max(160, Math.min(600, startW + (e.clientX - startX))); + app.style.gridTemplateColumns = `${newW}px 6px 1fr`; + } + function onUp() { + resizer.classList.remove('dragging'); + const cols = getComputedStyle(app).gridTemplateColumns; + const w = parseInt(cols, 10); + if (w) localStorage.setItem('sptSidebarWidth', String(w)); + document.removeEventListener('mousemove', onMove); + document.removeEventListener('mouseup', onUp); + } + document.addEventListener('mousemove', onMove); + document.addEventListener('mouseup', onUp); + }); + } + + function _applyVScrollWindow() { + if (!_vScrollState) return; + const { el, runs } = _vScrollState; + const rowH = _rowHeight; + const scrollTop = el.scrollTop; + const clientH = el.clientHeight || 500; + + const firstIdx = Math.max(0, Math.floor(scrollTop / rowH) - VSCROLL_OVERSCAN); + const lastIdx = Math.min(runs.length - 1, + Math.ceil((scrollTop + clientH) / rowH) + VSCROLL_OVERSCAN); + + const frag = document.createDocumentFragment(); + const topSpacer = document.createElement('div'); + topSpacer.style.height = `${firstIdx * rowH}px`; + frag.appendChild(topSpacer); + for (let i = firstIdx; i <= lastIdx; i++) frag.appendChild(makeRunRow(runs[i])); + const botSpacer = document.createElement('div'); + botSpacer.style.height = `${Math.max(0, runs.length - 1 - lastIdx) * rowH}px`; + frag.appendChild(botSpacer); + + el.replaceChildren(frag); + + // Update measured row height from the first rendered item. + const row0 = el.querySelector('.run-item'); + if (row0) { + const h = row0.offsetHeight; + if (h > 1) _rowHeight = h + 1; // +1 for the gap + } + } + function exportMetricsCSV() { const visIds = effectivelyVisible(); const metricNames = visibleMetricNames(); @@ -3128,6 +3206,10 @@ wireControls(); syncControlsToState(); initTheme(); + initSidebarResize(); + document.getElementById('run-list').addEventListener('scroll', () => { + if (_vScrollState) requestAnimationFrame(_applyVScrollWindow); + }, { passive: true }); // Tick every minute to keep duration displays and stale indicators current. setInterval(() => { for (const el of document.querySelectorAll('.run-dur[data-run-id]')) { diff --git a/stable_pretraining/web/assets/index.html b/stable_pretraining/web/assets/index.html index 0e5611044..ca5c2874b 100644 --- a/stable_pretraining/web/assets/index.html +++ b/stable_pretraining/web/assets/index.html @@ -48,6 +48,7 @@
+