From 455bafdbed613af68b587baac55d671a6d997531 Mon Sep 17 00:00:00 2001
From: stacknil <stacknil@proton.me>
Date: Fri, 19 Jun 2026 19:14:02 +0800
Subject: [PATCH] docs(contract): add cross-demo vocabulary

---
 README.md                          |  2 +
 docs/README.md                     |  1 +
 docs/evidence-pipeline-contract.md |  2 +
 docs/reviewer-pack.md              |  3 ++
 docs/roadmap.md                    |  8 ++--
 docs/vocabulary.md                 | 67 ++++++++++++++++++++++++++++++
 tests/test_reviewer_docs.py        | 35 ++++++++++++++++
 7 files changed, 115 insertions(+), 3 deletions(-)
 create mode 100644 docs/vocabulary.md

diff --git a/README.md b/README.md
index 8681d5a..3f7703b 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@ Latest tagged release: [v0.6.0 — fourth demo and config-change investigation](
 - [`docs/reviewer-path.md`](docs/reviewer-path.md): choose the right demo by review question
 - [`docs/reviewer-pack.md`](docs/reviewer-pack.md): demo matrix, artifact contract, and v1 readiness gate
 - [`docs/evidence-pipeline-contract.md`](docs/evidence-pipeline-contract.md): JSON schema contracts for reviewer-facing evidence artifacts
+- [`docs/vocabulary.md`](docs/vocabulary.md): cross-demo vocabulary for events, hits, signals, findings, summaries, reports, and audit traces
 - [`docs/README.md`](docs/README.md): current route, supporting docs, and historical release evidence
 
 ## Demos
@@ -170,6 +171,7 @@ Cooldown behavior:
 - [`docs/reviewer-path.md`](docs/reviewer-path.md) maps common review questions to the right demo and artifacts
 - [`docs/architecture.md`](docs/architecture.md) diagrams the local file-based detection workflow
 - [`docs/event-time-model.md`](docs/event-time-model.md) defines event, observed, window, and artifact time semantics
+- [`docs/vocabulary.md`](docs/vocabulary.md) defines the cross-demo evidence workflow terms
 - [`docs/sample-output.md`](docs/sample-output.md) summarizes the committed sample artifacts
 - [`docs/roadmap.md`](docs/roadmap.md) defines the v1 reviewer contract stabilization phase
 - [`data/processed/summary.json`](data/processed/summary.json) captures the default run in machine-readable form
diff --git a/docs/README.md b/docs/README.md
index 0689a4f..8154dbe 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,6 +8,7 @@ This directory separates the current reviewer route from supporting design notes
 - [`reviewer-path.md`](reviewer-path.md): choose a demo by review question
 - [`reviewer-brief.md`](reviewer-brief.md): short problem, value, evidence, and boundary summary
 - [`evidence-pipeline-contract.md`](evidence-pipeline-contract.md): JSON schema contracts for reviewer-facing evidence artifacts
+- [`vocabulary.md`](vocabulary.md): cross-demo vocabulary for evidence workflow terms
 - [`architecture.md`](architecture.md): local file-based workflow diagram
 - [`roadmap.md`](roadmap.md): v1 reviewer contract stabilization phase
 
diff --git a/docs/evidence-pipeline-contract.md b/docs/evidence-pipeline-contract.md
index fb5aff0..f43c2fa 100644
--- a/docs/evidence-pipeline-contract.md
+++ b/docs/evidence-pipeline-contract.md
@@ -2,6 +2,8 @@
 
 `telemetry-lab` v1.0 treats reviewer-facing JSON artifacts as evidence pipeline contracts. The schemas below define the current machine-readable artifact shapes for selected demo outputs without turning the repo into a SIEM, dashboard, or monitoring platform.
 
+Use [`docs/vocabulary.md`](vocabulary.md) for the cross-demo meaning of `event`, `signal`, `hit`, `finding`, `case_bundle`, `summary`, `report`, and `audit_trace`.
+
 The contract is intentionally local and file-based:
 
 - schemas live under `schemas/`
diff --git a/docs/reviewer-pack.md b/docs/reviewer-pack.md
index f8165e6..79bddc8 100644
--- a/docs/reviewer-pack.md
+++ b/docs/reviewer-pack.md
@@ -41,6 +41,8 @@ The current artifact names are reviewer-facing contracts for the v1 reviewer con
 
 See [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md) for the v1 JSON schema contract covering selected reviewer-facing evidence artifacts.
 
+See [`docs/vocabulary.md`](vocabulary.md) for the cross-demo meaning of `event`, `signal`, `hit`, `finding`, `case_bundle`, `summary`, `report`, and `audit_trace`.
+
 The current schema contract covers:
 
 - `schemas/rule_hits.schema.json`
@@ -96,6 +98,7 @@ Use the same Python interpreter for install, tests, and demo commands.
 - [`docs/reviewer-brief.md`](reviewer-brief.md): short problem / value summary
 - [`docs/reviewer-path.md`](reviewer-path.md): demo choice by review question
 - [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md): JSON schema contracts for selected evidence artifacts
+- [`docs/vocabulary.md`](vocabulary.md): cross-demo evidence workflow vocabulary
 - [`docs/architecture.md`](architecture.md): local file-based workflow diagram
 - [`docs/sample-output.md`](sample-output.md): committed output counts and sample artifacts
 - [`docs/roadmap.md`](roadmap.md): v1 reviewer contract stabilization phase
diff --git a/docs/roadmap.md b/docs/roadmap.md
index be7cb52..acb47c3 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -14,6 +14,7 @@ Recently added:
 - [`docs/reviewer-path.md`](reviewer-path.md) maps common review questions to the right demo and artifacts.
 - [`docs/reviewer-pack.md`](reviewer-pack.md) collects the top-level reviewer flow and artifact naming contract.
 - [`docs/architecture.md`](architecture.md) describes the local file-based workflow shape.
+- [`docs/vocabulary.md`](vocabulary.md) defines cross-demo evidence workflow terms.
 
 ## v1 Reviewer Contract Stabilization
 
@@ -21,9 +22,10 @@ Recently added:
 2. Freeze reviewer-visible artifact names unless a rename is intentional and documented across README, reviewer docs, demo docs, tests, and sample outputs.
 3. Keep JSON schema contracts aligned with selected reviewer-facing evidence artifacts.
 4. Keep committed evidence artifacts aligned with regenerated pipeline output.
-5. Keep one top-level reviewer pack as the primary no-guessing entrypoint.
-6. Keep the architecture diagram aligned with actual CLI and artifact behavior.
-7. Prefer regression tests and documentation accuracy over adding new workflow surface area.
+5. Keep cross-demo vocabulary stable for evidence workflow terms.
+6. Keep one top-level reviewer pack as the primary no-guessing entrypoint.
+7. Keep the architecture diagram aligned with actual CLI and artifact behavior.
+8. Prefer regression tests and documentation accuracy over adding new workflow surface area.
 
 The consolidation gate lives in [`docs/reviewer-pack.md`](reviewer-pack.md#v1-readiness-gate).
 
diff --git a/docs/vocabulary.md b/docs/vocabulary.md
new file mode 100644
index 0000000..15e75ba
--- /dev/null
+++ b/docs/vocabulary.md
@@ -0,0 +1,67 @@
+# Cross-Demo Vocabulary
+
+This vocabulary defines the reviewer-facing terms used across `telemetry-lab`.
+It is a local evidence workflow vocabulary, not a SIEM object model, not a
+case-management lifecycle, and not an incident response taxonomy.
+
+## Term Ladder
+
+| Term | Meaning in this repo | Typical artifacts | What it is not |
+| --- | --- | --- | --- |
+| `event` | A source or normalized telemetry record that says something happened at a specific time. Events are the raw material for windowing, rule evaluation, and bounded correlation. | `data/raw/*.jsonl`, `change_events_normalized.json`, `normalized_cloudtrail_events.json` | A detection by itself, an alert verdict, or a final incident conclusion. |
+| `signal` | A deterministic correlation output that selects one or more events for review because they match bounded investigation logic. Signals usually carry evidence IDs, actor or entity context, and a reason. | `investigation_signals.json` | A live alert route, autonomous action, or proof of compromise. |
+| `hit` | A rule-evaluation occurrence. A hit can be a raw rule match, a retained alert candidate, or a suppressed duplicate depending on the demo stage. | `rule_hits.json`, `rule_hits_before_dedup.json`, `rule_hits_after_dedup.json`, `investigation_hits.json` | A fully investigated finding or a case-management ticket. |
+| `finding` | A reviewer-facing observation derived from hits or signals and tied back to evidence. Findings may appear in prose reports, but they must remain bounded by the committed artifacts. | `case_report.md`, `dedup_report.md`, `investigation_report.md` | A final incident verdict, root-cause statement, or production severity decision. |
+| `case_bundle` | A bounded evidence package passed into the AI-assisted drafting stage. It groups rule hits, raw evidence, entities, severity, and ATT&CK mapping for one deterministic case candidate. | `case_bundles.json` | A persistent case record, workflow queue item, or incident-management object. |
+| `summary` | A compact machine-readable aggregate of counts, selected outcomes, and run metadata. Summaries should be easy for tests and reviewers to parse. | `summary.json`, `case_summaries.json`, `investigation_summary.json` | A narrative report or a substitute for inspecting evidence artifacts. |
+| `report` | A human-readable Markdown explanation of the demo output. Reports are review aids that point back to deterministic JSON, JSONL, CSV, or Markdown evidence. | `case_report.md`, `dedup_report.md`, `investigation_report.md` | The canonical machine contract or an operational dashboard. |
+| `audit_trace` | A JSONL audit record of constrained processing decisions, especially guardrails around AI-assisted drafting. It records inputs, validation, and accept/reject outcomes without exposing model chain-of-thought. | `audit_traces.jsonl` | Free-form reasoning logs, private credentials, prompts containing private data, or autonomous decision authority. |
+
+## How the Terms Flow
+
+The demos do not all use every term, but they follow the same evidence shape:
+
+1. `event`: committed sample input or normalized telemetry.
+2. `hit` or `signal`: deterministic rule or correlation output.
+3. `case_bundle`: bounded grouping when an AI-assisted drafting stage exists.
+4. `summary`: machine-readable aggregate for checks and reviewers.
+5. `report`: human-readable explanation tied to the underlying artifacts.
+6. `audit_trace`: guardrail and validation trail for constrained AI behavior.
+
+This flow is intentionally narrower than a production monitoring platform. It
+keeps the repo focused on reproducible evidence generation and review.
+
+## Naming Rules
+
+- Use `event` for source facts or normalized facts.
+- Use `hit` for rule matches and cooldown/suppression reasoning.
+- Use `signal` for bounded investigation outputs that correlate multiple events.
+- Use `finding` only for reviewer-facing observations backed by artifact IDs.
+- Use `case_bundle` only for bounded AI-assisted case drafting inputs.
+- Use `summary` for machine-readable aggregates.
+- Use `report` for human-readable Markdown explanations.
+- Use `audit_trace` for structured guardrail, validation, and provenance records.
+
+## Time Semantics
+
+Events, hits, signals, summaries, and reports should preserve the time semantics
+defined in [`docs/event-time-model.md`](event-time-model.md). In short:
+
+- `event_time` is when the source event happened.
+- `observed_time` is when a collector or pipeline observed it, when available.
+- `window_start` and `window_end` define feature-window boundaries.
+- `artifact_generated_at` is when a local artifact was generated.
+
+Do not use a later summary, report, or audit timestamp to rewrite the source
+event order.
+
+## Contract Boundaries
+
+This vocabulary should make demo artifacts easier to compare across the repo,
+but it does not add production platform claims. The repo still has:
+
+- no real-time ingestion
+- no alert routing
+- no dashboard or case-management service
+- no autonomous response
+- no final incident verdicts
diff --git a/tests/test_reviewer_docs.py b/tests/test_reviewer_docs.py
index b2adf5e..4f94d31 100644
--- a/tests/test_reviewer_docs.py
+++ b/tests/test_reviewer_docs.py
@@ -151,6 +151,8 @@ def test_docs_index_separates_current_route_from_history() -> None:
         "reviewer-pack.md",
         "reviewer-path.md",
         "reviewer-brief.md",
+        "evidence-pipeline-contract.md",
+        "vocabulary.md",
         "architecture.md",
         "roadmap.md",
     ]:
@@ -183,6 +185,7 @@ def test_top_level_reviewer_pack_covers_matrix_and_artifact_contract() -> None:
     assert "Artifact Naming Contract" in reviewer_pack
     assert "[`docs/README.md`](README.md)" in reviewer_pack
     assert "[`docs/reviewer-path.md`](reviewer-path.md)" in reviewer_pack
+    assert "[`docs/vocabulary.md`](vocabulary.md)" in reviewer_pack
     assert "[`docs/architecture.md`](architecture.md)" in reviewer_pack
     assert "[`docs/roadmap.md`](roadmap.md)" in reviewer_pack
     assert "current route, supporting docs, and historical release evidence" in reviewer_pack
@@ -238,6 +241,38 @@ def test_current_docs_use_v1_contract_stabilization_language() -> None:
         assert "v0.7 / v1.0" not in text, path
 
 
+def test_vocabulary_defines_cross_demo_terms() -> None:
+    vocabulary = _read_repo_file("docs/vocabulary.md")
+    docs_index = _read_repo_file("docs/README.md")
+    readme = _read_repo_file("README.md")
+    evidence_contract = _read_repo_file("docs/evidence-pipeline-contract.md")
+    roadmap = _read_repo_file("docs/roadmap.md")
+
+    assert "local evidence workflow vocabulary" in vocabulary
+    assert "not a SIEM object model" in vocabulary
+    assert "[`docs/event-time-model.md`](event-time-model.md)" in vocabulary
+
+    for term in [
+        "event",
+        "signal",
+        "hit",
+        "finding",
+        "case_bundle",
+        "summary",
+        "report",
+        "audit_trace",
+    ]:
+        assert f"`{term}`" in vocabulary
+        assert f"`{term}`" in evidence_contract
+
+    for text in [docs_index, readme]:
+        assert "vocabulary.md" in text
+        assert "cross-demo" in text
+
+    assert "[`docs/vocabulary.md`](vocabulary.md)" in roadmap
+    assert "Keep cross-demo vocabulary stable" in roadmap
+
+
 def test_architecture_doc_keeps_local_file_based_boundaries() -> None:
     architecture = _read_repo_file("docs/architecture.md")