From 455bafdbed613af68b587baac55d671a6d997531 Mon Sep 17 00:00:00 2001 From: stacknil Date: Fri, 19 Jun 2026 19:14:02 +0800 Subject: [PATCH] docs(contract): add cross-demo vocabulary --- README.md | 2 + docs/README.md | 1 + docs/evidence-pipeline-contract.md | 2 + docs/reviewer-pack.md | 3 ++ docs/roadmap.md | 8 ++-- docs/vocabulary.md | 67 ++++++++++++++++++++++++++++++ tests/test_reviewer_docs.py | 35 ++++++++++++++++ 7 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 docs/vocabulary.md diff --git a/README.md b/README.md index 8681d5a..3f7703b 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Latest tagged release: [v0.6.0 — fourth demo and config-change investigation]( - [`docs/reviewer-path.md`](docs/reviewer-path.md): choose the right demo by review question - [`docs/reviewer-pack.md`](docs/reviewer-pack.md): demo matrix, artifact contract, and v1 readiness gate - [`docs/evidence-pipeline-contract.md`](docs/evidence-pipeline-contract.md): JSON schema contracts for reviewer-facing evidence artifacts +- [`docs/vocabulary.md`](docs/vocabulary.md): cross-demo vocabulary for events, hits, signals, findings, summaries, reports, and audit traces - [`docs/README.md`](docs/README.md): current route, supporting docs, and historical release evidence ## Demos @@ -170,6 +171,7 @@ Cooldown behavior: - [`docs/reviewer-path.md`](docs/reviewer-path.md) maps common review questions to the right demo and artifacts - [`docs/architecture.md`](docs/architecture.md) diagrams the local file-based detection workflow - [`docs/event-time-model.md`](docs/event-time-model.md) defines event, observed, window, and artifact time semantics +- [`docs/vocabulary.md`](docs/vocabulary.md) defines the cross-demo evidence workflow terms - [`docs/sample-output.md`](docs/sample-output.md) summarizes the committed sample artifacts - [`docs/roadmap.md`](docs/roadmap.md) defines the v1 reviewer contract stabilization phase - [`data/processed/summary.json`](data/processed/summary.json) captures the default run in machine-readable form diff --git a/docs/README.md b/docs/README.md index 0689a4f..8154dbe 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,6 +8,7 @@ This directory separates the current reviewer route from supporting design notes - [`reviewer-path.md`](reviewer-path.md): choose a demo by review question - [`reviewer-brief.md`](reviewer-brief.md): short problem, value, evidence, and boundary summary - [`evidence-pipeline-contract.md`](evidence-pipeline-contract.md): JSON schema contracts for reviewer-facing evidence artifacts +- [`vocabulary.md`](vocabulary.md): cross-demo vocabulary for evidence workflow terms - [`architecture.md`](architecture.md): local file-based workflow diagram - [`roadmap.md`](roadmap.md): v1 reviewer contract stabilization phase diff --git a/docs/evidence-pipeline-contract.md b/docs/evidence-pipeline-contract.md index fb5aff0..f43c2fa 100644 --- a/docs/evidence-pipeline-contract.md +++ b/docs/evidence-pipeline-contract.md @@ -2,6 +2,8 @@ `telemetry-lab` v1.0 treats reviewer-facing JSON artifacts as evidence pipeline contracts. The schemas below define the current machine-readable artifact shapes for selected demo outputs without turning the repo into a SIEM, dashboard, or monitoring platform. +Use [`docs/vocabulary.md`](vocabulary.md) for the cross-demo meaning of `event`, `signal`, `hit`, `finding`, `case_bundle`, `summary`, `report`, and `audit_trace`. + The contract is intentionally local and file-based: - schemas live under `schemas/` diff --git a/docs/reviewer-pack.md b/docs/reviewer-pack.md index f8165e6..79bddc8 100644 --- a/docs/reviewer-pack.md +++ b/docs/reviewer-pack.md @@ -41,6 +41,8 @@ The current artifact names are reviewer-facing contracts for the v1 reviewer con See [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md) for the v1 JSON schema contract covering selected reviewer-facing evidence artifacts. +See [`docs/vocabulary.md`](vocabulary.md) for the cross-demo meaning of `event`, `signal`, `hit`, `finding`, `case_bundle`, `summary`, `report`, and `audit_trace`. + The current schema contract covers: - `schemas/rule_hits.schema.json` @@ -96,6 +98,7 @@ Use the same Python interpreter for install, tests, and demo commands. - [`docs/reviewer-brief.md`](reviewer-brief.md): short problem / value summary - [`docs/reviewer-path.md`](reviewer-path.md): demo choice by review question - [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md): JSON schema contracts for selected evidence artifacts +- [`docs/vocabulary.md`](vocabulary.md): cross-demo evidence workflow vocabulary - [`docs/architecture.md`](architecture.md): local file-based workflow diagram - [`docs/sample-output.md`](sample-output.md): committed output counts and sample artifacts - [`docs/roadmap.md`](roadmap.md): v1 reviewer contract stabilization phase diff --git a/docs/roadmap.md b/docs/roadmap.md index be7cb52..acb47c3 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -14,6 +14,7 @@ Recently added: - [`docs/reviewer-path.md`](reviewer-path.md) maps common review questions to the right demo and artifacts. - [`docs/reviewer-pack.md`](reviewer-pack.md) collects the top-level reviewer flow and artifact naming contract. - [`docs/architecture.md`](architecture.md) describes the local file-based workflow shape. +- [`docs/vocabulary.md`](vocabulary.md) defines cross-demo evidence workflow terms. ## v1 Reviewer Contract Stabilization @@ -21,9 +22,10 @@ Recently added: 2. Freeze reviewer-visible artifact names unless a rename is intentional and documented across README, reviewer docs, demo docs, tests, and sample outputs. 3. Keep JSON schema contracts aligned with selected reviewer-facing evidence artifacts. 4. Keep committed evidence artifacts aligned with regenerated pipeline output. -5. Keep one top-level reviewer pack as the primary no-guessing entrypoint. -6. Keep the architecture diagram aligned with actual CLI and artifact behavior. -7. Prefer regression tests and documentation accuracy over adding new workflow surface area. +5. Keep cross-demo vocabulary stable for evidence workflow terms. +6. Keep one top-level reviewer pack as the primary no-guessing entrypoint. +7. Keep the architecture diagram aligned with actual CLI and artifact behavior. +8. Prefer regression tests and documentation accuracy over adding new workflow surface area. The consolidation gate lives in [`docs/reviewer-pack.md`](reviewer-pack.md#v1-readiness-gate). diff --git a/docs/vocabulary.md b/docs/vocabulary.md new file mode 100644 index 0000000..15e75ba --- /dev/null +++ b/docs/vocabulary.md @@ -0,0 +1,67 @@ +# Cross-Demo Vocabulary + +This vocabulary defines the reviewer-facing terms used across `telemetry-lab`. +It is a local evidence workflow vocabulary, not a SIEM object model, not a +case-management lifecycle, and not an incident response taxonomy. + +## Term Ladder + +| Term | Meaning in this repo | Typical artifacts | What it is not | +| --- | --- | --- | --- | +| `event` | A source or normalized telemetry record that says something happened at a specific time. Events are the raw material for windowing, rule evaluation, and bounded correlation. | `data/raw/*.jsonl`, `change_events_normalized.json`, `normalized_cloudtrail_events.json` | A detection by itself, an alert verdict, or a final incident conclusion. | +| `signal` | A deterministic correlation output that selects one or more events for review because they match bounded investigation logic. Signals usually carry evidence IDs, actor or entity context, and a reason. | `investigation_signals.json` | A live alert route, autonomous action, or proof of compromise. | +| `hit` | A rule-evaluation occurrence. A hit can be a raw rule match, a retained alert candidate, or a suppressed duplicate depending on the demo stage. | `rule_hits.json`, `rule_hits_before_dedup.json`, `rule_hits_after_dedup.json`, `investigation_hits.json` | A fully investigated finding or a case-management ticket. | +| `finding` | A reviewer-facing observation derived from hits or signals and tied back to evidence. Findings may appear in prose reports, but they must remain bounded by the committed artifacts. | `case_report.md`, `dedup_report.md`, `investigation_report.md` | A final incident verdict, root-cause statement, or production severity decision. | +| `case_bundle` | A bounded evidence package passed into the AI-assisted drafting stage. It groups rule hits, raw evidence, entities, severity, and ATT&CK mapping for one deterministic case candidate. | `case_bundles.json` | A persistent case record, workflow queue item, or incident-management object. | +| `summary` | A compact machine-readable aggregate of counts, selected outcomes, and run metadata. Summaries should be easy for tests and reviewers to parse. | `summary.json`, `case_summaries.json`, `investigation_summary.json` | A narrative report or a substitute for inspecting evidence artifacts. | +| `report` | A human-readable Markdown explanation of the demo output. Reports are review aids that point back to deterministic JSON, JSONL, CSV, or Markdown evidence. | `case_report.md`, `dedup_report.md`, `investigation_report.md` | The canonical machine contract or an operational dashboard. | +| `audit_trace` | A JSONL audit record of constrained processing decisions, especially guardrails around AI-assisted drafting. It records inputs, validation, and accept/reject outcomes without exposing model chain-of-thought. | `audit_traces.jsonl` | Free-form reasoning logs, private credentials, prompts containing private data, or autonomous decision authority. | + +## How the Terms Flow + +The demos do not all use every term, but they follow the same evidence shape: + +1. `event`: committed sample input or normalized telemetry. +2. `hit` or `signal`: deterministic rule or correlation output. +3. `case_bundle`: bounded grouping when an AI-assisted drafting stage exists. +4. `summary`: machine-readable aggregate for checks and reviewers. +5. `report`: human-readable explanation tied to the underlying artifacts. +6. `audit_trace`: guardrail and validation trail for constrained AI behavior. + +This flow is intentionally narrower than a production monitoring platform. It +keeps the repo focused on reproducible evidence generation and review. + +## Naming Rules + +- Use `event` for source facts or normalized facts. +- Use `hit` for rule matches and cooldown/suppression reasoning. +- Use `signal` for bounded investigation outputs that correlate multiple events. +- Use `finding` only for reviewer-facing observations backed by artifact IDs. +- Use `case_bundle` only for bounded AI-assisted case drafting inputs. +- Use `summary` for machine-readable aggregates. +- Use `report` for human-readable Markdown explanations. +- Use `audit_trace` for structured guardrail, validation, and provenance records. + +## Time Semantics + +Events, hits, signals, summaries, and reports should preserve the time semantics +defined in [`docs/event-time-model.md`](event-time-model.md). In short: + +- `event_time` is when the source event happened. +- `observed_time` is when a collector or pipeline observed it, when available. +- `window_start` and `window_end` define feature-window boundaries. +- `artifact_generated_at` is when a local artifact was generated. + +Do not use a later summary, report, or audit timestamp to rewrite the source +event order. + +## Contract Boundaries + +This vocabulary should make demo artifacts easier to compare across the repo, +but it does not add production platform claims. The repo still has: + +- no real-time ingestion +- no alert routing +- no dashboard or case-management service +- no autonomous response +- no final incident verdicts diff --git a/tests/test_reviewer_docs.py b/tests/test_reviewer_docs.py index b2adf5e..4f94d31 100644 --- a/tests/test_reviewer_docs.py +++ b/tests/test_reviewer_docs.py @@ -151,6 +151,8 @@ def test_docs_index_separates_current_route_from_history() -> None: "reviewer-pack.md", "reviewer-path.md", "reviewer-brief.md", + "evidence-pipeline-contract.md", + "vocabulary.md", "architecture.md", "roadmap.md", ]: @@ -183,6 +185,7 @@ def test_top_level_reviewer_pack_covers_matrix_and_artifact_contract() -> None: assert "Artifact Naming Contract" in reviewer_pack assert "[`docs/README.md`](README.md)" in reviewer_pack assert "[`docs/reviewer-path.md`](reviewer-path.md)" in reviewer_pack + assert "[`docs/vocabulary.md`](vocabulary.md)" in reviewer_pack assert "[`docs/architecture.md`](architecture.md)" in reviewer_pack assert "[`docs/roadmap.md`](roadmap.md)" in reviewer_pack assert "current route, supporting docs, and historical release evidence" in reviewer_pack @@ -238,6 +241,38 @@ def test_current_docs_use_v1_contract_stabilization_language() -> None: assert "v0.7 / v1.0" not in text, path +def test_vocabulary_defines_cross_demo_terms() -> None: + vocabulary = _read_repo_file("docs/vocabulary.md") + docs_index = _read_repo_file("docs/README.md") + readme = _read_repo_file("README.md") + evidence_contract = _read_repo_file("docs/evidence-pipeline-contract.md") + roadmap = _read_repo_file("docs/roadmap.md") + + assert "local evidence workflow vocabulary" in vocabulary + assert "not a SIEM object model" in vocabulary + assert "[`docs/event-time-model.md`](event-time-model.md)" in vocabulary + + for term in [ + "event", + "signal", + "hit", + "finding", + "case_bundle", + "summary", + "report", + "audit_trace", + ]: + assert f"`{term}`" in vocabulary + assert f"`{term}`" in evidence_contract + + for text in [docs_index, readme]: + assert "vocabulary.md" in text + assert "cross-demo" in text + + assert "[`docs/vocabulary.md`](vocabulary.md)" in roadmap + assert "Keep cross-demo vocabulary stable" in roadmap + + def test_architecture_doc_keeps_local_file_based_boundaries() -> None: architecture = _read_repo_file("docs/architecture.md")