From 15743fceb5ac7daedb11d282fe1f159e5e058a3d Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 13 Apr 2026 12:01:07 +0000 Subject: [PATCH 01/27] feat(traceability): add coverage checker and reporting docs --- docs/how-to/test_to_doc_links.rst | 40 ++ docs/reference/commands.md | 1 + scripts_bazel/BUILD | 7 + scripts_bazel/tests/BUILD | 9 + .../tests/traceability_coverage_test.py | 231 ++++++++++ scripts_bazel/traceability_coverage.py | 416 ++++++++++++++++++ 6 files changed, 704 insertions(+) create mode 100644 scripts_bazel/tests/traceability_coverage_test.py create mode 100644 scripts_bazel/traceability_coverage.py diff --git a/docs/how-to/test_to_doc_links.rst b/docs/how-to/test_to_doc_links.rst index d7c3677f3..5a0503f76 100644 --- a/docs/how-to/test_to_doc_links.rst +++ b/docs/how-to/test_to_doc_links.rst @@ -53,3 +53,43 @@ Limitations - Partial properties will lead to no Testlink creation. If you want a test to be linked, please ensure all requirement properties are provided. - Tests must be executed by Bazel first so `test.xml` files exist. + + +CI/CD Gate for Linkage Percentage +--------------------------------- + +To enforce traceability in CI: + +1. Run tests. +2. Generate ``needs.json``. +3. Execute the traceability checker. + +.. code-block:: bash + + bazel test //... + bazel build //:needs_json + bazel run //scripts_bazel:traceability_coverage -- \ + --needs-json bazel-bin/needs_json/_build/needs/needs.json \ + --min-req-code 100 \ + --min-req-test 100 \ + --min-req-fully-linked 100 \ + --min-tests-linked 100 \ + --fail-on-broken-test-refs + +The checker reports: + +- Percentage of implemented requirements with ``source_code_link`` +- Percentage of implemented requirements with ``testlink`` +- Percentage of implemented requirements with both links (fully linked) +- Percentage of test cases linked to at least one requirement +- Broken testcase references to unknown requirement IDs + +To check only unit tests, filter testcase types: + +.. code-block:: bash + + bazel run //scripts_bazel:traceability_coverage -- \ + --needs-json bazel-bin/needs_json/_build/needs/needs.json \ + --test-types unit-test + +Use lower thresholds during rollout and tighten towards 100% over time. diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 01bc8d15d..5bc2b959b 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -5,6 +5,7 @@ | `bazel run //:docs` | Builds documentation | | `bazel run //:docs_check` | Verifies documentation correctness | | `bazel run //:docs_combo` | Builds combined documentation with all external dependencies included | +| `bazel run //scripts_bazel:traceability_coverage -- --needs-json bazel-bin/needs_json/needs.json --min-req-code 100 --min-req-test 100 --min-req-fully-linked 100 --min-tests-linked 100 --fail-on-broken-test-refs` | Calculates requirement/test traceability percentages and fails if thresholds are not met | | `bazel run //:live_preview` | Creates a live_preview of the documentation viewable in a local server | | `bazel run //:live_preview_combo_experimental` | Creates a live_preview of the full documentation with all dependencies viewable in a local server | | `bazel run //:ide_support` | Sets up a Python venv for esbonio (Remember to restart VS Code!) | diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index befe51730..15586f5c1 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -37,3 +37,10 @@ py_binary( main = "merge_sourcelinks.py", visibility = ["//visibility:public"], ) + +py_binary( + name = "traceability_coverage", + srcs = ["traceability_coverage.py"], + main = "traceability_coverage.py", + visibility = ["//visibility:public"], +) diff --git a/scripts_bazel/tests/BUILD b/scripts_bazel/tests/BUILD index 7ff48c428..70b7703ac 100644 --- a/scripts_bazel/tests/BUILD +++ b/scripts_bazel/tests/BUILD @@ -32,3 +32,12 @@ score_pytest( ] + all_requirements, pytest_config = "//:pyproject.toml", ) + +score_pytest( + name = "traceability_coverage_test", + srcs = ["traceability_coverage_test.py"], + deps = [ + "//scripts_bazel:traceability_coverage", + ] + all_requirements, + pytest_config = "//:pyproject.toml", +) diff --git a/scripts_bazel/tests/traceability_coverage_test.py b/scripts_bazel/tests/traceability_coverage_test.py new file mode 100644 index 000000000..cec6d42de --- /dev/null +++ b/scripts_bazel/tests/traceability_coverage_test.py @@ -0,0 +1,231 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Tests for traceability_coverage.py.""" + +import json +import os +import subprocess +import sys +from pathlib import Path + +_MY_PATH = Path(__file__).parent + + +def _write_needs_json(tmp_path: Path) -> Path: + needs_json = tmp_path / "needs.json" + payload = { + "current_version": "main", + "versions": { + "main": { + "needs": { + "REQ_1": { + "id": "REQ_1", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/foo.py:10", + "testlink": "", + }, + "REQ_2": { + "id": "REQ_2", + "type": "tool_req", + "implemented": "PARTIAL", + "source_code_link": "", + "testlink": "tests/test_foo.py::test_bar", + }, + "REQ_3": { + "id": "REQ_3", + "type": "tool_req", + "implemented": "NO", + "source_code_link": "", + "testlink": "", + }, + "TC_1": { + "id": "TC_1", + "type": "testcase", + "partially_verifies": "REQ_1, REQ_2", + "fully_verifies": "", + }, + "TC_2": { + "id": "TC_2", + "type": "testcase", + "partially_verifies": "", + "fully_verifies": "", + }, + "TC_3": { + "id": "TC_3", + "type": "testcase", + "partially_verifies": "", + "fully_verifies": "REQ_UNKNOWN", + }, + } + } + }, + } + needs_json.write_text(json.dumps(payload), encoding="utf-8") + return needs_json + + +def test_traceability_coverage_thresholds_pass(tmp_path: Path) -> None: + needs_json = _write_needs_json(tmp_path) + output_json = tmp_path / "summary.json" + + result = subprocess.run( + [ + sys.executable, + _MY_PATH.parent / "traceability_coverage.py", + "--needs-json", + str(needs_json), + "--min-req-code", + "50", + "--min-req-test", + "50", + "--min-req-fully-linked", + "0", + "--min-tests-linked", + "60", + "--json-output", + str(output_json), + ], + capture_output=True, + text=True, + ) + + assert result.returncode == 0 + assert "Threshold check passed." in result.stdout + assert output_json.exists() + + summary = json.loads(output_json.read_text(encoding="utf-8")) + assert summary["requirements"]["total"] == 2 + assert summary["requirements"]["with_code_link"] == 1 + assert summary["requirements"]["with_test_link"] == 1 + assert summary["requirements"]["fully_linked"] == 0 + assert summary["tests"]["total"] == 3 + assert summary["tests"]["linked_to_requirements"] == 2 + assert len(summary["tests"]["broken_references"]) == 1 + + +def test_traceability_coverage_thresholds_fail(tmp_path: Path) -> None: + needs_json = _write_needs_json(tmp_path) + + result = subprocess.run( + [ + sys.executable, + _MY_PATH.parent / "traceability_coverage.py", + "--needs-json", + str(needs_json), + "--min-req-code", + "80", + "--min-req-test", + "80", + "--min-req-fully-linked", + "80", + "--min-tests-linked", + "80", + ], + capture_output=True, + text=True, + ) + + assert result.returncode == 2 + assert "Threshold check failed:" in result.stdout + + +def test_traceability_coverage_fails_on_broken_refs(tmp_path: Path) -> None: + needs_json = _write_needs_json(tmp_path) + + result = subprocess.run( + [ + sys.executable, + _MY_PATH.parent / "traceability_coverage.py", + "--needs-json", + str(needs_json), + "--min-req-code", + "0", + "--min-req-test", + "0", + "--min-req-fully-linked", + "0", + "--min-tests-linked", + "0", + "--fail-on-broken-test-refs", + ], + capture_output=True, + text=True, + ) + + assert result.returncode == 2 + assert "broken testcase references found:" in result.stdout + + +def test_traceability_coverage_prints_unlinked_requirements(tmp_path: Path) -> None: + needs_json = _write_needs_json(tmp_path) + + result = subprocess.run( + [ + sys.executable, + _MY_PATH.parent / "traceability_coverage.py", + "--needs-json", + str(needs_json), + "--min-req-code", + "0", + "--min-req-test", + "0", + "--min-req-fully-linked", + "0", + "--min-tests-linked", + "0", + "--print-unlinked-requirements", + ], + capture_output=True, + text=True, + ) + + assert result.returncode == 0 + assert "Unlinked requirement details:" in result.stdout + assert "Missing source_code_link: REQ_2" in result.stdout + assert "Missing testlink: REQ_1" in result.stdout + assert "Not fully linked: REQ_1, REQ_2" in result.stdout + + +def test_traceability_coverage_accepts_workspace_relative_needs_json(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + workspace.mkdir() + needs_json = _write_needs_json(workspace) + + env = dict(os.environ) + env["BUILD_WORKSPACE_DIRECTORY"] = str(workspace) + + result = subprocess.run( + [ + sys.executable, + _MY_PATH.parent / "traceability_coverage.py", + "--needs-json", + "needs.json", + "--min-req-code", + "0", + "--min-req-test", + "0", + "--min-req-fully-linked", + "0", + "--min-tests-linked", + "0", + ], + capture_output=True, + text=True, + cwd=tmp_path, + env=env, + ) + + assert result.returncode == 0 + assert f"Traceability input: {needs_json}" in result.stdout diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py new file mode 100644 index 000000000..ef8653882 --- /dev/null +++ b/scripts_bazel/traceability_coverage.py @@ -0,0 +1,416 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Compute requirement and test traceability coverage from sphinx-needs output.""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path +from typing import Any + + +def _is_non_empty(value: Any) -> bool: + if value is None: + return False + if isinstance(value, str): + return bool(value.strip()) + if isinstance(value, (list, tuple, set, dict)): + return len(value) > 0 + return True + + +def _parse_need_id_list(value: Any) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + return [item.strip() for item in value.split(",") if item.strip()] + if isinstance(value, list): + out: list[str] = [] + for item in value: + if isinstance(item, str) and item.strip(): + out.append(item.strip()) + return out + return [] + + +def _safe_percent(numerator: int, denominator: int) -> float: + if denominator == 0: + return 100.0 + return (numerator / denominator) * 100.0 + + +def _load_needs(needs_json: Path) -> list[dict[str, Any]]: + raw = json.loads(needs_json.read_text(encoding="utf-8")) + + if isinstance(raw, list): + return [item for item in raw if isinstance(item, dict)] + + if isinstance(raw, dict): + if "needs" in raw and isinstance(raw["needs"], dict): + return [v for v in raw["needs"].values() if isinstance(v, dict)] + + versions = raw.get("versions") + if isinstance(versions, dict) and versions: + current_version = raw.get("current_version") + selected: Any = None + if isinstance(current_version, str) and current_version in versions: + selected = versions[current_version] + else: + selected = next(iter(versions.values())) + if isinstance(selected, dict): + needs = selected.get("needs") + if isinstance(needs, dict): + return [v for v in needs.values() if isinstance(v, dict)] + + raise ValueError(f"Unsupported needs.json format in {needs_json}") + + +def _default_needs_json_candidates() -> list[Path]: + return [ + Path("_build/needs/needs.json"), + Path("bazel-bin/needs_json/needs.json"), + ] + + +def _find_needs_json(explicit: str | None) -> Path: + if explicit: + raw_path = Path(explicit) + candidates: list[Path] = [raw_path] + + # Under `bazel run` the working directory may be a runfiles tree, so + # also resolve relative paths from the workspace root when available. + workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "").strip() + if not raw_path.is_absolute() and workspace_dir: + candidates.append(Path(workspace_dir) / raw_path) + + for path in candidates: + if path.exists(): + return path + + raise FileNotFoundError(f"needs.json not found: {raw_path}") + + for candidate in _default_needs_json_candidates(): + if candidate.exists(): + return candidate + + raise FileNotFoundError( + "Could not locate needs.json automatically. " + "Use --needs-json with a valid path." + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Compute requirement/test traceability coverage from sphinx-needs output " + "and optionally fail on threshold violations." + ) + ) + parser.add_argument( + "--needs-json", + default=None, + help=( + "Path to needs.json. If omitted, tries _build/needs/needs.json and " + "bazel-bin/needs_json/needs.json" + ), + ) + parser.add_argument( + "--requirement-types", + default="tool_req", + help="Comma separated need types treated as requirements (default: tool_req)", + ) + parser.add_argument( + "--test-types", + default="", + help=( + "Optional comma separated testcase test_type filter (for example unit-test). " + "If empty, all testcase types are included." + ), + ) + parser.add_argument( + "--include-not-implemented", + action="store_true", + help=( + "Include requirements with implemented == NO in requirement denominator. " + "By default only YES/PARTIAL are counted." + ), + ) + parser.add_argument( + "--min-req-code", + type=float, + default=0.0, + help="Minimum required percentage for requirements with source code links", + ) + parser.add_argument( + "--min-req-test", + type=float, + default=0.0, + help="Minimum required percentage for requirements with test links", + ) + parser.add_argument( + "--min-req-fully-linked", + type=float, + default=0.0, + help=( + "Minimum required percentage for requirements with both source code " + "and test links" + ), + ) + parser.add_argument( + "--min-tests-linked", + type=float, + default=0.0, + help="Minimum required percentage for testcases linked to requirements", + ) + parser.add_argument( + "--require-all-links", + action="store_true", + help="Shortcut that enforces 100%% for all three minimum percentages", + ) + parser.add_argument( + "--fail-on-broken-test-refs", + action="store_true", + help="Fail if a testcase references an unknown requirement ID", + ) + parser.add_argument( + "--json-output", + default=None, + help="Optional path to write machine-readable JSON summary", + ) + parser.add_argument( + "--print-unlinked-requirements", + action="store_true", + help=( + "Print IDs of requirements missing source_code_link and/or testlink. " + "Useful when coverage thresholds fail." + ), + ) + + args = parser.parse_args() + if args.require_all_links: + args.min_req_code = 100.0 + args.min_req_test = 100.0 + args.min_req_fully_linked = 100.0 + args.min_tests_linked = 100.0 + args.fail_on_broken_test_refs = True + + requirement_types = { + item.strip() for item in str(args.requirement_types).split(",") if item.strip() + } + if not requirement_types: + raise ValueError("--requirement-types must not be empty") + + filtered_test_types = { + item.strip() for item in str(args.test_types).split(",") if item.strip() + } + + needs_json = _find_needs_json(args.needs_json) + all_needs = _load_needs(needs_json) + + requirements: list[dict[str, Any]] = [] + for need in all_needs: + need_type = str(need.get("type", "")).strip() + if need_type not in requirement_types: + continue + if not args.include_not_implemented: + implemented = str(need.get("implemented", "")).upper().strip() + if implemented not in {"YES", "PARTIAL"}: + continue + requirements.append(need) + + requirement_ids = { + str(need.get("id", "")).strip() for need in requirements if need.get("id") + } + + req_total = len(requirements) + req_with_code = sum( + 1 for need in requirements if _is_non_empty(need.get("source_code_link")) + ) + req_with_test = sum(1 for need in requirements if _is_non_empty(need.get("testlink"))) + req_fully_linked = sum( + 1 + for need in requirements + if _is_non_empty(need.get("source_code_link")) + and _is_non_empty(need.get("testlink")) + ) + + req_missing_code = [ + str(need.get("id", "")) + for need in requirements + if not _is_non_empty(need.get("source_code_link")) and need.get("id") + ] + req_missing_test = [ + str(need.get("id", "")) + for need in requirements + if not _is_non_empty(need.get("testlink")) and need.get("id") + ] + req_not_fully_linked = [ + str(need.get("id", "")) + for need in requirements + if ( + (not _is_non_empty(need.get("source_code_link")) + or not _is_non_empty(need.get("testlink"))) + and need.get("id") + ) + ] + + testcases = [need for need in all_needs if str(need.get("type", "")).strip() == "testcase"] + if filtered_test_types: + testcases = [ + need + for need in testcases + if str(need.get("test_type", need.get("TestType", ""))).strip() + in filtered_test_types + ] + tests_total = len(testcases) + + tests_linked = 0 + broken_test_references: list[dict[str, str]] = [] + for test_need in testcases: + test_id = str(test_need.get("id", "")) + partially = _parse_need_id_list( + test_need.get("partially_verifies", test_need.get("PartiallyVerifies")) + ) + fully = _parse_need_id_list( + test_need.get("fully_verifies", test_need.get("FullyVerifies")) + ) + refs = partially + fully + if refs: + tests_linked += 1 + for ref in refs: + if ref not in requirement_ids: + broken_test_references.append({"testcase": test_id, "missing_need": ref}) + + req_code_pct = _safe_percent(req_with_code, req_total) + req_test_pct = _safe_percent(req_with_test, req_total) + req_fully_linked_pct = _safe_percent(req_fully_linked, req_total) + tests_linked_pct = _safe_percent(tests_linked, tests_total) + + summary = { + "needs_json": str(needs_json), + "requirement_types": sorted(requirement_types), + "include_not_implemented": bool(args.include_not_implemented), + "requirements": { + "total": req_total, + "with_code_link": req_with_code, + "with_test_link": req_with_test, + "fully_linked": req_fully_linked, + "with_code_link_pct": req_code_pct, + "with_test_link_pct": req_test_pct, + "fully_linked_pct": req_fully_linked_pct, + "missing_code_link_ids": sorted(req_missing_code), + "missing_test_link_ids": sorted(req_missing_test), + "not_fully_linked_ids": sorted(req_not_fully_linked), + }, + "tests": { + "total": tests_total, + "filtered_test_types": sorted(filtered_test_types), + "linked_to_requirements": tests_linked, + "linked_to_requirements_pct": tests_linked_pct, + "broken_references": broken_test_references, + }, + "thresholds": { + "min_req_code": float(args.min_req_code), + "min_req_test": float(args.min_req_test), + "min_req_fully_linked": float(args.min_req_fully_linked), + "min_tests_linked": float(args.min_tests_linked), + "fail_on_broken_test_refs": bool(args.fail_on_broken_test_refs), + }, + } + + print(f"Traceability input: {needs_json}") + print("-" * 72) + print( + "Requirements with source links: " + f"{req_with_code}/{req_total} ({req_code_pct:.2f}%)" + ) + print( + "Requirements with test links: " + f"{req_with_test}/{req_total} ({req_test_pct:.2f}%)" + ) + print( + "Requirements fully linked: " + f"{req_fully_linked}/{req_total} ({req_fully_linked_pct:.2f}%)" + ) + if args.print_unlinked_requirements: + print("Unlinked requirement details:") + print( + " Missing source_code_link: " + + (", ".join(sorted(req_missing_code)) if req_missing_code else "") + ) + print( + " Missing testlink: " + + (", ".join(sorted(req_missing_test)) if req_missing_test else "") + ) + print( + " Not fully linked: " + + ( + ", ".join(sorted(req_not_fully_linked)) + if req_not_fully_linked + else "" + ) + ) + print( + "Tests linked to requirements: " + f"{tests_linked}/{tests_total} ({tests_linked_pct:.2f}%)" + ) + print(f"Broken test references: {len(broken_test_references)}") + + if broken_test_references: + print("Broken reference details:") + for item in broken_test_references: + print(f" - {item['testcase']} -> {item['missing_need']}") + + if args.json_output: + out_file = Path(args.json_output) + out_file.write_text(json.dumps(summary, indent=2), encoding="utf-8") + print(f"JSON summary written to: {out_file}") + + failures: list[str] = [] + if req_code_pct < float(args.min_req_code): + failures.append( + f"requirements with code links {req_code_pct:.2f}% < {args.min_req_code:.2f}%" + ) + if req_test_pct < float(args.min_req_test): + failures.append( + f"requirements with test links {req_test_pct:.2f}% < {args.min_req_test:.2f}%" + ) + if req_fully_linked_pct < float(args.min_req_fully_linked): + failures.append( + "requirements fully linked " + f"{req_fully_linked_pct:.2f}% < {args.min_req_fully_linked:.2f}%" + ) + if tests_linked_pct < float(args.min_tests_linked): + failures.append( + f"tests linked to requirements {tests_linked_pct:.2f}% < {args.min_tests_linked:.2f}%" + ) + if args.fail_on_broken_test_refs and broken_test_references: + failures.append( + f"broken testcase references found: {len(broken_test_references)}" + ) + + if failures: + print("Threshold check failed:") + for msg in failures: + print(f" - {msg}") + return 2 + + print("Threshold check passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 58ae80dc0948e30fead4e50b8c408092a6d94fe3 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 13 Apr 2026 13:35:15 +0000 Subject: [PATCH 02/27] add coverage check --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2900b6074..d19638d07 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ __pycache__/ # bug: This file is created in repo root on test discovery. /consumer_test.log +.clwb From 4e9c60ea52c56ad05e8ea8e3f6d454a286905705 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 13 Apr 2026 14:03:36 +0000 Subject: [PATCH 03/27] fix lint --- .../tests/traceability_coverage_test.py | 4 +- scripts_bazel/traceability_coverage.py | 402 ++++++++++++------ 2 files changed, 267 insertions(+), 139 deletions(-) diff --git a/scripts_bazel/tests/traceability_coverage_test.py b/scripts_bazel/tests/traceability_coverage_test.py index cec6d42de..9e1267bc3 100644 --- a/scripts_bazel/tests/traceability_coverage_test.py +++ b/scripts_bazel/tests/traceability_coverage_test.py @@ -198,7 +198,9 @@ def test_traceability_coverage_prints_unlinked_requirements(tmp_path: Path) -> N assert "Not fully linked: REQ_1, REQ_2" in result.stdout -def test_traceability_coverage_accepts_workspace_relative_needs_json(tmp_path: Path) -> None: +def test_traceability_coverage_accepts_workspace_relative_needs_json( + tmp_path: Path, +) -> None: workspace = tmp_path / "workspace" workspace.mkdir() needs_json = _write_needs_json(workspace) diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py index ef8653882..6a641d3f9 100644 --- a/scripts_bazel/traceability_coverage.py +++ b/scripts_bazel/traceability_coverage.py @@ -107,11 +107,228 @@ def _find_needs_json(explicit: str | None) -> Path: return candidate raise FileNotFoundError( - "Could not locate needs.json automatically. " - "Use --needs-json with a valid path." + "Could not locate needs.json automatically. Use --needs-json with a valid path." ) +def _apply_argument_shortcuts(args: argparse.Namespace) -> None: + """Apply shortcut arguments like --require-all-links.""" + if args.require_all_links: + args.min_req_code = 100.0 + args.min_req_test = 100.0 + args.min_req_fully_linked = 100.0 + args.min_tests_linked = 100.0 + args.fail_on_broken_test_refs = True + + +def _filter_requirements( + all_needs: list[dict[str, Any]], + requirement_types: set[str], + include_not_implemented: bool, +) -> list[dict[str, Any]]: + """Extract and filter requirements from needs.""" + requirements: list[dict[str, Any]] = [] + for need in all_needs: + need_type = str(need.get("type", "")).strip() + if need_type not in requirement_types: + continue + if not include_not_implemented: + implemented = str(need.get("implemented", "")).upper().strip() + if implemented not in {"YES", "PARTIAL"}: + continue + requirements.append(need) + return requirements + + +def _calculate_requirement_metrics( + requirements: list[dict[str, Any]], +) -> tuple[int, int, int, int, list[str], list[str], list[str]]: + """Calculate traceability metrics for requirements.""" + req_total = len(requirements) + req_with_code = sum( + 1 for need in requirements if _is_non_empty(need.get("source_code_link")) + ) + req_with_test = sum( + 1 for need in requirements if _is_non_empty(need.get("testlink")) + ) + req_fully_linked = sum( + 1 + for need in requirements + if _is_non_empty(need.get("source_code_link")) + and _is_non_empty(need.get("testlink")) + ) + req_missing_code = [ + str(need.get("id", "")) + for need in requirements + if not _is_non_empty(need.get("source_code_link")) and need.get("id") + ] + req_missing_test = [ + str(need.get("id", "")) + for need in requirements + if not _is_non_empty(need.get("testlink")) and need.get("id") + ] + req_not_fully_linked = [ + str(need.get("id", "")) + for need in requirements + if ( + ( + not _is_non_empty(need.get("source_code_link")) + or not _is_non_empty(need.get("testlink")) + ) + and need.get("id") + ) + ] + return ( + req_total, + req_with_code, + req_with_test, + req_fully_linked, + req_missing_code, + req_missing_test, + req_not_fully_linked, + ) + + +def _calculate_test_metrics( + all_needs: list[dict[str, Any]], + requirement_ids: set[str], + filtered_test_types: set[str], +) -> tuple[int, int, list[dict[str, str]]]: + """Calculate test linkage metrics and find broken references.""" + testcases = [ + need for need in all_needs if str(need.get("type", "")).strip() == "testcase" + ] + if filtered_test_types: + testcases = [ + need + for need in testcases + if str(need.get("test_type", need.get("TestType", ""))).strip() + in filtered_test_types + ] + tests_total = len(testcases) + + tests_linked = 0 + broken_test_references: list[dict[str, str]] = [] + for test_need in testcases: + test_id = str(test_need.get("id", "")) + partially = _parse_need_id_list( + test_need.get("partially_verifies", test_need.get("PartiallyVerifies")) + ) + fully = _parse_need_id_list( + test_need.get("fully_verifies", test_need.get("FullyVerifies")) + ) + refs = partially + fully + if refs: + tests_linked += 1 + for ref in refs: + if ref not in requirement_ids: + broken_test_references.append( + {"testcase": test_id, "missing_need": ref} + ) + return tests_total, tests_linked, broken_test_references + + +def _print_summary( + needs_json: Path, + req_total: int, + req_with_code: int, + req_code_pct: float, + req_with_test: int, + req_test_pct: float, + req_fully_linked: int, + req_fully_linked_pct: float, + req_missing_code: list[str], + req_missing_test: list[str], + req_not_fully_linked: list[str], + print_unlinked: bool, + tests_total: int, + tests_linked: int, + tests_linked_pct: float, + broken_test_references: list[dict[str, str]], +) -> None: + """Print human-readable summary.""" + print(f"Traceability input: {needs_json}") + print("-" * 72) + print( + "Requirements with source links: " + f"{req_with_code}/{req_total} ({req_code_pct:.2f}%)" + ) + print( + "Requirements with test links: " + f"{req_with_test}/{req_total} ({req_test_pct:.2f}%)" + ) + print( + "Requirements fully linked: " + f"{req_fully_linked}/{req_total} ({req_fully_linked_pct:.2f}%)" + ) + if print_unlinked: + print("Unlinked requirement details:") + print( + " Missing source_code_link: " + + (", ".join(sorted(req_missing_code)) if req_missing_code else "") + ) + print( + " Missing testlink: " + + (", ".join(sorted(req_missing_test)) if req_missing_test else "") + ) + print( + " Not fully linked: " + + ( + ", ".join(sorted(req_not_fully_linked)) + if req_not_fully_linked + else "" + ) + ) + print( + "Tests linked to requirements: " + f"{tests_linked}/{tests_total} ({tests_linked_pct:.2f}%)" + ) + print(f"Broken test references: {len(broken_test_references)}") + + if broken_test_references: + print("Broken reference details:") + for item in broken_test_references: + print(f" - {item['testcase']} -> {item['missing_need']}") + + +def _check_thresholds( + req_code_pct: float, + min_req_code: float, + req_test_pct: float, + min_req_test: float, + req_fully_linked_pct: float, + min_req_fully_linked: float, + tests_linked_pct: float, + min_tests_linked: float, + broken_test_references: list[dict[str, str]], + fail_on_broken_test_refs: bool, +) -> list[str]: + """Check threshold violations and return failures.""" + failures: list[str] = [] + if req_code_pct < float(min_req_code): + failures.append( + f"requirements with code links {req_code_pct:.2f}% < {min_req_code:.2f}%" + ) + if req_test_pct < float(min_req_test): + failures.append( + f"requirements with test links {req_test_pct:.2f}% < {min_req_test:.2f}%" + ) + if req_fully_linked_pct < float(min_req_fully_linked): + failures.append( + "requirements fully linked " + f"{req_fully_linked_pct:.2f}% < {min_req_fully_linked:.2f}%" + ) + if tests_linked_pct < float(min_tests_linked): + failures.append( + f"tests linked to requirements {tests_linked_pct:.2f}% < {min_tests_linked:.2f}%" + ) + if fail_on_broken_test_refs and broken_test_references: + failures.append( + f"broken testcase references found: {len(broken_test_references)}" + ) + return failures + + def main() -> int: parser = argparse.ArgumentParser( description=( @@ -200,12 +417,7 @@ def main() -> int: ) args = parser.parse_args() - if args.require_all_links: - args.min_req_code = 100.0 - args.min_req_test = 100.0 - args.min_req_fully_linked = 100.0 - args.min_tests_linked = 100.0 - args.fail_on_broken_test_refs = True + _apply_argument_shortcuts(args) requirement_types = { item.strip() for item in str(args.requirement_types).split(",") if item.strip() @@ -220,80 +432,28 @@ def main() -> int: needs_json = _find_needs_json(args.needs_json) all_needs = _load_needs(needs_json) - requirements: list[dict[str, Any]] = [] - for need in all_needs: - need_type = str(need.get("type", "")).strip() - if need_type not in requirement_types: - continue - if not args.include_not_implemented: - implemented = str(need.get("implemented", "")).upper().strip() - if implemented not in {"YES", "PARTIAL"}: - continue - requirements.append(need) + requirements = _filter_requirements( + all_needs, requirement_types, args.include_not_implemented + ) requirement_ids = { str(need.get("id", "")).strip() for need in requirements if need.get("id") } - req_total = len(requirements) - req_with_code = sum( - 1 for need in requirements if _is_non_empty(need.get("source_code_link")) - ) - req_with_test = sum(1 for need in requirements if _is_non_empty(need.get("testlink"))) - req_fully_linked = sum( - 1 - for need in requirements - if _is_non_empty(need.get("source_code_link")) - and _is_non_empty(need.get("testlink")) + ( + req_total, + req_with_code, + req_with_test, + req_fully_linked, + req_missing_code, + req_missing_test, + req_not_fully_linked, + ) = _calculate_requirement_metrics(requirements) + + tests_total, tests_linked, broken_test_references = _calculate_test_metrics( + all_needs, requirement_ids, filtered_test_types ) - req_missing_code = [ - str(need.get("id", "")) - for need in requirements - if not _is_non_empty(need.get("source_code_link")) and need.get("id") - ] - req_missing_test = [ - str(need.get("id", "")) - for need in requirements - if not _is_non_empty(need.get("testlink")) and need.get("id") - ] - req_not_fully_linked = [ - str(need.get("id", "")) - for need in requirements - if ( - (not _is_non_empty(need.get("source_code_link")) - or not _is_non_empty(need.get("testlink"))) - and need.get("id") - ) - ] - - testcases = [need for need in all_needs if str(need.get("type", "")).strip() == "testcase"] - if filtered_test_types: - testcases = [ - need - for need in testcases - if str(need.get("test_type", need.get("TestType", ""))).strip() - in filtered_test_types - ] - tests_total = len(testcases) - - tests_linked = 0 - broken_test_references: list[dict[str, str]] = [] - for test_need in testcases: - test_id = str(test_need.get("id", "")) - partially = _parse_need_id_list( - test_need.get("partially_verifies", test_need.get("PartiallyVerifies")) - ) - fully = _parse_need_id_list( - test_need.get("fully_verifies", test_need.get("FullyVerifies")) - ) - refs = partially + fully - if refs: - tests_linked += 1 - for ref in refs: - if ref not in requirement_ids: - broken_test_references.append({"testcase": test_id, "missing_need": ref}) - req_code_pct = _safe_percent(req_with_code, req_total) req_test_pct = _safe_percent(req_with_test, req_total) req_fully_linked_pct = _safe_percent(req_fully_linked, req_total) @@ -331,76 +491,42 @@ def main() -> int: }, } - print(f"Traceability input: {needs_json}") - print("-" * 72) - print( - "Requirements with source links: " - f"{req_with_code}/{req_total} ({req_code_pct:.2f}%)" - ) - print( - "Requirements with test links: " - f"{req_with_test}/{req_total} ({req_test_pct:.2f}%)" - ) - print( - "Requirements fully linked: " - f"{req_fully_linked}/{req_total} ({req_fully_linked_pct:.2f}%)" - ) - if args.print_unlinked_requirements: - print("Unlinked requirement details:") - print( - " Missing source_code_link: " - + (", ".join(sorted(req_missing_code)) if req_missing_code else "") - ) - print( - " Missing testlink: " - + (", ".join(sorted(req_missing_test)) if req_missing_test else "") - ) - print( - " Not fully linked: " - + ( - ", ".join(sorted(req_not_fully_linked)) - if req_not_fully_linked - else "" - ) - ) - print( - "Tests linked to requirements: " - f"{tests_linked}/{tests_total} ({tests_linked_pct:.2f}%)" + _print_summary( + needs_json, + req_total, + req_with_code, + req_code_pct, + req_with_test, + req_test_pct, + req_fully_linked, + req_fully_linked_pct, + req_missing_code, + req_missing_test, + req_not_fully_linked, + args.print_unlinked_requirements, + tests_total, + tests_linked, + tests_linked_pct, + broken_test_references, ) - print(f"Broken test references: {len(broken_test_references)}") - - if broken_test_references: - print("Broken reference details:") - for item in broken_test_references: - print(f" - {item['testcase']} -> {item['missing_need']}") if args.json_output: out_file = Path(args.json_output) out_file.write_text(json.dumps(summary, indent=2), encoding="utf-8") print(f"JSON summary written to: {out_file}") - failures: list[str] = [] - if req_code_pct < float(args.min_req_code): - failures.append( - f"requirements with code links {req_code_pct:.2f}% < {args.min_req_code:.2f}%" - ) - if req_test_pct < float(args.min_req_test): - failures.append( - f"requirements with test links {req_test_pct:.2f}% < {args.min_req_test:.2f}%" - ) - if req_fully_linked_pct < float(args.min_req_fully_linked): - failures.append( - "requirements fully linked " - f"{req_fully_linked_pct:.2f}% < {args.min_req_fully_linked:.2f}%" - ) - if tests_linked_pct < float(args.min_tests_linked): - failures.append( - f"tests linked to requirements {tests_linked_pct:.2f}% < {args.min_tests_linked:.2f}%" - ) - if args.fail_on_broken_test_refs and broken_test_references: - failures.append( - f"broken testcase references found: {len(broken_test_references)}" - ) + failures = _check_thresholds( + req_code_pct, + args.min_req_code, + req_test_pct, + args.min_req_test, + req_fully_linked_pct, + args.min_req_fully_linked, + tests_linked_pct, + args.min_tests_linked, + broken_test_references, + args.fail_on_broken_test_refs, + ) if failures: print("Threshold check failed:") From 0ec52177dfdae3bb4cdc18bda8f0f52c1e69c950 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 14 Apr 2026 13:32:16 +0000 Subject: [PATCH 04/27] refactoring the coverage, metrics and dashboard --- .../requirements/implementation_state.rst | 11 +- scripts_bazel/BUILD | 1 + scripts_bazel/traceability_coverage.py | 223 ++++-------------- .../checks/traceability_dashboard.py | 77 ++++++ .../score_metamodel/traceability_metrics.py | 199 ++++++++++++++++ 5 files changed, 321 insertions(+), 190 deletions(-) create mode 100644 src/extensions/score_metamodel/checks/traceability_dashboard.py create mode 100644 src/extensions/score_metamodel/traceability_metrics.py diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index 580e090fc..47eaa539a 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -20,12 +20,9 @@ Overview -------- .. needpie:: Requirements Status - :labels: not implemented, implemented but not tested, implemented and tested + :labels: not implemented, implemented but incomplete docs, fully documented :colors: red,yellow, green - - type == 'tool_req' and implemented == 'NO' - type == 'tool_req' and testlink == '' and (implemented == 'YES' or implemented == 'PARTIAL') - type == 'tool_req' and testlink != '' and (implemented == 'YES' or implemented == 'PARTIAL') + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_status(tool_req) In Detail --------- @@ -48,9 +45,7 @@ In Detail .. needpie:: Requirements with Codelinks :labels: no codelink, with codelink :colors: red, green - - type == 'tool_req' and source_code_link == '' - type == 'tool_req' and source_code_link != '' + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_code_links(tool_req) .. grid-item-card:: diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index 15586f5c1..49ac766cc 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -43,4 +43,5 @@ py_binary( srcs = ["traceability_coverage.py"], main = "traceability_coverage.py", visibility = ["//visibility:public"], + deps = all_requirements + ["//src/extensions/score_metamodel:score_metamodel"], ) diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py index 6a641d3f9..ef2dc15b8 100644 --- a/scripts_bazel/traceability_coverage.py +++ b/scripts_bazel/traceability_coverage.py @@ -16,40 +16,27 @@ from __future__ import annotations import argparse +import importlib.util import json import os +import sys from pathlib import Path from typing import Any +# Ensure shared metric code under src/ is importable when executed directly. +_REPO_ROOT = Path(__file__).resolve().parent.parent +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) -def _is_non_empty(value: Any) -> bool: - if value is None: - return False - if isinstance(value, str): - return bool(value.strip()) - if isinstance(value, (list, tuple, set, dict)): - return len(value) > 0 - return True +# Import only the metrics module, avoid heavy __init__.py +_metrics_path = _REPO_ROOT / "src/extensions/score_metamodel/traceability_metrics.py" +_spec = importlib.util.spec_from_file_location("traceability_metrics", _metrics_path) +if _spec is None or _spec.loader is None: + raise ImportError(f"Failed to load metrics module from {_metrics_path}") +traceability_metrics = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(traceability_metrics) - -def _parse_need_id_list(value: Any) -> list[str]: - if value is None: - return [] - if isinstance(value, str): - return [item.strip() for item in value.split(",") if item.strip()] - if isinstance(value, list): - out: list[str] = [] - for item in value: - if isinstance(item, str) and item.strip(): - out.append(item.strip()) - return out - return [] - - -def _safe_percent(numerator: int, denominator: int) -> float: - if denominator == 0: - return 100.0 - return (numerator / denominator) * 100.0 +compute_traceability_summary = traceability_metrics.compute_traceability_summary def _load_needs(needs_json: Path) -> list[dict[str, Any]]: @@ -121,113 +108,6 @@ def _apply_argument_shortcuts(args: argparse.Namespace) -> None: args.fail_on_broken_test_refs = True -def _filter_requirements( - all_needs: list[dict[str, Any]], - requirement_types: set[str], - include_not_implemented: bool, -) -> list[dict[str, Any]]: - """Extract and filter requirements from needs.""" - requirements: list[dict[str, Any]] = [] - for need in all_needs: - need_type = str(need.get("type", "")).strip() - if need_type not in requirement_types: - continue - if not include_not_implemented: - implemented = str(need.get("implemented", "")).upper().strip() - if implemented not in {"YES", "PARTIAL"}: - continue - requirements.append(need) - return requirements - - -def _calculate_requirement_metrics( - requirements: list[dict[str, Any]], -) -> tuple[int, int, int, int, list[str], list[str], list[str]]: - """Calculate traceability metrics for requirements.""" - req_total = len(requirements) - req_with_code = sum( - 1 for need in requirements if _is_non_empty(need.get("source_code_link")) - ) - req_with_test = sum( - 1 for need in requirements if _is_non_empty(need.get("testlink")) - ) - req_fully_linked = sum( - 1 - for need in requirements - if _is_non_empty(need.get("source_code_link")) - and _is_non_empty(need.get("testlink")) - ) - req_missing_code = [ - str(need.get("id", "")) - for need in requirements - if not _is_non_empty(need.get("source_code_link")) and need.get("id") - ] - req_missing_test = [ - str(need.get("id", "")) - for need in requirements - if not _is_non_empty(need.get("testlink")) and need.get("id") - ] - req_not_fully_linked = [ - str(need.get("id", "")) - for need in requirements - if ( - ( - not _is_non_empty(need.get("source_code_link")) - or not _is_non_empty(need.get("testlink")) - ) - and need.get("id") - ) - ] - return ( - req_total, - req_with_code, - req_with_test, - req_fully_linked, - req_missing_code, - req_missing_test, - req_not_fully_linked, - ) - - -def _calculate_test_metrics( - all_needs: list[dict[str, Any]], - requirement_ids: set[str], - filtered_test_types: set[str], -) -> tuple[int, int, list[dict[str, str]]]: - """Calculate test linkage metrics and find broken references.""" - testcases = [ - need for need in all_needs if str(need.get("type", "")).strip() == "testcase" - ] - if filtered_test_types: - testcases = [ - need - for need in testcases - if str(need.get("test_type", need.get("TestType", ""))).strip() - in filtered_test_types - ] - tests_total = len(testcases) - - tests_linked = 0 - broken_test_references: list[dict[str, str]] = [] - for test_need in testcases: - test_id = str(test_need.get("id", "")) - partially = _parse_need_id_list( - test_need.get("partially_verifies", test_need.get("PartiallyVerifies")) - ) - fully = _parse_need_id_list( - test_need.get("fully_verifies", test_need.get("FullyVerifies")) - ) - refs = partially + fully - if refs: - tests_linked += 1 - for ref in refs: - if ref not in requirement_ids: - broken_test_references.append( - {"testcase": test_id, "missing_need": ref} - ) - return tests_total, tests_linked, broken_test_references - - def _print_summary( needs_json: Path, req_total: int, @@ -432,56 +312,35 @@ def main() -> int: needs_json = _find_needs_json(args.needs_json) all_needs = _load_needs(needs_json) - requirements = _filter_requirements( - all_needs, requirement_types, args.include_not_implemented - ) - - requirement_ids = { - str(need.get("id", "")).strip() for need in requirements if need.get("id") - } - - ( - req_total, - req_with_code, - req_with_test, - req_fully_linked, - req_missing_code, - req_missing_test, - req_not_fully_linked, - ) = _calculate_requirement_metrics(requirements) - - tests_total, tests_linked, broken_test_references = _calculate_test_metrics( - all_needs, requirement_ids, filtered_test_types + summary = compute_traceability_summary( + all_needs=all_needs, + requirement_types=requirement_types, + include_not_implemented=args.include_not_implemented, + filtered_test_types=filtered_test_types, ) - req_code_pct = _safe_percent(req_with_code, req_total) - req_test_pct = _safe_percent(req_with_test, req_total) - req_fully_linked_pct = _safe_percent(req_fully_linked, req_total) - tests_linked_pct = _safe_percent(tests_linked, tests_total) - - summary = { + req_total = int(summary["requirements"]["total"]) + req_with_code = int(summary["requirements"]["with_code_link"]) + req_with_test = int(summary["requirements"]["with_test_link"]) + req_fully_linked = int(summary["requirements"]["fully_linked"]) + req_code_pct = float(summary["requirements"]["with_code_link_pct"]) + req_test_pct = float(summary["requirements"]["with_test_link_pct"]) + req_fully_linked_pct = float(summary["requirements"]["fully_linked_pct"]) + req_missing_code = list(summary["requirements"]["missing_code_link_ids"]) + req_missing_test = list(summary["requirements"]["missing_test_link_ids"]) + req_not_fully_linked = list(summary["requirements"]["not_fully_linked_ids"]) + + tests_total = int(summary["tests"]["total"]) + tests_linked = int(summary["tests"]["linked_to_requirements"]) + tests_linked_pct = float(summary["tests"]["linked_to_requirements_pct"]) + broken_test_references = list(summary["tests"]["broken_references"]) + + summary_output = { "needs_json": str(needs_json), - "requirement_types": sorted(requirement_types), - "include_not_implemented": bool(args.include_not_implemented), - "requirements": { - "total": req_total, - "with_code_link": req_with_code, - "with_test_link": req_with_test, - "fully_linked": req_fully_linked, - "with_code_link_pct": req_code_pct, - "with_test_link_pct": req_test_pct, - "fully_linked_pct": req_fully_linked_pct, - "missing_code_link_ids": sorted(req_missing_code), - "missing_test_link_ids": sorted(req_missing_test), - "not_fully_linked_ids": sorted(req_not_fully_linked), - }, - "tests": { - "total": tests_total, - "filtered_test_types": sorted(filtered_test_types), - "linked_to_requirements": tests_linked, - "linked_to_requirements_pct": tests_linked_pct, - "broken_references": broken_test_references, - }, + "requirement_types": summary["requirement_types"], + "include_not_implemented": summary["include_not_implemented"], + "requirements": summary["requirements"], + "tests": summary["tests"], "thresholds": { "min_req_code": float(args.min_req_code), "min_req_test": float(args.min_req_test), @@ -512,7 +371,7 @@ def main() -> int: if args.json_output: out_file = Path(args.json_output) - out_file.write_text(json.dumps(summary, indent=2), encoding="utf-8") + out_file.write_text(json.dumps(summary_output, indent=2), encoding="utf-8") print(f"JSON summary written to: {out_file}") failures = _check_thresholds( diff --git a/src/extensions/score_metamodel/checks/traceability_dashboard.py b/src/extensions/score_metamodel/checks/traceability_dashboard.py new file mode 100644 index 000000000..5d109030c --- /dev/null +++ b/src/extensions/score_metamodel/checks/traceability_dashboard.py @@ -0,0 +1,77 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Needpie filter functions backed by shared traceability metric calculations.""" + +from __future__ import annotations + +from sphinx_needs.need_item import NeedItem + +from ..traceability_metrics import compute_traceability_summary, filter_requirements + + +def _requirement_types(kwargs: dict[str, str | int | float]) -> set[str]: + raw = str(kwargs.get("arg1", "tool_req")).strip() + values = {value.strip() for value in raw.split(",") if value.strip()} + return values or {"tool_req"} + + +def pie_requirements_status( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Dashboard status split: not implemented, implemented/incomplete, fully linked.""" + req_types = _requirement_types(kwargs) + + all_requirements = filter_requirements( + needs, + requirement_types=req_types, + include_not_implemented=True, + ) + implemented_requirements = filter_requirements( + needs, + requirement_types=req_types, + include_not_implemented=False, + ) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=False, + filtered_test_types=set(), + ) + + not_implemented = len(all_requirements) - len(implemented_requirements) + fully_linked = int(summary["requirements"]["fully_linked"]) + implemented_incomplete = len(implemented_requirements) - fully_linked + + results.append(not_implemented) + results.append(implemented_incomplete) + results.append(fully_linked) + + +def pie_requirements_with_code_links( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Dashboard split: requirements with and without source code links.""" + req_types = _requirement_types(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + ) + + total = int(summary["requirements"]["total"]) + with_code = int(summary["requirements"]["with_code_link"]) + + results.append(total - with_code) + results.append(with_code) diff --git a/src/extensions/score_metamodel/traceability_metrics.py b/src/extensions/score_metamodel/traceability_metrics.py new file mode 100644 index 000000000..10284ed1c --- /dev/null +++ b/src/extensions/score_metamodel/traceability_metrics.py @@ -0,0 +1,199 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Shared traceability metric calculations for CI checks and dashboards.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + + +def is_non_empty(value: Any) -> bool: + """Return True if value should be treated as present for traceability checks.""" + if value is None: + return False + if isinstance(value, str): + return bool(value.strip()) + if isinstance(value, (list, tuple, set, dict)): + return len(value) > 0 + return True + + +def parse_need_id_list(value: Any) -> list[str]: + """Normalize need-id lists encoded as CSV strings or string lists.""" + if value is None: + return [] + if isinstance(value, str): + return [item.strip() for item in value.split(",") if item.strip()] + if isinstance(value, list): + out: list[str] = [] + for item in value: + if isinstance(item, str) and item.strip(): + out.append(item.strip()) + return out + return [] + + +def safe_percent(numerator: int, denominator: int) -> float: + """Return percentage in range [0, 100], treating empty denominator as 100%.""" + if denominator == 0: + return 100.0 + return (numerator / denominator) * 100.0 + + +def filter_requirements( + all_needs: Sequence[Any], + requirement_types: set[str], + include_not_implemented: bool, +) -> list[Any]: + """Extract requirements by type and implementation state.""" + requirements: list[dict[str, Any]] = [] + for need in all_needs: + need_type = str(need.get("type", "")).strip() + if need_type not in requirement_types: + continue + if not include_not_implemented: + implemented = str(need.get("implemented", "")).upper().strip() + if implemented not in {"YES", "PARTIAL"}: + continue + requirements.append(need) + return requirements + + +def calculate_requirement_metrics( + requirements: Sequence[Any], +) -> dict[str, Any]: + """Calculate requirement traceability statistics for links and completeness.""" + total = len(requirements) + with_code = sum( + 1 for need in requirements if is_non_empty(need.get("source_code_link")) + ) + with_test = sum(1 for need in requirements if is_non_empty(need.get("testlink"))) + fully_linked = sum( + 1 + for need in requirements + if is_non_empty(need.get("source_code_link")) + and is_non_empty(need.get("testlink")) + ) + + missing_code_ids = [ + str(need.get("id", "")) + for need in requirements + if not is_non_empty(need.get("source_code_link")) and need.get("id") + ] + missing_test_ids = [ + str(need.get("id", "")) + for need in requirements + if not is_non_empty(need.get("testlink")) and need.get("id") + ] + not_fully_linked_ids = [ + str(need.get("id", "")) + for need in requirements + if ( + ( + not is_non_empty(need.get("source_code_link")) + or not is_non_empty(need.get("testlink")) + ) + and need.get("id") + ) + ] + + return { + "total": total, + "with_code_link": with_code, + "with_test_link": with_test, + "fully_linked": fully_linked, + "with_code_link_pct": safe_percent(with_code, total), + "with_test_link_pct": safe_percent(with_test, total), + "fully_linked_pct": safe_percent(fully_linked, total), + "missing_code_link_ids": sorted(missing_code_ids), + "missing_test_link_ids": sorted(missing_test_ids), + "not_fully_linked_ids": sorted(not_fully_linked_ids), + } + + +def calculate_test_metrics( + all_needs: Sequence[Any], + requirement_ids: set[str], + filtered_test_types: set[str], +) -> dict[str, Any]: + """Calculate testcase linkage and broken testcase-reference statistics.""" + testcases = [ + need for need in all_needs if str(need.get("type", "")).strip() == "testcase" + ] + if filtered_test_types: + testcases = [ + need + for need in testcases + if str(need.get("test_type", need.get("TestType", ""))).strip() + in filtered_test_types + ] + + tests_total = len(testcases) + tests_linked = 0 + broken_references: list[dict[str, str]] = [] + + for test_need in testcases: + test_id = str(test_need.get("id", "")) + partially = parse_need_id_list( + test_need.get("partially_verifies", test_need.get("PartiallyVerifies")) + ) + fully = parse_need_id_list( + test_need.get("fully_verifies", test_need.get("FullyVerifies")) + ) + refs = partially + fully + if refs: + tests_linked += 1 + for ref in refs: + if ref not in requirement_ids: + broken_references.append({"testcase": test_id, "missing_need": ref}) + + return { + "total": tests_total, + "filtered_test_types": sorted(filtered_test_types), + "linked_to_requirements": tests_linked, + "linked_to_requirements_pct": safe_percent(tests_linked, tests_total), + "broken_references": broken_references, + } + + +def compute_traceability_summary( + all_needs: Sequence[Any], + requirement_types: set[str], + include_not_implemented: bool, + filtered_test_types: set[str], +) -> dict[str, Any]: + """Return full CI/dashboard summary using one shared metric implementation.""" + requirements = filter_requirements( + all_needs, + requirement_types=requirement_types, + include_not_implemented=include_not_implemented, + ) + requirement_ids = { + str(need.get("id", "")).strip() for need in requirements if need.get("id") + } + + req_metrics = calculate_requirement_metrics(requirements) + test_metrics = calculate_test_metrics( + all_needs, + requirement_ids=requirement_ids, + filtered_test_types=filtered_test_types, + ) + + return { + "requirement_types": sorted(requirement_types), + "include_not_implemented": include_not_implemented, + "requirements": req_metrics, + "tests": test_metrics, + } From 764da8d42540b7f599fabdda2b474d8c229376af Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 14 Apr 2026 14:13:03 +0000 Subject: [PATCH 05/27] add generic filters --- .../score_metamodel/checks/standards.py | 104 ++-------- .../score_metamodel/sphinx_filters.py | 188 ++++++++++++++++++ 2 files changed, 205 insertions(+), 87 deletions(-) create mode 100644 src/extensions/score_metamodel/sphinx_filters.py diff --git a/src/extensions/score_metamodel/checks/standards.py b/src/extensions/score_metamodel/checks/standards.py index 7d27f5bff..8460b267b 100644 --- a/src/extensions/score_metamodel/checks/standards.py +++ b/src/extensions/score_metamodel/checks/standards.py @@ -14,6 +14,13 @@ from sphinx_needs.need_item import NeedItem +from ..sphinx_filters import ( + generic_pie_items_by_tag, + generic_pie_items_in_relationships, + generic_pie_linked_items, + generic_pie_workproducts_by_type, +) + # from score_metamodel import ( # CheckLogger, # graph_check, @@ -186,27 +193,8 @@ def my_pie_linked_standard_requirements( Passed arguments can be accessed via kwargs['arg'] See: https://sphinx-needs.readthedocs.io/en/latest/filter.html#arguments """ - cnt_connected = 0 - cnt_not_connected = 0 - - standard = kwargs["arg1"] - - all_standards_needs = get_standards_needs(needs) - standards_needs = { - k: v - for k, v in all_standards_needs.items() - if k.startswith(f"std_req__{standard}__") - } - compliance_req_needs = get_compliance_req_needs(needs) - - for need in standards_needs.values(): - if need["id"] in compliance_req_needs: - cnt_connected += 1 - else: - cnt_not_connected += 1 - - results.append(cnt_connected) - results.append(cnt_not_connected) + standard = str(kwargs["arg1"]) + generic_pie_linked_items(needs, results, arg1=f"std_req__{standard}__", arg2="gd_") def my_pie_linked_standard_requirements_by_tag( @@ -238,23 +226,10 @@ def my_pie_linked_standard_requirements_by_tag( the mutated `results`list, and use this to display/generate the piechart. """ - count_linked = 0 - count_non_linked = 0 - - tag = str(kwargs["arg1"]) assert len(kwargs) == 1, ( "Can only provide one tag to `my_pie_linked_standard_requirements_by_tag`" ) - - compliance_req_needs = get_compliance_req_needs(needs) - for need in needs: - if tag in need["tags"]: - if need["id"] in compliance_req_needs: - count_linked += 1 - else: - count_non_linked += 1 - results.append(count_linked) - results.append(count_non_linked) + generic_pie_items_by_tag(needs, results, arg1=kwargs["arg1"], arg2="gd_") def my_pie_linked_standard_workproducts( @@ -267,28 +242,10 @@ def my_pie_linked_standard_workproducts( Passed arguments can be accessed via kwargs['arg'] See: https://sphinx-needs.readthedocs.io/en/latest/filter.html#arguments """ - cwp_connected = 0 - cwp_not_connected = 0 - - standard = kwargs["arg1"] - - all_standard_workproducts = get_standards_workproducts(needs) - standard_workproducts = { - k: v - for k, v in all_standard_workproducts.items() - if k.startswith(f"std_wp__{standard}__") - } - - compliance_wp_needs = get_compliance_wp_needs(needs) - - for need in standard_workproducts.values(): - if need["id"] in compliance_wp_needs: - cwp_connected += 1 - else: - cwp_not_connected += 1 - - results.append(cwp_connected) - results.append(cwp_not_connected) + standard = str(kwargs["arg1"]) + generic_pie_workproducts_by_type( + needs, results, arg1=f"std_wp__{standard}__", arg2="workproduct" + ) def my_pie_workproducts_contained_in_exactly_one_workflow( @@ -299,33 +256,6 @@ def my_pie_workproducts_contained_in_exactly_one_workflow( in exactly one workflow, the not connected once and the once that are connected to multiple workflows. """ - all_workflows = get_workflows(needs) - all_workproducts = get_workproducts(needs) - - # Map to track counts for each workproduct and their associated workflows - workproduct_analysis = {wp["id"]: {"count": 0} for wp in all_workproducts.values()} - - # Iterate over workflows and update the counts and workflows - for workflow in all_workflows.values(): - for output in workflow.get("output", []): - # Increment count and add workflow_id if workproduct is in analysis - if output in workproduct_analysis: - workproduct_analysis[output]["count"] += 1 - - not_connected_wp = 0 - nb_wp_connected_to_one_workflow = 0 - nb_wp_connected_to_more_than_one_workflow = 0 - - for analysis in workproduct_analysis.values(): - count = analysis["count"] - - if count == 0: - not_connected_wp += 1 - elif count == 1: - nb_wp_connected_to_one_workflow += 1 - else: - nb_wp_connected_to_more_than_one_workflow += 1 - - results.append(not_connected_wp) - results.append(nb_wp_connected_to_one_workflow) - results.append(nb_wp_connected_to_more_than_one_workflow) + generic_pie_items_in_relationships( + needs, results, arg1="workflow", arg2="output", arg3="workproduct" + ) diff --git a/src/extensions/score_metamodel/sphinx_filters.py b/src/extensions/score_metamodel/sphinx_filters.py new file mode 100644 index 000000000..0a5f4fc1b --- /dev/null +++ b/src/extensions/score_metamodel/sphinx_filters.py @@ -0,0 +1,188 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Generic sphinx-needs filter functions for ``needpie`` directives. + +These functions are fully parameterizable and designed to be called directly +by consumers of docs-as-code (e.g. reference-integration repos) when they +pull in the ``score_docs_as_code`` Bazel module. All functions follow the +sphinx-needs ``filter-func`` signature convention: + +.. code-block:: python + + def func(needs: list[NeedItem], results: list[int], **kwargs) -> None: ... + +Arguments are injected from the ``:filter-func:`` call-site as positional +``arg1``, ``arg2``, … keyword arguments. + +Example usage in RST:: + + .. needpie:: My Requirements Coverage + :labels: Linked, Not Linked + :filter-func: score_metamodel.sphinx_filters.generic_pie_linked_items(std_req__mystandard__, gd_) + +""" + +from __future__ import annotations + +from sphinx_needs.need_item import NeedItem + + +def generic_pie_linked_items( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count items matching an ID prefix split by compliance linkage. + + Finds all needs whose ``id`` starts with *arg1*, then checks whether + each one appears in the ``complies`` field of any need whose ``type`` + starts with *arg2*. + + :filter-func: arguments: + + - ``arg1`` – ID prefix of the items to count + (e.g. ``std_req__iso26262__``) + - ``arg2`` – type prefix of the source needs whose ``complies`` + lists are scanned (e.g. ``gd_``) + + Appends to *results*: ``[linked_count, not_linked_count]`` + """ + id_prefix = str(kwargs.get("arg1", "")) + compliance_prefix = str(kwargs.get("arg2", "")) + + target_ids = [ + str(n.get("id", "")) + for n in needs + if str(n.get("id", "")).startswith(id_prefix) + ] + + linked_ids: set[str] = { + ref + for n in needs + if str(n.get("type", "")).startswith(compliance_prefix) + for ref in n.get("complies", []) + if ref + } + + connected = sum(1 for item_id in target_ids if item_id in linked_ids) + not_connected = len(target_ids) - connected + + results.append(connected) + results.append(not_connected) + + +def generic_pie_items_by_tag( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count items carrying a given tag split by compliance linkage. + + Checks every need that has *arg1* in its ``tags`` field and splits them + by whether their id appears in the ``complies`` field of any need whose + ``type`` starts with *arg2*. + + :filter-func: arguments: + + - ``arg1`` – tag to filter by (e.g. ``aspice40_man5``). + Note: tag values must not contain dots. + - ``arg2`` – type prefix of the source needs whose ``complies`` + lists are scanned (e.g. ``gd_``) + + Appends to *results*: ``[linked_count, not_linked_count]`` + """ + tag = str(kwargs.get("arg1", "")) + compliance_prefix = str(kwargs.get("arg2", "")) + + linked_ids: set[str] = { + ref + for n in needs + if str(n.get("type", "")).startswith(compliance_prefix) + for ref in n.get("complies", []) + if ref + } + + linked = 0 + not_linked = 0 + for n in needs: + if tag in n.get("tags", []): + if str(n.get("id", "")) in linked_ids: + linked += 1 + else: + not_linked += 1 + + results.append(linked) + results.append(not_linked) + + +def generic_pie_workproducts_by_type( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count work-product items matching an ID prefix split by compliance linkage. + + Semantically equivalent to :func:`generic_pie_linked_items` but scoped to + work-product traceability where the compliance source type is typically an + exact match (e.g. ``workproduct``) rather than a prefix. Because + ``"workproduct".startswith("workproduct")`` is ``True``, both functions use + the same underlying logic. + + :filter-func: arguments: + + - ``arg1`` – ID prefix of the work-product items to count + (e.g. ``std_wp__iso26262__``) + - ``arg2`` – type (or type prefix) of source needs whose ``complies`` + lists are scanned (e.g. ``workproduct``) + + Appends to *results*: ``[linked_count, not_linked_count]`` + """ + generic_pie_linked_items(needs, results, **kwargs) + + +def generic_pie_items_in_relationships( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count items of a given type by how many container items reference them. + + For every need of type *arg3*, counts how many needs of type *arg1* + include its id in their *arg2* field. Splits the result into three + buckets: not referenced, referenced exactly once, referenced more than + once. + + :filter-func: arguments: + + - ``arg1`` – type of the container needs (e.g. ``workflow``) + - ``arg2`` – field on the container that holds references + (e.g. ``output``) + - ``arg3`` – type of the items to count (e.g. ``workproduct``) + + Appends to *results*: + ``[not_referenced_count, referenced_once_count, referenced_multiple_count]`` + """ + container_type = str(kwargs.get("arg1", "")) + field = str(kwargs.get("arg2", "")) + item_type = str(kwargs.get("arg3", "")) + + containers = [n for n in needs if n.get("type") == container_type] + items = [n for n in needs if n.get("type") == item_type] + + item_counts: dict[str, int] = {str(n.get("id", "")): 0 for n in items} + + for container in containers: + for ref in container.get(field, []): + if ref in item_counts: + item_counts[ref] += 1 + + not_referenced = sum(1 for c in item_counts.values() if c == 0) + referenced_once = sum(1 for c in item_counts.values() if c == 1) + referenced_multiple = sum(1 for c in item_counts.values() if c > 1) + + results.append(not_referenced) + results.append(referenced_once) + results.append(referenced_multiple) From ec2e9949655bc197dcf698ae284270282df63335 Mon Sep 17 00:00:00 2001 From: "Frank Scholter Peres(MBTI)" <145544737+FScholPer@users.noreply.github.com> Date: Thu, 16 Apr 2026 15:29:39 +0200 Subject: [PATCH 06/27] Update src/extensions/score_metamodel/traceability_metrics.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maximilian Sören Pollak Signed-off-by: Frank Scholter Peres(MBTI) <145544737+FScholPer@users.noreply.github.com> --- src/extensions/score_metamodel/traceability_metrics.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/extensions/score_metamodel/traceability_metrics.py b/src/extensions/score_metamodel/traceability_metrics.py index 10284ed1c..fe35647ff 100644 --- a/src/extensions/score_metamodel/traceability_metrics.py +++ b/src/extensions/score_metamodel/traceability_metrics.py @@ -21,13 +21,9 @@ def is_non_empty(value: Any) -> bool: """Return True if value should be treated as present for traceability checks.""" - if value is None: - return False if isinstance(value, str): return bool(value.strip()) - if isinstance(value, (list, tuple, set, dict)): - return len(value) > 0 - return True + return bool(value) def parse_need_id_list(value: Any) -> list[str]: From a3642575fabe79f5b561dda1b7079306ff86aa17 Mon Sep 17 00:00:00 2001 From: "Frank Scholter Peres(MBTI)" <145544737+FScholPer@users.noreply.github.com> Date: Fri, 17 Apr 2026 10:59:07 +0200 Subject: [PATCH 07/27] Apply suggestions from code review Co-authored-by: Andreas Zwinkau <95761648+a-zw@users.noreply.github.com> Signed-off-by: Frank Scholter Peres(MBTI) <145544737+FScholPer@users.noreply.github.com> --- docs/reference/commands.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 5bc2b959b..7f7f0dffa 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -5,7 +5,7 @@ | `bazel run //:docs` | Builds documentation | | `bazel run //:docs_check` | Verifies documentation correctness | | `bazel run //:docs_combo` | Builds combined documentation with all external dependencies included | -| `bazel run //scripts_bazel:traceability_coverage -- --needs-json bazel-bin/needs_json/needs.json --min-req-code 100 --min-req-test 100 --min-req-fully-linked 100 --min-tests-linked 100 --fail-on-broken-test-refs` | Calculates requirement/test traceability percentages and fails if thresholds are not met | +| `bazel run @score_docs_as_code//scripts_bazel:traceability_coverage -- --needs-json bazel-bin/needs_json/needs.json --min-req-code 100 --min-req-test 100 --min-req-fully-linked 100 --min-tests-linked 100 --fail-on-broken-test-refs` | Calculates requirement/test traceability percentages and fails if thresholds are not met | | `bazel run //:live_preview` | Creates a live_preview of the documentation viewable in a local server | | `bazel run //:live_preview_combo_experimental` | Creates a live_preview of the full documentation with all dependencies viewable in a local server | | `bazel run //:ide_support` | Sets up a Python venv for esbonio (Remember to restart VS Code!) | From ecd1cafcdbba590ba7272862641ac93b23ca1219 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Fri, 17 Apr 2026 11:26:26 +0000 Subject: [PATCH 08/27] readd genai headers --- .gitignore | 3 ++ docs/how-to/test_to_doc_links.rst | 32 ++++++++++++++++--- .../requirements/implementation_state.rst | 4 +++ docs/reference/commands.md | 6 ++++ scripts_bazel/BUILD | 4 +++ scripts_bazel/tests/BUILD | 4 +++ .../tests/traceability_coverage_test.py | 4 +++ scripts_bazel/traceability_coverage.py | 4 +++ .../score_metamodel/checks/standards.py | 4 +++ .../checks/traceability_dashboard.py | 4 +++ .../score_metamodel/sphinx_filters.py | 4 +++ .../score_metamodel/traceability_metrics.py | 4 +++ 12 files changed, 72 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index d19638d07..ec96afacf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ # Commonly used for local settings and secrets +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ .env # Bazel diff --git a/docs/how-to/test_to_doc_links.rst b/docs/how-to/test_to_doc_links.rst index 5a0503f76..c1ac5eeef 100644 --- a/docs/how-to/test_to_doc_links.rst +++ b/docs/how-to/test_to_doc_links.rst @@ -12,6 +12,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* + # ╓ ╖ + # ║ Some portions generated by Github Copilot ║ + # ╙ ╜ + Reference Docs in Tests ======================= @@ -58,38 +62,56 @@ Limitations CI/CD Gate for Linkage Percentage --------------------------------- -To enforce traceability in CI: +The traceability checker can be used as a low-level CI gate over exported +``needs.json`` data. + +Current workflow: 1. Run tests. 2. Generate ``needs.json``. 3. Execute the traceability checker. +In repository CI, the preferred setup is to wire the coverage check target +to depend on the test-report and ``//:needs_json`` targets, so Bazel handles +the build order automatically. + +You can run the checker as a standalone command, and you can also run it as +part of documentation creation if your repository wiring does so. + .. code-block:: bash bazel test //... bazel build //:needs_json bazel run //scripts_bazel:traceability_coverage -- \ - --needs-json bazel-bin/needs_json/_build/needs/needs.json \ --min-req-code 100 \ --min-req-test 100 \ --min-req-fully-linked 100 \ --min-tests-linked 100 \ --fail-on-broken-test-refs +If ``//:needs_json`` was built beforehand, the checker locates the default +``needs.json`` output automatically. Use ``--needs-json`` only when you want +to point to a non-standard location. + The checker reports: - Percentage of implemented requirements with ``source_code_link`` - Percentage of implemented requirements with ``testlink`` - Percentage of implemented requirements with both links (fully linked) -- Percentage of test cases linked to at least one requirement -- Broken testcase references to unknown requirement IDs + +.. note:: + + Testcase-based metrics depend on testcase needs being present in the + exported ``needs.json``. Testcases are currently generated as external + needs, so values such as testcase linkage percentage or broken testcase + references are only meaningful if those external testcase needs are also + included in the exported dataset. To check only unit tests, filter testcase types: .. code-block:: bash bazel run //scripts_bazel:traceability_coverage -- \ - --needs-json bazel-bin/needs_json/_build/needs/needs.json \ --test-types unit-test Use lower thresholds during rollout and tighten towards 100% over time. diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index 47eaa539a..7c6c5929a 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -11,6 +11,10 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* + + # ╓ ╖ + # ║ Some portions generated by Github Copilot ║ + # ╙ ╜ .. _docs_statistics: Implementation State Statistics diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 7f7f0dffa..091560672 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -1,3 +1,9 @@ + + # Commands | Target | What it does | diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index 49ac766cc..0596271ec 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + load("@aspect_rules_py//py:defs.bzl", "py_binary") load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") diff --git a/scripts_bazel/tests/BUILD b/scripts_bazel/tests/BUILD index 70b7703ac..8f10ee763 100644 --- a/scripts_bazel/tests/BUILD +++ b/scripts_bazel/tests/BUILD @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") load("//:score_pytest.bzl", "score_pytest") diff --git a/scripts_bazel/tests/traceability_coverage_test.py b/scripts_bazel/tests/traceability_coverage_test.py index 9e1267bc3..87086025f 100644 --- a/scripts_bazel/tests/traceability_coverage_test.py +++ b/scripts_bazel/tests/traceability_coverage_test.py @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + """Tests for traceability_coverage.py.""" import json diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py index ef2dc15b8..f8c0c20da 100644 --- a/scripts_bazel/traceability_coverage.py +++ b/scripts_bazel/traceability_coverage.py @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + """Compute requirement and test traceability coverage from sphinx-needs output.""" from __future__ import annotations diff --git a/src/extensions/score_metamodel/checks/standards.py b/src/extensions/score_metamodel/checks/standards.py index 8460b267b..c5480aa88 100644 --- a/src/extensions/score_metamodel/checks/standards.py +++ b/src/extensions/score_metamodel/checks/standards.py @@ -10,6 +10,10 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ # from sphinx.application import Sphinx from sphinx_needs.need_item import NeedItem diff --git a/src/extensions/score_metamodel/checks/traceability_dashboard.py b/src/extensions/score_metamodel/checks/traceability_dashboard.py index 5d109030c..b8a1dedc9 100644 --- a/src/extensions/score_metamodel/checks/traceability_dashboard.py +++ b/src/extensions/score_metamodel/checks/traceability_dashboard.py @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + """Needpie filter functions backed by shared traceability metric calculations.""" from __future__ import annotations diff --git a/src/extensions/score_metamodel/sphinx_filters.py b/src/extensions/score_metamodel/sphinx_filters.py index 0a5f4fc1b..e48c3b370 100644 --- a/src/extensions/score_metamodel/sphinx_filters.py +++ b/src/extensions/score_metamodel/sphinx_filters.py @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + """Generic sphinx-needs filter functions for ``needpie`` directives. These functions are fully parameterizable and designed to be called directly diff --git a/src/extensions/score_metamodel/traceability_metrics.py b/src/extensions/score_metamodel/traceability_metrics.py index fe35647ff..184fbd1b4 100644 --- a/src/extensions/score_metamodel/traceability_metrics.py +++ b/src/extensions/score_metamodel/traceability_metrics.py @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + """Shared traceability metric calculations for CI checks and dashboards.""" from __future__ import annotations From 6287c69d6f289dbd1d3059a121f0347591e17c03 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 20 Apr 2026 10:07:16 +0000 Subject: [PATCH 09/27] changed to new json structure --- .../requirements/score_1782_open_summary.md | 629 ++++++++++++++++++ scripts_bazel/BUILD | 8 + scripts_bazel/tests/BUILD | 9 + .../tests/traceability_coverage_test.py | 18 +- scripts_bazel/tests/traceability_gate_test.py | 231 +++++++ scripts_bazel/traceability_coverage.py | 31 +- scripts_bazel/traceability_gate.py | 231 +++++++ .../traceability_metrics_schema.json | 176 +++++ 8 files changed, 1315 insertions(+), 18 deletions(-) create mode 100644 docs/internals/requirements/score_1782_open_summary.md create mode 100644 scripts_bazel/tests/traceability_gate_test.py create mode 100644 scripts_bazel/traceability_gate.py create mode 100644 scripts_bazel/traceability_metrics_schema.json diff --git a/docs/internals/requirements/score_1782_open_summary.md b/docs/internals/requirements/score_1782_open_summary.md new file mode 100644 index 000000000..2c919d9bb --- /dev/null +++ b/docs/internals/requirements/score_1782_open_summary.md @@ -0,0 +1,629 @@ +# SCORE #1782 Open Items in docs-as-code + +Source reference: https://github.com/eclipse-score/score/issues/1782#issuecomment-4108721585 + +Note: the referenced comment currently contains only "for review on Mar-30". The checklist was taken from the issue body of `eclipse-score/score#1782`. + +**Context**: docs-as-code platform development supporting ASIL B project certification per SCORE building blocks metamodel. +- **Platform Purpose**: Enable downstream ASIL B projects (Dependable Elements) to produce certified safety documentation +- **Module Requirements**: ALL modules in docs-as-code must enforce ASIL B-level rigor (change management, safety analysis) regardless of their own ASIL assignment, because consuming projects may be ASIL B +- **Assumptions of Use (AoU)**: Platform documents AoU that modules must respect; consuming projects document their own AoU for end customers +- **Implication**: The 56 open items represent ASIL B-level requirements for the platform and all its modules, enabling downstream ASIL B certification + +## Summary + +- Total requirements in list: **97** (ALL apply to platform + all modules to enable ASIL B project certification) +- Open in docs-as-code: **56** (Tier 1 & 2 have no ASIL-dependent scope; ALL modules must implement) + - PARTIAL: **8** + - NO: **13** + - NOT_MAPPED: **35** + +State legend: +- `PARTIAL`: mapped to docs-as-code requirement(s) with `:implemented: PARTIAL` +- `NO`: mapped to docs-as-code requirement(s) with `:implemented: NO` +- `NOT_MAPPED`: no mapping found in `docs/internals/requirements/requirements.rst` + +PM title source: `process_description` requirement titles (`.. gd_req:: ...`), with fallback to first requirement sentence. + +## Functional Safety Prioritization + +The open items are organized by **Safety Tier** to highlight the critical path for **ASIL B-capable platform** development per SCORE building blocks: + +- **Tier 1 (Critical)**: Direct blockers for ASIL B platform certification. Required for all modules to ensure consuming ASIL B projects can rely on platform support (safety analysis, requirements traceability, test coverage). +- **Tier 2 (High)**: Essential ASIL B infrastructure for all modules. Change control, problem management, document governance—mandatory for all modules to ensure downstream ASIL B projects meet certification audit requirements. +- **Tier 3 (Medium)**: Platform robustness and traceability depth. Important for mature ASIL B processes. All modules expected to implement. +- **Tier 4 (Lower)**: Extended capabilities or language-specific conveniences. Platform and modules can use alternative approaches where core Tier 1-2 requirements are met. + +## Epic Diagram + +```mermaid +flowchart TB + O[Open Requirements\n56] + T1["Tier 1: Critical\n12 items\n(5P/6N/1M)"] + T2["Tier 2: High\n18 items\n(1P/4N/13M)"] + T3["Tier 3: Medium\n13 items\n(2P/2N/9M)"] + T4["Tier 4: Lower\n13 items\n(0P/1N/12M)"] + O --> T1 + O --> T2 + O --> T3 + O --> T4 +``` + +## Tier 1: Critical Requirements (12 items) + +**Category**: Direct enablers for ASIL B-capable platform. ALL modules must implement these to ensure consuming ASIL B projects receive proper safety analysis infrastructure. Non-negotiable for downstream ASIL B certification. + +### Safety Analysis Automation (8/13 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__saf_attr_mitigated_by | Mitigation Controls Documentation | Documents which design controls or safety mechanisms mitigate identified hazards and risks. Links failures to their mitigation strategies. | **CRITICAL (ASIL B)**: ALL modules must document mitigations for identified failures. Consuming projects need proof that platform is safe for ASIL B integration. Auditors verify: every failure mode has documented mitigation linked to requirements and tests. | NO | tool_req__docs_saf_attrs_mitigated_by | +| gd_req__saf_attr_aou | Applicability to Area of Use (AOU) Linkage | Explicitly links safety analysis artifacts (FMEA, DFA) to the specific operational context, vehicles, or systems they apply to. | **CRITICAL (ASIL B)**: ALL modules must declare their AoU scope. Analysis must explicitly state: "all releases" or "specific constraints". Consuming ASIL B projects need to know: which module AoU assumptions must they respect? Missing AoU scope = audit finding. | NO | tool_req__docs_saf_attrs_mitigated_by | +| gd_req__saf_attr_requirements_check | Safety Analysis–Requirements Linkage Validation | Automated checks ensuring every hazard/failure identified in safety analysis has corresponding requirements to control it. Detects orphaned risks. | **CRITICAL (ASIL B)**: ALL modules must have every failure mode linked to a requirement. Orphaned failure modes = audit failure for consuming ASIL B projects. Automated checks prevent gaps. Non-negotiable. | NO | tool_req__docs_saf_attrs_mitigated_by | +| gd_req__saf_linkage_check | Safety Analysis Linkage Consistency | Validates that links between FMEA/DFA items, requirements, and tests are consistent and bidirectional. Detects dangling references. | **CRITICAL (ASIL B)**: ALL modules must have complete bidirectional traceability. Broken links = missing evidence for ASIL B projects. Automatic checking prevents audit failure. Non-negotiable. | NO | tool_req__docs_saf_attrs_violates | +| gd_req__saf_attr_feffect | Failure Effect Documentation | Precisely documents what happens when a failure occurs (loss of function, unintended behavior, delayed response, etc.). Critical for risk assessment. | **CRITICAL (ASIL B)**: ALL modules must classify failure effects per ISO 26262 Table 1. Vague effects = ASIL downgrade for consuming projects = audit failure. Mandatory precision required. | NO | tool_req__docs_saf_attr_fmea_failure_effect | +| gd_req__saf_attr_fault_id | FMEA Fault/Failure Mode Identity | Unique identifier for each failure mode in FMEA ensuring no duplicates or overlooked scenarios. Enables consistent reference in all downstream artifacts. | **CRITICAL (ASIL B)**: ALL modules must have unique FMEA IDs. Consuming ASIL B projects need traceable audit trail: which failures analyzed, which requirements address them, which tests verify. No exceptions. | NO | tool_req__docs_saf_attr_fmea_fault_id | +| gd_req__saf_attr_failure_id | DFA Failure Point Identity | Unique identifier for each failure point in Dependent Failure Analysis. Critical for complex systems with interdependent components. | **CRITICAL (ASIL B)**: ALL modules with complex interdependencies must have DFA with unique failure point IDs. Missing IDs = unanalyzed cascades for ASIL B projects = audit failure. Required for complex architectures. | NO | tool_req__docs_saf_attr_dfa_failure_id | +| gd_req__saf_argument | Safety Analysis Argumentation | Rationale and supporting evidence explaining why identified hazards are or are not significant, why ASIL assignments are appropriate, why mitigations are sufficient. | **CRITICAL (ASIL B)**: ALL modules must document safety arguments supporting their analysis. Consuming ASIL B projects rely on this. Auditors scrutinize: why is module safe? Why are mitigations sufficient? Unsupported = non-compliance. | NO | tool_req__docs_saf_attrs_content | + +### Requirements Traceability (4/7 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__req_linkage | Requirement Cross-Linking | Establishes relationships between requirements (implements, refines, conflicts with, depends-on). Creates the requirement dependency graph. | **CRITICAL (ASIL B)**: ALL modules must show requirement relationships and dependencies. Consuming ASIL B projects need impact analysis: "if this module's requirement changes, what else breaks?" | PARTIAL | tool_req__docs_req_link_satisfies_allowed | +| gd_req__req_traceability | End-to-End Traceability Matrix | Links requirements → design → tests → verification results in a bidirectional chain auditable for completeness and consistency. The "traceability matrix" auditors demand. | **CRITICAL (ASIL B)**: ALL modules must provide complete V-model traceability: requirements → design → tests → results. Consuming ASIL B projects depend on this. Missing links = audit failure. | PARTIAL | tool_req__docs_req_link_satisfies_allowed | +| gd_req__req_attr_test_covered | Test Coverage Completeness Validation | Automated checks ensuring every safety requirement has at least one test case assigned and executed. Detects untested requirements. | **CRITICAL (ASIL B)**: ALL modules must have 100% test coverage of safety requirements. No untested requirements allowed. Consuming ASIL B projects require complete evidence. Automated check mandatory. | PARTIAL | tool_req__docs_req_attr_testcov | +| gd_req__req_attr_req_cov | Requirement Coverage Scope | Tracks which architecture/design items satisfy which requirements, preventing ambiguity about requirement implementation. | **CRITICAL (ASIL B)**: ALL modules must map requirements to architecture/design elements. Consuming ASIL B projects need proof: requirements are actually implemented, not just documented. Without mapping, unproven. | PARTIAL | tool_req__docs_req_attr_reqcov | + +### Configuration Management (1/1 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__configuration_uid | Configuration Unique Identification | Every configuration/release of the documented system must have a unique, immutable ID enabling audit trail and traceability to specific configuration versions. | **CRITICAL (ASIL B)**: ALL module configurations must be uniquely versioned. Consuming ASIL B projects must answer: "which module version was integrated?" Broken traceability = audit failure. | NOT_MAPPED | - | + +--- + +## Tier 2: High-Priority Requirements (18 items) + +**Category**: Essential ASIL B infrastructure. ALL modules require change control, problem management, document governance—mandatory for consuming ASIL B projects to meet certification audit requirements. + +### Change & Problem Management (13/18 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__change_attr_uid | Change Request Unique ID | Every change to safety-relevant artifacts must have a unique tracking ID enabling full audit trail and impact analysis. | **HIGH (ASIL B)**: ALL modules must track every change with unique IDs. Consuming ASIL B projects need audit trail. Untracked changes = certification violation. Mandatory for all. | NOT_MAPPED | - | +| gd_req__change_attr_title | Change Request Title | Descriptive title enabling quick identification of what is being changed and why. Critical for impact assessment. | **HIGH (ASIL B)**: ALL modules require clear change titles. Consuming ASIL B projects need rapid safety impact assessment. Required for all. | NO | tool_req__docs_doc_attr | +| gd_req__change_attr_status | Change Request Status Tracking | Workflow state (proposed, reviewed, approved, implemented, verified, released) ensuring changes are properly authorized before implementation. | **HIGH (ASIL B)**: ALL modules require controlled workflow: proposed→assessment→review→approved→tested→released. Consuming ASIL B projects need workflow enforcement. Mandatory. | NOT_MAPPED | - | +| gd_req__change_attr_safety | Change Request Safety Impact Assessment | Explicit field documenting whether change affects safety requirements, analysis, or test coverage and what the impact is. | **HIGH (ASIL B)**: ALL modules must assess safety impact of every change. Mandatory assessment prevents undetected impact. Consuming ASIL B projects require this. | PARTIAL | tool_req__docs_doc_generic_mandatory | +| gd_req__change_attr_types | Change Request Categories | Classification (defect fix, enhancement, safety-critical change, design change, etc.) enabling routing to appropriate review process. | **HIGH**: Safety-critical changes require more scrutiny. Misclassification routes safety change through standard process, missing specialized review. | NOT_MAPPED | - | +| gd_req__change_attr_milestone | Change Request Scheduling | Links change to project timeline/release version. Enables tracking: "which changes are in which release?" Critical for coordinating safety analysis updates. | **HIGH**: When coordinating platform safety analysis with product releases, need to know which changes are in which build. Without this, mismatch between analyzed platform ≠ delivered platform. | NOT_MAPPED | - | +| gd_req__change_attr_impact_description | Change Impact Description | Documents *why* a change is needed and what it affects (requirements, architecture, tests, risk profile). Enables informed review. | **HIGH**: Safety reviewers need context. "Changed line 42 of module X" is meaningless without explanation. Clear impact descriptions enable proper safety assessment. | NOT_MAPPED | - | +| gd_req__problem_attr_uid | Problem Report Unique ID | Every identified problem/defect must have unique ID enabling audit trail and traceability to fix/verification. | **HIGH**: If a safety-critical defect is discovered, must prove it was tracked, analyzed for safety impact, fixed, and verified. Missing ID = untracked problem. | NOT_MAPPED | - | +| gd_req__problem_attr_title | Problem Report Title | Clear description of what is wrong enabling quick identification and assessment. | **HIGH**: Safety staff need to assess: "does this defect affect safety?" Unclear titles delay assessment and risk safety impact being missed. | NOT_MAPPED | - | +| gd_req__problem_attr_status | Problem Report Status | Workflow ensuring problems are investigated, root-caused, fixed, and verified before closing. Prevents unresolved issues. | **HIGH**: Incomplete problem closure = unresolved defect potentially affecting safety. Status tracking ensures rigor in problem resolution. | NOT_MAPPED | - | +| gd_req__problem_attr_classification | Problem Classification | Type (functional defect, safety issue, performance, integration problem, etc.) routing to appropriate resolution process. | **HIGH**: Safety-critical problems require different root cause analysis and verification. Misclassification sends safety defect through standard defect process. | NOT_MAPPED | - | +| gd_req__problem_attr_safety_affected | Problem Safety Impact | Explicit assessment: does this problem affect safety? If yes, what is the impact and required fix/verification? | **HIGH (ASIL B)**: ALL modules must assess every problem for safety impact. No exceptions. Consuming ASIL B projects require complete assessment. Unassessed defects = unmanaged risk. | NOT_MAPPED | - | +| gd_req__problem_check_closing | Problem Resolution Verification | Checks ensuring a problem cannot be marked "closed" without evidence it was actually fixed and verified. Prevents premature closure. | **HIGH**: Prevents "closed" problems that actually still exist. Broken verification = defect re-appears in production. | NOT_MAPPED | - | +| gd_req__problem_attr_anaylsis_results | Problem Root Cause Analysis | Documented investigation of why the problem occurred, enabling preventive action. Without root cause, same problem re-occurs. | **HIGH**: Superficial fixes (symptom treatment) without root cause analysis = defect will recur. Proper root cause identification prevents repeated safety issues. | NOT_MAPPED | - | +| gd_req__problem_attr_impact_description | Problem Impact Description | Clear documentation of scope (what is broken, how many users/products affected, severity). Enables triage and risk assessment. | **HIGH**: Without impact scope, cannot assess urgency or safety relevance. Small problem affecting 1 user ≠ widespread problem affecting all safety-critical units. | NOT_MAPPED | - | +| gd_req__problem_attr_milestone | Problem Fix Scheduling | Links problem to planned resolution/release. Enables tracking: "when will this be fixed?" | **HIGH**: For safety-critical problems, need to know: is this fixed in next release? Impacts product safety claim if problem persists in released versions. | NOT_MAPPED | - | +| gd_req__problem_attr_security_affected | Problem Security Impact | Explicit assessment: does this problem expose security vulnerability? Triggers security review. | **HIGH**: Functional safety and cybersecurity are increasingly linked in standards (SOTIF, broader ISO frameworks). Unidentified security issues can compromise safety. | NOT_MAPPED | - | +| gd_req__problem_attr_stakeholder | Problem Ownership | Identifies who is responsible for investigation, fix, and verification. Prevents diffusion of responsibility and incomplete resolutions. | **HIGH**: Without clear ownership, problems get passed around and never actually fixed. Clear accountability ensures proper resolution. | NOT_MAPPED | - | + +### Document Governance (3/4 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__doc_author | Document Author Tracking | Records who created/modified each document. Essential for assigning responsibility and enabling future contact for questions. | **HIGH**: When auditor asks "who was responsible for this safety document?", must have clear answer. Author tracking enables accountability. | NO | tool_req__docs_doc_attr, tool_req__docs_doc_attr_author_autofill | +| gd_req__doc_reviewer | Document Reviewer Identification | Records independent review (e.g., safety review, architecture review) of documents before release. Proves review occurred. | **HIGH**: IS 26262 requires documented evidence of review. Without reviewer identification, cannot prove document was independently reviewed (may be author-only). | NO | tool_req__docs_doc_attr, tool_req__docs_doc_attr_reviewer_autofill | +| gd_req__doc_approver | Document Approver Tracking | Records formal approval authority sign-off. Proves document is authorized version. | **HIGH**: Safety documents must be formally approved before use. Without approver tracking, unclear which version is the "official" document used in safety claims. | NO | tool_req__docs_doc_attr, tool_req__docs_doc_attr_approver_autofill | + +### Safety Status Documentation (2/2 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__safety_doc_status | Safety Document Release Status | Tracks whether each safety-related document (hazard analysis, FMEA, DFA, requirements) is draft, reviewed, approved, or superseded. Prevents use of unapproved versions. | **HIGH**: Auditors demand to see "version 3.2 reviewed/approved 2024-Q2". Using draft versions in safety claims = audit failure. | NOT_MAPPED | - | +| gd_req__safety_wp_status | Safety Work Product Status | For ongoing safety work (e.g., safety analysis update, rework for new features), tracks: planned, in-progress, reviewed, complete. Prevents incomplete work from being declared done. | **HIGH**: Partially-completed safety work is dangerous. Ensures safety updates are fully completed before claiming compliance. | NOT_MAPPED | - | + +--- + +## Tier 3: Medium-Priority Requirements (13 items) + +**Category**: Supporting requirements that improve robustness and reduce compliance risk. Lack of these creates operational friction but can be partially worked around. + +### Requirement Attributes & Versioning (5/7 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__req_attr_version | Requirement Versioning | Tracks versions of individual requirements enabling impact analysis: "what changed between v1.0 and v1.1 of this requirement?" | **MEDIUM**: Over time, requirements evolve. Without version history, difficult to assess: "was this requirement different when product X was certified vs. product Y?" | PARTIAL | tool_req__docs_common_attr_version | +| gd_req__req_attr_valid_from | Requirement Valid-From Date | Date requirement became effective. Enables temporal traceability: "was this requirement active when we were developing feature X?" | **MEDIUM**: For product variants or phased development, need to know requirement applicability timeline. Missing: difficult to justify why old product didn't have this requirement. | NOT_MAPPED | - | +| gd_req__req_attr_valid_until | Requirement Valid-Until Date | Date requirement is superseded/obsoleted. Marks when requirement no longer applies (e.g., legacy support ending). | **MEDIUM**: Prevents confusion about obsolete requirements. Without clear end-of-life, teams waste effort on outdated requirements or accidentally violate obsolete constraints. | NOT_MAPPED | - | +| gd_req__doc_attributes_manual | Document Attributes (Metadata) | Documents can store structured metadata (purpose, version, date, classification level, etc.) in machine-readable form. Enables automated reports. | **MEDIUM**: Manual metadata = error-prone. Structured attributes enable automated traceability reports (e.g., "generate traceability matrix for all documents created in Q2"). | PARTIAL | tool_req__docs_doc_generic_mandatory | +| gd_req__arch_build_blocks_corr | Architecture Building Block Correlations | Document how architectural blocks relate (data flow, control flow, interfaces). Creates the "architecture diagram" that safety analysis is based on. | **MEDIUM**: FMEA/DFA analysis requires understanding system structure. Correlations enable verification that analysis covers all interactions. Missing: analysis may overlook interface failures. | NOT_MAPPED | - | + +### Architecture & Design Traceability (3/5 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__arch_linkage_safety | Architecture Linkage for Safety Analysis | Links requirements to architectural elements (modules, interfaces, components) enabling verification that design implements requirements. | **MEDIUM**: Safety requirements must be "placed" in the architecture (e.g., "this safety mechanism is implemented in module X"). Without this mapping, cannot verify requirements are actually designed. | NOT_MAPPED | - | +| gd_req__arch_linkage_safety_trace | Architecture–Safety Traceability | Bidirectional links from architecture → safety analysis, showing which architectural elements/failures are analyzed for hazards. | **MEDIUM**: Enables verification that every architectural failure point was considered in safety analysis. Prevents unanalyzed failure modes in design. | PARTIAL | tool_req__docs_req_arch_link_safety_to_arch | +| gd_req__impl_design_code_link | Design Implementation–Code Linking | Links architectural design decisions/requirements to actual code implementation. Bridges design→code traceability gap. | **MEDIUM**: Modern development often separates design documents from code. Without this link, code review cannot verify: "is the implementation actually following the approved design?" | NOT_MAPPED | - | + +### Verification Infrastructure (5/8 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__verification_checks | Verification Metadata Validation | Automated checks ensuring all tests have required metadata (test ID, test case, expected result, reviewed by, etc.). Detects incomplete test documentation. | **MEDIUM**: Test metadata is auditor gold. Without complete metadata, cannot demonstrate that tests were properly designed, executed, and reviewed. | NO | tool_req__docs_test_metadata_mandatory_1, tool_req__docs_test_metadata_mandatory_2, tool_req__docs_test_metadata_link_levels | +| gd_req__verification_reporting | Verification Results Reporting | Automated generation of test execution reports (what was tested, pass/fail results, coverage metrics). | **MEDIUM**: Enables communication of test status to safety team. Manual report generation = error-prone. Automated reports provide consistent, auditable evidence. | NOT_MAPPED | - | +| gd_req__verification_report_archiving | Verification Report Archiving | Long-term storage of test results with strong versioning. Enables future audits to retrieve: "what were test results for this product version?" | **MEDIUM**: Safety compliance claims must remain valid years after release. Archiving ensures historical test evidence is preserved and retrievable. | NOT_MAPPED | - | +| gd_req__verification_independence | Test Independence Verification | Ensures tests are independent from implementation team (different author, different environment, different tools). Prevents "conflicts of interest" in testing. | **MEDIUM**: Tests written by implementation team may have bias. Independent verification adds credibility. Required for higher ASIL levels. | NOT_MAPPED | - | + +--- + +## Tier 4: Lower-Priority Requirements (13 items) + +**Category**: Extended capabilities with acceptable workarounds or lower direct safety impact. Can often be deferred or addressed through alternative means. + +### Extended Requirement Attributes (4/7 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__req_attr_version | [Listed in Tier 3] | | | | | + +### Extended Architecture Capabilities (2/5 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__arch_build_blocks_dynamic | Dynamic Architecture Behavior | Documentation of how architectural blocks behave over time (state machines, sequences, timing). More detailed than static structure. | **LOWER**: Complements but not strictly required for basic traceability. Can often be captured through design documents rather than tool automation. | NOT_MAPPED | - | +| gd_req__arch_model | Holistic Architecture Modeling | Comprehensive metamodel for encoding architecture (components, interfaces, data flow, control flow) in machine-readable form. | **LOWER**: Nice-to-have for mature organizations. Many projects manage with document-based architecture and manual traceability. | NOT_MAPPED | - | + +### Implementation & Design Details (2/2 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__impl_dynamic_diagram | Dynamic Behavior Diagrams | Sequence/timing diagrams showing how system behaves at runtime (object interactions, message sequences). | **LOWER**: Useful for complex systems but not mandatory. Many projects suffice with static design + code review. | NOT_MAPPED | - | + +### Language-Specific Verification (4/8 open) + +| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | +|---|---|---|---|---|---| +| gd_req__verification_link_tests_cpp | C++ Test Linkage | Tool support for linking requirements to C++ unit tests/integration tests. Language-specific traceability. | **LOWER**: Language-specific convenience. Generic test-linkage often suffices if properly documented. Can use manual cross-referencing. | NOT_MAPPED | - | +| gd_req__verification_link_tests_python | Python Test Linkage | Tool support for linking requirements to Python pytest/unittest. Language-specific traceability. | **LOWER**: Language-specific convenience. Generic test-linkage often suffices if properly documented. Can use manual cross-referencing. | NOT_MAPPED | - | +| gd_req__verification_link_tests_rust | Rust Test Linkage | Tool support for linking requirements to Rust tests. Language-specific traceability. | **LOWER**: Language-specific convenience. Generic test-linkage often suffices if properly documented. Can use manual cross-referencing. | NOT_MAPPED | - | +| gd_req__verification_link_tests | Generic Test Linkage (General) | Ability to link requirements to test cases regardless of language/framework. Generic capability below language-specific variants. | **LOWER**: Partly captured by Tier 1/2 requirements. This is the "generalizable" form. | NOT_MAPPED | - | + +--- + +## Summary by Priority & State + +| Tier | Total | PARTIAL | NO | NOT_MAPPED | Safety Impact | +|---|---|---|---|---|---| +| **Tier 1: Critical** | 12 | 5 | 6 | 1 | Audit blockers without these | +| **Tier 2: High** | 18 | 1 | 4 | 13 | Major assurance gaps | +| **Tier 3: Medium** | 13 | 2 | 2 | 9 | Operational friction, reduced robustness | +| **Tier 4: Lower** | 13 | 0 | 1 | 12 | Convenience/extended capabilities | +| **TOTAL** | **56** | **8** | **13** | **35** | | + +## Interpretation & Next Steps + +**Critical Assessment (ASIL B-Capable Platform)**: + +The 56 open items are critical for **ASIL B-capable platform** that enables downstream ASIL B certification. Key implications: + +- **Safety analysis integration (8 items)**: ALL modules must implement FMEA/DFA with complete mitigations. Consuming ASIL B projects depend on this safety infrastructure. No ASIL-dependent scope—every module contributes to safety argument. **Non-negotiable for ASIL B projects.** + +- **Traceability completeness (4 items)**: ALL modules must provide complete requirement→design→test traceability. Consuming ASIL B projects cannot integrate modules without proof of traceability. **Mandatory for all modules.** + +- **Change & Problem Management (18 items)**: ALL modules require change tracking and problem assessment. Consuming ASIL B projects need audit trail. No exemptions for "utility code"—all module changes must go through safety review. **Applies to entire codebase.** + +- **Configuration management (1 item)**: ALL module configurations must be uniquely versioned. Consuming ASIL B projects must trace: which module version was integrated? + +**Action Priority (ASIL B Platform Timeline)**: +1. **Critical Path (Phase 1-2, May-Sep)**: Safety analysis foundation for ALL modules (FMEA/DFA with mitigations). Auditor will request: "show me FMEA for every module". Must have answer for EVERY module. +2. **High Priority (Phase 2-3, Jul-Oct)**: Change control for ALL modules. Auditor will request: "what changes were made, who approved them, safety impact assessed?" Consuming ASIL B projects require complete audit trail. +3. **Essential (Phase 3-4, Sep-Dec)**: Document governance and test evidence for ALL modules. Auditor will verify: every safety-relevant document approved, every safety requirement tested. +4. **ALL Modules**: No module can be exempt from ASIL B-level requirements. ALL must implement change management, safety analysis, requirements traceability, test coverage. + +**NOT_MAPPED Items (35 total – Triage Urgency)**: +- **14 new concepts**: Determine scope by end of Phase 2 (Aug). +- **21 existing concepts without requirements**: Define in Phase 2; all modules must implement core Tier 1-2. + +**ASIL B Platform Risk**: Consuming ASIL B projects rely on platform enforcement of ASIL B rigor. Gaps in platform enforcement = downstream ASIL B certification failure. Platform must enforce change management and safety analysis for ALL integrated modules, with no exceptions. + +--- + +## Implementation Roadmap (April – December 2026) + +### Phase 1: Foundation (April 17 – June 30, 2026) — ~8 weeks + +**Goal**: Establish safety analysis traceability foundation and baseline documentation governance. + +**Deliverables**: + +#### 1.1 Safety Analysis Metamodel & Linking (Tier 1 | 4 weeks, high effort) +- [ ] **Define gd_req__saf_attr_mitigated_by** implementation in metamodel.yaml + - Add `mitigated_by` field to safety analysis blocks (FMEA, DFA entries) + - Enable bidirectional links: Failure Mode ↔ Mitigation Requirement ↔ Test Case + - Constraint: Every failure mode must reference at least one mitigation requirement + - Estimated effort: 2 weeks (design + implementation + testing) + +- [ ] **Implement gd_req__saf_linkage_check** automated validation + - New Sphinx checker: verify FMEA/DFA links are complete and consistent + - Detect orphaned safety analysis items (unlinked to requirements/tests) + - Report broken bidirectional links + - Estimated effort: 1.5 weeks + +- [ ] **Add gd_req__saf_attr_fault_id & gd_req__saf_attr_failure_id** ID generation + - Auto-generate unique IDs for FMEA/DFA entries (with versioning) + - Ensure uniqueness constraint + - Estimated effort: 0.5 weeks + +- [ ] **Documentation & Examples** + - Create how-to guide for FMEA/DFA traceability in docs/how-to/ + - Add example safety analysis with full traceability + - Update docs/internals/requirements/requirements.rst with new tool requirements + - Estimated effort: 1 week + +**Success Criteria**: +- ✅ tool_req__docs_saf_attrs_mitigated_by marked PARTIAL → YES +- ✅ tool_req__docs_saf_attrs_violates marked NO → YES (linkage checking) +- ✅ Automated checks catch unlinked FMEA entries +- ✅ Example safety analysis demonstrating full traceability + +#### 1.2 Requirement Traceability Completion (Tier 1 | 3 weeks, medium effort) +- [ ] **Complete gd_req__req_attr_test_covered** implementation + - Ensure every requirement with `:implemented: YES` has at least one test linked + - Add automated checker: detect requirements without test coverage + - Report test coverage percentage per requirement category + - Estimated effort: 1.5 weeks + +- [ ] **Implement gd_req__req_attr_req_cov** architecture coverage mapping + - Add optional `:covers_design:` field to link requirement → design components + - Enable reporting: "which design elements implement this requirement?" + - Estimated effort: 1 week + +- [ ] **Refresh traceability matrix report** (docs/reference/commands.md) + - Add commands to generate complete requirement→test→design traceability matrix + - Include coverage metrics per requirement type + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ tool_req__docs_req_attr_testcov marked PARTIAL → YES +- ✅ tool_req__docs_req_attr_reqcov marked PARTIAL → YES +- ✅ Automated test coverage checks integrated into CI +- ✅ Traceability matrix generation command available + +#### 1.3 Document Governance Baseline (Tier 2 | 2 weeks, low-medium effort) +- [ ] **Implement gd_req__doc_author, gd_req__doc_reviewer, gd_req__doc_approver** tracking + - Add `:author:`, `:reviewed_by:`, `:approved_by:` fields to document metadata + - Store with timestamps + - Estimated effort: 1 week + +- [ ] **Document Governance Guide** + - Create docs/how-to/document_governance.rst + - Explain review/approval workflow for safety documents + - Templates for document metadata + - Estimated effort: 0.5 weeks + +- [ ] **Basic safety document status tracking** + - Add `:status: [draft|reviewed|approved|superseded]` field + - Automated check: prevent use of draft safety documents in compliance claims + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ tool_req__docs_doc_attr, tool_req__docs_doc_attr_author_autofill marked NO → YES (at least basic version) +- ✅ tool_req__docs_doc_attr_reviewer_autofill marked NO → YES +- ✅ Document governance guide published +- ✅ Sample safety documents show proper author/reviewer/approver tracking + +--- + +### Phase 2: Safety Analysis Extension (July 1 – August 31, 2026) — ~8 weeks + +**Goal**: Complete safety analysis automation depth and address NOT_MAPPED decisions. + +**Deliverables**: + +#### 2.1 Safety Analysis Content & Linkage Depth (Tier 1 | 3 weeks) +- [ ] **Implement gd_req__saf_attr_feffect** structured failure effect documentation + - Add enumeration: `[loss_of_function | wrong_output | delayed_response | unintended_behavior | ...]` + - Mandatory field with explanation text + - Automated check: detect vague or missing effect descriptions + - Estimated effort: 1 week + +- [ ] **Add gd_req__saf_attr_aou** (Area of Use) linkage + - Enable linking FMEA/DFA to specific product variants, operational contexts, or safety goals + - Constraint: safety analysis must explicitly declare scope (AOU) + - Estimated effort: 1 week + +- [ ] **Implement gd_req__saf_attr_requirements_check** automated validation + - Check: every failure mode in FMEA/DFA has corresponding safety requirement + - Detect and report "orphaned" failure modes (analyzed but not controlled by requirements) + - Estimated effort: 1 week + +**Success Criteria**: +- ✅ tool_req__docs_saf_attr_fmea_failure_effect marked NO → YES +- ✅ tool_req__docs_saf_attrs_mitigated_by marked NO → YES (complete implementation) +- ✅ Automated orphaned-failure detection working +- ✅ gd_req__saf_attr_aou marked NO → YES + +#### 2.2 NOT_MAPPED Triage & Scope Decisions (All Tiers | 2 weeks) +- [ ] **Conduct NOT_MAPPED Item Review** + - For each of 35 NOT_MAPPED items, make decision: + - **In-scope**: Add to docs-as-code/requirements.rst as new tool requirement + - **Out-of-scope**: Document rationale (e.g., "managed by process_description only") + - **Deferred**: Schedule for Phase 3 or later + - Estimated effort: 1.5 weeks (review + documentation) + +- [ ] **Create NOT_MAPPED Decision Log** + - Document decisions with rationale for each item + - Publish in docs/internals/requirements/not_mapped_decisions.md + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ 35 NOT_MAPPED items triaged: ~20 defined as in-scope, ~10 out-of-scope, ~5 deferred +- ✅ Decision log published with clear ownership/timeline for in-scope items +- ✅ Requirements.rst updated with newly-scoped items + +#### 2.3 Configuration Management Baseline (Tier 1 | 1.5 weeks) +- [ ] **Implement gd_req__configuration_uid** version baseline tracking + - Define metadata: config ID, release version, date, safety documentation version, artifacts included + - Tool support: mark safety documents with configuration ID they apply to + - Estimated effort: 1.5 weeks + +**Success Criteria**: +- ✅ Configuration versioning scheme established +- ✅ Example released configuration with full safety artifact traceability +- ✅ tool_req__docs_*_config_uid or similar defined and marked YES + +#### 2.4 Change/Problem Management Framework (Tier 2 | 2 weeks, planning phase) +- [ ] **Design Change/Problem Tracking Model** + - Sketch metamodel for change/problem records + - Define required fields (uid, title, status, safety impact, etc.) + - Integration with requirements/design/test artifacts + - Estimated effort: 1.5 weeks (design) + +- [ ] **Publish Change Management Guide** + - Draft docs/how-to/change_management.md + - Explain workflow, safety impact assessment, review process + - Templates for change records + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ Change management metamodel design approved +- ✅ Preliminary guide published (full implementation in Phase 3) + +--- + +### Phase 3: High-Priority Completion (September 1 – October 31, 2026) — ~8 weeks + +**Goal**: Complete Tier 2 (Change/Problem Management, Document Governance) and begin Tier 3 architecture items. + +**Deliverables**: + +#### 3.1 Change/Problem Management Implementation (Tier 2 | 5 weeks) +- [ ] **Metamodel & Tool Support** (~3 weeks) + - Implement change record structure in metamodel.yaml + - Add problem report structure + - Fields: uid, title, status, safety_affected, classification, impact_description, assigned_to, target_milestone + - Automated checks: required fields, valid status transitions, safety assessment completeness + - Estimated effort: 3 weeks + +- [ ] **Change/Problem Management Commands** (~1.5 weeks) + - New CLI command: `sphinx-needs manage-change [create|update|close] [options]` + - Report generation: "safety-critical changes in release X" + - Status dashboard: open problems, pending safety assessment, unverified fixes + - Estimated effort: 1.5 weeks + +- [ ] **Integration with Requirements/Tests** (~0.5 weeks) + - Link change → affected requirements + - Link problem → test case demonstrating fix + - Backward link: requirement/test → related changes/problems + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ All 18 Change/Problem Management items marked NO → PARTIAL (basic support) +- ✅ Workflow commands available +- ✅ Integration with requirements/tests working +- ✅ Example change/problem records demonstrating full traceability + +#### 3.2 Document Governance Enhancement (Tier 2 | 2 weeks) +- [ ] **Review/Approval Workflow Automation** + - Extend document metadata: review checklist, sign-off tracking + - Automated reminder: documents pending review/approval + - Report: unapproved safety documents in use (red flag!) + - Estimated effort: 1.5 weeks + +- [ ] **Document Release Workflow** + - Implement gd_req__safety_doc_status workflow: draft → reviewed → approved → released + - Constraint: only released versions can be referenced in compliance claims + - Automated check: detect use of draft/unapproved safety docs + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ tool_req__docs_doc_attr_reviewer_autofill, tool_req__docs_doc_attr_approver_autofill marked NO → YES +- ✅ Automated unapproved-document detection working +- ✅ Example documents showing complete review/approval chain + +#### 3.3 Tier 3 Architecture & Versioning Start (Tier 3 | 1 week, planning) +- [ ] **Requirement Versioning Design** + - Sketch version management: v1.0, v1.1, v1.2, etc. + - Valid-from/valid-until date ranges + - Impact analysis: "what changed between versions?" + - Estimated effort: 0.5 weeks + +- [ ] **Architecture Traceability Design** + - Model: requirement → architecture block → design → code + - Design document placeholder + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ Design documents approved +- ✅ Roadmap for Tier 3 Q4 implementation established + +--- + +### Phase 4: Polish & Tier 3 Completion (November 1 – December 31, 2026) — ~8 weeks + +**Goal**: Complete Tier 3 items and achieve "good state" by year-end for audit readiness. + +**Deliverables**: + +#### 4.1 Requirement Versioning & Temporal Traceability (Tier 3 | 3 weeks) +- [ ] **Implement gd_req__req_attr_version** requirement version tracking + - Extend requirement metadata: version field, version history + - Generate version change reports + - Impact analysis: "what's different between version X and Y?" + - Estimated effort: 2 weeks + +- [ ] **Implement gd_req__req_attr_valid_from & gd_req__req_attr_valid_until** + - Temporal scoping: requirement active from date X to date Y + - Enable queries: "which requirements were active when product V1.2 was developed?" + - Automated check: detect requirements with undefined validity windows + - Estimated effort: 1 week + +**Success Criteria**: +- ✅ tool_req__docs_common_attr_version marked PARTIAL → YES +- ✅ Version history visible in requirement documentation +- ✅ Temporal traceability queries available + +#### 4.2 Architecture & Design Linkage (Tier 3 | 2.5 weeks) +- [ ] **Implement gd_req__arch_build_blocks_corr** architectural relationships + - Metamodel extensions: component relationships, interfaces, data flows + - Visualization: architecture diagram generation + - Estimated effort: 1.5 weeks + +- [ ] **Implement gd_req__arch_linkage_safety** requirement→architecture mapping + - Link requirement → specific architecture component implementing it + - Report: architecture coverage by requirement category + - Estimated effort: 1 week + +**Success Criteria**: +- ✅ Architecture traceability working +- ✅ Architecture diagrams automatically generated from metadata +- ✅ Coverage gaps identified + +#### 4.3 Verification Infrastructure Completion (Tier 3 | 2 weeks) +- [ ] **Implement gd_req__verification_reporting** automated test reports + - Generate test execution summary: pass/fail/skip counts, coverage metrics + - Integration with CI pipeline: test results automatically linked to requirements + - Estimated effort: 1.5 weeks + +- [ ] **Implement gd_req__verification_report_archiving** long-term storage + - Archive test results with strong versioning + - Enable retrieval: "show me test results for product version X" + - Estimated effort: 0.5 weeks + +**Success Criteria**: +- ✅ Automated test reporting available +- ✅ Historical test results retrievable for audit +- ✅ All Tier 3 items marked YES + +#### 4.4 Audit Readiness Review & Documentation (All Tiers | 0.5 weeks) +- [ ] **Complete Tier 4 Gap Analysis** + - Confirm Tier 4 items deferred to 2027 or addressed via workarounds + - Estimated effort: 0.25 weeks + +- [ ] **Publish Final SCORE #1782 Coverage Report** + - Update score_1782_open_summary.md with Phase 4 completions + - State changes: ~45 items YES, ~8 items PARTIAL, ~3 items NO (acceptable), 0 items NOT_MAPPED + - Estimated effort: 0.25 weeks + +**Success Criteria**: +- ✅ Tier 1 (12/12 items): 100% YES or PARTIAL ✓ +- ✅ Tier 2 (18/18 items): 95%+ YES/PARTIAL ✓ +- ✅ Tier 3 (13/13 items): 95%+ YES/PARTIAL ✓ +- ✅ All NOT_MAPPED items triaged and resolved ✓ +- ✅ Documentation complete and audit-ready ✓ + +--- + +## Summary Timeline + +| Phase | Timeline | Duration | Focus | Expected State | +|---|---|---|---|---| +| **Phase 1** | Apr 17 – Jun 30 | 8 weeks | Safety analysis foundation + requirement traceability + doc governance baseline | Tier 1: 50% YES, Tier 2: 20% YES | +| **Phase 2** | Jul 1 – Aug 31 | 8 weeks | Safety analysis depth + NOT_MAPPED triage + config management | Tier 1: 80% YES, Tier 2: 30% YES | +| **Phase 3** | Sep 1 – Oct 31 | 8 weeks | Change/problem management + doc governance enhancement + Tier 3 planning | Tier 2: 80% YES, Tier 3: Planning complete | +| **Phase 4** | Nov 1 – Dec 31 | 8 weeks | Requirement versioning + architecture linkage + verification infra + audit prep | **All Tiers: 90%+ YES/PARTIAL** ✓ | + +--- + +## Effort Summary + +| Tier | Phase 1 | Phase 2 | Phase 3 | Phase 4 | Total | FTE Equiv | +|---|---|---|---|---|---|---| +| **Tier 1** | 9 weeks | 5 weeks | 1 week | 1 week | **16 weeks** | ~0.4 FTE | +| **Tier 2** | 2 weeks | 2 weeks | 7 weeks | 1 week | **12 weeks** | ~0.3 FTE | +| **Tier 3** | — | — | 1 week | 5.5 weeks | **6.5 weeks** | ~0.15 FTE | +| **Tier 4** | — | — | — | 0.5 weeks | **0.5 weeks** | Negligible | +| **TOTAL** | **11 weeks** | **7 weeks** | **9 weeks** | **7.5 weeks** | **34.5 weeks** | ~**0.85 FTE** | + +**Interpretation**: Full-time engineer (1 FTE) completing this roadmap in ~35 weeks (9 months). Realistic delivery: **mid-January 2027** with 1 dedicated engineer, or **end-of-December 2026** with 1.2+ FTE allocation. + +--- + +## Key Dependencies & Risks + +### Dependencies +1. **Phase 1 → Phase 2**: Metamodel extensions must be complete before depth features +2. **Phase 2 → Phase 3**: NOT_MAPPED triage decisions required before Phase 3 implementation +3. **Phase 3 → Phase 4**: Architecture design finalized in Phase 3 before implementation in Phase 4 + +### Risks & Mitigation +| Risk | Likelihood | Impact | Mitigation | +|---|---|---|---| +| Metamodel changes cause rework in downstream features | Medium | High | Freeze metamodel design in Phase 1 week 2; peer review before implementation | +| NOT_MAPPED triage decisions delayed (scope creep) | High | Medium | Allocate dedicated reviewer for Phase 2.2; strict decision deadline (Aug 15) | +| CI/test performance degradation from new checkers | Medium | Medium | Performance test new checkers before production deployment | +| Stakeholder disagreement on out-of-scope decisions | Medium | High | Early communication of scope; documented rationale for each decision | +| Resource availability changes mid-roadmap | High | High | Cross-train backup engineer in Q3; document all designs in Phase 1 | + +--- + +## Success Metrics (End of Year – ASIL B Certification Readiness) + +By **December 31, 2026**, the docs-as-code tool should demonstrate ASIL B certification-ready compliance: + +1. **Safety Analysis Completeness (ASIL B)** ✓ + - Every code module analyzed: FMEA identifying all failure modes with unique IDs + - DFA documenting failure cascades (for modules with high ASIL complexity) + - Every failure mode has documented mitigation requirement and test evidence + - Example: complete embedded system FMEA/DFA with full traceability + +2. **Requirement–Test Coverage (ASIL B)** ✓ + - 100% of safety requirements have test evidence (ASIL B requires no exceptions) + - Every test documented with: test ID, purpose, expected result, reviewed/approved + - Traceability matrix complete: requirement↔design↔test↔result + - Out-of-context: coverage applies to all modules (no "untested utility libraries") + +3. **Change Control (ASIL B)** ✓ + - All code changes tracked with unique ID + - Safety impact assessment mandatory for every change + - Review/approval workflow enforced before deployment + - Audit trail: who changed what, when, why, impact assessment, verification + +4. **Problem Management (ASIL B)** ✓ + - All defects logged and tracked + - Safety assessment mandatory (no defect exempted from safety evaluation) + - Root cause analysis and fix verification documented + - Traceability: problem→root cause→fix→test verification + +5. **Document Governance (ASIL B)** ✓ + - All safety documents: author, reviewer (independent), approver tracked + - Status workflow: draft→reviewed→approved→released (draft never cited) + - Audit: prove review/approval occurred for every safety document + +6. **Configuration Versioning (ASIL B)** ✓ + - Each product release: unique configuration ID + - Traceability to exact FMEA, requirements version, test results for that release + - Audit retrieval: "show me safety analysis for product v1.2.3" + +7. **Audit Readiness (ASIL B Out-of-Context)** ✓ + - ~50 of 56 open items marked YES or PARTIAL + - All NOT_MAPPED items resolved + - Automated compliance checks enabled in CI pipeline + - No module can claim "non-safety"—all covered by requirements/tests/evidence + - Documentation complete with examples demonstrating full ASIL B V-model + +**Expected Audit Posture**: "We have implemented ASIL B functional safety infrastructure. Every code module is a safety element with complete analysis, requirements, tests, and verification evidence. Our tooling enforces compliance checks. We are ready for ASIL B certification audit." \ No newline at end of file diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index 0596271ec..876860f01 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -49,3 +49,11 @@ py_binary( visibility = ["//visibility:public"], deps = all_requirements + ["//src/extensions/score_metamodel:score_metamodel"], ) + +py_binary( + name = "traceability_gate", + srcs = ["traceability_gate.py"], + main = "traceability_gate.py", + visibility = ["//visibility:public"], + deps = all_requirements, +) diff --git a/scripts_bazel/tests/BUILD b/scripts_bazel/tests/BUILD index 8f10ee763..2245c3ba2 100644 --- a/scripts_bazel/tests/BUILD +++ b/scripts_bazel/tests/BUILD @@ -45,3 +45,12 @@ score_pytest( ] + all_requirements, pytest_config = "//:pyproject.toml", ) + +score_pytest( + name = "traceability_gate_test", + srcs = ["traceability_gate_test.py"], + deps = [ + "//scripts_bazel:traceability_gate", + ] + all_requirements, + pytest_config = "//:pyproject.toml", +) diff --git a/scripts_bazel/tests/traceability_coverage_test.py b/scripts_bazel/tests/traceability_coverage_test.py index 87086025f..b6b82eacd 100644 --- a/scripts_bazel/tests/traceability_coverage_test.py +++ b/scripts_bazel/tests/traceability_coverage_test.py @@ -110,13 +110,17 @@ def test_traceability_coverage_thresholds_pass(tmp_path: Path) -> None: assert output_json.exists() summary = json.loads(output_json.read_text(encoding="utf-8")) - assert summary["requirements"]["total"] == 2 - assert summary["requirements"]["with_code_link"] == 1 - assert summary["requirements"]["with_test_link"] == 1 - assert summary["requirements"]["fully_linked"] == 0 - assert summary["tests"]["total"] == 3 - assert summary["tests"]["linked_to_requirements"] == 2 - assert len(summary["tests"]["broken_references"]) == 1 + assert summary["schema_version"] == "1" + assert summary["generated_by"] == "traceability_coverage" + assert "tool_req" in summary["metrics_by_type"] + type_metrics = summary["metrics_by_type"]["tool_req"] + assert type_metrics["requirements"]["total"] == 2 + assert type_metrics["requirements"]["with_code_link"] == 1 + assert type_metrics["requirements"]["with_test_link"] == 1 + assert type_metrics["requirements"]["fully_linked"] == 0 + assert type_metrics["tests"]["total"] == 3 + assert type_metrics["tests"]["linked_to_requirements"] == 2 + assert len(type_metrics["tests"]["broken_references"]) == 1 def test_traceability_coverage_thresholds_fail(tmp_path: Path) -> None: diff --git a/scripts_bazel/tests/traceability_gate_test.py b/scripts_bazel/tests/traceability_gate_test.py new file mode 100644 index 000000000..9019fa942 --- /dev/null +++ b/scripts_bazel/tests/traceability_gate_test.py @@ -0,0 +1,231 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Tests for traceability_gate.py.""" + +import json +import subprocess +import sys +from pathlib import Path + +_MY_PATH = Path(__file__).parent + +_GATE_SCRIPT = _MY_PATH.parent / "traceability_gate.py" + + +def _write_metrics_json(tmp_path: Path, metrics_by_type: dict | None = None) -> Path: + """Write a schema-v1 metrics JSON and return its path.""" + if metrics_by_type is None: + metrics_by_type = { + "tool_req": { + "include_not_implemented": False, + "requirements": { + "total": 4, + "with_code_link": 3, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 75.0, + "with_test_link_pct": 50.0, + "fully_linked_pct": 50.0, + "missing_code_link_ids": ["REQ_4"], + "missing_test_link_ids": ["REQ_3", "REQ_4"], + "not_fully_linked_ids": ["REQ_3", "REQ_4"], + }, + "tests": { + "total": 3, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 66.67, + "broken_references": [], + }, + } + } + payload = { + "schema_version": "1", + "generated_by": "traceability_coverage", + "needs_json": "fake/needs.json", + "metrics_by_type": metrics_by_type, + } + out = tmp_path / "metrics.json" + out.write_text(json.dumps(payload), encoding="utf-8") + return out + + +def _run_gate(metrics_json: Path, extra_args: list[str]) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, _GATE_SCRIPT, "--metrics-json", str(metrics_json)] + + extra_args, + capture_output=True, + text=True, + ) + + +def test_gate_passes_when_thresholds_met(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate( + metrics_json, + ["--min-req-code", "70", "--min-req-test", "50", "--min-tests-linked", "60"], + ) + + assert result.returncode == 0 + assert "Threshold check passed." in result.stdout + + +def test_gate_fails_when_threshold_not_met(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate( + metrics_json, + ["--min-req-code", "100"], + ) + + assert result.returncode == 2 + assert "Threshold check failed:" in result.stdout + assert "[tool_req] requirements with code links" in result.stdout + + +def test_gate_require_all_links_fails(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate(metrics_json, ["--require-all-links"]) + + assert result.returncode == 2 + assert "Threshold check failed:" in result.stdout + + +def test_gate_fail_on_broken_refs(tmp_path: Path) -> None: + metrics_by_type = { + "tool_req": { + "include_not_implemented": False, + "requirements": { + "total": 1, + "with_code_link": 1, + "with_test_link": 1, + "fully_linked": 1, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [], + }, + "tests": { + "total": 2, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 100.0, + "broken_references": [ + {"testcase": "TC_X", "missing_need": "REQ_UNKNOWN"} + ], + }, + } + } + metrics_json = _write_metrics_json(tmp_path, metrics_by_type) + + result = _run_gate(metrics_json, ["--fail-on-broken-test-refs"]) + + assert result.returncode == 2 + assert "broken testcase references found:" in result.stdout + + +def test_gate_specific_need_type(tmp_path: Path) -> None: + metrics_by_type = { + "tool_req": { + "include_not_implemented": False, + "requirements": { + "total": 2, + "with_code_link": 2, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [], + }, + "tests": { + "total": 1, + "filtered_test_types": [], + "linked_to_requirements": 1, + "linked_to_requirements_pct": 100.0, + "broken_references": [], + }, + }, + "comp_req": { + "include_not_implemented": False, + "requirements": { + "total": 5, + "with_code_link": 0, + "with_test_link": 0, + "fully_linked": 0, + "with_code_link_pct": 0.0, + "with_test_link_pct": 0.0, + "fully_linked_pct": 0.0, + "missing_code_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "missing_test_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "not_fully_linked_ids": ["C1", "C2", "C3", "C4", "C5"], + }, + "tests": { + "total": 0, + "filtered_test_types": [], + "linked_to_requirements": 0, + "linked_to_requirements_pct": 100.0, + "broken_references": [], + }, + }, + } + metrics_json = _write_metrics_json(tmp_path, metrics_by_type) + + # Gate only on tool_req (which is fully linked) — comp_req failures are ignored + result = _run_gate( + metrics_json, + ["--need-type", "tool_req", "--require-all-links"], + ) + + assert result.returncode == 0 + assert "[tool_req]" in result.stdout + assert "[comp_req]" not in result.stdout + + +def test_gate_unknown_need_type_fails(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate(metrics_json, ["--need-type", "nonexistent_req"]) + + assert result.returncode == 2 + assert "not found in metrics JSON" in result.stdout + + +def test_gate_unsupported_schema_version(tmp_path: Path) -> None: + bad = tmp_path / "bad.json" + bad.write_text( + json.dumps({"schema_version": "99", "metrics_by_type": {}}), encoding="utf-8" + ) + + result = _run_gate(bad, []) + + assert result.returncode == 1 + assert "unsupported schema_version" in result.stderr + + +def test_gate_missing_file_returns_error(tmp_path: Path) -> None: + result = _run_gate(tmp_path / "does_not_exist.json", []) + + assert result.returncode == 1 + assert "not found" in result.stderr diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py index f8c0c20da..5435a3619 100644 --- a/scripts_bazel/traceability_coverage.py +++ b/scripts_bazel/traceability_coverage.py @@ -339,19 +339,28 @@ def main() -> int: tests_linked_pct = float(summary["tests"]["linked_to_requirements_pct"]) broken_test_references = list(summary["tests"]["broken_references"]) + # Build per-type metrics for the JSON output (schema v1). + # Each requirement type is computed independently so downstream tools + # (e.g. traceability_gate) can apply per-type thresholds. + metrics_by_type: dict[str, Any] = {} + for req_type in sorted(requirement_types): + type_summary = compute_traceability_summary( + all_needs=all_needs, + requirement_types={req_type}, + include_not_implemented=args.include_not_implemented, + filtered_test_types=filtered_test_types, + ) + metrics_by_type[req_type] = { + "include_not_implemented": type_summary["include_not_implemented"], + "requirements": type_summary["requirements"], + "tests": type_summary["tests"], + } + summary_output = { + "schema_version": "1", + "generated_by": "traceability_coverage", "needs_json": str(needs_json), - "requirement_types": summary["requirement_types"], - "include_not_implemented": summary["include_not_implemented"], - "requirements": summary["requirements"], - "tests": summary["tests"], - "thresholds": { - "min_req_code": float(args.min_req_code), - "min_req_test": float(args.min_req_test), - "min_req_fully_linked": float(args.min_req_fully_linked), - "min_tests_linked": float(args.min_tests_linked), - "fail_on_broken_test_refs": bool(args.fail_on_broken_test_refs), - }, + "metrics_by_type": metrics_by_type, } _print_summary( diff --git a/scripts_bazel/traceability_gate.py b/scripts_bazel/traceability_gate.py new file mode 100644 index 000000000..f0f8f277a --- /dev/null +++ b/scripts_bazel/traceability_gate.py @@ -0,0 +1,231 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Traceability gate: read a metrics JSON and enforce coverage thresholds. + +This script is the "gate" step from the json-idea architecture: + + docs build → traceability_coverage --json-output metrics.json + CI gate → traceability_gate --metrics-json metrics.json [--min-* ...] + +The gate never parses needs.json itself; it only reads the pre-computed +schema-v1 metrics file produced by the docs build. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + +_SUPPORTED_SCHEMA_VERSION = "1" + + +def _print_type_summary(need_type: str, metrics: dict[str, Any]) -> None: + req = metrics["requirements"] + tst = metrics["tests"] + print(f"[{need_type}]") + print( + f" Requirements with source links: " + f"{req['with_code_link']}/{req['total']} ({req['with_code_link_pct']:.2f}%)" + ) + print( + f" Requirements with test links: " + f"{req['with_test_link']}/{req['total']} ({req['with_test_link_pct']:.2f}%)" + ) + print( + f" Requirements fully linked: " + f"{req['fully_linked']}/{req['total']} ({req['fully_linked_pct']:.2f}%)" + ) + print( + f" Tests linked to requirements: " + f"{tst['linked_to_requirements']}/{tst['total']} " + f"({tst['linked_to_requirements_pct']:.2f}%)" + ) + print(f" Broken test references: {len(tst['broken_references'])}") + if tst["broken_references"]: + for item in tst["broken_references"]: + print(f" - {item['testcase']} -> {item['missing_need']}") + + +def _check_type_thresholds( + need_type: str, + metrics: dict[str, Any], + min_req_code: float, + min_req_test: float, + min_req_fully_linked: float, + min_tests_linked: float, + fail_on_broken_test_refs: bool, +) -> list[str]: + failures: list[str] = [] + req = metrics["requirements"] + tst = metrics["tests"] + prefix = f"[{need_type}] " + + if req["with_code_link_pct"] < min_req_code: + failures.append( + f"{prefix}requirements with code links " + f"{req['with_code_link_pct']:.2f}% < {min_req_code:.2f}%" + ) + if req["with_test_link_pct"] < min_req_test: + failures.append( + f"{prefix}requirements with test links " + f"{req['with_test_link_pct']:.2f}% < {min_req_test:.2f}%" + ) + if req["fully_linked_pct"] < min_req_fully_linked: + failures.append( + f"{prefix}requirements fully linked " + f"{req['fully_linked_pct']:.2f}% < {min_req_fully_linked:.2f}%" + ) + if tst["linked_to_requirements_pct"] < min_tests_linked: + failures.append( + f"{prefix}tests linked to requirements " + f"{tst['linked_to_requirements_pct']:.2f}% < {min_tests_linked:.2f}%" + ) + if fail_on_broken_test_refs and tst["broken_references"]: + failures.append( + f"{prefix}broken testcase references found: {len(tst['broken_references'])}" + ) + return failures + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Read a traceability metrics JSON (schema v1) and enforce coverage " + "thresholds. Exits 0 on pass, 2 on threshold failure, 1 on input error." + ) + ) + parser.add_argument( + "--metrics-json", + required=True, + help=( + "Path to the metrics JSON produced by " + "'traceability_coverage --json-output '." + ), + ) + parser.add_argument( + "--need-type", + default=None, + help=( + "Check only this need type (e.g. 'tool_req'). " + "If omitted, all types present in the JSON are checked." + ), + ) + parser.add_argument( + "--min-req-code", + type=float, + default=0.0, + help="Minimum %% of requirements that must have a source_code_link.", + ) + parser.add_argument( + "--min-req-test", + type=float, + default=0.0, + help="Minimum %% of requirements that must have a testlink.", + ) + parser.add_argument( + "--min-req-fully-linked", + type=float, + default=0.0, + help="Minimum %% of requirements that must have both source_code_link and testlink.", + ) + parser.add_argument( + "--min-tests-linked", + type=float, + default=0.0, + help="Minimum %% of testcases that must reference at least one requirement.", + ) + parser.add_argument( + "--require-all-links", + action="store_true", + help="Shortcut: sets all --min-* to 100 and enables --fail-on-broken-test-refs.", + ) + parser.add_argument( + "--fail-on-broken-test-refs", + action="store_true", + help="Fail when any testcase references an unknown requirement ID.", + ) + + args = parser.parse_args() + + if args.require_all_links: + args.min_req_code = 100.0 + args.min_req_test = 100.0 + args.min_req_fully_linked = 100.0 + args.min_tests_linked = 100.0 + args.fail_on_broken_test_refs = True + + metrics_path = Path(args.metrics_json) + if not metrics_path.exists(): + print(f"Error: metrics JSON not found: {metrics_path}", file=sys.stderr) + return 1 + + data: dict[str, Any] = json.loads(metrics_path.read_text(encoding="utf-8")) + + schema_version = data.get("schema_version") + if schema_version != _SUPPORTED_SCHEMA_VERSION: + print( + f"Error: unsupported schema_version {schema_version!r} " + f"(expected {_SUPPORTED_SCHEMA_VERSION!r})", + file=sys.stderr, + ) + return 1 + + metrics_by_type: dict[str, Any] = data["metrics_by_type"] + types_to_check = [args.need_type] if args.need_type else list(metrics_by_type.keys()) + + print(f"Traceability gate input: {metrics_path}") + print("-" * 72) + + failures: list[str] = [] + for need_type in types_to_check: + if need_type not in metrics_by_type: + available = list(metrics_by_type.keys()) + failures.append( + f"need type '{need_type}' not found in metrics JSON " + f"(available: {available})" + ) + continue + _print_type_summary(need_type, metrics_by_type[need_type]) + failures.extend( + _check_type_thresholds( + need_type, + metrics_by_type[need_type], + args.min_req_code, + args.min_req_test, + args.min_req_fully_linked, + args.min_tests_linked, + args.fail_on_broken_test_refs, + ) + ) + + print("-" * 72) + if failures: + print("Threshold check failed:") + for msg in failures: + print(f" - {msg}") + return 2 + + print("Threshold check passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts_bazel/traceability_metrics_schema.json b/scripts_bazel/traceability_metrics_schema.json new file mode 100644 index 000000000..e2c0a81ee --- /dev/null +++ b/scripts_bazel/traceability_metrics_schema.json @@ -0,0 +1,176 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://eclipse-score.github.io/docs-as-code/traceability-metrics-schema.json", + "title": "Traceability Metrics", + "description": "Per-need-type traceability coverage metrics produced by the docs build (traceability_coverage --json-output). Consumed by traceability_gate to enforce coverage thresholds without re-parsing needs.json.", + "type": "object", + "required": ["schema_version", "generated_by", "needs_json", "metrics_by_type"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "string", + "const": "1", + "description": "Schema version. Bump when the shape changes incompatibly." + }, + "generated_by": { + "type": "string", + "description": "Name of the tool that produced this file (e.g. 'traceability_coverage')." + }, + "needs_json": { + "type": "string", + "description": "Path to the needs.json that was used as input." + }, + "metrics_by_type": { + "type": "object", + "description": "Metrics keyed by need type (e.g. 'tool_req', 'comp_req'). Each key is one call-scope: the requirement type string passed via --requirement-types.", + "minProperties": 1, + "additionalProperties": { + "$ref": "#/$defs/TypeMetrics" + } + } + }, + "$defs": { + "TypeMetrics": { + "type": "object", + "required": ["include_not_implemented", "requirements", "tests"], + "additionalProperties": false, + "properties": { + "include_not_implemented": { + "type": "boolean", + "description": "Whether requirements with implemented==NO were counted in the denominator." + }, + "requirements": { + "$ref": "#/$defs/RequirementMetrics" + }, + "tests": { + "$ref": "#/$defs/TestMetrics" + } + } + }, + "RequirementMetrics": { + "type": "object", + "required": [ + "total", + "with_code_link", + "with_test_link", + "fully_linked", + "with_code_link_pct", + "with_test_link_pct", + "fully_linked_pct", + "missing_code_link_ids", + "missing_test_link_ids", + "not_fully_linked_ids" + ], + "additionalProperties": false, + "properties": { + "total": { + "type": "integer", + "minimum": 0, + "description": "Total number of requirements in scope." + }, + "with_code_link": { + "type": "integer", + "minimum": 0, + "description": "Requirements that have a non-empty source_code_link." + }, + "with_test_link": { + "type": "integer", + "minimum": 0, + "description": "Requirements that have a non-empty testlink." + }, + "fully_linked": { + "type": "integer", + "minimum": 0, + "description": "Requirements that have both source_code_link and testlink." + }, + "with_code_link_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "with_code_link / total * 100, or 100 when total == 0." + }, + "with_test_link_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "with_test_link / total * 100, or 100 when total == 0." + }, + "fully_linked_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "fully_linked / total * 100, or 100 when total == 0." + }, + "missing_code_link_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Sorted IDs of requirements missing source_code_link." + }, + "missing_test_link_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Sorted IDs of requirements missing testlink." + }, + "not_fully_linked_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Sorted IDs of requirements missing either source_code_link or testlink." + } + } + }, + "TestMetrics": { + "type": "object", + "required": [ + "total", + "filtered_test_types", + "linked_to_requirements", + "linked_to_requirements_pct", + "broken_references" + ], + "additionalProperties": false, + "properties": { + "total": { + "type": "integer", + "minimum": 0, + "description": "Total testcase needs considered (after optional test_type filter)." + }, + "filtered_test_types": { + "type": "array", + "items": { "type": "string" }, + "description": "The test_type values used to filter testcases. Empty means all types." + }, + "linked_to_requirements": { + "type": "integer", + "minimum": 0, + "description": "Testcases that reference at least one requirement via partially_verifies or fully_verifies." + }, + "linked_to_requirements_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "linked_to_requirements / total * 100, or 100 when total == 0." + }, + "broken_references": { + "type": "array", + "items": { "$ref": "#/$defs/BrokenReference" }, + "description": "Testcase references that point to unknown requirement IDs." + } + } + }, + "BrokenReference": { + "type": "object", + "required": ["testcase", "missing_need"], + "additionalProperties": false, + "properties": { + "testcase": { + "type": "string", + "description": "ID of the testcase containing the broken reference." + }, + "missing_need": { + "type": "string", + "description": "Requirement ID referenced by the testcase that does not exist in needs.json." + } + } + } + } +} From b4ec35ba77e00734ec7b13dde0ff3b6a54f689ab Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 07:59:12 +0000 Subject: [PATCH 10/27] removed md and refactored gate --- .../requirements/score_1782_open_summary.md | 629 ------------------ scripts_bazel/traceability_gate.py | 57 +- 2 files changed, 38 insertions(+), 648 deletions(-) delete mode 100644 docs/internals/requirements/score_1782_open_summary.md diff --git a/docs/internals/requirements/score_1782_open_summary.md b/docs/internals/requirements/score_1782_open_summary.md deleted file mode 100644 index 2c919d9bb..000000000 --- a/docs/internals/requirements/score_1782_open_summary.md +++ /dev/null @@ -1,629 +0,0 @@ -# SCORE #1782 Open Items in docs-as-code - -Source reference: https://github.com/eclipse-score/score/issues/1782#issuecomment-4108721585 - -Note: the referenced comment currently contains only "for review on Mar-30". The checklist was taken from the issue body of `eclipse-score/score#1782`. - -**Context**: docs-as-code platform development supporting ASIL B project certification per SCORE building blocks metamodel. -- **Platform Purpose**: Enable downstream ASIL B projects (Dependable Elements) to produce certified safety documentation -- **Module Requirements**: ALL modules in docs-as-code must enforce ASIL B-level rigor (change management, safety analysis) regardless of their own ASIL assignment, because consuming projects may be ASIL B -- **Assumptions of Use (AoU)**: Platform documents AoU that modules must respect; consuming projects document their own AoU for end customers -- **Implication**: The 56 open items represent ASIL B-level requirements for the platform and all its modules, enabling downstream ASIL B certification - -## Summary - -- Total requirements in list: **97** (ALL apply to platform + all modules to enable ASIL B project certification) -- Open in docs-as-code: **56** (Tier 1 & 2 have no ASIL-dependent scope; ALL modules must implement) - - PARTIAL: **8** - - NO: **13** - - NOT_MAPPED: **35** - -State legend: -- `PARTIAL`: mapped to docs-as-code requirement(s) with `:implemented: PARTIAL` -- `NO`: mapped to docs-as-code requirement(s) with `:implemented: NO` -- `NOT_MAPPED`: no mapping found in `docs/internals/requirements/requirements.rst` - -PM title source: `process_description` requirement titles (`.. gd_req:: ...`), with fallback to first requirement sentence. - -## Functional Safety Prioritization - -The open items are organized by **Safety Tier** to highlight the critical path for **ASIL B-capable platform** development per SCORE building blocks: - -- **Tier 1 (Critical)**: Direct blockers for ASIL B platform certification. Required for all modules to ensure consuming ASIL B projects can rely on platform support (safety analysis, requirements traceability, test coverage). -- **Tier 2 (High)**: Essential ASIL B infrastructure for all modules. Change control, problem management, document governance—mandatory for all modules to ensure downstream ASIL B projects meet certification audit requirements. -- **Tier 3 (Medium)**: Platform robustness and traceability depth. Important for mature ASIL B processes. All modules expected to implement. -- **Tier 4 (Lower)**: Extended capabilities or language-specific conveniences. Platform and modules can use alternative approaches where core Tier 1-2 requirements are met. - -## Epic Diagram - -```mermaid -flowchart TB - O[Open Requirements\n56] - T1["Tier 1: Critical\n12 items\n(5P/6N/1M)"] - T2["Tier 2: High\n18 items\n(1P/4N/13M)"] - T3["Tier 3: Medium\n13 items\n(2P/2N/9M)"] - T4["Tier 4: Lower\n13 items\n(0P/1N/12M)"] - O --> T1 - O --> T2 - O --> T3 - O --> T4 -``` - -## Tier 1: Critical Requirements (12 items) - -**Category**: Direct enablers for ASIL B-capable platform. ALL modules must implement these to ensure consuming ASIL B projects receive proper safety analysis infrastructure. Non-negotiable for downstream ASIL B certification. - -### Safety Analysis Automation (8/13 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__saf_attr_mitigated_by | Mitigation Controls Documentation | Documents which design controls or safety mechanisms mitigate identified hazards and risks. Links failures to their mitigation strategies. | **CRITICAL (ASIL B)**: ALL modules must document mitigations for identified failures. Consuming projects need proof that platform is safe for ASIL B integration. Auditors verify: every failure mode has documented mitigation linked to requirements and tests. | NO | tool_req__docs_saf_attrs_mitigated_by | -| gd_req__saf_attr_aou | Applicability to Area of Use (AOU) Linkage | Explicitly links safety analysis artifacts (FMEA, DFA) to the specific operational context, vehicles, or systems they apply to. | **CRITICAL (ASIL B)**: ALL modules must declare their AoU scope. Analysis must explicitly state: "all releases" or "specific constraints". Consuming ASIL B projects need to know: which module AoU assumptions must they respect? Missing AoU scope = audit finding. | NO | tool_req__docs_saf_attrs_mitigated_by | -| gd_req__saf_attr_requirements_check | Safety Analysis–Requirements Linkage Validation | Automated checks ensuring every hazard/failure identified in safety analysis has corresponding requirements to control it. Detects orphaned risks. | **CRITICAL (ASIL B)**: ALL modules must have every failure mode linked to a requirement. Orphaned failure modes = audit failure for consuming ASIL B projects. Automated checks prevent gaps. Non-negotiable. | NO | tool_req__docs_saf_attrs_mitigated_by | -| gd_req__saf_linkage_check | Safety Analysis Linkage Consistency | Validates that links between FMEA/DFA items, requirements, and tests are consistent and bidirectional. Detects dangling references. | **CRITICAL (ASIL B)**: ALL modules must have complete bidirectional traceability. Broken links = missing evidence for ASIL B projects. Automatic checking prevents audit failure. Non-negotiable. | NO | tool_req__docs_saf_attrs_violates | -| gd_req__saf_attr_feffect | Failure Effect Documentation | Precisely documents what happens when a failure occurs (loss of function, unintended behavior, delayed response, etc.). Critical for risk assessment. | **CRITICAL (ASIL B)**: ALL modules must classify failure effects per ISO 26262 Table 1. Vague effects = ASIL downgrade for consuming projects = audit failure. Mandatory precision required. | NO | tool_req__docs_saf_attr_fmea_failure_effect | -| gd_req__saf_attr_fault_id | FMEA Fault/Failure Mode Identity | Unique identifier for each failure mode in FMEA ensuring no duplicates or overlooked scenarios. Enables consistent reference in all downstream artifacts. | **CRITICAL (ASIL B)**: ALL modules must have unique FMEA IDs. Consuming ASIL B projects need traceable audit trail: which failures analyzed, which requirements address them, which tests verify. No exceptions. | NO | tool_req__docs_saf_attr_fmea_fault_id | -| gd_req__saf_attr_failure_id | DFA Failure Point Identity | Unique identifier for each failure point in Dependent Failure Analysis. Critical for complex systems with interdependent components. | **CRITICAL (ASIL B)**: ALL modules with complex interdependencies must have DFA with unique failure point IDs. Missing IDs = unanalyzed cascades for ASIL B projects = audit failure. Required for complex architectures. | NO | tool_req__docs_saf_attr_dfa_failure_id | -| gd_req__saf_argument | Safety Analysis Argumentation | Rationale and supporting evidence explaining why identified hazards are or are not significant, why ASIL assignments are appropriate, why mitigations are sufficient. | **CRITICAL (ASIL B)**: ALL modules must document safety arguments supporting their analysis. Consuming ASIL B projects rely on this. Auditors scrutinize: why is module safe? Why are mitigations sufficient? Unsupported = non-compliance. | NO | tool_req__docs_saf_attrs_content | - -### Requirements Traceability (4/7 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__req_linkage | Requirement Cross-Linking | Establishes relationships between requirements (implements, refines, conflicts with, depends-on). Creates the requirement dependency graph. | **CRITICAL (ASIL B)**: ALL modules must show requirement relationships and dependencies. Consuming ASIL B projects need impact analysis: "if this module's requirement changes, what else breaks?" | PARTIAL | tool_req__docs_req_link_satisfies_allowed | -| gd_req__req_traceability | End-to-End Traceability Matrix | Links requirements → design → tests → verification results in a bidirectional chain auditable for completeness and consistency. The "traceability matrix" auditors demand. | **CRITICAL (ASIL B)**: ALL modules must provide complete V-model traceability: requirements → design → tests → results. Consuming ASIL B projects depend on this. Missing links = audit failure. | PARTIAL | tool_req__docs_req_link_satisfies_allowed | -| gd_req__req_attr_test_covered | Test Coverage Completeness Validation | Automated checks ensuring every safety requirement has at least one test case assigned and executed. Detects untested requirements. | **CRITICAL (ASIL B)**: ALL modules must have 100% test coverage of safety requirements. No untested requirements allowed. Consuming ASIL B projects require complete evidence. Automated check mandatory. | PARTIAL | tool_req__docs_req_attr_testcov | -| gd_req__req_attr_req_cov | Requirement Coverage Scope | Tracks which architecture/design items satisfy which requirements, preventing ambiguity about requirement implementation. | **CRITICAL (ASIL B)**: ALL modules must map requirements to architecture/design elements. Consuming ASIL B projects need proof: requirements are actually implemented, not just documented. Without mapping, unproven. | PARTIAL | tool_req__docs_req_attr_reqcov | - -### Configuration Management (1/1 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__configuration_uid | Configuration Unique Identification | Every configuration/release of the documented system must have a unique, immutable ID enabling audit trail and traceability to specific configuration versions. | **CRITICAL (ASIL B)**: ALL module configurations must be uniquely versioned. Consuming ASIL B projects must answer: "which module version was integrated?" Broken traceability = audit failure. | NOT_MAPPED | - | - ---- - -## Tier 2: High-Priority Requirements (18 items) - -**Category**: Essential ASIL B infrastructure. ALL modules require change control, problem management, document governance—mandatory for consuming ASIL B projects to meet certification audit requirements. - -### Change & Problem Management (13/18 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__change_attr_uid | Change Request Unique ID | Every change to safety-relevant artifacts must have a unique tracking ID enabling full audit trail and impact analysis. | **HIGH (ASIL B)**: ALL modules must track every change with unique IDs. Consuming ASIL B projects need audit trail. Untracked changes = certification violation. Mandatory for all. | NOT_MAPPED | - | -| gd_req__change_attr_title | Change Request Title | Descriptive title enabling quick identification of what is being changed and why. Critical for impact assessment. | **HIGH (ASIL B)**: ALL modules require clear change titles. Consuming ASIL B projects need rapid safety impact assessment. Required for all. | NO | tool_req__docs_doc_attr | -| gd_req__change_attr_status | Change Request Status Tracking | Workflow state (proposed, reviewed, approved, implemented, verified, released) ensuring changes are properly authorized before implementation. | **HIGH (ASIL B)**: ALL modules require controlled workflow: proposed→assessment→review→approved→tested→released. Consuming ASIL B projects need workflow enforcement. Mandatory. | NOT_MAPPED | - | -| gd_req__change_attr_safety | Change Request Safety Impact Assessment | Explicit field documenting whether change affects safety requirements, analysis, or test coverage and what the impact is. | **HIGH (ASIL B)**: ALL modules must assess safety impact of every change. Mandatory assessment prevents undetected impact. Consuming ASIL B projects require this. | PARTIAL | tool_req__docs_doc_generic_mandatory | -| gd_req__change_attr_types | Change Request Categories | Classification (defect fix, enhancement, safety-critical change, design change, etc.) enabling routing to appropriate review process. | **HIGH**: Safety-critical changes require more scrutiny. Misclassification routes safety change through standard process, missing specialized review. | NOT_MAPPED | - | -| gd_req__change_attr_milestone | Change Request Scheduling | Links change to project timeline/release version. Enables tracking: "which changes are in which release?" Critical for coordinating safety analysis updates. | **HIGH**: When coordinating platform safety analysis with product releases, need to know which changes are in which build. Without this, mismatch between analyzed platform ≠ delivered platform. | NOT_MAPPED | - | -| gd_req__change_attr_impact_description | Change Impact Description | Documents *why* a change is needed and what it affects (requirements, architecture, tests, risk profile). Enables informed review. | **HIGH**: Safety reviewers need context. "Changed line 42 of module X" is meaningless without explanation. Clear impact descriptions enable proper safety assessment. | NOT_MAPPED | - | -| gd_req__problem_attr_uid | Problem Report Unique ID | Every identified problem/defect must have unique ID enabling audit trail and traceability to fix/verification. | **HIGH**: If a safety-critical defect is discovered, must prove it was tracked, analyzed for safety impact, fixed, and verified. Missing ID = untracked problem. | NOT_MAPPED | - | -| gd_req__problem_attr_title | Problem Report Title | Clear description of what is wrong enabling quick identification and assessment. | **HIGH**: Safety staff need to assess: "does this defect affect safety?" Unclear titles delay assessment and risk safety impact being missed. | NOT_MAPPED | - | -| gd_req__problem_attr_status | Problem Report Status | Workflow ensuring problems are investigated, root-caused, fixed, and verified before closing. Prevents unresolved issues. | **HIGH**: Incomplete problem closure = unresolved defect potentially affecting safety. Status tracking ensures rigor in problem resolution. | NOT_MAPPED | - | -| gd_req__problem_attr_classification | Problem Classification | Type (functional defect, safety issue, performance, integration problem, etc.) routing to appropriate resolution process. | **HIGH**: Safety-critical problems require different root cause analysis and verification. Misclassification sends safety defect through standard defect process. | NOT_MAPPED | - | -| gd_req__problem_attr_safety_affected | Problem Safety Impact | Explicit assessment: does this problem affect safety? If yes, what is the impact and required fix/verification? | **HIGH (ASIL B)**: ALL modules must assess every problem for safety impact. No exceptions. Consuming ASIL B projects require complete assessment. Unassessed defects = unmanaged risk. | NOT_MAPPED | - | -| gd_req__problem_check_closing | Problem Resolution Verification | Checks ensuring a problem cannot be marked "closed" without evidence it was actually fixed and verified. Prevents premature closure. | **HIGH**: Prevents "closed" problems that actually still exist. Broken verification = defect re-appears in production. | NOT_MAPPED | - | -| gd_req__problem_attr_anaylsis_results | Problem Root Cause Analysis | Documented investigation of why the problem occurred, enabling preventive action. Without root cause, same problem re-occurs. | **HIGH**: Superficial fixes (symptom treatment) without root cause analysis = defect will recur. Proper root cause identification prevents repeated safety issues. | NOT_MAPPED | - | -| gd_req__problem_attr_impact_description | Problem Impact Description | Clear documentation of scope (what is broken, how many users/products affected, severity). Enables triage and risk assessment. | **HIGH**: Without impact scope, cannot assess urgency or safety relevance. Small problem affecting 1 user ≠ widespread problem affecting all safety-critical units. | NOT_MAPPED | - | -| gd_req__problem_attr_milestone | Problem Fix Scheduling | Links problem to planned resolution/release. Enables tracking: "when will this be fixed?" | **HIGH**: For safety-critical problems, need to know: is this fixed in next release? Impacts product safety claim if problem persists in released versions. | NOT_MAPPED | - | -| gd_req__problem_attr_security_affected | Problem Security Impact | Explicit assessment: does this problem expose security vulnerability? Triggers security review. | **HIGH**: Functional safety and cybersecurity are increasingly linked in standards (SOTIF, broader ISO frameworks). Unidentified security issues can compromise safety. | NOT_MAPPED | - | -| gd_req__problem_attr_stakeholder | Problem Ownership | Identifies who is responsible for investigation, fix, and verification. Prevents diffusion of responsibility and incomplete resolutions. | **HIGH**: Without clear ownership, problems get passed around and never actually fixed. Clear accountability ensures proper resolution. | NOT_MAPPED | - | - -### Document Governance (3/4 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__doc_author | Document Author Tracking | Records who created/modified each document. Essential for assigning responsibility and enabling future contact for questions. | **HIGH**: When auditor asks "who was responsible for this safety document?", must have clear answer. Author tracking enables accountability. | NO | tool_req__docs_doc_attr, tool_req__docs_doc_attr_author_autofill | -| gd_req__doc_reviewer | Document Reviewer Identification | Records independent review (e.g., safety review, architecture review) of documents before release. Proves review occurred. | **HIGH**: IS 26262 requires documented evidence of review. Without reviewer identification, cannot prove document was independently reviewed (may be author-only). | NO | tool_req__docs_doc_attr, tool_req__docs_doc_attr_reviewer_autofill | -| gd_req__doc_approver | Document Approver Tracking | Records formal approval authority sign-off. Proves document is authorized version. | **HIGH**: Safety documents must be formally approved before use. Without approver tracking, unclear which version is the "official" document used in safety claims. | NO | tool_req__docs_doc_attr, tool_req__docs_doc_attr_approver_autofill | - -### Safety Status Documentation (2/2 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__safety_doc_status | Safety Document Release Status | Tracks whether each safety-related document (hazard analysis, FMEA, DFA, requirements) is draft, reviewed, approved, or superseded. Prevents use of unapproved versions. | **HIGH**: Auditors demand to see "version 3.2 reviewed/approved 2024-Q2". Using draft versions in safety claims = audit failure. | NOT_MAPPED | - | -| gd_req__safety_wp_status | Safety Work Product Status | For ongoing safety work (e.g., safety analysis update, rework for new features), tracks: planned, in-progress, reviewed, complete. Prevents incomplete work from being declared done. | **HIGH**: Partially-completed safety work is dangerous. Ensures safety updates are fully completed before claiming compliance. | NOT_MAPPED | - | - ---- - -## Tier 3: Medium-Priority Requirements (13 items) - -**Category**: Supporting requirements that improve robustness and reduce compliance risk. Lack of these creates operational friction but can be partially worked around. - -### Requirement Attributes & Versioning (5/7 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__req_attr_version | Requirement Versioning | Tracks versions of individual requirements enabling impact analysis: "what changed between v1.0 and v1.1 of this requirement?" | **MEDIUM**: Over time, requirements evolve. Without version history, difficult to assess: "was this requirement different when product X was certified vs. product Y?" | PARTIAL | tool_req__docs_common_attr_version | -| gd_req__req_attr_valid_from | Requirement Valid-From Date | Date requirement became effective. Enables temporal traceability: "was this requirement active when we were developing feature X?" | **MEDIUM**: For product variants or phased development, need to know requirement applicability timeline. Missing: difficult to justify why old product didn't have this requirement. | NOT_MAPPED | - | -| gd_req__req_attr_valid_until | Requirement Valid-Until Date | Date requirement is superseded/obsoleted. Marks when requirement no longer applies (e.g., legacy support ending). | **MEDIUM**: Prevents confusion about obsolete requirements. Without clear end-of-life, teams waste effort on outdated requirements or accidentally violate obsolete constraints. | NOT_MAPPED | - | -| gd_req__doc_attributes_manual | Document Attributes (Metadata) | Documents can store structured metadata (purpose, version, date, classification level, etc.) in machine-readable form. Enables automated reports. | **MEDIUM**: Manual metadata = error-prone. Structured attributes enable automated traceability reports (e.g., "generate traceability matrix for all documents created in Q2"). | PARTIAL | tool_req__docs_doc_generic_mandatory | -| gd_req__arch_build_blocks_corr | Architecture Building Block Correlations | Document how architectural blocks relate (data flow, control flow, interfaces). Creates the "architecture diagram" that safety analysis is based on. | **MEDIUM**: FMEA/DFA analysis requires understanding system structure. Correlations enable verification that analysis covers all interactions. Missing: analysis may overlook interface failures. | NOT_MAPPED | - | - -### Architecture & Design Traceability (3/5 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__arch_linkage_safety | Architecture Linkage for Safety Analysis | Links requirements to architectural elements (modules, interfaces, components) enabling verification that design implements requirements. | **MEDIUM**: Safety requirements must be "placed" in the architecture (e.g., "this safety mechanism is implemented in module X"). Without this mapping, cannot verify requirements are actually designed. | NOT_MAPPED | - | -| gd_req__arch_linkage_safety_trace | Architecture–Safety Traceability | Bidirectional links from architecture → safety analysis, showing which architectural elements/failures are analyzed for hazards. | **MEDIUM**: Enables verification that every architectural failure point was considered in safety analysis. Prevents unanalyzed failure modes in design. | PARTIAL | tool_req__docs_req_arch_link_safety_to_arch | -| gd_req__impl_design_code_link | Design Implementation–Code Linking | Links architectural design decisions/requirements to actual code implementation. Bridges design→code traceability gap. | **MEDIUM**: Modern development often separates design documents from code. Without this link, code review cannot verify: "is the implementation actually following the approved design?" | NOT_MAPPED | - | - -### Verification Infrastructure (5/8 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__verification_checks | Verification Metadata Validation | Automated checks ensuring all tests have required metadata (test ID, test case, expected result, reviewed by, etc.). Detects incomplete test documentation. | **MEDIUM**: Test metadata is auditor gold. Without complete metadata, cannot demonstrate that tests were properly designed, executed, and reviewed. | NO | tool_req__docs_test_metadata_mandatory_1, tool_req__docs_test_metadata_mandatory_2, tool_req__docs_test_metadata_link_levels | -| gd_req__verification_reporting | Verification Results Reporting | Automated generation of test execution reports (what was tested, pass/fail results, coverage metrics). | **MEDIUM**: Enables communication of test status to safety team. Manual report generation = error-prone. Automated reports provide consistent, auditable evidence. | NOT_MAPPED | - | -| gd_req__verification_report_archiving | Verification Report Archiving | Long-term storage of test results with strong versioning. Enables future audits to retrieve: "what were test results for this product version?" | **MEDIUM**: Safety compliance claims must remain valid years after release. Archiving ensures historical test evidence is preserved and retrievable. | NOT_MAPPED | - | -| gd_req__verification_independence | Test Independence Verification | Ensures tests are independent from implementation team (different author, different environment, different tools). Prevents "conflicts of interest" in testing. | **MEDIUM**: Tests written by implementation team may have bias. Independent verification adds credibility. Required for higher ASIL levels. | NOT_MAPPED | - | - ---- - -## Tier 4: Lower-Priority Requirements (13 items) - -**Category**: Extended capabilities with acceptable workarounds or lower direct safety impact. Can often be deferred or addressed through alternative means. - -### Extended Requirement Attributes (4/7 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__req_attr_version | [Listed in Tier 3] | | | | | - -### Extended Architecture Capabilities (2/5 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__arch_build_blocks_dynamic | Dynamic Architecture Behavior | Documentation of how architectural blocks behave over time (state machines, sequences, timing). More detailed than static structure. | **LOWER**: Complements but not strictly required for basic traceability. Can often be captured through design documents rather than tool automation. | NOT_MAPPED | - | -| gd_req__arch_model | Holistic Architecture Modeling | Comprehensive metamodel for encoding architecture (components, interfaces, data flow, control flow) in machine-readable form. | **LOWER**: Nice-to-have for mature organizations. Many projects manage with document-based architecture and manual traceability. | NOT_MAPPED | - | - -### Implementation & Design Details (2/2 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__impl_dynamic_diagram | Dynamic Behavior Diagrams | Sequence/timing diagrams showing how system behaves at runtime (object interactions, message sequences). | **LOWER**: Useful for complex systems but not mandatory. Many projects suffice with static design + code review. | NOT_MAPPED | - | - -### Language-Specific Verification (4/8 open) - -| ID | Title | Description | Safety Relevance | State | Mapped Tool Req | -|---|---|---|---|---|---| -| gd_req__verification_link_tests_cpp | C++ Test Linkage | Tool support for linking requirements to C++ unit tests/integration tests. Language-specific traceability. | **LOWER**: Language-specific convenience. Generic test-linkage often suffices if properly documented. Can use manual cross-referencing. | NOT_MAPPED | - | -| gd_req__verification_link_tests_python | Python Test Linkage | Tool support for linking requirements to Python pytest/unittest. Language-specific traceability. | **LOWER**: Language-specific convenience. Generic test-linkage often suffices if properly documented. Can use manual cross-referencing. | NOT_MAPPED | - | -| gd_req__verification_link_tests_rust | Rust Test Linkage | Tool support for linking requirements to Rust tests. Language-specific traceability. | **LOWER**: Language-specific convenience. Generic test-linkage often suffices if properly documented. Can use manual cross-referencing. | NOT_MAPPED | - | -| gd_req__verification_link_tests | Generic Test Linkage (General) | Ability to link requirements to test cases regardless of language/framework. Generic capability below language-specific variants. | **LOWER**: Partly captured by Tier 1/2 requirements. This is the "generalizable" form. | NOT_MAPPED | - | - ---- - -## Summary by Priority & State - -| Tier | Total | PARTIAL | NO | NOT_MAPPED | Safety Impact | -|---|---|---|---|---|---| -| **Tier 1: Critical** | 12 | 5 | 6 | 1 | Audit blockers without these | -| **Tier 2: High** | 18 | 1 | 4 | 13 | Major assurance gaps | -| **Tier 3: Medium** | 13 | 2 | 2 | 9 | Operational friction, reduced robustness | -| **Tier 4: Lower** | 13 | 0 | 1 | 12 | Convenience/extended capabilities | -| **TOTAL** | **56** | **8** | **13** | **35** | | - -## Interpretation & Next Steps - -**Critical Assessment (ASIL B-Capable Platform)**: - -The 56 open items are critical for **ASIL B-capable platform** that enables downstream ASIL B certification. Key implications: - -- **Safety analysis integration (8 items)**: ALL modules must implement FMEA/DFA with complete mitigations. Consuming ASIL B projects depend on this safety infrastructure. No ASIL-dependent scope—every module contributes to safety argument. **Non-negotiable for ASIL B projects.** - -- **Traceability completeness (4 items)**: ALL modules must provide complete requirement→design→test traceability. Consuming ASIL B projects cannot integrate modules without proof of traceability. **Mandatory for all modules.** - -- **Change & Problem Management (18 items)**: ALL modules require change tracking and problem assessment. Consuming ASIL B projects need audit trail. No exemptions for "utility code"—all module changes must go through safety review. **Applies to entire codebase.** - -- **Configuration management (1 item)**: ALL module configurations must be uniquely versioned. Consuming ASIL B projects must trace: which module version was integrated? - -**Action Priority (ASIL B Platform Timeline)**: -1. **Critical Path (Phase 1-2, May-Sep)**: Safety analysis foundation for ALL modules (FMEA/DFA with mitigations). Auditor will request: "show me FMEA for every module". Must have answer for EVERY module. -2. **High Priority (Phase 2-3, Jul-Oct)**: Change control for ALL modules. Auditor will request: "what changes were made, who approved them, safety impact assessed?" Consuming ASIL B projects require complete audit trail. -3. **Essential (Phase 3-4, Sep-Dec)**: Document governance and test evidence for ALL modules. Auditor will verify: every safety-relevant document approved, every safety requirement tested. -4. **ALL Modules**: No module can be exempt from ASIL B-level requirements. ALL must implement change management, safety analysis, requirements traceability, test coverage. - -**NOT_MAPPED Items (35 total – Triage Urgency)**: -- **14 new concepts**: Determine scope by end of Phase 2 (Aug). -- **21 existing concepts without requirements**: Define in Phase 2; all modules must implement core Tier 1-2. - -**ASIL B Platform Risk**: Consuming ASIL B projects rely on platform enforcement of ASIL B rigor. Gaps in platform enforcement = downstream ASIL B certification failure. Platform must enforce change management and safety analysis for ALL integrated modules, with no exceptions. - ---- - -## Implementation Roadmap (April – December 2026) - -### Phase 1: Foundation (April 17 – June 30, 2026) — ~8 weeks - -**Goal**: Establish safety analysis traceability foundation and baseline documentation governance. - -**Deliverables**: - -#### 1.1 Safety Analysis Metamodel & Linking (Tier 1 | 4 weeks, high effort) -- [ ] **Define gd_req__saf_attr_mitigated_by** implementation in metamodel.yaml - - Add `mitigated_by` field to safety analysis blocks (FMEA, DFA entries) - - Enable bidirectional links: Failure Mode ↔ Mitigation Requirement ↔ Test Case - - Constraint: Every failure mode must reference at least one mitigation requirement - - Estimated effort: 2 weeks (design + implementation + testing) - -- [ ] **Implement gd_req__saf_linkage_check** automated validation - - New Sphinx checker: verify FMEA/DFA links are complete and consistent - - Detect orphaned safety analysis items (unlinked to requirements/tests) - - Report broken bidirectional links - - Estimated effort: 1.5 weeks - -- [ ] **Add gd_req__saf_attr_fault_id & gd_req__saf_attr_failure_id** ID generation - - Auto-generate unique IDs for FMEA/DFA entries (with versioning) - - Ensure uniqueness constraint - - Estimated effort: 0.5 weeks - -- [ ] **Documentation & Examples** - - Create how-to guide for FMEA/DFA traceability in docs/how-to/ - - Add example safety analysis with full traceability - - Update docs/internals/requirements/requirements.rst with new tool requirements - - Estimated effort: 1 week - -**Success Criteria**: -- ✅ tool_req__docs_saf_attrs_mitigated_by marked PARTIAL → YES -- ✅ tool_req__docs_saf_attrs_violates marked NO → YES (linkage checking) -- ✅ Automated checks catch unlinked FMEA entries -- ✅ Example safety analysis demonstrating full traceability - -#### 1.2 Requirement Traceability Completion (Tier 1 | 3 weeks, medium effort) -- [ ] **Complete gd_req__req_attr_test_covered** implementation - - Ensure every requirement with `:implemented: YES` has at least one test linked - - Add automated checker: detect requirements without test coverage - - Report test coverage percentage per requirement category - - Estimated effort: 1.5 weeks - -- [ ] **Implement gd_req__req_attr_req_cov** architecture coverage mapping - - Add optional `:covers_design:` field to link requirement → design components - - Enable reporting: "which design elements implement this requirement?" - - Estimated effort: 1 week - -- [ ] **Refresh traceability matrix report** (docs/reference/commands.md) - - Add commands to generate complete requirement→test→design traceability matrix - - Include coverage metrics per requirement type - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ tool_req__docs_req_attr_testcov marked PARTIAL → YES -- ✅ tool_req__docs_req_attr_reqcov marked PARTIAL → YES -- ✅ Automated test coverage checks integrated into CI -- ✅ Traceability matrix generation command available - -#### 1.3 Document Governance Baseline (Tier 2 | 2 weeks, low-medium effort) -- [ ] **Implement gd_req__doc_author, gd_req__doc_reviewer, gd_req__doc_approver** tracking - - Add `:author:`, `:reviewed_by:`, `:approved_by:` fields to document metadata - - Store with timestamps - - Estimated effort: 1 week - -- [ ] **Document Governance Guide** - - Create docs/how-to/document_governance.rst - - Explain review/approval workflow for safety documents - - Templates for document metadata - - Estimated effort: 0.5 weeks - -- [ ] **Basic safety document status tracking** - - Add `:status: [draft|reviewed|approved|superseded]` field - - Automated check: prevent use of draft safety documents in compliance claims - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ tool_req__docs_doc_attr, tool_req__docs_doc_attr_author_autofill marked NO → YES (at least basic version) -- ✅ tool_req__docs_doc_attr_reviewer_autofill marked NO → YES -- ✅ Document governance guide published -- ✅ Sample safety documents show proper author/reviewer/approver tracking - ---- - -### Phase 2: Safety Analysis Extension (July 1 – August 31, 2026) — ~8 weeks - -**Goal**: Complete safety analysis automation depth and address NOT_MAPPED decisions. - -**Deliverables**: - -#### 2.1 Safety Analysis Content & Linkage Depth (Tier 1 | 3 weeks) -- [ ] **Implement gd_req__saf_attr_feffect** structured failure effect documentation - - Add enumeration: `[loss_of_function | wrong_output | delayed_response | unintended_behavior | ...]` - - Mandatory field with explanation text - - Automated check: detect vague or missing effect descriptions - - Estimated effort: 1 week - -- [ ] **Add gd_req__saf_attr_aou** (Area of Use) linkage - - Enable linking FMEA/DFA to specific product variants, operational contexts, or safety goals - - Constraint: safety analysis must explicitly declare scope (AOU) - - Estimated effort: 1 week - -- [ ] **Implement gd_req__saf_attr_requirements_check** automated validation - - Check: every failure mode in FMEA/DFA has corresponding safety requirement - - Detect and report "orphaned" failure modes (analyzed but not controlled by requirements) - - Estimated effort: 1 week - -**Success Criteria**: -- ✅ tool_req__docs_saf_attr_fmea_failure_effect marked NO → YES -- ✅ tool_req__docs_saf_attrs_mitigated_by marked NO → YES (complete implementation) -- ✅ Automated orphaned-failure detection working -- ✅ gd_req__saf_attr_aou marked NO → YES - -#### 2.2 NOT_MAPPED Triage & Scope Decisions (All Tiers | 2 weeks) -- [ ] **Conduct NOT_MAPPED Item Review** - - For each of 35 NOT_MAPPED items, make decision: - - **In-scope**: Add to docs-as-code/requirements.rst as new tool requirement - - **Out-of-scope**: Document rationale (e.g., "managed by process_description only") - - **Deferred**: Schedule for Phase 3 or later - - Estimated effort: 1.5 weeks (review + documentation) - -- [ ] **Create NOT_MAPPED Decision Log** - - Document decisions with rationale for each item - - Publish in docs/internals/requirements/not_mapped_decisions.md - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ 35 NOT_MAPPED items triaged: ~20 defined as in-scope, ~10 out-of-scope, ~5 deferred -- ✅ Decision log published with clear ownership/timeline for in-scope items -- ✅ Requirements.rst updated with newly-scoped items - -#### 2.3 Configuration Management Baseline (Tier 1 | 1.5 weeks) -- [ ] **Implement gd_req__configuration_uid** version baseline tracking - - Define metadata: config ID, release version, date, safety documentation version, artifacts included - - Tool support: mark safety documents with configuration ID they apply to - - Estimated effort: 1.5 weeks - -**Success Criteria**: -- ✅ Configuration versioning scheme established -- ✅ Example released configuration with full safety artifact traceability -- ✅ tool_req__docs_*_config_uid or similar defined and marked YES - -#### 2.4 Change/Problem Management Framework (Tier 2 | 2 weeks, planning phase) -- [ ] **Design Change/Problem Tracking Model** - - Sketch metamodel for change/problem records - - Define required fields (uid, title, status, safety impact, etc.) - - Integration with requirements/design/test artifacts - - Estimated effort: 1.5 weeks (design) - -- [ ] **Publish Change Management Guide** - - Draft docs/how-to/change_management.md - - Explain workflow, safety impact assessment, review process - - Templates for change records - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ Change management metamodel design approved -- ✅ Preliminary guide published (full implementation in Phase 3) - ---- - -### Phase 3: High-Priority Completion (September 1 – October 31, 2026) — ~8 weeks - -**Goal**: Complete Tier 2 (Change/Problem Management, Document Governance) and begin Tier 3 architecture items. - -**Deliverables**: - -#### 3.1 Change/Problem Management Implementation (Tier 2 | 5 weeks) -- [ ] **Metamodel & Tool Support** (~3 weeks) - - Implement change record structure in metamodel.yaml - - Add problem report structure - - Fields: uid, title, status, safety_affected, classification, impact_description, assigned_to, target_milestone - - Automated checks: required fields, valid status transitions, safety assessment completeness - - Estimated effort: 3 weeks - -- [ ] **Change/Problem Management Commands** (~1.5 weeks) - - New CLI command: `sphinx-needs manage-change [create|update|close] [options]` - - Report generation: "safety-critical changes in release X" - - Status dashboard: open problems, pending safety assessment, unverified fixes - - Estimated effort: 1.5 weeks - -- [ ] **Integration with Requirements/Tests** (~0.5 weeks) - - Link change → affected requirements - - Link problem → test case demonstrating fix - - Backward link: requirement/test → related changes/problems - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ All 18 Change/Problem Management items marked NO → PARTIAL (basic support) -- ✅ Workflow commands available -- ✅ Integration with requirements/tests working -- ✅ Example change/problem records demonstrating full traceability - -#### 3.2 Document Governance Enhancement (Tier 2 | 2 weeks) -- [ ] **Review/Approval Workflow Automation** - - Extend document metadata: review checklist, sign-off tracking - - Automated reminder: documents pending review/approval - - Report: unapproved safety documents in use (red flag!) - - Estimated effort: 1.5 weeks - -- [ ] **Document Release Workflow** - - Implement gd_req__safety_doc_status workflow: draft → reviewed → approved → released - - Constraint: only released versions can be referenced in compliance claims - - Automated check: detect use of draft/unapproved safety docs - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ tool_req__docs_doc_attr_reviewer_autofill, tool_req__docs_doc_attr_approver_autofill marked NO → YES -- ✅ Automated unapproved-document detection working -- ✅ Example documents showing complete review/approval chain - -#### 3.3 Tier 3 Architecture & Versioning Start (Tier 3 | 1 week, planning) -- [ ] **Requirement Versioning Design** - - Sketch version management: v1.0, v1.1, v1.2, etc. - - Valid-from/valid-until date ranges - - Impact analysis: "what changed between versions?" - - Estimated effort: 0.5 weeks - -- [ ] **Architecture Traceability Design** - - Model: requirement → architecture block → design → code - - Design document placeholder - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ Design documents approved -- ✅ Roadmap for Tier 3 Q4 implementation established - ---- - -### Phase 4: Polish & Tier 3 Completion (November 1 – December 31, 2026) — ~8 weeks - -**Goal**: Complete Tier 3 items and achieve "good state" by year-end for audit readiness. - -**Deliverables**: - -#### 4.1 Requirement Versioning & Temporal Traceability (Tier 3 | 3 weeks) -- [ ] **Implement gd_req__req_attr_version** requirement version tracking - - Extend requirement metadata: version field, version history - - Generate version change reports - - Impact analysis: "what's different between version X and Y?" - - Estimated effort: 2 weeks - -- [ ] **Implement gd_req__req_attr_valid_from & gd_req__req_attr_valid_until** - - Temporal scoping: requirement active from date X to date Y - - Enable queries: "which requirements were active when product V1.2 was developed?" - - Automated check: detect requirements with undefined validity windows - - Estimated effort: 1 week - -**Success Criteria**: -- ✅ tool_req__docs_common_attr_version marked PARTIAL → YES -- ✅ Version history visible in requirement documentation -- ✅ Temporal traceability queries available - -#### 4.2 Architecture & Design Linkage (Tier 3 | 2.5 weeks) -- [ ] **Implement gd_req__arch_build_blocks_corr** architectural relationships - - Metamodel extensions: component relationships, interfaces, data flows - - Visualization: architecture diagram generation - - Estimated effort: 1.5 weeks - -- [ ] **Implement gd_req__arch_linkage_safety** requirement→architecture mapping - - Link requirement → specific architecture component implementing it - - Report: architecture coverage by requirement category - - Estimated effort: 1 week - -**Success Criteria**: -- ✅ Architecture traceability working -- ✅ Architecture diagrams automatically generated from metadata -- ✅ Coverage gaps identified - -#### 4.3 Verification Infrastructure Completion (Tier 3 | 2 weeks) -- [ ] **Implement gd_req__verification_reporting** automated test reports - - Generate test execution summary: pass/fail/skip counts, coverage metrics - - Integration with CI pipeline: test results automatically linked to requirements - - Estimated effort: 1.5 weeks - -- [ ] **Implement gd_req__verification_report_archiving** long-term storage - - Archive test results with strong versioning - - Enable retrieval: "show me test results for product version X" - - Estimated effort: 0.5 weeks - -**Success Criteria**: -- ✅ Automated test reporting available -- ✅ Historical test results retrievable for audit -- ✅ All Tier 3 items marked YES - -#### 4.4 Audit Readiness Review & Documentation (All Tiers | 0.5 weeks) -- [ ] **Complete Tier 4 Gap Analysis** - - Confirm Tier 4 items deferred to 2027 or addressed via workarounds - - Estimated effort: 0.25 weeks - -- [ ] **Publish Final SCORE #1782 Coverage Report** - - Update score_1782_open_summary.md with Phase 4 completions - - State changes: ~45 items YES, ~8 items PARTIAL, ~3 items NO (acceptable), 0 items NOT_MAPPED - - Estimated effort: 0.25 weeks - -**Success Criteria**: -- ✅ Tier 1 (12/12 items): 100% YES or PARTIAL ✓ -- ✅ Tier 2 (18/18 items): 95%+ YES/PARTIAL ✓ -- ✅ Tier 3 (13/13 items): 95%+ YES/PARTIAL ✓ -- ✅ All NOT_MAPPED items triaged and resolved ✓ -- ✅ Documentation complete and audit-ready ✓ - ---- - -## Summary Timeline - -| Phase | Timeline | Duration | Focus | Expected State | -|---|---|---|---|---| -| **Phase 1** | Apr 17 – Jun 30 | 8 weeks | Safety analysis foundation + requirement traceability + doc governance baseline | Tier 1: 50% YES, Tier 2: 20% YES | -| **Phase 2** | Jul 1 – Aug 31 | 8 weeks | Safety analysis depth + NOT_MAPPED triage + config management | Tier 1: 80% YES, Tier 2: 30% YES | -| **Phase 3** | Sep 1 – Oct 31 | 8 weeks | Change/problem management + doc governance enhancement + Tier 3 planning | Tier 2: 80% YES, Tier 3: Planning complete | -| **Phase 4** | Nov 1 – Dec 31 | 8 weeks | Requirement versioning + architecture linkage + verification infra + audit prep | **All Tiers: 90%+ YES/PARTIAL** ✓ | - ---- - -## Effort Summary - -| Tier | Phase 1 | Phase 2 | Phase 3 | Phase 4 | Total | FTE Equiv | -|---|---|---|---|---|---|---| -| **Tier 1** | 9 weeks | 5 weeks | 1 week | 1 week | **16 weeks** | ~0.4 FTE | -| **Tier 2** | 2 weeks | 2 weeks | 7 weeks | 1 week | **12 weeks** | ~0.3 FTE | -| **Tier 3** | — | — | 1 week | 5.5 weeks | **6.5 weeks** | ~0.15 FTE | -| **Tier 4** | — | — | — | 0.5 weeks | **0.5 weeks** | Negligible | -| **TOTAL** | **11 weeks** | **7 weeks** | **9 weeks** | **7.5 weeks** | **34.5 weeks** | ~**0.85 FTE** | - -**Interpretation**: Full-time engineer (1 FTE) completing this roadmap in ~35 weeks (9 months). Realistic delivery: **mid-January 2027** with 1 dedicated engineer, or **end-of-December 2026** with 1.2+ FTE allocation. - ---- - -## Key Dependencies & Risks - -### Dependencies -1. **Phase 1 → Phase 2**: Metamodel extensions must be complete before depth features -2. **Phase 2 → Phase 3**: NOT_MAPPED triage decisions required before Phase 3 implementation -3. **Phase 3 → Phase 4**: Architecture design finalized in Phase 3 before implementation in Phase 4 - -### Risks & Mitigation -| Risk | Likelihood | Impact | Mitigation | -|---|---|---|---| -| Metamodel changes cause rework in downstream features | Medium | High | Freeze metamodel design in Phase 1 week 2; peer review before implementation | -| NOT_MAPPED triage decisions delayed (scope creep) | High | Medium | Allocate dedicated reviewer for Phase 2.2; strict decision deadline (Aug 15) | -| CI/test performance degradation from new checkers | Medium | Medium | Performance test new checkers before production deployment | -| Stakeholder disagreement on out-of-scope decisions | Medium | High | Early communication of scope; documented rationale for each decision | -| Resource availability changes mid-roadmap | High | High | Cross-train backup engineer in Q3; document all designs in Phase 1 | - ---- - -## Success Metrics (End of Year – ASIL B Certification Readiness) - -By **December 31, 2026**, the docs-as-code tool should demonstrate ASIL B certification-ready compliance: - -1. **Safety Analysis Completeness (ASIL B)** ✓ - - Every code module analyzed: FMEA identifying all failure modes with unique IDs - - DFA documenting failure cascades (for modules with high ASIL complexity) - - Every failure mode has documented mitigation requirement and test evidence - - Example: complete embedded system FMEA/DFA with full traceability - -2. **Requirement–Test Coverage (ASIL B)** ✓ - - 100% of safety requirements have test evidence (ASIL B requires no exceptions) - - Every test documented with: test ID, purpose, expected result, reviewed/approved - - Traceability matrix complete: requirement↔design↔test↔result - - Out-of-context: coverage applies to all modules (no "untested utility libraries") - -3. **Change Control (ASIL B)** ✓ - - All code changes tracked with unique ID - - Safety impact assessment mandatory for every change - - Review/approval workflow enforced before deployment - - Audit trail: who changed what, when, why, impact assessment, verification - -4. **Problem Management (ASIL B)** ✓ - - All defects logged and tracked - - Safety assessment mandatory (no defect exempted from safety evaluation) - - Root cause analysis and fix verification documented - - Traceability: problem→root cause→fix→test verification - -5. **Document Governance (ASIL B)** ✓ - - All safety documents: author, reviewer (independent), approver tracked - - Status workflow: draft→reviewed→approved→released (draft never cited) - - Audit: prove review/approval occurred for every safety document - -6. **Configuration Versioning (ASIL B)** ✓ - - Each product release: unique configuration ID - - Traceability to exact FMEA, requirements version, test results for that release - - Audit retrieval: "show me safety analysis for product v1.2.3" - -7. **Audit Readiness (ASIL B Out-of-Context)** ✓ - - ~50 of 56 open items marked YES or PARTIAL - - All NOT_MAPPED items resolved - - Automated compliance checks enabled in CI pipeline - - No module can claim "non-safety"—all covered by requirements/tests/evidence - - Documentation complete with examples demonstrating full ASIL B V-model - -**Expected Audit Posture**: "We have implemented ASIL B functional safety infrastructure. Every code module is a safety element with complete analysis, requirements, tests, and verification evidence. Our tooling enforces compliance checks. We are ready for ASIL B certification audit." \ No newline at end of file diff --git a/scripts_bazel/traceability_gate.py b/scripts_bazel/traceability_gate.py index f0f8f277a..6e82ec0ee 100644 --- a/scripts_bazel/traceability_gate.py +++ b/scripts_bazel/traceability_gate.py @@ -40,27 +40,39 @@ def _print_type_summary(need_type: str, metrics: dict[str, Any]) -> None: req = metrics["requirements"] tst = metrics["tests"] + req_total = req["total"] + req_with_code_link = req["with_code_link"] + req_with_code_link_pct = req["with_code_link_pct"] + req_with_test_link = req["with_test_link"] + req_with_test_link_pct = req["with_test_link_pct"] + req_fully_linked = req["fully_linked"] + req_fully_linked_pct = req["fully_linked_pct"] + tst_total = tst["total"] + tst_linked_to_requirements = tst["linked_to_requirements"] + tst_linked_to_requirements_pct = tst["linked_to_requirements_pct"] + tst_broken_references = tst["broken_references"] + print(f"[{need_type}]") print( f" Requirements with source links: " - f"{req['with_code_link']}/{req['total']} ({req['with_code_link_pct']:.2f}%)" + f"{req_with_code_link}/{req_total} ({req_with_code_link_pct:.2f}%)" ) print( f" Requirements with test links: " - f"{req['with_test_link']}/{req['total']} ({req['with_test_link_pct']:.2f}%)" + f"{req_with_test_link}/{req_total} ({req_with_test_link_pct:.2f}%)" ) print( f" Requirements fully linked: " - f"{req['fully_linked']}/{req['total']} ({req['fully_linked_pct']:.2f}%)" + f"{req_fully_linked}/{req_total} ({req_fully_linked_pct:.2f}%)" ) print( f" Tests linked to requirements: " - f"{tst['linked_to_requirements']}/{tst['total']} " - f"({tst['linked_to_requirements_pct']:.2f}%)" + f"{tst_linked_to_requirements}/{tst_total} " + f"({tst_linked_to_requirements_pct:.2f}%)" ) - print(f" Broken test references: {len(tst['broken_references'])}") - if tst["broken_references"]: - for item in tst["broken_references"]: + print(f" Broken test references: {len(tst_broken_references)}") + if tst_broken_references: + for item in tst_broken_references: print(f" - {item['testcase']} -> {item['missing_need']}") @@ -76,31 +88,36 @@ def _check_type_thresholds( failures: list[str] = [] req = metrics["requirements"] tst = metrics["tests"] + req_with_code_link_pct = req["with_code_link_pct"] + req_with_test_link_pct = req["with_test_link_pct"] + req_fully_linked_pct = req["fully_linked_pct"] + tst_linked_to_requirements_pct = tst["linked_to_requirements_pct"] + tst_broken_references = tst["broken_references"] prefix = f"[{need_type}] " - if req["with_code_link_pct"] < min_req_code: + if req_with_code_link_pct < min_req_code: failures.append( f"{prefix}requirements with code links " - f"{req['with_code_link_pct']:.2f}% < {min_req_code:.2f}%" + f"{req_with_code_link_pct:.2f}% < {min_req_code:.2f}%" ) - if req["with_test_link_pct"] < min_req_test: + if req_with_test_link_pct < min_req_test: failures.append( f"{prefix}requirements with test links " - f"{req['with_test_link_pct']:.2f}% < {min_req_test:.2f}%" + f"{req_with_test_link_pct:.2f}% < {min_req_test:.2f}%" ) - if req["fully_linked_pct"] < min_req_fully_linked: + if req_fully_linked_pct < min_req_fully_linked: failures.append( f"{prefix}requirements fully linked " - f"{req['fully_linked_pct']:.2f}% < {min_req_fully_linked:.2f}%" + f"{req_fully_linked_pct:.2f}% < {min_req_fully_linked:.2f}%" ) - if tst["linked_to_requirements_pct"] < min_tests_linked: + if tst_linked_to_requirements_pct < min_tests_linked: failures.append( f"{prefix}tests linked to requirements " - f"{tst['linked_to_requirements_pct']:.2f}% < {min_tests_linked:.2f}%" + f"{tst_linked_to_requirements_pct:.2f}% < {min_tests_linked:.2f}%" ) - if fail_on_broken_test_refs and tst["broken_references"]: + if fail_on_broken_test_refs and tst_broken_references: failures.append( - f"{prefix}broken testcase references found: {len(tst['broken_references'])}" + f"{prefix}broken testcase references found: {len(tst_broken_references)}" ) return failures @@ -189,7 +206,9 @@ def main() -> int: return 1 metrics_by_type: dict[str, Any] = data["metrics_by_type"] - types_to_check = [args.need_type] if args.need_type else list(metrics_by_type.keys()) + types_to_check = ( + [args.need_type] if args.need_type else list(metrics_by_type.keys()) + ) print(f"Traceability gate input: {metrics_path}") print("-" * 72) From 7ce683522c606e0eec524c35d46802b97b20638c Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 08:44:52 +0000 Subject: [PATCH 11/27] Added the uml from the comment --- docs/how-to/test_to_doc_links.rst | 67 +++++++++++++++++++++++------- scripts_bazel/traceability_gate.py | 3 +- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/docs/how-to/test_to_doc_links.rst b/docs/how-to/test_to_doc_links.rst index c1ac5eeef..7f161591f 100644 --- a/docs/how-to/test_to_doc_links.rst +++ b/docs/how-to/test_to_doc_links.rst @@ -62,42 +62,75 @@ Limitations CI/CD Gate for Linkage Percentage --------------------------------- -The traceability checker can be used as a low-level CI gate over exported -``needs.json`` data. +The traceability tooling uses a **two-step architecture**: + +1. ``traceability_coverage`` reads ``needs.json``, computes metrics, and writes + a machine-readable ``metrics.json`` (schema v1). +2. ``traceability_gate`` reads that ``metrics.json`` and enforces configurable + coverage thresholds. + +Separating the two steps keeps the CI gate decoupled from the Sphinx/Bazel +build: the gate never parses ``needs.json`` itself. + +.. plantuml:: + + @startuml + skinparam componentStyle rectangle + skinparam defaultTextAlignment center + + rectangle "docs build" { + component "calc metrics\n(traceability_coverage)" as coverage + } + + usecase "test" as test + database "needs.json" as needsjson + database "metrics.json\n(v1: metrics per needs type,\ne.g. tool_req)" as metricsjson + component "gate\n(traceability_gate)" as gate + + test --> coverage : xml + needsjson --> coverage + coverage --> metricsjson + metricsjson --> gate + gate --> (Pretty output) + + @enduml Current workflow: 1. Run tests. 2. Generate ``needs.json``. -3. Execute the traceability checker. - -In repository CI, the preferred setup is to wire the coverage check target -to depend on the test-report and ``//:needs_json`` targets, so Bazel handles -the build order automatically. - -You can run the checker as a standalone command, and you can also run it as -part of documentation creation if your repository wiring does so. +3. Compute metrics and export to ``metrics.json``. +4. Run the gate against the exported metrics. .. code-block:: bash bazel test //... bazel build //:needs_json bazel run //scripts_bazel:traceability_coverage -- \ + --needs-json bazel-bin/needs_json/needs.json \ + --json-output metrics.json + + bazel run //scripts_bazel:traceability_gate -- \ + --metrics-json metrics.json \ --min-req-code 100 \ --min-req-test 100 \ --min-req-fully-linked 100 \ --min-tests-linked 100 \ --fail-on-broken-test-refs -If ``//:needs_json`` was built beforehand, the checker locates the default -``needs.json`` output automatically. Use ``--needs-json`` only when you want -to point to a non-standard location. +In repository CI, wire the coverage target to depend on the test-report and +``//:needs_json`` targets so Bazel handles the build order automatically. + +The ``--require-all-links`` shortcut is equivalent to setting all ``--min-*`` +flags to 100 and enabling ``--fail-on-broken-test-refs``. -The checker reports: +The gate reports: - Percentage of implemented requirements with ``source_code_link`` - Percentage of implemented requirements with ``testlink`` - Percentage of implemented requirements with both links (fully linked) +- Percentage of testcases linked to at least one requirement +- Broken testcase references (testcases referencing an unknown requirement ID) .. note:: @@ -107,11 +140,13 @@ The checker reports: references are only meaningful if those external testcase needs are also included in the exported dataset. -To check only unit tests, filter testcase types: +To check only unit tests, pass ``--test-types`` to the coverage step: .. code-block:: bash bazel run //scripts_bazel:traceability_coverage -- \ - --test-types unit-test + --needs-json bazel-bin/needs_json/needs.json \ + --test-types unit-test \ + --json-output metrics.json Use lower thresholds during rollout and tighten towards 100% over time. diff --git a/scripts_bazel/traceability_gate.py b/scripts_bazel/traceability_gate.py index 6e82ec0ee..2f6e8773a 100644 --- a/scripts_bazel/traceability_gate.py +++ b/scripts_bazel/traceability_gate.py @@ -17,8 +17,7 @@ """Traceability gate: read a metrics JSON and enforce coverage thresholds. -This script is the "gate" step from the json-idea architecture: - +This script is the "gate" step based from a metrics.json approach. The typical workflow is: docs build → traceability_coverage --json-output metrics.json CI gate → traceability_gate --metrics-json metrics.json [--min-* ...] From a6029b73990041601370021ec99dba4344e51390 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 13:36:34 +0000 Subject: [PATCH 12/27] refactoring to the new json approach and refactoring of dashboards for better ux --- docs.bzl | 4 +- docs/how-to/dashboards_and_quality_gates.rst | 183 +++++++++++++++ docs/how-to/get_started.rst | 3 + docs/how-to/index.rst | 1 + docs/how-to/setup.md | 6 + docs/how-to/test_to_doc_links.rst | 53 +++-- .../requirements/implementation_state.rst | 90 +++----- docs/internals/requirements/index.rst | 20 +- .../requirements/tool_req_overview.rst | 22 +- .../requirements/tooling_verification.rst | 100 +++++++++ scripts_bazel/traceability_coverage.py | 2 +- scripts_bazel/traceability_gate.py | 4 + .../score_layout/assets/css/score_design.css | 28 ++- .../score_layout/assets/css/score_needs.css | 30 +++ src/extensions/score_metamodel/__init__.py | 88 ++++++++ .../checks/traceability_dashboard.py | 87 ++++++++ .../tests/test_traceability_dashboard.py | 209 ++++++++++++++++++ .../tests/test_traceability_metrics.py | 204 +++++++++++++++++ ...st_traceability_metrics_json_generation.py | 82 +++++++ .../score_metamodel/traceability_metrics.py | 61 ++++- .../score_source_code_linker/__init__.py | 94 ++++++-- .../tests/test_xml_parser.py | 35 +++ .../score_source_code_linker/xml_parser.py | 23 +- 23 files changed, 1315 insertions(+), 114 deletions(-) create mode 100644 docs/how-to/dashboards_and_quality_gates.rst create mode 100644 docs/internals/requirements/tooling_verification.rst create mode 100644 src/extensions/score_metamodel/tests/test_traceability_dashboard.py create mode 100644 src/extensions/score_metamodel/tests/test_traceability_metrics.py create mode 100644 src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py diff --git a/docs.bzl b/docs.bzl index 699c2370f..bb1a4899b 100644 --- a/docs.bzl +++ b/docs.bzl @@ -293,10 +293,12 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = "--jobs", "auto", "--define=external_needs_source=" + str(data), + "--define=score_sourcelinks_json=$(location :sourcelinks_json)", + "--define=score_source_code_linker_plain_links=1", ], formats = ["needs"], sphinx = ":sphinx_build", - tools = data, + tools = data + [":sourcelinks_json"], visibility = ["//visibility:public"], # Persistent workers cause stale symlinks after dependency version # changes, corrupting the Bazel cache. diff --git a/docs/how-to/dashboards_and_quality_gates.rst b/docs/how-to/dashboards_and_quality_gates.rst new file mode 100644 index 000000000..2a47110a7 --- /dev/null +++ b/docs/how-to/dashboards_and_quality_gates.rst @@ -0,0 +1,183 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +Build Dashboards and Quality Gates +================================== + +This guide is for repositories that *consume* docs-as-code as a Bazel +dependency. Examples are module repositories and integration repositories that +want to: + +1. publish their own traceability dashboards, +2. export ``metrics.json`` during documentation builds, and +3. enforce quality gates in CI. + +The docs-as-code repository itself documents tooling coverage. Consumer +repositories use the same extensions to document *their own* requirements, +architecture, source-code links, and verification evidence. + +What You Get +------------ + +When a consumer repository integrates docs-as-code correctly, it can: + +- build an HTML dashboard from its own Sphinx needs, +- include external needs from other repositories when desired, +- export ``needs.json`` and ``metrics.json`` for machine-readable reporting, +- gate CI on traceability thresholds via ``traceability_gate``. + +Typical Setup +------------- + +1. Add docs-as-code as a Bazel dependency as described in :ref:`setup`. +2. Define the documentation target via the ``docs(...)`` macro. +3. Provide process or upstream needs via the ``data`` argument when cross-repo + traceability is required. +4. Provide implementation sources via ``scan_code`` so ``source_code_link`` can + be generated. +5. Add test metadata so ``testlink`` and testcase needs can be generated. + +Minimal Consumer Example +------------------------ + +In ``BUILD``: + +.. code-block:: starlark + + load("@score_docs_as_code//:docs.bzl", "docs") + + filegroup( + name = "module_sources", + srcs = glob([ + "src/**/*.py", + "src/**/*.cpp", + "src/**/*.h", + "src/**/*.rs", + ]), + ) + + docs( + source_dir = "docs", + data = [ + "@score_process//:needs_json", + ], + scan_code = [":module_sources"], + ) + +In ``docs/conf.py``: + +.. code-block:: python + + score_metamodel_requirement_types = "feat_req,comp_req,aou_req" + score_metamodel_include_external_needs = False + +Use ``score_metamodel_include_external_needs = True`` only in repositories that +intentionally aggregate traceability across dependencies, such as integration +repositories. + +Building the Dashboard +---------------------- + +Run: + +.. code-block:: bash + + bazel run //:docs + +This generates HTML output under ``_build/``. + +Run: + +.. code-block:: bash + + bazel build //:needs_json + +This generates machine-readable output under: + +- ``bazel-bin/needs_json/_build/needs/needs.json`` +- ``bazel-bin/needs_json/_build/needs/metrics.json`` + +The HTML dashboard and the exported ``metrics.json`` are backed by the same +traceability metric implementation, so the charts and the CI gate evaluate the +same data. + +Inputs for Linkage Metrics +-------------------------- + +To get meaningful dashboard and gate values, consumer repositories typically +need three inputs: + +1. Requirement and architecture needs in the documentation itself. +2. Source code references via :doc:`source_to_doc_links`. +3. Test metadata via :doc:`test_to_doc_links`. + +If one of those inputs is missing, the related chart or gate metric will remain +empty or low. + +Choosing Local vs Aggregated Views +---------------------------------- + +There are two common modes: + +**Module repository** + +- Set ``score_metamodel_include_external_needs = False``. +- Gate only on the needs owned by the repository itself. +- Use this for per-module implementation progress and traceability. + +**Integration repository** + +- Set ``score_metamodel_include_external_needs = True``. +- Aggregate requirements across module dependencies when that is the intended + repository purpose. +- Use this for system or integration-level dashboards. + +CI Quality Gate +--------------- + +After building ``//:needs_json``, run the gate on the exported metrics: + +.. code-block:: bash + + bazel run //scripts_bazel:traceability_gate -- \ + --metrics-json bazel-bin/needs_json/_build/needs/metrics.json \ + --min-req-code 70 \ + --min-req-test 70 \ + --min-req-fully-linked 60 \ + --min-tests-linked 70 + +Useful flags: + +- ``--require-all-links`` for strict 100 percent gating +- ``--fail-on-broken-test-refs`` to fail when testcase references point to + unknown requirement IDs + +Recommended Rollout +------------------- + +For a new consumer repository: + +1. Start with local-only metrics. +2. Enable ``scan_code`` and verify ``source_code_link`` coverage first. +3. Add test metadata and verify ``testlink`` coverage. +4. Introduce modest thresholds in CI. +5. Raise thresholds over time as the repository matures. + +Related Guides +-------------- + +- :ref:`setup` +- :doc:`other_modules` +- :doc:`source_to_doc_links` +- :doc:`test_to_doc_links` \ No newline at end of file diff --git a/docs/how-to/get_started.rst b/docs/how-to/get_started.rst index c469c37f2..0567894e8 100644 --- a/docs/how-to/get_started.rst +++ b/docs/how-to/get_started.rst @@ -24,3 +24,6 @@ In an existing S-CORE repository, you can build the documentation using Bazel: Open the generated site at ``_build/index.html`` in your browser. In a new S-CORE repository, see :ref:`setup`. + +After the initial setup, continue with :doc:`dashboards_and_quality_gates` to +build a repository dashboard and enforce CI quality gates. diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index 4e3f17e8c..7cae52036 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -27,6 +27,7 @@ Here you find practical guides on how to use docs-as-code. write_docs faq other_modules + dashboards_and_quality_gates source_to_doc_links test_to_doc_links add_extensions diff --git a/docs/how-to/setup.md b/docs/how-to/setup.md index 253f88116..bdd4d612d 100644 --- a/docs/how-to/setup.md +++ b/docs/how-to/setup.md @@ -86,3 +86,9 @@ bazel run //:docs #### 6. Access your documentation at `/_build/index.html` + +## Next Step + +After basic setup, see {doc}`dashboards_and_quality_gates` to configure +traceability dashboards, export `metrics.json`, and enforce CI quality gates in +consumer repositories. diff --git a/docs/how-to/test_to_doc_links.rst b/docs/how-to/test_to_doc_links.rst index 7f161591f..1a8c28eed 100644 --- a/docs/how-to/test_to_doc_links.rst +++ b/docs/how-to/test_to_doc_links.rst @@ -72,6 +72,16 @@ The traceability tooling uses a **two-step architecture**: Separating the two steps keeps the CI gate decoupled from the Sphinx/Bazel build: the gate never parses ``needs.json`` itself. +.. note:: + + ``metrics.json`` is the **single source of truth** for traceability data. + It is written by the Sphinx docs build (via the ``score_metamodel`` extension) + to ``_build/needs/metrics.json`` alongside ``needs.json``. The same + ``compute_traceability_summary`` function that powers the dashboard pie charts + produces this file, so the gate and the dashboard always show the same numbers. + The ``traceability_coverage`` CLI is a standalone alternative for repos that + run the coverage check outside of a full Sphinx build. + .. plantuml:: @startuml @@ -98,20 +108,16 @@ build: the gate never parses ``needs.json`` itself. Current workflow: 1. Run tests. -2. Generate ``needs.json``. -3. Compute metrics and export to ``metrics.json``. -4. Run the gate against the exported metrics. +2. Build docs (generates ``needs.json`` **and** ``metrics.json``). +3. Run the gate against the exported metrics. .. code-block:: bash bazel test //... bazel build //:needs_json - bazel run //scripts_bazel:traceability_coverage -- \ - --needs-json bazel-bin/needs_json/needs.json \ - --json-output metrics.json bazel run //scripts_bazel:traceability_gate -- \ - --metrics-json metrics.json \ + --metrics-json bazel-bin/needs_json/_build/needs/metrics.json \ --min-req-code 100 \ --min-req-test 100 \ --min-req-fully-linked 100 \ @@ -126,9 +132,9 @@ flags to 100 and enabling ``--fail-on-broken-test-refs``. The gate reports: -- Percentage of implemented requirements with ``source_code_link`` -- Percentage of implemented requirements with ``testlink`` -- Percentage of implemented requirements with both links (fully linked) +- Percentage of requirements with ``source_code_link`` +- Percentage of requirements with ``testlink`` +- Percentage of requirements with both links (fully linked) - Percentage of testcases linked to at least one requirement - Broken testcase references (testcases referencing an unknown requirement ID) @@ -140,13 +146,28 @@ The gate reports: references are only meaningful if those external testcase needs are also included in the exported dataset. -To check only unit tests, pass ``--test-types`` to the coverage step: +To restrict which need types are treated as requirements when computing metrics, +set ``score_metamodel_requirement_types`` in your Sphinx ``conf.py`` +(default: ``tool_req``): -.. code-block:: bash +.. code-block:: python + + score_metamodel_requirement_types = "tool_req,comp_req" + +By default, dashboard and gate use only needs defined in the current repository +(``is_external == False``). This supports per-repo CI gates. +For integration repositories that intentionally aggregate across dependencies, +you can include external needs in both dashboard and gate by setting: + +.. code-block:: python + + score_metamodel_include_external_needs = True + +You can also override dashboard behaviour per pie chart via filter args: + +.. code-block:: rst - bazel run //scripts_bazel:traceability_coverage -- \ - --needs-json bazel-bin/needs_json/needs.json \ - --test-types unit-test \ - --json-output metrics.json + .. needpie:: Requirements with Codelinks + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_code_links(tool_req,true) Use lower thresholds during rollout and tighten towards 100% over time. diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index 7c6c5929a..afa31f008 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -17,8 +17,13 @@ # ╙ ╜ .. _docs_statistics: -Implementation State Statistics -================================ +Tooling Coverage +================ + +This page shows how the docs-as-code tooling covers process and tool +requirements. It focuses on tooling capabilities offered to downstream +repositories rather than on product-specific traceability inside those +repositories. Overview -------- @@ -53,72 +58,35 @@ In Detail .. grid-item-card:: - .. needpie:: Test Results - :labels: passed, failed, skipped - :colors: green, red, orange - - type == 'testcase' and result == 'passed' - type == 'testcase' and result == 'failed' - type == 'testcase' and result == 'skipped' - -.. grid:: 2 + .. needpie:: Requirements with linked tests + :labels: no test link, with test link + :colors: red, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_test_links(tool_req) .. grid-item-card:: - Failed Tests - - *Hint: this table is empty by definition, as PRs with failing tests are not allowed to be merged in docs-as-code repo.* - - .. needtable:: FAILED TESTS - :filter: result == "failed" - :tags: TEST - :columns: name as "testcase";result;fully_verifies;partially_verifies;test_type;derivation_technique;id as "link" + .. needpie:: Requirements fully linked (code + tests) + :labels: not fully linked, fully linked + :colors: orange, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_fully_linked(tool_req) .. grid-item-card:: - Skipped / Disabled Tests - - *Hint: this table is empty by definition, as we do not allow skipped or disabled tests in docs-as-code repo.* - - .. needtable:: SKIPPED/DISABLED TESTS - :filter: result != "failed" and result != "passed" - :tags: TEST - :columns: name as "testcase";result;fully_verifies;partially_verifies;test_type;derivation_technique;id as "link" - - - - -All passed Tests ------------------ - -.. needtable:: SUCCESSFUL TESTS - :filter: result == "passed" - :tags: TEST - :columns: name as "testcase";result;fully_verifies;partially_verifies;test_type;derivation_technique;id as "link" - - -Details About Testcases ------------------------- -*Data is not filled out yet within the test cases.* - -.. needpie:: Test Types Used In Testcases - :labels: fault-injection, interface-test, requirements-based, resource-usage - :legend: + .. needpie:: Process requirements linked by tool requirements + :labels: not linked, linked + :colors: red, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_process_requirements_linked(tool_req,true) - type == 'testcase' and test_type == 'fault-injection' - type == 'testcase' and test_type == 'interface-test' - type == 'testcase' and test_type == 'requirements-based' - type == 'testcase' and test_type == 'resource-usage' +Process-to-Tool Mapping +----------------------- -.. needpie:: Derivation Techniques Used In Testcases - :labels: requirements-analysis, design-analysis, boundary-values, equivalence-classes, fuzz-testing, error-guessing, explorative-testing - :legend: +.. needtable:: Process requirement -> tool requirement mapping + :types: tool_req + :columns: satisfies as "Process Requirement";id as "Tool Requirement" + :style: table - type == 'testcase' and derivation_technique == 'requirements-analysis' - type == 'testcase' and derivation_technique == 'design-analysis' - type == 'testcase' and derivation_technique == 'boundary-values' - type == 'testcase' and derivation_technique == 'equivalence-classes' - type == 'testcase' and derivation_technique == 'fuzz-testing' - type == 'testcase' and derivation_technique == 'error-guessing' - type == 'testcase' and derivation_technique == 'explorative-testing' +.. needtable:: Tool requirement implementation and links + :types: tool_req + :columns: id as "Tool Requirement";implemented;source_code_link;testlink + :style: table diff --git a/docs/internals/requirements/index.rst b/docs/internals/requirements/index.rst index b63c2a056..31a5f5b3d 100644 --- a/docs/internals/requirements/index.rst +++ b/docs/internals/requirements/index.rst @@ -15,11 +15,29 @@ Requirements ============ +This repository provides the docs-as-code tooling used by other SCORE +repositories. The pages in this section therefore focus on two questions: + +1. Which process and tool requirements are covered by the docs-as-code tooling? +2. How is the tooling itself verified and qualified for downstream use? + +Actual product and module traceability is expected to live in consuming +repositories, such as module repositories and integration repositories that use +docs-as-code as a Bazel dependency. + +Pages +----- + +- ``implementation_state`` describes tooling coverage: implemented capabilities, + source-code links, test links, full linkage, and process-to-tool mapping. +- ``tooling_verification`` describes verification evidence for the tooling + itself, including test results and testcase metadata. + .. toctree:: :maxdepth: 1 capabilities process_overview - tool_req_overview requirements implementation_state + tooling_verification diff --git a/docs/internals/requirements/tool_req_overview.rst b/docs/internals/requirements/tool_req_overview.rst index 0990fc68e..2ae20c6bb 100644 --- a/docs/internals/requirements/tool_req_overview.rst +++ b/docs/internals/requirements/tool_req_overview.rst @@ -12,15 +12,17 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -=============================== -Tool Requirements Overview -=============================== +.. _docs_tool_req_overview: -Here are all our tool requirements -tersely packed in a table -with some hopefully useful meta information. +:orphan: -.. needtable:: - :types: tool_req - :columns: satisfies as "Process Requirement" ;id as "Tool Requirement";implemented;source_code_link - :style: table +Tool Requirements Overview (Deprecated) +======================================= + +This page is deprecated. + +Use the canonical requirements dashboard: +:doc:`implementation_state` + +The process-to-tool mapping tables previously shown here are now part of +the implementation state dashboard. diff --git a/docs/internals/requirements/tooling_verification.rst b/docs/internals/requirements/tooling_verification.rst new file mode 100644 index 000000000..c966e77de --- /dev/null +++ b/docs/internals/requirements/tooling_verification.rst @@ -0,0 +1,100 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +.. _docs_tooling_verification: + +Tooling Verification +==================== + +This page describes verification evidence for the docs-as-code tooling itself. +It is intentionally separate from tooling coverage so downstream quality signals +such as unit tests, future static analysis, and other verification evidence can +evolve independently from traceability support. + +Overview +-------- + +.. needpie:: Test Results + :labels: passed, failed, skipped + :colors: green, red, orange + + type == 'testcase' and result == 'passed' + type == 'testcase' and result == 'failed' + type == 'testcase' and result == 'skipped' + +.. grid:: 2 + :class-container: score-grid + + .. grid-item-card:: + + Failed Tests + + *Hint: this table is empty by definition, as PRs with failing tests are not allowed to be merged in docs-as-code repo.* + + No failing tests are expected in the current dataset. + + .. grid-item-card:: + + Skipped / Disabled Tests + + *Hint: this table is empty by definition, as we do not allow skipped or disabled tests in docs-as-code repo.* + + No skipped or disabled tests are expected in the current dataset. + + +Testcase Metadata Overview +-------------------------- + +*Data is not filled out yet within the test cases.* + +.. needpie:: Test Types Used In Testcases + :labels: fault-injection, interface-test, requirements-based, resource-usage + :legend: + + type == 'testcase' and test_type == 'fault-injection' + type == 'testcase' and test_type == 'interface-test' + type == 'testcase' and test_type == 'requirements-based' + type == 'testcase' and test_type == 'resource-usage' + + +.. needpie:: Derivation Techniques Used In Testcases + :labels: requirements-analysis, design-analysis, boundary-values, equivalence-classes, fuzz-testing, error-guessing, explorative-testing + :legend: + + type == 'testcase' and derivation_technique == 'requirements-analysis' + type == 'testcase' and derivation_technique == 'design-analysis' + type == 'testcase' and derivation_technique == 'boundary-values' + type == 'testcase' and derivation_technique == 'equivalence-classes' + type == 'testcase' and derivation_technique == 'fuzz-testing' + type == 'testcase' and derivation_technique == 'error-guessing' + type == 'testcase' and derivation_technique == 'explorative-testing' + + +All passed Tests +---------------- + +.. needtable:: SUCCESSFUL TESTS - status and link + :filter: result == "passed" + :tags: TEST + :columns: name as "testcase";result;id as "link" + +.. needtable:: SUCCESSFUL TESTS - verification mapping + :filter: result == "passed" + :tags: TEST + :columns: name as "testcase";fully_verifies;partially_verifies + +.. needtable:: SUCCESSFUL TESTS - optional metadata + :filter: result == "passed" + :tags: TEST + :columns: name as "testcase";test_type;derivation_technique \ No newline at end of file diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py index 5435a3619..da17dcb48 100644 --- a/scripts_bazel/traceability_coverage.py +++ b/scripts_bazel/traceability_coverage.py @@ -72,7 +72,7 @@ def _load_needs(needs_json: Path) -> list[dict[str, Any]]: def _default_needs_json_candidates() -> list[Path]: return [ Path("_build/needs/needs.json"), - Path("bazel-bin/needs_json/needs.json"), + Path("bazel-bin/needs_json/_build/needs/needs.json"), ] diff --git a/scripts_bazel/traceability_gate.py b/scripts_bazel/traceability_gate.py index 2f6e8773a..abff691d9 100644 --- a/scripts_bazel/traceability_gate.py +++ b/scripts_bazel/traceability_gate.py @@ -29,6 +29,7 @@ import argparse import json +import os import sys from pathlib import Path from typing import Any @@ -189,6 +190,9 @@ def main() -> int: args.fail_on_broken_test_refs = True metrics_path = Path(args.metrics_json) + workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "").strip() + if not metrics_path.is_absolute() and workspace_dir: + metrics_path = Path(workspace_dir) / metrics_path if not metrics_path.exists(): print(f"Error: metrics JSON not found: {metrics_path}", file=sys.stderr) return 1 diff --git a/src/extensions/score_layout/assets/css/score_design.css b/src/extensions/score_layout/assets/css/score_design.css index 887da5ad6..5bd9bc537 100644 --- a/src/extensions/score_layout/assets/css/score_design.css +++ b/src/extensions/score_layout/assets/css/score_design.css @@ -16,8 +16,34 @@ font-size: 1.2em; } + +.score-grid .sd-card { + border: 1px solid rgba(45, 25, 66, 0.15); + box-shadow: none !important; + border-radius: 0.5rem; +} + .score-grid .sd-card-body { - text-align: justify; + text-align: center; + padding: 0.75rem; +} + +.score-grid .sd-card-body img { + display: block; + width: 100%; + max-width: 420px; + height: auto; + margin: 0 auto; +} + +.score-grid .needs_filter_warning { + min-height: 280px; + display: flex; + align-items: center; + justify-content: center; + text-align: center; + color: var(--pst-color-text-base); + margin: 0; } details.sd-dropdown summary.sd-card-header + div.sd-summary-content diff --git a/src/extensions/score_layout/assets/css/score_needs.css b/src/extensions/score_layout/assets/css/score_needs.css index 0a800e819..020d04c4c 100644 --- a/src/extensions/score_layout/assets/css/score_needs.css +++ b/src/extensions/score_layout/assets/css/score_needs.css @@ -16,6 +16,36 @@ table.needs_type_tool_req > tbody > tr.head > td { /* Common NEED configs */ /* ######################### */ +/* Improve wrapping and width usage for Sphinx-Needs tables/datatables only */ +.needstable_wrapper .pst-scrollable-table-container, +.needstable_wrapper .dataTables_wrapper, +.needstable_wrapper table.NEEDS_TABLE, +.needstable_wrapper table.NEEDS_DATATABLES, +table.NEEDS_TABLE, +table.NEEDS_DATATABLES { + width: 100%; +} + +.needstable_wrapper table.NEEDS_TABLE, +.needstable_wrapper table.NEEDS_DATATABLES, +table.NEEDS_TABLE, +table.NEEDS_DATATABLES { + table-layout: fixed; +} + +.needstable_wrapper table.NEEDS_TABLE th, +.needstable_wrapper table.NEEDS_TABLE td, +.needstable_wrapper table.NEEDS_DATATABLES th, +.needstable_wrapper table.NEEDS_DATATABLES td, +table.NEEDS_TABLE th, +table.NEEDS_TABLE td, +table.NEEDS_DATATABLES th, +table.NEEDS_DATATABLES td { + white-space: normal; + overflow-wrap: anywhere; + word-break: break-word; +} + /* All need table cells */ table.need.need.need > tbody > tr > td { padding: 0.7em 1.25em; diff --git a/src/extensions/score_metamodel/__init__.py b/src/extensions/score_metamodel/__init__.py index f0b90c8ee..04017121f 100644 --- a/src/extensions/score_metamodel/__init__.py +++ b/src/extensions/score_metamodel/__init__.py @@ -11,10 +11,12 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* import importlib +import json import os import pkgutil from collections.abc import Callable from pathlib import Path +from typing import Any from sphinx.application import Sphinx from sphinx_needs import logging @@ -29,6 +31,9 @@ ProhibitedWordCheck as ProhibitedWordCheck, ScoreNeedType as ScoreNeedType, ) +from src.extensions.score_metamodel.traceability_metrics import ( + compute_traceability_summary, +) from src.extensions.score_metamodel.yaml_parser import ( default_options as default_options, load_metamodel_data as load_metamodel_data, @@ -94,6 +99,54 @@ def graph_check(func: graph_check_function): return func +def _write_metrics_json(app: Sphinx, exception: Exception | None) -> None: + """Write a schema-v1 metrics.json alongside needs.json in the build output. + + This is the single source of truth for traceability metrics. It runs + inside the Sphinx build so it has access to all needs (local + external) + and produces the same metrics the dashboard pie charts display. + The traceability_gate reads this file to enforce CI thresholds. + """ + if exception: + return + + all_needs: list[Any] = list(SphinxNeedsData(app.env).get_needs_view().values()) + + raw = str(getattr(app.config, "score_metamodel_requirement_types", "tool_req")) + requirement_types = {t.strip() for t in raw.split(",") if t.strip()} or {"tool_req"} + include_not_implemented = True + include_external: bool = bool( + getattr(app.config, "score_metamodel_include_external_needs", False) + ) + + metrics_by_type: dict[str, Any] = {} + for req_type in sorted(requirement_types): + type_summary = compute_traceability_summary( + all_needs=all_needs, + requirement_types={req_type}, + include_not_implemented=include_not_implemented, + filtered_test_types=set(), + include_external=include_external, + ) + metrics_by_type[req_type] = { + "include_not_implemented": type_summary["include_not_implemented"], + "include_external": type_summary["include_external"], + "requirements": type_summary["requirements"], + "tests": type_summary["tests"], + } + + output: dict[str, Any] = { + "schema_version": "1", + "generated_by": "sphinx_build", + "metrics_by_type": metrics_by_type, + } + + out_path = Path(app.outdir) / "metrics.json" + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(json.dumps(output, indent=2), encoding="utf-8") + logger.info(f"Traceability metrics written to: {out_path}") + + def _run_checks(app: Sphinx, exception: Exception | None) -> None: # Do not run checks if an exception occurred during build if exception: @@ -161,6 +214,18 @@ def is_check_enabled(check: local_check_function | graph_check_function): ) +def _configure_traceability_dashboard(app: Sphinx, config: object) -> None: + """Propagate repo-level traceability settings to dashboard filters.""" + from src.extensions.score_metamodel.checks.traceability_dashboard import ( + set_default_include_external, + ) + + include_external = bool( + getattr(config, "score_metamodel_include_external_needs", False) + ) + set_default_include_external(include_external) + + def _remove_prefix(word: str, prefixes: list[str]) -> str: for prefix in prefixes or []: if isinstance(word, str) and word.startswith(prefix): @@ -264,6 +329,29 @@ def setup(app: Sphinx) -> dict[str, str | bool]: ), ) + app.add_config_value( + "score_metamodel_requirement_types", + "tool_req", + rebuild="env", + description=( + "Comma-separated list of need types treated as requirements for " + "traceability metrics (default: tool_req)." + ), + ) + + app.add_config_value( + "score_metamodel_include_external_needs", + False, + rebuild="env", + description=( + "When True, include external requirements in dashboard and CI metrics. " + "Default is False so each repo gates only its own needs." + ), + ) + + _ = app.connect("config-inited", _configure_traceability_dashboard, priority=498) + + _ = app.connect("build-finished", _write_metrics_json) _ = app.connect("build-finished", _run_checks) return { diff --git a/src/extensions/score_metamodel/checks/traceability_dashboard.py b/src/extensions/score_metamodel/checks/traceability_dashboard.py index b8a1dedc9..b808b412f 100644 --- a/src/extensions/score_metamodel/checks/traceability_dashboard.py +++ b/src/extensions/score_metamodel/checks/traceability_dashboard.py @@ -23,6 +23,23 @@ from ..traceability_metrics import compute_traceability_summary, filter_requirements +_DEFAULT_INCLUDE_EXTERNAL = False + + +def set_default_include_external(include_external: bool) -> None: + """Configure default behaviour for including external requirements.""" + global _DEFAULT_INCLUDE_EXTERNAL + _DEFAULT_INCLUDE_EXTERNAL = bool(include_external) + + +def _include_external(kwargs: dict[str, str | int | float]) -> bool: + """Read include_external override from filter args, else use configured default.""" + raw = kwargs.get("arg2") + if raw is None: + return _DEFAULT_INCLUDE_EXTERNAL + text = str(raw).strip().lower() + return text in {"1", "true", "yes", "on"} + def _requirement_types(kwargs: dict[str, str | int | float]) -> set[str]: raw = str(kwargs.get("arg1", "tool_req")).strip() @@ -35,22 +52,26 @@ def pie_requirements_status( ) -> None: """Dashboard status split: not implemented, implemented/incomplete, fully linked.""" req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) all_requirements = filter_requirements( needs, requirement_types=req_types, include_not_implemented=True, + include_external=include_external, ) implemented_requirements = filter_requirements( needs, requirement_types=req_types, include_not_implemented=False, + include_external=include_external, ) summary = compute_traceability_summary( all_needs=needs, requirement_types=req_types, include_not_implemented=False, filtered_test_types=set(), + include_external=include_external, ) not_implemented = len(all_requirements) - len(implemented_requirements) @@ -67,11 +88,13 @@ def pie_requirements_with_code_links( ) -> None: """Dashboard split: requirements with and without source code links.""" req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) summary = compute_traceability_summary( all_needs=needs, requirement_types=req_types, include_not_implemented=True, filtered_test_types=set(), + include_external=include_external, ) total = int(summary["requirements"]["total"]) @@ -79,3 +102,67 @@ def pie_requirements_with_code_links( results.append(total - with_code) results.append(with_code) + + +def pie_requirements_with_test_links( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Dashboard split: requirements with and without testcase links.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + total = int(summary["requirements"]["total"]) + with_test = int(summary["requirements"]["with_test_link"]) + + results.append(total - with_test) + results.append(with_test) + + +def pie_requirements_fully_linked( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Dashboard split: requirements fully linked vs incomplete.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + total = int(summary["requirements"]["total"]) + fully_linked = int(summary["requirements"]["fully_linked"]) + + results.append(total - fully_linked) + results.append(fully_linked) + + +def pie_process_requirements_linked( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Dashboard split: process requirements linked vs not linked.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + process_requirements = summary["process_requirements"] + total = int(process_requirements["total"]) + linked = int(process_requirements["linked"]) + + results.append(total - linked) + results.append(linked) diff --git a/src/extensions/score_metamodel/tests/test_traceability_dashboard.py b/src/extensions/score_metamodel/tests/test_traceability_dashboard.py new file mode 100644 index 000000000..042ecd1e4 --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_traceability_dashboard.py @@ -0,0 +1,209 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Tests that dashboard filters follow local/external settings.""" + +from src.extensions.score_metamodel.checks import traceability_dashboard +from src.extensions.score_metamodel.checks.traceability_dashboard import ( + pie_process_requirements_linked, + pie_requirements_fully_linked, + pie_requirements_with_test_links, + pie_requirements_with_code_links, + set_default_include_external, +) +from src.extensions.score_metamodel.traceability_metrics import ( + compute_traceability_summary, +) + + +def _needs() -> list[dict[str, object]]: + return [ + { + "id": "LOCAL_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "", + "testlink": "", + "is_external": False, + }, + { + "id": "LOCAL_SYS_REQ", + "type": "sys_req", + "implemented": "YES", + "source_code_link": "", + "testlink": "T_LOCAL", + "is_external": False, + }, + { + "id": "EXT_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/ext.py:10", + "testlink": "T_EXT", + "is_external": True, + }, + ] + + +def test_dashboard_defaults_to_local_only() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_code_links(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + assert results == [1, 0] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["with_code_link"], + summary["requirements"]["with_code_link"], + ] + + +def test_dashboard_can_include_external_via_default_flag() -> None: + set_default_include_external(True) + + results: list[int] = [] + pie_requirements_with_code_links(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=True, + ) + + assert results == [1, 1] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["with_code_link"], + summary["requirements"]["with_code_link"], + ] + + +def test_dashboard_filter_arg_can_override_default() -> None: + set_default_include_external(True) + + results: list[int] = [] + pie_requirements_with_code_links(_needs(), results, arg1="tool_req", arg2="false") + + assert results == [1, 0] + + +def test_requirements_with_test_links_default_local_only() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_test_links(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + assert results == [1, 0] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["with_test_link"], + summary["requirements"]["with_test_link"], + ] + + +def test_requirements_with_test_links_can_override_include_external() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_test_links(_needs(), results, arg1="tool_req", arg2="true") + + assert results == [1, 1] + + +def test_requirements_with_test_links_parses_multiple_types() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_test_links(_needs(), results, arg1="tool_req,sys_req") + + assert results == [1, 1] + + +def test_requirements_fully_linked_uses_shared_summary() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_fully_linked(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + assert results == [1, 0] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["fully_linked"], + summary["requirements"]["fully_linked"], + ] + + +def test_requirements_fully_linked_can_include_external() -> None: + set_default_include_external(True) + + results: list[int] = [] + pie_requirements_fully_linked(_needs(), results, arg1="tool_req") + + assert results == [1, 1] + + +def test_process_requirements_linked_uses_stream_a_process_requirement_totals( + monkeypatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_summary( + all_needs, + requirement_types, + include_not_implemented, + filtered_test_types, + include_external, + ): + captured["all_needs"] = all_needs + captured["requirement_types"] = requirement_types + captured["include_not_implemented"] = include_not_implemented + captured["filtered_test_types"] = filtered_test_types + captured["include_external"] = include_external + return { + "requirements": {"total": 99, "linked": 0}, + "process_requirements": {"total": 4, "linked": 3}, + } + + monkeypatch.setattr(traceability_dashboard, "compute_traceability_summary", _fake_summary) + + results: list[int] = [] + pie_process_requirements_linked(_needs(), results, arg1="tool_req,sys_req", arg2="true") + + assert results == [1, 3] + assert captured["requirement_types"] == {"tool_req", "sys_req"} + assert captured["include_not_implemented"] is True + assert captured["filtered_test_types"] == set() + assert captured["include_external"] is True diff --git a/src/extensions/score_metamodel/tests/test_traceability_metrics.py b/src/extensions/score_metamodel/tests/test_traceability_metrics.py new file mode 100644 index 000000000..f49d13bb1 --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_traceability_metrics.py @@ -0,0 +1,204 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Unit tests for traceability_metrics include_external handling.""" + +from src.extensions.score_metamodel.traceability_metrics import ( + compute_traceability_summary, + filter_requirements, +) + + +def _needs() -> list[dict[str, object]]: + return [ + { + "id": "LOCAL_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/local.py:1", + "testlink": "tests/test_local.py::test_ok", + "is_external": False, + }, + { + "id": "EXT_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/external.py:9", + "testlink": "tests/test_external.py::test_ok", + "is_external": True, + }, + { + "id": "TC_1", + "type": "testcase", + "partially_verifies": "LOCAL_REQ", + "fully_verifies": "", + "is_external": False, + }, + ] + + +def test_filter_requirements_defaults_to_local_only() -> None: + filtered = filter_requirements( + _needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + ) + + assert [need["id"] for need in filtered] == ["LOCAL_REQ"] + + +def test_filter_requirements_can_include_external_needs() -> None: + filtered = filter_requirements( + _needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + include_external=True, + ) + + assert sorted(need["id"] for need in filtered) == ["EXT_REQ", "LOCAL_REQ"] + + +def test_compute_traceability_summary_propagates_include_external() -> None: + summary_local = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + summary_all = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=True, + ) + + assert summary_local["include_external"] is False + assert summary_local["requirements"]["total"] == 1 + assert summary_all["include_external"] is True + assert summary_all["requirements"]["total"] == 2 + + +def test_compute_traceability_summary_process_requirements_summary() -> None: + summary = compute_traceability_summary( + all_needs=[ + { + "id": "TOOL_REQ_1", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/req.py:10", + "testlink": "tests/test_req.py::test_ok", + "satisfies": "PR_LOCAL_1,OTHER_REQ", + "is_external": False, + }, + { + "id": "TOOL_REQ_2", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/req.py:20", + "testlink": "tests/test_req.py::test_ok_2", + "satisfies": ["PR_LOCAL_1", "PR_LOCAL_2"], + "is_external": False, + }, + { + "id": "PR_LOCAL_1", + "type": "process_req", + "is_external": False, + }, + { + "id": "PR_LOCAL_2", + "type": "gd_req", + "is_external": False, + }, + { + "id": "PR_LOCAL_3", + "type": "gd_req", + "is_external": False, + }, + ], + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + process_requirements = summary["process_requirements"] + + assert process_requirements["total"] == 3 + assert process_requirements["linked_by_tool_requirements"] == 2 + assert process_requirements["linked_by_tool_requirements_pct"] == (2 / 3) * 100 + assert process_requirements["unlinked_ids"] == ["PR_LOCAL_3"] + + +def test_compute_traceability_summary_process_requirements_respects_include_external() -> None: + all_needs = [ + { + "id": "TOOL_REQ_LOCAL", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/local.py:1", + "testlink": "tests/test_local.py::test_ok", + "satisfies": "PR_LOCAL", + "is_external": False, + }, + { + "id": "TOOL_REQ_EXTERNAL", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/external.py:1", + "testlink": "tests/test_external.py::test_ok", + "satisfies": "PR_EXTERNAL", + "is_external": True, + }, + { + "id": "PR_LOCAL", + "type": "gd_req", + "is_external": False, + }, + { + "id": "PR_EXTERNAL", + "type": "gd_req", + "is_external": True, + }, + ] + + summary_local = compute_traceability_summary( + all_needs=all_needs, + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + summary_all = compute_traceability_summary( + all_needs=all_needs, + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=True, + ) + + assert summary_local["process_requirements"] == { + "total": 1, + "linked": 1, + "linked_by_tool_requirements": 1, + "linked_by_tool_requirements_pct": 100.0, + "unlinked_ids": [], + } + assert summary_all["process_requirements"] == { + "total": 2, + "linked": 2, + "linked_by_tool_requirements": 2, + "linked_by_tool_requirements_pct": 100.0, + "unlinked_ids": [], + } diff --git a/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py b/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py new file mode 100644 index 000000000..3e8505d0e --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py @@ -0,0 +1,82 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Tests for Sphinx-side metrics.json generation defaults.""" + +import json +from pathlib import Path +from types import SimpleNamespace + +import src.extensions.score_metamodel.__init__ as metamodel_init + + +class _FakeNeedsData: + def __init__(self, env: object): + self._env = env + + def get_needs_view(self) -> dict[str, dict[str, object]]: + return { + "LOCAL_REQ": { + "id": "LOCAL_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "", + "testlink": "", + "is_external": False, + }, + "EXT_REQ": { + "id": "EXT_REQ", + "type": "tool_req", + "implemented": "NO", + "source_code_link": "src/ext.py:1", + "testlink": "", + "is_external": True, + }, + } + + +def _app(tmp_path: Path, include_external: bool) -> SimpleNamespace: + return SimpleNamespace( + env=object(), + outdir=str(tmp_path), + config=SimpleNamespace( + score_metamodel_requirement_types="tool_req", + score_metamodel_include_external_needs=include_external, + ), + ) + + +def test_write_metrics_json_defaults_to_local_only(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(metamodel_init, "SphinxNeedsData", _FakeNeedsData) + + metamodel_init._write_metrics_json(_app(tmp_path, include_external=False), None) + + payload = json.loads((tmp_path / "metrics.json").read_text(encoding="utf-8")) + metrics = payload["metrics_by_type"]["tool_req"] + + assert payload["schema_version"] == "1" + assert metrics["include_not_implemented"] is True + assert metrics["include_external"] is False + assert metrics["requirements"]["total"] == 1 + + +def test_write_metrics_json_can_include_external(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(metamodel_init, "SphinxNeedsData", _FakeNeedsData) + + metamodel_init._write_metrics_json(_app(tmp_path, include_external=True), None) + + payload = json.loads((tmp_path / "metrics.json").read_text(encoding="utf-8")) + metrics = payload["metrics_by_type"]["tool_req"] + + assert metrics["include_external"] is True + assert metrics["requirements"]["total"] == 2 diff --git a/src/extensions/score_metamodel/traceability_metrics.py b/src/extensions/score_metamodel/traceability_metrics.py index 184fbd1b4..08e1723e9 100644 --- a/src/extensions/score_metamodel/traceability_metrics.py +++ b/src/extensions/score_metamodel/traceability_metrics.py @@ -56,13 +56,16 @@ def filter_requirements( all_needs: Sequence[Any], requirement_types: set[str], include_not_implemented: bool, + include_external: bool = False, ) -> list[Any]: - """Extract requirements by type and implementation state.""" + """Extract requirements by type, implementation state, and origin.""" requirements: list[dict[str, Any]] = [] for need in all_needs: need_type = str(need.get("type", "")).strip() if need_type not in requirement_types: continue + if not include_external and need.get("is_external", False): + continue if not include_not_implemented: implemented = str(need.get("implemented", "")).upper().strip() if implemented not in {"YES", "PARTIAL"}: @@ -168,17 +171,66 @@ def calculate_test_metrics( } +def calculate_process_requirement_metrics( + all_needs: Sequence[Any], + include_not_implemented: bool, + include_external: bool, +) -> dict[str, Any]: + """Calculate process-requirement coverage via tool_req ``satisfies`` links.""" + process_requirements = [ + need + for need in all_needs + if str(need.get("type", "")).strip() in {"gd_req", "process_req"} + and (include_external or not need.get("is_external", False)) + ] + process_requirement_ids = { + str(need.get("id", "")).strip() + for need in process_requirements + if need.get("id") + } + + tool_requirements = filter_requirements( + all_needs, + requirement_types={"tool_req"}, + include_not_implemented=include_not_implemented, + include_external=include_external, + ) + + linked_process_requirement_ids: set[str] = set() + for need in tool_requirements: + satisfies_ids = parse_need_id_list(need.get("satisfies", need.get("Satisfies"))) + for ref_id in satisfies_ids: + if ref_id in process_requirement_ids: + linked_process_requirement_ids.add(ref_id) + + total = len(process_requirement_ids) + linked_by_tool_requirements = len(linked_process_requirement_ids) + unlinked_ids = sorted(process_requirement_ids - linked_process_requirement_ids) + + return { + "total": total, + "linked": linked_by_tool_requirements, + "linked_by_tool_requirements": linked_by_tool_requirements, + "linked_by_tool_requirements_pct": safe_percent( + linked_by_tool_requirements, total + ), + "unlinked_ids": unlinked_ids, + } + + def compute_traceability_summary( all_needs: Sequence[Any], requirement_types: set[str], include_not_implemented: bool, filtered_test_types: set[str], + include_external: bool = False, ) -> dict[str, Any]: """Return full CI/dashboard summary using one shared metric implementation.""" requirements = filter_requirements( all_needs, requirement_types=requirement_types, include_not_implemented=include_not_implemented, + include_external=include_external, ) requirement_ids = { str(need.get("id", "")).strip() for need in requirements if need.get("id") @@ -190,10 +242,17 @@ def compute_traceability_summary( requirement_ids=requirement_ids, filtered_test_types=filtered_test_types, ) + process_requirement_metrics = calculate_process_requirement_metrics( + all_needs, + include_not_implemented=include_not_implemented, + include_external=include_external, + ) return { "requirement_types": sorted(requirement_types), "include_not_implemented": include_not_implemented, + "include_external": include_external, "requirements": req_metrics, "tests": test_metrics, + "process_requirements": process_requirement_metrics, } diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index 50a42462e..73ddc86f1 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -41,15 +41,18 @@ store_source_code_links_combined_json, ) from src.extensions.score_source_code_linker.needlinks import ( + NeedLink, load_source_code_links_json, load_source_code_links_with_metadata_json, ) from src.extensions.score_source_code_linker.repo_source_links import ( + RepoInfo, group_needs_by_repo, load_repo_source_links_json, store_repo_source_links_json, ) from src.extensions.score_source_code_linker.testlink import ( + DataForTestLink, load_data_of_test_case_json, load_test_xml_parsed_json, ) @@ -104,9 +107,15 @@ def build_and_save_combined_file(outdir: Path): source_code_links = load_source_code_links_with_metadata_json( source_code_links_json ) - test_code_links = load_test_xml_parsed_json( - get_cache_filename(outdir, "score_xml_parser_cache.json") - ) + test_cache = get_cache_filename(outdir, "score_xml_parser_cache.json") + if test_cache.exists(): + test_code_links = load_test_xml_parsed_json(test_cache) + else: + LOGGER.debug( + "No score_xml_parser_cache.json found. Continuing without test XML links.", + type="score_source_code_linker", + ) + test_code_links = [] scl_list = group_by_need(source_code_links, test_code_links) store_source_code_links_combined_json( outdir / "score_scl_grouped_cache.json", scl_list @@ -118,7 +127,7 @@ def build_and_save_combined_file(outdir: Path): # ╰──────────────────────────────────────╯ -def setup_source_code_linker(app: Sphinx, ws_root: Path): +def setup_source_code_linker(app: Sphinx, ws_root: Path | None): """ Setting up source_code_linker with all needed options. Allows us to only have this run once during live_preview & esbonio @@ -144,11 +153,26 @@ def setup_source_code_linker(app: Sphinx, ws_root: Path): ) score_sourcelinks_json = os.environ.get("SCORE_SOURCELINKS") + if not score_sourcelinks_json: + score_sourcelinks_json = str( + getattr(app.config, "score_sourcelinks_json", "") + ).strip() + if score_sourcelinks_json: + # Reuse existing code paths that expect this env var. + os.environ["SCORE_SOURCELINKS"] = score_sourcelinks_json if score_sourcelinks_json: # No need to generate the JSON file if this env var is set # because it points to an existing file with the needed data. return + if ws_root is None: + LOGGER.info( + "No workspace root found and no SCORE_SOURCELINKS provided. " + "Skipping source-code-link scan.", + type="score_source_code_linker", + ) + return + scl_cache_json = get_cache_filename( app.outdir, "score_source_code_linker_cache.json" ) @@ -277,14 +301,13 @@ def setup_once(app: Sphinx): ) LOGGER.debug(f"DEBUG: Git root is {find_git_root()}") - # Run only for local files! - # ws_root is not set when running on external repositories (dependencies). + # Run for local files if possible. In Bazel sandbox builds, ws_root may be + # unavailable; in that case we can still operate when SCORE_SOURCELINKS + # (or score_sourcelinks_json config) is provided. ws_root = find_ws_root() - if not ws_root: - return - - # When BUILD_WORKSPACE_DIRECTORY is set, we are inside a git repository. - assert find_git_root() + if ws_root: + # When BUILD_WORKSPACE_DIRECTORY is set, we are inside a git repository. + assert find_git_root() # Register & Run (if needed) parsing & saving of JSON caches setup_source_code_linker(app, ws_root) @@ -300,6 +323,13 @@ def setup(app: Sphinx) -> dict[str, str | bool]: # Esbonio will execute setup() on every iteration. # setup_once will only be called once. app.add_config_value("KNOWN_GOOD_JSON", default="", rebuild="env", types=str) + app.add_config_value("score_sourcelinks_json", default="", rebuild="env", types=str) + app.add_config_value( + "score_source_code_linker_plain_links", + default=False, + rebuild="env", + types=bool, + ) setup_once(app) return { @@ -327,9 +357,6 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: env: Buildenvironment, this is filled automatically app: Sphinx app application, this is filled automatically """ - ws_root = find_ws_root() - assert ws_root - Needs_Data = SphinxNeedsData(env) needs = Needs_Data.get_needs_mutable() needs_copy = deepcopy( @@ -352,6 +379,41 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: scl_by_module = load_repo_source_links_json( get_cache_filename(app.outdir, "score_repo_grouped_scl_cache.json") ) + plain_links = bool( + getattr(app.config, "score_source_code_linker_plain_links", False) + ) + + def _render_code_link(metadata: RepoInfo, link: NeedLink) -> str: + if plain_links: + return ( + "https://github.com/placeholder/placeholder/blob/unknown/" + f"{link.file}#L{link.line}<>{link.file}:{link.line}" + ) + try: + base = get_github_link(metadata, link) + except AssertionError: + LOGGER.info( + "Falling back to local code-link format (no git remote available): " + f"{link.file}:{link.line}", + type="score_source_code_linker", + ) + return f"{link.file}:{link.line}" + return f"{base}<>{link.file}:{link.line}" + + def _render_test_link(metadata: RepoInfo, link: DataForTestLink) -> str: + if plain_links: + return str(link.name) + try: + base = get_github_link(metadata, link) + except AssertionError: + LOGGER.info( + "Falling back to local test-link format (no git remote available): " + f"{link.name}", + type="score_source_code_linker", + ) + return str(link.name) + return f"{base}<>{link.name}" + for module_grouped_needs in scl_by_module: for source_code_links in module_grouped_needs.needs: need = find_need(needs_copy, source_code_links.need) @@ -374,11 +436,11 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: need_as_dict = cast(dict[str, object], need) metadata = module_grouped_needs.repo need_as_dict["source_code_link"] = ", ".join( - f"{get_github_link(metadata, n)}<>{n.file}:{n.line}" + _render_code_link(metadata, n) for n in source_code_links.links.CodeLinks ) need_as_dict["testlink"] = ", ".join( - f"{get_github_link(metadata, n)}<>{n.name}" + _render_test_link(metadata, n) for n in source_code_links.links.TestLinks ) diff --git a/src/extensions/score_source_code_linker/tests/test_xml_parser.py b/src/extensions/score_source_code_linker/tests/test_xml_parser.py index 8fde2cec3..9fd8510aa 100644 --- a/src/extensions/score_source_code_linker/tests/test_xml_parser.py +++ b/src/extensions/score_source_code_linker/tests/test_xml_parser.py @@ -335,6 +335,41 @@ def test_short_hash_consistency_and_format(): assert len(h1) == 5 +def test_construct_and_add_need_uses_fallback_url_for_missing_repo_metadata( + monkeypatch, +): + calls: list[dict[str, object]] = [] + + def _fake_add_external_need(**kwargs: object) -> object: + calls.append(kwargs) + return object() + + monkeypatch.setattr(xml_parser, "add_external_need", _fake_add_external_need) + + testcase = DataOfTestCase( + name="tc_missing_meta", + file="tests/foo_test.py", + line=10, + result="passed", + result_text="", + FullyVerifies="REQ_1", + PartiallyVerifies="", + TestType="requirements-based", + DerivationTechnique="analysis", + repo_name=None, + hash=None, + url=None, + ) + + xml_parser.construct_and_add_need(app=object(), tn=testcase) + + # Must not crash and should create an external need using fallback metadata. + assert len(calls) == 1 + assert calls[0]["external_url"] == ( + "https://github.com/placeholder/placeholder/blob/unknown/tests/foo_test.py#L10" + ) + + # ─────────────[ Boilerplate generated by CoPilot ]───────────── diff --git a/src/extensions/score_source_code_linker/xml_parser.py b/src/extensions/score_source_code_linker/xml_parser.py index 9c741d9f4..302d87469 100644 --- a/src/extensions/score_source_code_linker/xml_parser.py +++ b/src/extensions/score_source_code_linker/xml_parser.py @@ -338,11 +338,22 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): # and either 'Fully' or 'PartiallyVerifies' should not be None here assert tn.file is not None assert tn.name is not None - assert tn.repo_name is not None - assert tn.hash is not None - assert tn.url is not None - # Have to build metadata here for the gh link func - metadata = RepoInfo(name=tn.repo_name, hash=tn.hash, url=tn.url) + external_url = "" + if tn.repo_name is None or tn.hash is None or tn.url is None: + logger.info( + "Creating testcase need with fallback URL due to incomplete repo metadata: " + f"name={tn.name}, file={tn.file}, repo_name={tn.repo_name}, " + f"hash={tn.hash}, url={tn.url}", + type="score_source_code_linker", + ) + line = tn.line if tn.line is not None else 1 + external_url = ( + f"https://github.com/placeholder/placeholder/blob/unknown/{tn.file}#L{line}" + ) + else: + # Have to build metadata here for the gh link func + metadata = RepoInfo(name=tn.repo_name, hash=tn.hash, url=tn.url) + external_url = get_github_link(metadata, tn) # IDK if this is ideal or not with contextlib.suppress(BaseException): _ = add_external_need( @@ -352,7 +363,7 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): tags="TEST", id=f"testcase__{tn.name}_{short_hash(tn.file + tn.name)}", name=tn.name, - external_url=get_github_link(metadata, tn), + external_url=external_url, fully_verifies=tn.FullyVerifies if tn.FullyVerifies is not None else "", partially_verifies=tn.PartiallyVerifies if tn.PartiallyVerifies is not None From a93233b7b18a5411c6c817e1316130c9491125bd Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 13:43:23 +0000 Subject: [PATCH 13/27] lint fix --- docs/how-to/dashboards_and_quality_gates.rst | 2 +- docs/internals/requirements/tooling_verification.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how-to/dashboards_and_quality_gates.rst b/docs/how-to/dashboards_and_quality_gates.rst index 2a47110a7..a38879bf4 100644 --- a/docs/how-to/dashboards_and_quality_gates.rst +++ b/docs/how-to/dashboards_and_quality_gates.rst @@ -180,4 +180,4 @@ Related Guides - :ref:`setup` - :doc:`other_modules` - :doc:`source_to_doc_links` -- :doc:`test_to_doc_links` \ No newline at end of file +- :doc:`test_to_doc_links` diff --git a/docs/internals/requirements/tooling_verification.rst b/docs/internals/requirements/tooling_verification.rst index c966e77de..5b477043d 100644 --- a/docs/internals/requirements/tooling_verification.rst +++ b/docs/internals/requirements/tooling_verification.rst @@ -97,4 +97,4 @@ All passed Tests .. needtable:: SUCCESSFUL TESTS - optional metadata :filter: result == "passed" :tags: TEST - :columns: name as "testcase";test_type;derivation_technique \ No newline at end of file + :columns: name as "testcase";test_type;derivation_technique From 6e1e0aa3f19c662856511caeea59810814c8a97e Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 14:06:42 +0000 Subject: [PATCH 14/27] fixed liniting issues --- .../checks/traceability_dashboard.py | 23 ++- .../tests/test_traceability_dashboard.py | 29 ++- .../tests/test_traceability_metrics.py | 4 +- ...st_traceability_metrics_json_generation.py | 22 ++- .../score_source_code_linker/__init__.py | 179 +++++++++++------- .../tests/test_xml_parser.py | 12 +- 6 files changed, 173 insertions(+), 96 deletions(-) diff --git a/src/extensions/score_metamodel/checks/traceability_dashboard.py b/src/extensions/score_metamodel/checks/traceability_dashboard.py index b808b412f..a59087928 100644 --- a/src/extensions/score_metamodel/checks/traceability_dashboard.py +++ b/src/extensions/score_metamodel/checks/traceability_dashboard.py @@ -19,6 +19,9 @@ from __future__ import annotations +from collections.abc import Sequence +from typing import Any + from sphinx_needs.need_item import NeedItem from ..traceability_metrics import compute_traceability_summary, filter_requirements @@ -48,7 +51,9 @@ def _requirement_types(kwargs: dict[str, str | int | float]) -> set[str]: def pie_requirements_status( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, ) -> None: """Dashboard status split: not implemented, implemented/incomplete, fully linked.""" req_types = _requirement_types(kwargs) @@ -84,7 +89,9 @@ def pie_requirements_status( def pie_requirements_with_code_links( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, ) -> None: """Dashboard split: requirements with and without source code links.""" req_types = _requirement_types(kwargs) @@ -105,7 +112,9 @@ def pie_requirements_with_code_links( def pie_requirements_with_test_links( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, ) -> None: """Dashboard split: requirements with and without testcase links.""" req_types = _requirement_types(kwargs) @@ -126,7 +135,9 @@ def pie_requirements_with_test_links( def pie_requirements_fully_linked( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, ) -> None: """Dashboard split: requirements fully linked vs incomplete.""" req_types = _requirement_types(kwargs) @@ -147,7 +158,9 @@ def pie_requirements_fully_linked( def pie_process_requirements_linked( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, ) -> None: """Dashboard split: process requirements linked vs not linked.""" req_types = _requirement_types(kwargs) diff --git a/src/extensions/score_metamodel/tests/test_traceability_dashboard.py b/src/extensions/score_metamodel/tests/test_traceability_dashboard.py index 042ecd1e4..2e06b3b7d 100644 --- a/src/extensions/score_metamodel/tests/test_traceability_dashboard.py +++ b/src/extensions/score_metamodel/tests/test_traceability_dashboard.py @@ -13,12 +13,17 @@ """Tests that dashboard filters follow local/external settings.""" +from collections.abc import Sequence +from typing import Any + +import pytest + from src.extensions.score_metamodel.checks import traceability_dashboard from src.extensions.score_metamodel.checks.traceability_dashboard import ( pie_process_requirements_linked, pie_requirements_fully_linked, - pie_requirements_with_test_links, pie_requirements_with_code_links, + pie_requirements_with_test_links, set_default_include_external, ) from src.extensions.score_metamodel.traceability_metrics import ( @@ -176,17 +181,17 @@ def test_requirements_fully_linked_can_include_external() -> None: def test_process_requirements_linked_uses_stream_a_process_requirement_totals( - monkeypatch, + monkeypatch: pytest.MonkeyPatch, ) -> None: captured: dict[str, object] = {} def _fake_summary( - all_needs, - requirement_types, - include_not_implemented, - filtered_test_types, - include_external, - ): + all_needs: Sequence[dict[str, Any]], + requirement_types: set[str], + include_not_implemented: bool, + filtered_test_types: set[str], + include_external: bool, + ) -> dict[str, dict[str, int]]: captured["all_needs"] = all_needs captured["requirement_types"] = requirement_types captured["include_not_implemented"] = include_not_implemented @@ -197,10 +202,14 @@ def _fake_summary( "process_requirements": {"total": 4, "linked": 3}, } - monkeypatch.setattr(traceability_dashboard, "compute_traceability_summary", _fake_summary) + monkeypatch.setattr( + traceability_dashboard, "compute_traceability_summary", _fake_summary + ) results: list[int] = [] - pie_process_requirements_linked(_needs(), results, arg1="tool_req,sys_req", arg2="true") + pie_process_requirements_linked( + _needs(), results, arg1="tool_req,sys_req", arg2="true" + ) assert results == [1, 3] assert captured["requirement_types"] == {"tool_req", "sys_req"} diff --git a/src/extensions/score_metamodel/tests/test_traceability_metrics.py b/src/extensions/score_metamodel/tests/test_traceability_metrics.py index f49d13bb1..850f064f6 100644 --- a/src/extensions/score_metamodel/tests/test_traceability_metrics.py +++ b/src/extensions/score_metamodel/tests/test_traceability_metrics.py @@ -141,7 +141,9 @@ def test_compute_traceability_summary_process_requirements_summary() -> None: assert process_requirements["unlinked_ids"] == ["PR_LOCAL_3"] -def test_compute_traceability_summary_process_requirements_respects_include_external() -> None: +def test_compute_traceability_summary_process_requirements_respects_include_external() -> ( + None +): all_needs = [ { "id": "TOOL_REQ_LOCAL", diff --git a/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py b/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py index 3e8505d0e..764659874 100644 --- a/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py +++ b/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py @@ -16,6 +16,10 @@ import json from pathlib import Path from types import SimpleNamespace +from typing import cast + +import pytest +from sphinx.application import Sphinx import src.extensions.score_metamodel.__init__ as metamodel_init @@ -56,10 +60,15 @@ def _app(tmp_path: Path, include_external: bool) -> SimpleNamespace: ) -def test_write_metrics_json_defaults_to_local_only(monkeypatch, tmp_path: Path) -> None: +def test_write_metrics_json_defaults_to_local_only( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: monkeypatch.setattr(metamodel_init, "SphinxNeedsData", _FakeNeedsData) - metamodel_init._write_metrics_json(_app(tmp_path, include_external=False), None) + metamodel_init._write_metrics_json( + cast(Sphinx, _app(tmp_path, include_external=False)), + None, + ) payload = json.loads((tmp_path / "metrics.json").read_text(encoding="utf-8")) metrics = payload["metrics_by_type"]["tool_req"] @@ -70,10 +79,15 @@ def test_write_metrics_json_defaults_to_local_only(monkeypatch, tmp_path: Path) assert metrics["requirements"]["total"] == 1 -def test_write_metrics_json_can_include_external(monkeypatch, tmp_path: Path) -> None: +def test_write_metrics_json_can_include_external( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: monkeypatch.setattr(metamodel_init, "SphinxNeedsData", _FakeNeedsData) - metamodel_init._write_metrics_json(_app(tmp_path, include_external=True), None) + metamodel_init._write_metrics_json( + cast(Sphinx, _app(tmp_path, include_external=True)), + None, + ) payload = json.loads((tmp_path / "metrics.json").read_text(encoding="utf-8")) metrics = payload["metrics_by_type"]["tool_req"] diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index 73ddc86f1..a668225d7 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -23,7 +23,7 @@ import os from copy import deepcopy from pathlib import Path -from typing import cast +from typing import Any, cast from sphinx.application import Sphinx from sphinx.environment import BuildEnvironment @@ -346,6 +346,102 @@ def find_need(all_needs: NeedsMutable, id: str) -> NeedItem | None: return all_needs.get(id) +def _log_existing_links(needs: NeedsMutable) -> None: + """Emit debug logs for needs that already contain source/test links.""" + if LOGGER.getEffectiveLevel() < 10: + return + + for need_id, need in needs.items(): + if need.get("source_code_link"): + LOGGER.debug( + f"?? Need {need_id} already has source_code_link: " + f"{need.get('source_code_link')}" + ) + if need.get("testlink"): + LOGGER.debug( + f"?? Need {need_id} already has testlink: {need.get('testlink')}" + ) + + +def _render_code_link(plain_links: bool, metadata: RepoInfo, link: NeedLink) -> str: + if plain_links: + return ( + "https://github.com/placeholder/placeholder/blob/unknown/" + f"{link.file}#L{link.line}<>{link.file}:{link.line}" + ) + try: + base = get_github_link(metadata, link) + except AssertionError: + LOGGER.info( + "Falling back to local code-link format (no git remote available): " + f"{link.file}:{link.line}", + type="score_source_code_linker", + ) + return f"{link.file}:{link.line}" + return f"{base}<>{link.file}:{link.line}" + + +def _render_test_link( + plain_links: bool, + metadata: RepoInfo, + link: DataForTestLink, +) -> str: + if plain_links: + return str(link.name) + try: + base = get_github_link(metadata, link) + except AssertionError: + LOGGER.info( + "Falling back to local test-link format (no git remote available): " + f"{link.name}", + type="score_source_code_linker", + ) + return str(link.name) + return f"{base}<>{link.name}" + + +def _warn_missing_need(source_code_links: object) -> None: + links = cast(Any, source_code_links).links + need_id = cast(Any, source_code_links).need + + for code_link in links.CodeLinks: + LOGGER.warning( + f"{code_link.file}:{code_link.line}: Could not find {need_id} " + "in documentation [CODE LINK]", + type="score_source_code_linker", + ) + for test_link in links.TestLinks: + LOGGER.warning( + f"{test_link.file}:{test_link.line}: Could not find {need_id} " + "in documentation [TEST LINK]", + type="score_source_code_linker", + ) + + +def _apply_links_to_need( + needs_data: SphinxNeedsData, + need: NeedItem, + source_code_links: object, + metadata: RepoInfo, + plain_links: bool, +) -> None: + links = cast(Any, source_code_links).links + need_as_dict = cast(dict[str, object], need) + need_as_dict["source_code_link"] = ", ".join( + _render_code_link(plain_links, metadata, code_link) + for code_link in links.CodeLinks + ) + need_as_dict["testlink"] = ", ".join( + _render_test_link(plain_links, metadata, test_link) + for test_link in links.TestLinks + ) + + # NOTE: Removing & adding the need is important to make sure + # the needs gets 're-evaluated'. + needs_data.remove_need(need["id"]) + needs_data.add_need(need) + + # re-qid: gd_req__req__attr_impl def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: """ @@ -357,24 +453,13 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: env: Buildenvironment, this is filled automatically app: Sphinx app application, this is filled automatically """ - Needs_Data = SphinxNeedsData(env) - needs = Needs_Data.get_needs_mutable() + needs_data = SphinxNeedsData(env) + needs = needs_data.get_needs_mutable() needs_copy = deepcopy( needs ) # TODO: why do we create a copy? Can we also needs_copy = needs[:]? copy(needs)? - # Enabled automatically for DEBUGGING - if LOGGER.getEffectiveLevel() >= 10: - for id, need in needs.items(): - if need.get("source_code_link"): - LOGGER.debug( - f"?? Need {id} already has source_code_link: " - f"{need.get('source_code_link')}" - ) - if need.get("testlink"): - LOGGER.debug( - f"?? Need {id} already has testlink: {need.get('testlink')}" - ) + _log_existing_links(needs) scl_by_module = load_repo_source_links_json( get_cache_filename(app.outdir, "score_repo_grouped_scl_cache.json") @@ -383,71 +468,21 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: getattr(app.config, "score_source_code_linker_plain_links", False) ) - def _render_code_link(metadata: RepoInfo, link: NeedLink) -> str: - if plain_links: - return ( - "https://github.com/placeholder/placeholder/blob/unknown/" - f"{link.file}#L{link.line}<>{link.file}:{link.line}" - ) - try: - base = get_github_link(metadata, link) - except AssertionError: - LOGGER.info( - "Falling back to local code-link format (no git remote available): " - f"{link.file}:{link.line}", - type="score_source_code_linker", - ) - return f"{link.file}:{link.line}" - return f"{base}<>{link.file}:{link.line}" - - def _render_test_link(metadata: RepoInfo, link: DataForTestLink) -> str: - if plain_links: - return str(link.name) - try: - base = get_github_link(metadata, link) - except AssertionError: - LOGGER.info( - "Falling back to local test-link format (no git remote available): " - f"{link.name}", - type="score_source_code_linker", - ) - return str(link.name) - return f"{base}<>{link.name}" - for module_grouped_needs in scl_by_module: for source_code_links in module_grouped_needs.needs: need = find_need(needs_copy, source_code_links.need) if need is None: # TODO: print github annotations as in https://github.com/eclipse-score/bazel_registry/blob/7423b9996a45dd0a9ec868e06a970330ee71cf4f/tools/verify_semver_compatibility_level.py#L126-L129 - for n in source_code_links.links.CodeLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [CODE LINK]", - type="score_source_code_linker", - ) - for n in source_code_links.links.TestLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [TEST LINK]", - type="score_source_code_linker", - ) + _warn_missing_need(source_code_links) continue - need_as_dict = cast(dict[str, object], need) - metadata = module_grouped_needs.repo - need_as_dict["source_code_link"] = ", ".join( - _render_code_link(metadata, n) - for n in source_code_links.links.CodeLinks + _apply_links_to_need( + needs_data=needs_data, + need=need, + source_code_links=source_code_links, + metadata=module_grouped_needs.repo, + plain_links=plain_links, ) - need_as_dict["testlink"] = ", ".join( - _render_test_link(metadata, n) - for n in source_code_links.links.TestLinks - ) - - # NOTE: Removing & adding the need is important to make sure - # the needs gets 're-evaluated'. - Needs_Data.remove_need(need["id"]) - Needs_Data.add_need(need) # ╭──────────────────────────────────────╮ diff --git a/src/extensions/score_source_code_linker/tests/test_xml_parser.py b/src/extensions/score_source_code_linker/tests/test_xml_parser.py index 9fd8510aa..f161aba59 100644 --- a/src/extensions/score_source_code_linker/tests/test_xml_parser.py +++ b/src/extensions/score_source_code_linker/tests/test_xml_parser.py @@ -26,13 +26,14 @@ import xml.etree.ElementTree as ET from collections.abc import Callable from pathlib import Path -from typing import Any +from typing import Any, cast from unittest.mock import patch import pytest # This depends on the `attribute_plugin` in our tooling repository from attribute_plugin import add_test_properties # type: ignore[import-untyped] +from sphinx.application import Sphinx import src.extensions.score_source_code_linker.xml_parser as xml_parser from src.extensions.score_source_code_linker.testlink import DataOfTestCase @@ -336,7 +337,7 @@ def test_short_hash_consistency_and_format(): def test_construct_and_add_need_uses_fallback_url_for_missing_repo_metadata( - monkeypatch, + monkeypatch: pytest.MonkeyPatch, ): calls: list[dict[str, object]] = [] @@ -349,7 +350,7 @@ def _fake_add_external_need(**kwargs: object) -> object: testcase = DataOfTestCase( name="tc_missing_meta", file="tests/foo_test.py", - line=10, + line="10", result="passed", result_text="", FullyVerifies="REQ_1", @@ -361,7 +362,10 @@ def _fake_add_external_need(**kwargs: object) -> object: url=None, ) - xml_parser.construct_and_add_need(app=object(), tn=testcase) + xml_parser.construct_and_add_need( + app=cast(Sphinx, object()), + tn=testcase, + ) # Must not crash and should create an external need using fallback metadata. assert len(calls) == 1 From c8e40584d11701d780d56d0271a1fccdf046a6fc Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 14:45:46 +0000 Subject: [PATCH 15/27] improved description --- .../requirements/implementation_state.rst | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index afa31f008..300892a2b 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -29,10 +29,43 @@ Overview -------- .. needpie:: Requirements Status - :labels: not implemented, implemented but incomplete docs, fully documented + :labels: not implemented, implemented but incomplete traceability, fully linked :colors: red,yellow, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_status(tool_req) +Jump to evidence tables: + +- :ref:`tooling_coverage_table_impl_links` +- :ref:`tooling_coverage_table_process_mapping` + +How To Read These Levels +------------------------ + +The overview pie combines implementation state and traceability evidence: + +- ``not implemented``: + requirement has ``implemented == NO``. +- ``implemented but incomplete traceability``: + requirement has ``implemented == YES`` or ``implemented == PARTIAL``, + but is missing at least one traceability link (code link and/or test link). +- ``fully linked``: + requirement has both ``source_code_link`` and ``testlink``. + +Implementation labels used on this page: + +- ``NO``: requirement is not implemented. +- ``PARTIAL``: requirement is partly implemented. +- ``YES``: requirement is implemented. + +Why multiple pies are shown: + +- ``Requirements with Codelinks`` shows implementation-to-source traceability. +- ``Requirements with linked tests`` shows implementation-to-verification traceability. +- ``Requirements fully linked`` is the strict roll-up (both links present). + +These are intentionally separate because they answer different diagnostics: +missing code links, missing test links, or both. + In Detail --------- @@ -49,6 +82,8 @@ In Detail type == 'tool_req' and implemented == 'PARTIAL' type == 'tool_req' and implemented == 'YES' + See table: :ref:`tooling_coverage_table_impl_links` + .. grid-item-card:: .. needpie:: Requirements with Codelinks @@ -56,6 +91,8 @@ In Detail :colors: red, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_code_links(tool_req) + See table: :ref:`tooling_coverage_table_impl_links` + .. grid-item-card:: .. needpie:: Requirements with linked tests @@ -63,6 +100,8 @@ In Detail :colors: red, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_test_links(tool_req) + See table: :ref:`tooling_coverage_table_impl_links` + .. grid-item-card:: .. needpie:: Requirements fully linked (code + tests) @@ -70,6 +109,8 @@ In Detail :colors: orange, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_fully_linked(tool_req) + See table: :ref:`tooling_coverage_table_impl_links` + .. grid-item-card:: .. needpie:: Process requirements linked by tool requirements @@ -77,15 +118,21 @@ In Detail :colors: red, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_process_requirements_linked(tool_req,true) + See table: :ref:`tooling_coverage_table_process_mapping` + Process-to-Tool Mapping ----------------------- +.. _tooling_coverage_table_process_mapping: + .. needtable:: Process requirement -> tool requirement mapping :types: tool_req :columns: satisfies as "Process Requirement";id as "Tool Requirement" :style: table +.. _tooling_coverage_table_impl_links: + .. needtable:: Tool requirement implementation and links :types: tool_req :columns: id as "Tool Requirement";implemented;source_code_link;testlink From 44375804c3d4b93b3263b9f4fa46087f11942ef2 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 27 Apr 2026 15:18:54 +0000 Subject: [PATCH 16/27] fix warnings --- docs/internals/requirements/implementation_state.rst | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index 300892a2b..d1cb06ad8 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -82,8 +82,6 @@ In Detail type == 'tool_req' and implemented == 'PARTIAL' type == 'tool_req' and implemented == 'YES' - See table: :ref:`tooling_coverage_table_impl_links` - .. grid-item-card:: .. needpie:: Requirements with Codelinks @@ -91,8 +89,6 @@ In Detail :colors: red, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_code_links(tool_req) - See table: :ref:`tooling_coverage_table_impl_links` - .. grid-item-card:: .. needpie:: Requirements with linked tests @@ -100,8 +96,6 @@ In Detail :colors: red, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_test_links(tool_req) - See table: :ref:`tooling_coverage_table_impl_links` - .. grid-item-card:: .. needpie:: Requirements fully linked (code + tests) @@ -109,8 +103,6 @@ In Detail :colors: orange, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_fully_linked(tool_req) - See table: :ref:`tooling_coverage_table_impl_links` - .. grid-item-card:: .. needpie:: Process requirements linked by tool requirements @@ -118,8 +110,6 @@ In Detail :colors: red, green :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_process_requirements_linked(tool_req,true) - See table: :ref:`tooling_coverage_table_process_mapping` - Process-to-Tool Mapping ----------------------- From 243aa21189b2f8c3470d03a55210b18f818e3241 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 28 Apr 2026 12:37:43 +0000 Subject: [PATCH 17/27] fix docs build --- docs/internals/requirements/implementation_state.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index d1cb06ad8..ea7840eb1 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -35,8 +35,8 @@ Overview Jump to evidence tables: -- :ref:`tooling_coverage_table_impl_links` -- :ref:`tooling_coverage_table_process_mapping` +- :ref:`Tool Requirement Implementation and Links table ` +- :ref:`Process Requirement to Tool Requirement mapping table ` How To Read These Levels ------------------------ From cf19ba80119f15d6af37ca2beebecff90f646437 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 28 Apr 2026 13:49:47 +0000 Subject: [PATCH 18/27] fixed review comments(removed coverage py to utilize extension) Co-authored-by: Copilot --- docs/how-to/dashboards_and_quality_gates.rst | 2 +- docs/how-to/test_to_doc_links.rst | 30 +- docs/reference/commands.md | 4 +- scripts_bazel/BUILD | 8 - scripts_bazel/tests/BUILD | 9 - .../tests/traceability_coverage_test.py | 241 ---------- scripts_bazel/tests/traceability_gate_test.py | 3 +- scripts_bazel/traceability_coverage.py | 414 ------------------ scripts_bazel/traceability_gate.py | 9 +- .../traceability_metrics_schema.json | 10 +- .../score_metamodel/checks/standards.py | 3 +- .../score_metamodel/sphinx_filters.py | 47 +- 12 files changed, 40 insertions(+), 740 deletions(-) delete mode 100644 scripts_bazel/tests/traceability_coverage_test.py delete mode 100644 scripts_bazel/traceability_coverage.py diff --git a/docs/how-to/dashboards_and_quality_gates.rst b/docs/how-to/dashboards_and_quality_gates.rst index a38879bf4..35df229e0 100644 --- a/docs/how-to/dashboards_and_quality_gates.rst +++ b/docs/how-to/dashboards_and_quality_gates.rst @@ -150,7 +150,7 @@ After building ``//:needs_json``, run the gate on the exported metrics: .. code-block:: bash - bazel run //scripts_bazel:traceability_gate -- \ + bazel run @score_docs_as_code//scripts_bazel:traceability_gate -- \ --metrics-json bazel-bin/needs_json/_build/needs/metrics.json \ --min-req-code 70 \ --min-req-test 70 \ diff --git a/docs/how-to/test_to_doc_links.rst b/docs/how-to/test_to_doc_links.rst index 1a8c28eed..7da9842cb 100644 --- a/docs/how-to/test_to_doc_links.rst +++ b/docs/how-to/test_to_doc_links.rst @@ -64,23 +64,23 @@ CI/CD Gate for Linkage Percentage The traceability tooling uses a **two-step architecture**: -1. ``traceability_coverage`` reads ``needs.json``, computes metrics, and writes - a machine-readable ``metrics.json`` (schema v1). +1. The **Sphinx build** computes metrics via the ``score_metamodel`` extension and + writes a machine-readable ``metrics.json`` (schema v1) to the build output + directory alongside ``needs.json``. 2. ``traceability_gate`` reads that ``metrics.json`` and enforces configurable coverage thresholds. -Separating the two steps keeps the CI gate decoupled from the Sphinx/Bazel -build: the gate never parses ``needs.json`` itself. +Separating computation (Sphinx extension, during docs build) from gating (thin +CLI, in CI) keeps the gate decoupled from the Sphinx/Bazel build: it never +parses ``needs.json`` itself and has direct access to all sphinx-needs data. .. note:: ``metrics.json`` is the **single source of truth** for traceability data. It is written by the Sphinx docs build (via the ``score_metamodel`` extension) - to ``_build/needs/metrics.json`` alongside ``needs.json``. The same - ``compute_traceability_summary`` function that powers the dashboard pie charts - produces this file, so the gate and the dashboard always show the same numbers. - The ``traceability_coverage`` CLI is a standalone alternative for repos that - run the coverage check outside of a full Sphinx build. + to ``/metrics.json``. The same computation that powers the dashboard + pie charts produces this file, so the gate and the dashboard always show + the same numbers. .. plantuml:: @@ -88,17 +88,17 @@ build: the gate never parses ``needs.json`` itself. skinparam componentStyle rectangle skinparam defaultTextAlignment center - rectangle "docs build" { - component "calc metrics\n(traceability_coverage)" as coverage + rectangle "docs build (Sphinx + score_metamodel extension)" { + component "calc metrics\n(Sphinx extension\nbuild-finished hook)" as coverage } usecase "test" as test - database "needs.json" as needsjson + database "needs.json\n(sphinx-needs)" as needsjson database "metrics.json\n(v1: metrics per needs type,\ne.g. tool_req)" as metricsjson component "gate\n(traceability_gate)" as gate test --> coverage : xml - needsjson --> coverage + needsjson --> coverage : sphinx-needs data\n(already loaded) coverage --> metricsjson metricsjson --> gate gate --> (Pretty output) @@ -108,7 +108,7 @@ build: the gate never parses ``needs.json`` itself. Current workflow: 1. Run tests. -2. Build docs (generates ``needs.json`` **and** ``metrics.json``). +2. Build docs (``score_metamodel`` extension writes ``metrics.json`` automatically). 3. Run the gate against the exported metrics. .. code-block:: bash @@ -124,7 +124,7 @@ Current workflow: --min-tests-linked 100 \ --fail-on-broken-test-refs -In repository CI, wire the coverage target to depend on the test-report and +In repository CI, wire the gate target to depend on the test-report and ``//:needs_json`` targets so Bazel handles the build order automatically. The ``--require-all-links`` shortcut is equivalent to setting all ``--min-*`` diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 091560672..a7a83a056 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -8,10 +8,10 @@ | Target | What it does | | ---------------------------------------------- | ------------------------------------------------------------------------------------------------- | -| `bazel run //:docs` | Builds documentation | +| `bazel run //:docs` | Builds documentation (also writes `metrics.json` via the score_metamodel extension) | | `bazel run //:docs_check` | Verifies documentation correctness | | `bazel run //:docs_combo` | Builds combined documentation with all external dependencies included | -| `bazel run @score_docs_as_code//scripts_bazel:traceability_coverage -- --needs-json bazel-bin/needs_json/needs.json --min-req-code 100 --min-req-test 100 --min-req-fully-linked 100 --min-tests-linked 100 --fail-on-broken-test-refs` | Calculates requirement/test traceability percentages and fails if thresholds are not met | +| `bazel run @score_docs_as_code//scripts_bazel:traceability_gate -- --metrics-json bazel-bin/needs_json/_build/needs/metrics.json --min-req-code 100 --min-req-test 100 --min-req-fully-linked 100 --min-tests-linked 100 --fail-on-broken-test-refs` | Reads the pre-computed metrics.json from the docs build and fails if coverage thresholds are not met | | `bazel run //:live_preview` | Creates a live_preview of the documentation viewable in a local server | | `bazel run //:live_preview_combo_experimental` | Creates a live_preview of the full documentation with all dependencies viewable in a local server | | `bazel run //:ide_support` | Sets up a Python venv for esbonio (Remember to restart VS Code!) | diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index 876860f01..58dda85ff 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -42,14 +42,6 @@ py_binary( visibility = ["//visibility:public"], ) -py_binary( - name = "traceability_coverage", - srcs = ["traceability_coverage.py"], - main = "traceability_coverage.py", - visibility = ["//visibility:public"], - deps = all_requirements + ["//src/extensions/score_metamodel:score_metamodel"], -) - py_binary( name = "traceability_gate", srcs = ["traceability_gate.py"], diff --git a/scripts_bazel/tests/BUILD b/scripts_bazel/tests/BUILD index 2245c3ba2..b5dc6d722 100644 --- a/scripts_bazel/tests/BUILD +++ b/scripts_bazel/tests/BUILD @@ -37,15 +37,6 @@ score_pytest( pytest_config = "//:pyproject.toml", ) -score_pytest( - name = "traceability_coverage_test", - srcs = ["traceability_coverage_test.py"], - deps = [ - "//scripts_bazel:traceability_coverage", - ] + all_requirements, - pytest_config = "//:pyproject.toml", -) - score_pytest( name = "traceability_gate_test", srcs = ["traceability_gate_test.py"], diff --git a/scripts_bazel/tests/traceability_coverage_test.py b/scripts_bazel/tests/traceability_coverage_test.py deleted file mode 100644 index b6b82eacd..000000000 --- a/scripts_bazel/tests/traceability_coverage_test.py +++ /dev/null @@ -1,241 +0,0 @@ -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* - -# ╓ ╖ -# ║ Some portions generated by Github Copilot ║ -# ╙ ╜ - -"""Tests for traceability_coverage.py.""" - -import json -import os -import subprocess -import sys -from pathlib import Path - -_MY_PATH = Path(__file__).parent - - -def _write_needs_json(tmp_path: Path) -> Path: - needs_json = tmp_path / "needs.json" - payload = { - "current_version": "main", - "versions": { - "main": { - "needs": { - "REQ_1": { - "id": "REQ_1", - "type": "tool_req", - "implemented": "YES", - "source_code_link": "src/foo.py:10", - "testlink": "", - }, - "REQ_2": { - "id": "REQ_2", - "type": "tool_req", - "implemented": "PARTIAL", - "source_code_link": "", - "testlink": "tests/test_foo.py::test_bar", - }, - "REQ_3": { - "id": "REQ_3", - "type": "tool_req", - "implemented": "NO", - "source_code_link": "", - "testlink": "", - }, - "TC_1": { - "id": "TC_1", - "type": "testcase", - "partially_verifies": "REQ_1, REQ_2", - "fully_verifies": "", - }, - "TC_2": { - "id": "TC_2", - "type": "testcase", - "partially_verifies": "", - "fully_verifies": "", - }, - "TC_3": { - "id": "TC_3", - "type": "testcase", - "partially_verifies": "", - "fully_verifies": "REQ_UNKNOWN", - }, - } - } - }, - } - needs_json.write_text(json.dumps(payload), encoding="utf-8") - return needs_json - - -def test_traceability_coverage_thresholds_pass(tmp_path: Path) -> None: - needs_json = _write_needs_json(tmp_path) - output_json = tmp_path / "summary.json" - - result = subprocess.run( - [ - sys.executable, - _MY_PATH.parent / "traceability_coverage.py", - "--needs-json", - str(needs_json), - "--min-req-code", - "50", - "--min-req-test", - "50", - "--min-req-fully-linked", - "0", - "--min-tests-linked", - "60", - "--json-output", - str(output_json), - ], - capture_output=True, - text=True, - ) - - assert result.returncode == 0 - assert "Threshold check passed." in result.stdout - assert output_json.exists() - - summary = json.loads(output_json.read_text(encoding="utf-8")) - assert summary["schema_version"] == "1" - assert summary["generated_by"] == "traceability_coverage" - assert "tool_req" in summary["metrics_by_type"] - type_metrics = summary["metrics_by_type"]["tool_req"] - assert type_metrics["requirements"]["total"] == 2 - assert type_metrics["requirements"]["with_code_link"] == 1 - assert type_metrics["requirements"]["with_test_link"] == 1 - assert type_metrics["requirements"]["fully_linked"] == 0 - assert type_metrics["tests"]["total"] == 3 - assert type_metrics["tests"]["linked_to_requirements"] == 2 - assert len(type_metrics["tests"]["broken_references"]) == 1 - - -def test_traceability_coverage_thresholds_fail(tmp_path: Path) -> None: - needs_json = _write_needs_json(tmp_path) - - result = subprocess.run( - [ - sys.executable, - _MY_PATH.parent / "traceability_coverage.py", - "--needs-json", - str(needs_json), - "--min-req-code", - "80", - "--min-req-test", - "80", - "--min-req-fully-linked", - "80", - "--min-tests-linked", - "80", - ], - capture_output=True, - text=True, - ) - - assert result.returncode == 2 - assert "Threshold check failed:" in result.stdout - - -def test_traceability_coverage_fails_on_broken_refs(tmp_path: Path) -> None: - needs_json = _write_needs_json(tmp_path) - - result = subprocess.run( - [ - sys.executable, - _MY_PATH.parent / "traceability_coverage.py", - "--needs-json", - str(needs_json), - "--min-req-code", - "0", - "--min-req-test", - "0", - "--min-req-fully-linked", - "0", - "--min-tests-linked", - "0", - "--fail-on-broken-test-refs", - ], - capture_output=True, - text=True, - ) - - assert result.returncode == 2 - assert "broken testcase references found:" in result.stdout - - -def test_traceability_coverage_prints_unlinked_requirements(tmp_path: Path) -> None: - needs_json = _write_needs_json(tmp_path) - - result = subprocess.run( - [ - sys.executable, - _MY_PATH.parent / "traceability_coverage.py", - "--needs-json", - str(needs_json), - "--min-req-code", - "0", - "--min-req-test", - "0", - "--min-req-fully-linked", - "0", - "--min-tests-linked", - "0", - "--print-unlinked-requirements", - ], - capture_output=True, - text=True, - ) - - assert result.returncode == 0 - assert "Unlinked requirement details:" in result.stdout - assert "Missing source_code_link: REQ_2" in result.stdout - assert "Missing testlink: REQ_1" in result.stdout - assert "Not fully linked: REQ_1, REQ_2" in result.stdout - - -def test_traceability_coverage_accepts_workspace_relative_needs_json( - tmp_path: Path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - needs_json = _write_needs_json(workspace) - - env = dict(os.environ) - env["BUILD_WORKSPACE_DIRECTORY"] = str(workspace) - - result = subprocess.run( - [ - sys.executable, - _MY_PATH.parent / "traceability_coverage.py", - "--needs-json", - "needs.json", - "--min-req-code", - "0", - "--min-req-test", - "0", - "--min-req-fully-linked", - "0", - "--min-tests-linked", - "0", - ], - capture_output=True, - text=True, - cwd=tmp_path, - env=env, - ) - - assert result.returncode == 0 - assert f"Traceability input: {needs_json}" in result.stdout diff --git a/scripts_bazel/tests/traceability_gate_test.py b/scripts_bazel/tests/traceability_gate_test.py index 9019fa942..9ac29331d 100644 --- a/scripts_bazel/tests/traceability_gate_test.py +++ b/scripts_bazel/tests/traceability_gate_test.py @@ -56,8 +56,7 @@ def _write_metrics_json(tmp_path: Path, metrics_by_type: dict | None = None) -> } payload = { "schema_version": "1", - "generated_by": "traceability_coverage", - "needs_json": "fake/needs.json", + "generated_by": "sphinx_build", "metrics_by_type": metrics_by_type, } out = tmp_path / "metrics.json" diff --git a/scripts_bazel/traceability_coverage.py b/scripts_bazel/traceability_coverage.py deleted file mode 100644 index da17dcb48..000000000 --- a/scripts_bazel/traceability_coverage.py +++ /dev/null @@ -1,414 +0,0 @@ -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* - -# ╓ ╖ -# ║ Some portions generated by Github Copilot ║ -# ╙ ╜ - -"""Compute requirement and test traceability coverage from sphinx-needs output.""" - -from __future__ import annotations - -import argparse -import importlib.util -import json -import os -import sys -from pathlib import Path -from typing import Any - -# Ensure shared metric code under src/ is importable when executed directly. -_REPO_ROOT = Path(__file__).resolve().parent.parent -if str(_REPO_ROOT) not in sys.path: - sys.path.insert(0, str(_REPO_ROOT)) - -# Import only the metrics module, avoid heavy __init__.py -_metrics_path = _REPO_ROOT / "src/extensions/score_metamodel/traceability_metrics.py" -_spec = importlib.util.spec_from_file_location("traceability_metrics", _metrics_path) -if _spec is None or _spec.loader is None: - raise ImportError(f"Failed to load metrics module from {_metrics_path}") -traceability_metrics = importlib.util.module_from_spec(_spec) -_spec.loader.exec_module(traceability_metrics) - -compute_traceability_summary = traceability_metrics.compute_traceability_summary - - -def _load_needs(needs_json: Path) -> list[dict[str, Any]]: - raw = json.loads(needs_json.read_text(encoding="utf-8")) - - if isinstance(raw, list): - return [item for item in raw if isinstance(item, dict)] - - if isinstance(raw, dict): - if "needs" in raw and isinstance(raw["needs"], dict): - return [v for v in raw["needs"].values() if isinstance(v, dict)] - - versions = raw.get("versions") - if isinstance(versions, dict) and versions: - current_version = raw.get("current_version") - selected: Any = None - if isinstance(current_version, str) and current_version in versions: - selected = versions[current_version] - else: - selected = next(iter(versions.values())) - if isinstance(selected, dict): - needs = selected.get("needs") - if isinstance(needs, dict): - return [v for v in needs.values() if isinstance(v, dict)] - - raise ValueError(f"Unsupported needs.json format in {needs_json}") - - -def _default_needs_json_candidates() -> list[Path]: - return [ - Path("_build/needs/needs.json"), - Path("bazel-bin/needs_json/_build/needs/needs.json"), - ] - - -def _find_needs_json(explicit: str | None) -> Path: - if explicit: - raw_path = Path(explicit) - candidates: list[Path] = [raw_path] - - # Under `bazel run` the working directory may be a runfiles tree, so - # also resolve relative paths from the workspace root when available. - workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "").strip() - if not raw_path.is_absolute() and workspace_dir: - candidates.append(Path(workspace_dir) / raw_path) - - for path in candidates: - if path.exists(): - return path - - raise FileNotFoundError(f"needs.json not found: {raw_path}") - - for candidate in _default_needs_json_candidates(): - if candidate.exists(): - return candidate - - raise FileNotFoundError( - "Could not locate needs.json automatically. Use --needs-json with a valid path." - ) - - -def _apply_argument_shortcuts(args: argparse.Namespace) -> None: - """Apply shortcut arguments like --require-all-links.""" - if args.require_all_links: - args.min_req_code = 100.0 - args.min_req_test = 100.0 - args.min_req_fully_linked = 100.0 - args.min_tests_linked = 100.0 - args.fail_on_broken_test_refs = True - - -def _print_summary( - needs_json: Path, - req_total: int, - req_with_code: int, - req_code_pct: float, - req_with_test: int, - req_test_pct: float, - req_fully_linked: int, - req_fully_linked_pct: float, - req_missing_code: list[str], - req_missing_test: list[str], - req_not_fully_linked: list[str], - print_unlinked: bool, - tests_total: int, - tests_linked: int, - tests_linked_pct: float, - broken_test_references: list[dict[str, str]], -) -> None: - """Print human-readable summary.""" - print(f"Traceability input: {needs_json}") - print("-" * 72) - print( - "Requirements with source links: " - f"{req_with_code}/{req_total} ({req_code_pct:.2f}%)" - ) - print( - "Requirements with test links: " - f"{req_with_test}/{req_total} ({req_test_pct:.2f}%)" - ) - print( - "Requirements fully linked: " - f"{req_fully_linked}/{req_total} ({req_fully_linked_pct:.2f}%)" - ) - if print_unlinked: - print("Unlinked requirement details:") - print( - " Missing source_code_link: " - + (", ".join(sorted(req_missing_code)) if req_missing_code else "") - ) - print( - " Missing testlink: " - + (", ".join(sorted(req_missing_test)) if req_missing_test else "") - ) - print( - " Not fully linked: " - + ( - ", ".join(sorted(req_not_fully_linked)) - if req_not_fully_linked - else "" - ) - ) - print( - "Tests linked to requirements: " - f"{tests_linked}/{tests_total} ({tests_linked_pct:.2f}%)" - ) - print(f"Broken test references: {len(broken_test_references)}") - - if broken_test_references: - print("Broken reference details:") - for item in broken_test_references: - print(f" - {item['testcase']} -> {item['missing_need']}") - - -def _check_thresholds( - req_code_pct: float, - min_req_code: float, - req_test_pct: float, - min_req_test: float, - req_fully_linked_pct: float, - min_req_fully_linked: float, - tests_linked_pct: float, - min_tests_linked: float, - broken_test_references: list[dict[str, str]], - fail_on_broken_test_refs: bool, -) -> list[str]: - """Check threshold violations and return failures.""" - failures: list[str] = [] - if req_code_pct < float(min_req_code): - failures.append( - f"requirements with code links {req_code_pct:.2f}% < {min_req_code:.2f}%" - ) - if req_test_pct < float(min_req_test): - failures.append( - f"requirements with test links {req_test_pct:.2f}% < {min_req_test:.2f}%" - ) - if req_fully_linked_pct < float(min_req_fully_linked): - failures.append( - "requirements fully linked " - f"{req_fully_linked_pct:.2f}% < {min_req_fully_linked:.2f}%" - ) - if tests_linked_pct < float(min_tests_linked): - failures.append( - f"tests linked to requirements {tests_linked_pct:.2f}% < {min_tests_linked:.2f}%" - ) - if fail_on_broken_test_refs and broken_test_references: - failures.append( - f"broken testcase references found: {len(broken_test_references)}" - ) - return failures - - -def main() -> int: - parser = argparse.ArgumentParser( - description=( - "Compute requirement/test traceability coverage from sphinx-needs output " - "and optionally fail on threshold violations." - ) - ) - parser.add_argument( - "--needs-json", - default=None, - help=( - "Path to needs.json. If omitted, tries _build/needs/needs.json and " - "bazel-bin/needs_json/needs.json" - ), - ) - parser.add_argument( - "--requirement-types", - default="tool_req", - help="Comma separated need types treated as requirements (default: tool_req)", - ) - parser.add_argument( - "--test-types", - default="", - help=( - "Optional comma separated testcase test_type filter (for example unit-test). " - "If empty, all testcase types are included." - ), - ) - parser.add_argument( - "--include-not-implemented", - action="store_true", - help=( - "Include requirements with implemented == NO in requirement denominator. " - "By default only YES/PARTIAL are counted." - ), - ) - parser.add_argument( - "--min-req-code", - type=float, - default=0.0, - help="Minimum required percentage for requirements with source code links", - ) - parser.add_argument( - "--min-req-test", - type=float, - default=0.0, - help="Minimum required percentage for requirements with test links", - ) - parser.add_argument( - "--min-req-fully-linked", - type=float, - default=0.0, - help=( - "Minimum required percentage for requirements with both source code " - "and test links" - ), - ) - parser.add_argument( - "--min-tests-linked", - type=float, - default=0.0, - help="Minimum required percentage for testcases linked to requirements", - ) - parser.add_argument( - "--require-all-links", - action="store_true", - help="Shortcut that enforces 100%% for all three minimum percentages", - ) - parser.add_argument( - "--fail-on-broken-test-refs", - action="store_true", - help="Fail if a testcase references an unknown requirement ID", - ) - parser.add_argument( - "--json-output", - default=None, - help="Optional path to write machine-readable JSON summary", - ) - parser.add_argument( - "--print-unlinked-requirements", - action="store_true", - help=( - "Print IDs of requirements missing source_code_link and/or testlink. " - "Useful when coverage thresholds fail." - ), - ) - - args = parser.parse_args() - _apply_argument_shortcuts(args) - - requirement_types = { - item.strip() for item in str(args.requirement_types).split(",") if item.strip() - } - if not requirement_types: - raise ValueError("--requirement-types must not be empty") - - filtered_test_types = { - item.strip() for item in str(args.test_types).split(",") if item.strip() - } - - needs_json = _find_needs_json(args.needs_json) - all_needs = _load_needs(needs_json) - - summary = compute_traceability_summary( - all_needs=all_needs, - requirement_types=requirement_types, - include_not_implemented=args.include_not_implemented, - filtered_test_types=filtered_test_types, - ) - - req_total = int(summary["requirements"]["total"]) - req_with_code = int(summary["requirements"]["with_code_link"]) - req_with_test = int(summary["requirements"]["with_test_link"]) - req_fully_linked = int(summary["requirements"]["fully_linked"]) - req_code_pct = float(summary["requirements"]["with_code_link_pct"]) - req_test_pct = float(summary["requirements"]["with_test_link_pct"]) - req_fully_linked_pct = float(summary["requirements"]["fully_linked_pct"]) - req_missing_code = list(summary["requirements"]["missing_code_link_ids"]) - req_missing_test = list(summary["requirements"]["missing_test_link_ids"]) - req_not_fully_linked = list(summary["requirements"]["not_fully_linked_ids"]) - - tests_total = int(summary["tests"]["total"]) - tests_linked = int(summary["tests"]["linked_to_requirements"]) - tests_linked_pct = float(summary["tests"]["linked_to_requirements_pct"]) - broken_test_references = list(summary["tests"]["broken_references"]) - - # Build per-type metrics for the JSON output (schema v1). - # Each requirement type is computed independently so downstream tools - # (e.g. traceability_gate) can apply per-type thresholds. - metrics_by_type: dict[str, Any] = {} - for req_type in sorted(requirement_types): - type_summary = compute_traceability_summary( - all_needs=all_needs, - requirement_types={req_type}, - include_not_implemented=args.include_not_implemented, - filtered_test_types=filtered_test_types, - ) - metrics_by_type[req_type] = { - "include_not_implemented": type_summary["include_not_implemented"], - "requirements": type_summary["requirements"], - "tests": type_summary["tests"], - } - - summary_output = { - "schema_version": "1", - "generated_by": "traceability_coverage", - "needs_json": str(needs_json), - "metrics_by_type": metrics_by_type, - } - - _print_summary( - needs_json, - req_total, - req_with_code, - req_code_pct, - req_with_test, - req_test_pct, - req_fully_linked, - req_fully_linked_pct, - req_missing_code, - req_missing_test, - req_not_fully_linked, - args.print_unlinked_requirements, - tests_total, - tests_linked, - tests_linked_pct, - broken_test_references, - ) - - if args.json_output: - out_file = Path(args.json_output) - out_file.write_text(json.dumps(summary_output, indent=2), encoding="utf-8") - print(f"JSON summary written to: {out_file}") - - failures = _check_thresholds( - req_code_pct, - args.min_req_code, - req_test_pct, - args.min_req_test, - req_fully_linked_pct, - args.min_req_fully_linked, - tests_linked_pct, - args.min_tests_linked, - broken_test_references, - args.fail_on_broken_test_refs, - ) - - if failures: - print("Threshold check failed:") - for msg in failures: - print(f" - {msg}") - return 2 - - print("Threshold check passed.") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts_bazel/traceability_gate.py b/scripts_bazel/traceability_gate.py index abff691d9..399f43f80 100644 --- a/scripts_bazel/traceability_gate.py +++ b/scripts_bazel/traceability_gate.py @@ -17,8 +17,8 @@ """Traceability gate: read a metrics JSON and enforce coverage thresholds. -This script is the "gate" step based from a metrics.json approach. The typical workflow is: - docs build → traceability_coverage --json-output metrics.json +This script is the CI gate for a metrics.json based workflow: + docs build → score_metamodel extension writes metrics.json CI gate → traceability_gate --metrics-json metrics.json [--min-* ...] The gate never parses needs.json itself; it only reads the pre-computed @@ -132,10 +132,7 @@ def main() -> int: parser.add_argument( "--metrics-json", required=True, - help=( - "Path to the metrics JSON produced by " - "'traceability_coverage --json-output '." - ), + help="Path to the metrics JSON produced by the docs build.", ) parser.add_argument( "--need-type", diff --git a/scripts_bazel/traceability_metrics_schema.json b/scripts_bazel/traceability_metrics_schema.json index e2c0a81ee..86c8f1494 100644 --- a/scripts_bazel/traceability_metrics_schema.json +++ b/scripts_bazel/traceability_metrics_schema.json @@ -2,9 +2,9 @@ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://eclipse-score.github.io/docs-as-code/traceability-metrics-schema.json", "title": "Traceability Metrics", - "description": "Per-need-type traceability coverage metrics produced by the docs build (traceability_coverage --json-output). Consumed by traceability_gate to enforce coverage thresholds without re-parsing needs.json.", + "description": "Per-need-type traceability coverage metrics produced by the docs build (score_metamodel Sphinx extension). Consumed by traceability_gate to enforce coverage thresholds without re-parsing needs.json.", "type": "object", - "required": ["schema_version", "generated_by", "needs_json", "metrics_by_type"], + "required": ["schema_version", "generated_by", "metrics_by_type"], "additionalProperties": false, "properties": { "schema_version": { @@ -14,11 +14,7 @@ }, "generated_by": { "type": "string", - "description": "Name of the tool that produced this file (e.g. 'traceability_coverage')." - }, - "needs_json": { - "type": "string", - "description": "Path to the needs.json that was used as input." + "description": "Name of the producer that generated this file (e.g. 'sphinx_build')." }, "metrics_by_type": { "type": "object", diff --git a/src/extensions/score_metamodel/checks/standards.py b/src/extensions/score_metamodel/checks/standards.py index c5480aa88..a04d176ba 100644 --- a/src/extensions/score_metamodel/checks/standards.py +++ b/src/extensions/score_metamodel/checks/standards.py @@ -22,7 +22,6 @@ generic_pie_items_by_tag, generic_pie_items_in_relationships, generic_pie_linked_items, - generic_pie_workproducts_by_type, ) # from score_metamodel import ( @@ -247,7 +246,7 @@ def my_pie_linked_standard_workproducts( See: https://sphinx-needs.readthedocs.io/en/latest/filter.html#arguments """ standard = str(kwargs["arg1"]) - generic_pie_workproducts_by_type( + generic_pie_linked_items( needs, results, arg1=f"std_wp__{standard}__", arg2="workproduct" ) diff --git a/src/extensions/score_metamodel/sphinx_filters.py b/src/extensions/score_metamodel/sphinx_filters.py index e48c3b370..0a9788760 100644 --- a/src/extensions/score_metamodel/sphinx_filters.py +++ b/src/extensions/score_metamodel/sphinx_filters.py @@ -45,23 +45,25 @@ def func(needs: list[NeedItem], results: list[int], **kwargs) -> None: ... def generic_pie_linked_items( needs: list[NeedItem], results: list[int], **kwargs: str | int | float ) -> None: - """Count items matching an ID prefix split by compliance linkage. + """Count items matching an ID prefix split by linkage via a named link field. Finds all needs whose ``id`` starts with *arg1*, then checks whether - each one appears in the ``complies`` field of any need whose ``type`` + each one appears in the *arg3* field of any need whose ``type`` starts with *arg2*. :filter-func: arguments: - ``arg1`` – ID prefix of the items to count (e.g. ``std_req__iso26262__``) - - ``arg2`` – type prefix of the source needs whose ``complies`` - lists are scanned (e.g. ``gd_``) + - ``arg2`` – type prefix of the source needs to scan (e.g. ``gd_``) + - ``arg3`` – name of the link field to scan on source needs + (default: ``complies``) Appends to *results*: ``[linked_count, not_linked_count]`` """ id_prefix = str(kwargs.get("arg1", "")) compliance_prefix = str(kwargs.get("arg2", "")) + link_field = str(kwargs.get("arg3", "complies")) target_ids = [ str(n.get("id", "")) @@ -73,7 +75,7 @@ def generic_pie_linked_items( ref for n in needs if str(n.get("type", "")).startswith(compliance_prefix) - for ref in n.get("complies", []) + for ref in n.get(link_field, []) if ref } @@ -87,29 +89,31 @@ def generic_pie_linked_items( def generic_pie_items_by_tag( needs: list[NeedItem], results: list[int], **kwargs: str | int | float ) -> None: - """Count items carrying a given tag split by compliance linkage. + """Count items carrying a given tag split by linkage via a named link field. Checks every need that has *arg1* in its ``tags`` field and splits them - by whether their id appears in the ``complies`` field of any need whose + by whether their id appears in the *arg3* field of any need whose ``type`` starts with *arg2*. :filter-func: arguments: - ``arg1`` – tag to filter by (e.g. ``aspice40_man5``). Note: tag values must not contain dots. - - ``arg2`` – type prefix of the source needs whose ``complies`` - lists are scanned (e.g. ``gd_``) + - ``arg2`` – type prefix of the source needs to scan (e.g. ``gd_``) + - ``arg3`` – name of the link field to scan on source needs + (default: ``complies``) Appends to *results*: ``[linked_count, not_linked_count]`` """ tag = str(kwargs.get("arg1", "")) compliance_prefix = str(kwargs.get("arg2", "")) + link_field = str(kwargs.get("arg3", "complies")) linked_ids: set[str] = { ref for n in needs if str(n.get("type", "")).startswith(compliance_prefix) - for ref in n.get("complies", []) + for ref in n.get(link_field, []) if ref } @@ -126,29 +130,6 @@ def generic_pie_items_by_tag( results.append(not_linked) -def generic_pie_workproducts_by_type( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float -) -> None: - """Count work-product items matching an ID prefix split by compliance linkage. - - Semantically equivalent to :func:`generic_pie_linked_items` but scoped to - work-product traceability where the compliance source type is typically an - exact match (e.g. ``workproduct``) rather than a prefix. Because - ``"workproduct".startswith("workproduct")`` is ``True``, both functions use - the same underlying logic. - - :filter-func: arguments: - - - ``arg1`` – ID prefix of the work-product items to count - (e.g. ``std_wp__iso26262__``) - - ``arg2`` – type (or type prefix) of source needs whose ``complies`` - lists are scanned (e.g. ``workproduct``) - - Appends to *results*: ``[linked_count, not_linked_count]`` - """ - generic_pie_linked_items(needs, results, **kwargs) - - def generic_pie_items_in_relationships( needs: list[NeedItem], results: list[int], **kwargs: str | int | float ) -> None: From 2b8aace267c3414302271c347b4b7e31a7ae08f1 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 28 Apr 2026 14:04:15 +0000 Subject: [PATCH 19/27] review comment fixes Co-authored-by: Copilot --- .../score_metamodel/sphinx_filters.py | 56 +++++++--------- .../tests/test_sphinx_filters.py | 64 +++++++++++++++++++ .../tests/test_xml_parser.py | 2 +- 3 files changed, 89 insertions(+), 33 deletions(-) create mode 100644 src/extensions/score_metamodel/tests/test_sphinx_filters.py diff --git a/src/extensions/score_metamodel/sphinx_filters.py b/src/extensions/score_metamodel/sphinx_filters.py index 0a9788760..5131751c2 100644 --- a/src/extensions/score_metamodel/sphinx_filters.py +++ b/src/extensions/score_metamodel/sphinx_filters.py @@ -42,27 +42,29 @@ def func(needs: list[NeedItem], results: list[int], **kwargs) -> None: ... from sphinx_needs.need_item import NeedItem -def generic_pie_linked_items( - needs: list[NeedItem], results: list[int], **kwargs: str | int | float -) -> None: - """Count items matching an ID prefix split by linkage via a named link field. +def _matches_source_selector(need: NeedItem, selector: str) -> bool: + """Return whether a need matches a source selector. - Finds all needs whose ``id`` starts with *arg1*, then checks whether - each one appears in the *arg3* field of any need whose ``type`` - starts with *arg2*. + The selector is treated as a prefix and matched against both ``type`` and + ``id`` so filters remain robust when metamodels use explicit id prefixes + that are not coupled to directive names. + """ + need_type = str(need.get("type", "")) + need_id = str(need.get("id", "")) + return need_type.startswith(selector) or need_id.startswith(selector) - :filter-func: arguments: - - ``arg1`` – ID prefix of the items to count - (e.g. ``std_req__iso26262__``) - - ``arg2`` – type prefix of the source needs to scan (e.g. ``gd_``) - - ``arg3`` – name of the link field to scan on source needs - (default: ``complies``) +def generic_pie_linked_items( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count target IDs by whether they are linked by selected source needs. - Appends to *results*: ``[linked_count, not_linked_count]`` + Arguments are passed via ``arg1`` (target ID prefix), ``arg2`` (source + selector prefix, matched against source ``type`` and ``id``), and ``arg3`` + (link field name, default ``complies``). """ id_prefix = str(kwargs.get("arg1", "")) - compliance_prefix = str(kwargs.get("arg2", "")) + source_selector = str(kwargs.get("arg2", "")) link_field = str(kwargs.get("arg3", "complies")) target_ids = [ @@ -74,7 +76,7 @@ def generic_pie_linked_items( linked_ids: set[str] = { ref for n in needs - if str(n.get("type", "")).startswith(compliance_prefix) + if _matches_source_selector(n, source_selector) for ref in n.get(link_field, []) if ref } @@ -89,30 +91,20 @@ def generic_pie_linked_items( def generic_pie_items_by_tag( needs: list[NeedItem], results: list[int], **kwargs: str | int | float ) -> None: - """Count items carrying a given tag split by linkage via a named link field. - - Checks every need that has *arg1* in its ``tags`` field and splits them - by whether their id appears in the *arg3* field of any need whose - ``type`` starts with *arg2*. - - :filter-func: arguments: - - - ``arg1`` – tag to filter by (e.g. ``aspice40_man5``). - Note: tag values must not contain dots. - - ``arg2`` – type prefix of the source needs to scan (e.g. ``gd_``) - - ``arg3`` – name of the link field to scan on source needs - (default: ``complies``) + """Count tagged items split by whether selected source needs link them. - Appends to *results*: ``[linked_count, not_linked_count]`` + Arguments are passed via ``arg1`` (tag), ``arg2`` (source selector prefix, + matched against source ``type`` and ``id``), and ``arg3`` (link field + name, default ``complies``). """ tag = str(kwargs.get("arg1", "")) - compliance_prefix = str(kwargs.get("arg2", "")) + source_selector = str(kwargs.get("arg2", "")) link_field = str(kwargs.get("arg3", "complies")) linked_ids: set[str] = { ref for n in needs - if str(n.get("type", "")).startswith(compliance_prefix) + if _matches_source_selector(n, source_selector) for ref in n.get(link_field, []) if ref } diff --git a/src/extensions/score_metamodel/tests/test_sphinx_filters.py b/src/extensions/score_metamodel/tests/test_sphinx_filters.py new file mode 100644 index 000000000..b56a238ef --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_sphinx_filters.py @@ -0,0 +1,64 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +from src.extensions.score_metamodel.sphinx_filters import ( + generic_pie_items_by_tag, + generic_pie_linked_items, +) + + +def test_generic_pie_linked_items_matches_source_by_id_prefix() -> None: + needs = [ + {"id": "std_req__iso26262__001", "type": "std_req"}, + # Type intentionally does not match selector prefix, id does. + { + "id": "gd_guidl__xyz", + "type": "guideline", + "complies": ["std_req__iso26262__001"], + }, + ] + + results: list[int] = [] + generic_pie_linked_items( + needs, + results, + arg1="std_req__iso26262__", + arg2="gd_", + arg3="complies", + ) + + assert results == [1, 0] + + +def test_generic_pie_items_by_tag_matches_source_by_id_prefix() -> None: + needs = [ + {"id": "REQ_A", "type": "tool_req", "tags": ["aspice40_man5"]}, + {"id": "REQ_B", "type": "tool_req", "tags": ["aspice40_man5"]}, + # Type intentionally does not match selector prefix, id does. + { + "id": "gd_req__abc", + "type": "process_requirement", + "complies": ["REQ_A"], + }, + ] + + results: list[int] = [] + generic_pie_items_by_tag( + needs, + results, + arg1="aspice40_man5", + arg2="gd_", + arg3="complies", + ) + + assert results == [1, 1] diff --git a/src/extensions/score_source_code_linker/tests/test_xml_parser.py b/src/extensions/score_source_code_linker/tests/test_xml_parser.py index f161aba59..3c5440918 100644 --- a/src/extensions/score_source_code_linker/tests/test_xml_parser.py +++ b/src/extensions/score_source_code_linker/tests/test_xml_parser.py @@ -26,7 +26,7 @@ import xml.etree.ElementTree as ET from collections.abc import Callable from pathlib import Path -from typing import Any, cast +from typing import Any from unittest.mock import patch import pytest From f137f03c90cd9154e8c6029ce3f2670483d3f53a Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 28 Apr 2026 14:09:29 +0000 Subject: [PATCH 20/27] replaced for loop by list Co-authored-by: Copilot --- src/extensions/score_metamodel/sphinx_filters.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/extensions/score_metamodel/sphinx_filters.py b/src/extensions/score_metamodel/sphinx_filters.py index 5131751c2..8fd5fca19 100644 --- a/src/extensions/score_metamodel/sphinx_filters.py +++ b/src/extensions/score_metamodel/sphinx_filters.py @@ -156,9 +156,10 @@ def generic_pie_items_in_relationships( if ref in item_counts: item_counts[ref] += 1 - not_referenced = sum(1 for c in item_counts.values() if c == 0) - referenced_once = sum(1 for c in item_counts.values() if c == 1) - referenced_multiple = sum(1 for c in item_counts.values() if c > 1) + item_count_values = list(item_counts.values()) + not_referenced = item_count_values.count(0) + referenced_once = item_count_values.count(1) + referenced_multiple = len(item_count_values) - not_referenced - referenced_once results.append(not_referenced) results.append(referenced_once) From e9273396b992f054d9f4e2e376c710f6c111a679 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Tue, 28 Apr 2026 14:19:24 +0000 Subject: [PATCH 21/27] fix linting Co-authored-by: Copilot --- .../tests/test_sphinx_filters.py | 48 +++++++++++-------- .../tests/test_xml_parser.py | 4 +- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/src/extensions/score_metamodel/tests/test_sphinx_filters.py b/src/extensions/score_metamodel/tests/test_sphinx_filters.py index b56a238ef..c065a01b7 100644 --- a/src/extensions/score_metamodel/tests/test_sphinx_filters.py +++ b/src/extensions/score_metamodel/tests/test_sphinx_filters.py @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +from typing import cast + +from sphinx_needs.need_item import NeedItem + from src.extensions.score_metamodel.sphinx_filters import ( generic_pie_items_by_tag, generic_pie_linked_items, @@ -18,15 +22,18 @@ def test_generic_pie_linked_items_matches_source_by_id_prefix() -> None: - needs = [ - {"id": "std_req__iso26262__001", "type": "std_req"}, - # Type intentionally does not match selector prefix, id does. - { - "id": "gd_guidl__xyz", - "type": "guideline", - "complies": ["std_req__iso26262__001"], - }, - ] + needs = cast( + list[NeedItem], + [ + {"id": "std_req__iso26262__001", "type": "std_req"}, + # Type intentionally does not match selector prefix, id does. + { + "id": "gd_guidl__xyz", + "type": "guideline", + "complies": ["std_req__iso26262__001"], + }, + ], + ) results: list[int] = [] generic_pie_linked_items( @@ -41,16 +48,19 @@ def test_generic_pie_linked_items_matches_source_by_id_prefix() -> None: def test_generic_pie_items_by_tag_matches_source_by_id_prefix() -> None: - needs = [ - {"id": "REQ_A", "type": "tool_req", "tags": ["aspice40_man5"]}, - {"id": "REQ_B", "type": "tool_req", "tags": ["aspice40_man5"]}, - # Type intentionally does not match selector prefix, id does. - { - "id": "gd_req__abc", - "type": "process_requirement", - "complies": ["REQ_A"], - }, - ] + needs = cast( + list[NeedItem], + [ + {"id": "REQ_A", "type": "tool_req", "tags": ["aspice40_man5"]}, + {"id": "REQ_B", "type": "tool_req", "tags": ["aspice40_man5"]}, + # Type intentionally does not match selector prefix, id does. + { + "id": "gd_req__abc", + "type": "process_requirement", + "complies": ["REQ_A"], + }, + ], + ) results: list[int] = [] generic_pie_items_by_tag( diff --git a/src/extensions/score_source_code_linker/tests/test_xml_parser.py b/src/extensions/score_source_code_linker/tests/test_xml_parser.py index 3c5440918..c1725d31c 100644 --- a/src/extensions/score_source_code_linker/tests/test_xml_parser.py +++ b/src/extensions/score_source_code_linker/tests/test_xml_parser.py @@ -33,7 +33,6 @@ # This depends on the `attribute_plugin` in our tooling repository from attribute_plugin import add_test_properties # type: ignore[import-untyped] -from sphinx.application import Sphinx import src.extensions.score_source_code_linker.xml_parser as xml_parser from src.extensions.score_source_code_linker.testlink import DataOfTestCase @@ -362,8 +361,9 @@ def _fake_add_external_need(**kwargs: object) -> object: url=None, ) + app_stub: Any = object() xml_parser.construct_and_add_need( - app=cast(Sphinx, object()), + app=app_stub, tn=testcase, ) From 75ed383f029ed12ad0f8c15187730b490185af1c Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Wed, 29 Apr 2026 14:26:31 +0000 Subject: [PATCH 22/27] feat(harness): Add pilot foundation for docs-as-code assurance harness - Add outer loop with both metrics_json and needs_json task modes - Add lightweight candidate validation and query tooling - Add baseline + rule-retrieval harness candidates - Add provenance metadata and responsibility model for audit compliance - Add tool safety restrictions and compliance documentation - Create executable seed corpus (4 tasks) Addresses eclipse-score/score#2850, eclipse-score/score#2856 Part of eclipse-score/score#2852 --- .gitignore | 9 + AGENTS.md | 49 +++ score_harness/.gitignore | 3 + score_harness/BUILD | 75 ++++ score_harness/README.md | 83 ++++ score_harness/SKILL.md | 81 ++++ score_harness/common.py | 55 +++ score_harness/consistency_rules.yaml | 72 ++++ .../fixtures/metrics_broken_refs.json | 33 ++ .../fixtures/metrics_threshold_fail.json | 28 ++ .../fixtures/metrics_tool_req_scope_pass.json | 50 +++ score_harness/harness/BUILD | 35 ++ score_harness/harness/base_harness.py | 82 ++++ .../harness/rule_retrieval_harness.py | 123 ++++++ score_harness/outer_loop.py | 390 ++++++++++++++++++ score_harness/query_runs.py | 141 +++++++ score_harness/spec/task_001_example.json | 12 + .../spec/task_002_threshold_fail.json | 13 + .../spec/task_003_broken_refs_fail.json | 13 + .../spec/task_004_need_type_scope_pass.json | 13 + .../spec/task_005_build_output_pass.json | 13 + score_harness/tests/BUILD | 49 +++ score_harness/tests/common_test.py | 51 +++ .../tests/validate_candidate_test.py | 87 ++++ score_harness/validate_candidate.py | 100 +++++ 25 files changed, 1660 insertions(+) create mode 100644 AGENTS.md create mode 100644 score_harness/.gitignore create mode 100644 score_harness/BUILD create mode 100644 score_harness/README.md create mode 100644 score_harness/SKILL.md create mode 100644 score_harness/common.py create mode 100644 score_harness/consistency_rules.yaml create mode 100644 score_harness/fixtures/metrics_broken_refs.json create mode 100644 score_harness/fixtures/metrics_threshold_fail.json create mode 100644 score_harness/fixtures/metrics_tool_req_scope_pass.json create mode 100644 score_harness/harness/BUILD create mode 100644 score_harness/harness/base_harness.py create mode 100644 score_harness/harness/rule_retrieval_harness.py create mode 100644 score_harness/outer_loop.py create mode 100644 score_harness/query_runs.py create mode 100644 score_harness/spec/task_001_example.json create mode 100644 score_harness/spec/task_002_threshold_fail.json create mode 100644 score_harness/spec/task_003_broken_refs_fail.json create mode 100644 score_harness/spec/task_004_need_type_scope_pass.json create mode 100644 score_harness/spec/task_005_build_output_pass.json create mode 100644 score_harness/tests/BUILD create mode 100644 score_harness/tests/common_test.py create mode 100644 score_harness/tests/validate_candidate_test.py create mode 100644 score_harness/validate_candidate.py diff --git a/.gitignore b/.gitignore index ec96afacf..59e81291c 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,12 @@ __pycache__/ # bug: This file is created in repo root on test discovery. /consumer_test.log .clwb + +# Harness execution history (local only) +score_harness/runs/ + +# Internal OEM documentation (confidential) +docs/internals/requirements/oem_internal_workstreams.md + +# Temporary issue drafts (already created in GitHub) +.tmp_issue_updates/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..618c5f706 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,49 @@ +# Agent Entry Map + +This file is the repository entrypoint for coding agents. It is intentionally +short. Treat it as a map, not an encyclopedia. + +## What This Repo Does + +- Builds and validates docs-as-code workflows for Eclipse S-CORE +- Maintains a Sphinx-needs metamodel and traceability tooling +- Provides Lane A validation for requirements, code links, and test links + +## Where To Look First + +- `README.md` - repository overview and main build/test flow +- `docs/` - user and internal documentation +- `scripts_bazel/traceability_coverage.py` - metrics extraction +- `scripts_bazel/traceability_gate.py` - deterministic Lane A gate +- `score_harness/README.md` - harness subsystem map and execution flow + +## Core Commands + +- Build docs / generate needs data: use the existing repo build flow from `README.md` +- Run traceability metrics: `python scripts_bazel/traceability_coverage.py --json-output ` +- Run traceability gate: `python scripts_bazel/traceability_gate.py ` +- Run Python tests: use the existing repo test command from `README.md` + +## Harness Area + +The harness subsystem lives under `score_harness/`. + +- `score_harness/spec/` - task specs (small, structured change scenarios) +- `score_harness/harness/` - candidate harness code +- `score_harness/runs/` - append-only execution history +- `score_harness/outer_loop.py` - deterministic evaluation and trace distillation +- `score_harness/consistency_rules.yaml` - public rule catalog for docs-as-code + +## Working Rules + +- Keep Lane A deterministic and OSS-only +- Keep required artifacts machine-readable and grep-able +- Put recurring rules into tooling when possible, not only prose +- Keep top-level instruction files short; move detail into indexed files nearby +- Prefer additive, low-risk harness changes over broad prompt rewrites + +## Public vs Internal Boundary + +- Public issues: tooling, schemas, validators, workflows, public task corpora +- Internal work: release authority, confidential benchmarks, waivers, supplier/legal obligations +- Internal boundary details live in `docs/internals/requirements/oem_internal_workstreams.md` diff --git a/score_harness/.gitignore b/score_harness/.gitignore new file mode 100644 index 000000000..531d0fac2 --- /dev/null +++ b/score_harness/.gitignore @@ -0,0 +1,3 @@ +docs/internals/requirements/oem_internal_workstreams.md +runs/ +.tmp_issue_updates/ diff --git a/score_harness/BUILD b/score_harness/BUILD new file mode 100644 index 000000000..574b13d30 --- /dev/null +++ b/score_harness/BUILD @@ -0,0 +1,75 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +load("@aspect_rules_py//py:defs.bzl", "py_binary", "py_library") +load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") + +filegroup( + name = "sources", + srcs = glob(["**/*.py"]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "spec_files", + srcs = glob(["spec/*.json"]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "fixture_files", + srcs = glob(["fixtures/*.json"]), + visibility = ["//visibility:public"], +) + +py_library( + name = "common", + srcs = ["common.py"], + visibility = ["//score_harness:__subpackages__"], + deps = all_requirements, +) + +py_binary( + name = "outer_loop", + srcs = ["outer_loop.py"], + main = "outer_loop.py", + visibility = ["//visibility:public"], + deps = [ + ":common", + "//score_harness/harness:base_harness", + "//scripts_bazel:traceability_gate", + ] + all_requirements, +) + +py_binary( + name = "validate_candidate", + srcs = ["validate_candidate.py"], + main = "validate_candidate.py", + visibility = ["//visibility:public"], + deps = [ + ":common", + "//score_harness/harness:base_harness", + ] + all_requirements, +) + +py_binary( + name = "query_runs", + srcs = ["query_runs.py"], + main = "query_runs.py", + visibility = ["//visibility:public"], + deps = all_requirements, +) diff --git a/score_harness/README.md b/score_harness/README.md new file mode 100644 index 000000000..91b5a0982 --- /dev/null +++ b/score_harness/README.md @@ -0,0 +1,83 @@ +# score-harness + +Agent harness infrastructure for Eclipse S-CORE docs-as-code. + +This directory is the integration gate between agent-generated changes and the +Lane A traceability gate (`scripts_bazel/traceability_gate.py`). + +Treat this file as the entry map for the harness area. Keep it short. Put deeper +detail in the structured files below so agents can navigate selectively. + +## Structure + +``` +score_harness/ + spec/ Task specs (small, structured change scenarios) + harness/ Harness candidates (one Python file per candidate) + runs/ Execution history (append-only, per iteration/candidate/task) + consistency_rules.yaml Public docs-as-code rule catalog + SKILL.md Domain skill for the outer loop proposer + outer_loop.py Deterministic outer loop: run harness -> gate -> distill -> log +``` + +## Navigation + +- Start here for the overall contract and command sequence +- Read `spec/` for task units and expected verdicts +- Read `consistency_rules.yaml` for rule IDs and impact semantics +- Read `outer_loop.py` for evaluation, distillation, and filesystem layout +- Read `SKILL.md` only when working on Lane B candidate evolution + +## Lane A contract + +Every harness candidate is evaluated against the same Lane A gate: + +1. Run cheap candidate validation against one runnable task spec +2. For metrics_json tasks: load a stable `metrics.json` fixture directly +3. For needs_json tasks: use `metrics.json` from the same build directory as `needs.json` (both produced by Sphinx build) +4. Run `traceability_gate.py` with task-specific arguments to produce pass/fail verdict +5. Distill structured trace artifacts into `runs///traces//` + +No LLM is required in Lane A. The outer loop is deterministic Python. + +Note: `traceability_coverage.py` no longer exists as a separate script—coverage extraction is integrated into the Sphinx build via the score_metamodel extension. + +## Queryability rules + +- `runs/` is append-only +- trace artifacts must be JSON, small, and consistently named +- the proposer should start from `evolution_summary.jsonl` and then inspect only + the traces it needs +- avoid raw stdout dumps as the primary artifact + +## Lane B (optional) + +A proposer (any coding agent) may read the trace history via `runs/` and +`evolution_summary.jsonl` and propose new harness candidates. Lane B never +determines merge eligibility. + +## Getting started + +```bash +# Validate a candidate cheaply before full evaluation +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/base_harness.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json + +# Run the seeded gate-fixture corpus against the baseline harness +python3 score_harness/outer_loop.py \ + --candidate score_harness/harness/base_harness.py \ + --tasks score_harness/spec/ + +# Query prior runs, failed tasks, and candidate deltas +python3 score_harness/query_runs.py \ + --runs-dir score_harness/runs \ + --failed-tasks \ + --diff-candidates base_harness candidate_x +``` + +## Next implementation steps + +1. Grow the seeded corpus beyond gate metrics fixtures to full docs build snapshots using the needs_json task path. +2. Add more candidate harnesses so run-to-run diffs show meaningful behavioral deltas. +3. Integrate the outer loop into CI as a non-blocking pre-gate pilot. diff --git a/score_harness/SKILL.md b/score_harness/SKILL.md new file mode 100644 index 000000000..4f86827a1 --- /dev/null +++ b/score_harness/SKILL.md @@ -0,0 +1,81 @@ +--- +name: score-harness-assurance +description: Run one iteration of assurance harness evolution for Eclipse S-CORE docs-as-code. +--- + +# Score Harness — Assurance Consistency Domain + +Run ONE iteration of harness evolution. You analyze prior results, propose a new harness candidate, and implement it. The outer loop (`outer_loop.py`) handles evaluation. + +## Critical constraints + +- You MUST produce 1 new harness candidate every iteration. +- Do NOT hardcode task-specific knowledge. Harnesses must be general-purpose. +- Do NOT read raw `gate_stdout` fields from trace artifacts. Read only distilled JSON fields. +- One mechanism per candidate. If you are tempted to add "and also..." that is a second candidate. + +## Safety and scope restrictions + +**Harness candidates must comply with these mandatory rules:** + +1. **File scope**: Candidates may only read files declared in the task spec's `input_path` or referenced by `consistency_rules`. +2. **No network access**: Candidates must not make HTTP requests, DNS lookups, or access external services. +3. **No side effects**: `get_context()` must be read-only. Write operations belong in `post_process()` if needed. +4. **Deterministic**: Same task spec + same candidate → same context. No timestamps, random values, or external state in context. +5. **Tool safety**: Candidates may import stdlib and repo-local modules only. No dynamic code execution via `eval()` or `exec()`. + +**Violation consequences:** +- Candidates violating these rules will fail the cheap validation step before evaluation. +- Repeated violations may block future candidate submissions until governance review. + +## Domain context + +The task domain is: maintain ISO 26262 / ASPICE assurance arguments consistent with Sphinx-needs artifacts under change. + +The Lane A evaluation sequence is: +1. `traceability_coverage.py --json-output` → metrics JSON +2. `traceability_gate.py` → pass/fail verdict +3. Structured trace artifacts written to `runs///traces//` + +The harness variable is: what context is provided to the agent before it edits an RST file or needs.json. + +## Key files + +- `harness/base_harness.py` — base class and baseline candidate. Read before proposing. +- `evolution_summary.jsonl` — one line per prior candidate (read this first) +- `runs/` — trace history. Use grep to find patterns across tasks and iterations. +- `spec/*.json` — task specs defining input, expected verdict, and relevant consistency rules. + +## Workflow + +### Step 1: Analyze + +1. Read `evolution_summary.jsonl` to understand what has been tried. +2. Read `runs/` traces for failed tasks: `impacted_elements.json` and `score.json`. +3. Read prior candidate harness files in `harness/`. +4. Form a falsifiable hypothesis: "Providing X before the agent acts will reduce Y failure class." + +### Step 2: Implement + +1. Copy `harness/base_harness.py` to `harness/.py`. +2. Override `get_context()` with your mechanism. Keep `post_process()` default unless needed. +3. Validate import: `python3 -c "from score_harness.harness. import *; print('OK')"`. + +### Step 3: Write pending_eval.json + +```json +{ + "iteration": , + "candidates": [ + { + "name": "", + "file": "harness/.py", + "hypothesis": "", + "mechanism": "", + "expected_impact": "" + } + ] +} +``` + +Output: `CANDIDATE: ` diff --git a/score_harness/common.py b/score_harness/common.py new file mode 100644 index 000000000..d1f07b4b1 --- /dev/null +++ b/score_harness/common.py @@ -0,0 +1,55 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Shared helpers for score_harness utilities.""" + +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + + +def load_harness(candidate_path: Path): + """Dynamically load a harness candidate from a Python file.""" + spec = importlib.util.spec_from_file_location("candidate_harness", candidate_path) + if spec is None or spec.loader is None: + raise ValueError(f"Cannot load module spec from {candidate_path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + for name in dir(module): + cls = getattr(module, name) + try: + if ( + isinstance(cls, type) + and name != "AssuranceHarness" + and issubclass(cls, module.AssuranceHarness) + ): + return cls() + except (TypeError, AttributeError): + continue + raise ValueError(f"No AssuranceHarness subclass found in {candidate_path}") + + +def load_task_specs(tasks_dir: Path) -> list[dict]: + """Load all task spec JSON files from the tasks directory.""" + specs = [] + for path in sorted(tasks_dir.glob("*.json")): + spec = json.loads(path.read_text()) + spec["_task_spec_path"] = str(path.resolve()) + specs.append(spec) + return specs diff --git a/score_harness/consistency_rules.yaml b/score_harness/consistency_rules.yaml new file mode 100644 index 000000000..a40dbdb0f --- /dev/null +++ b/score_harness/consistency_rules.yaml @@ -0,0 +1,72 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# Consistency Rule Catalog +# Machine-readable rules linking argument elements to artifact types and change scenarios. +# Format: YAML list. Each rule has an id, description, trigger, impacted_element_type, +# impact_class, and action_required. + +rules: + - id: CR-001 + description: > + If a complies link target is renamed or removed, all linking elements + are directly impacted and must be rechecked. + trigger: + artifact_type: requirement + change: link_target_removed_or_renamed + impacted_element_type: guideline + impact_class: direct_recheck + action_required: re-verify compliance claim + + - id: CR-002 + description: > + If a requirement type changes (e.g. tool_req to process_requirement), + all guidelines that comply with it are indirectly impacted. + trigger: + artifact_type: requirement + change: type_changed + impacted_element_type: guideline + impact_class: indirect_propagation + action_required: review compliance argument chain + + - id: CR-003 + description: > + If a test reference is broken (linked test ID does not exist in needs.json), + the linked requirement loses its test coverage evidence. + trigger: + artifact_type: test_link + change: reference_broken + impacted_element_type: requirement + impact_class: revision_required + action_required: restore or replace test reference before gate can pass + + - id: CR-004 + description: > + If a std_req changes content, all gd_guidl elements that comply with it + require re-review of the compliance argument. + trigger: + artifact_type: std_req + change: content_changed + impacted_element_type: gd_guidl + impact_class: indirect_propagation + action_required: re-review compliance argument + + - id: CR-005 + description: > + If coverage drops below the configured threshold for any requirement type, + the gate verdict changes from pass to fail. + trigger: + artifact_type: metrics + change: coverage_below_threshold + impacted_element_type: gate_verdict + impact_class: direct_recheck + action_required: restore coverage above threshold or request waiver diff --git a/score_harness/fixtures/metrics_broken_refs.json b/score_harness/fixtures/metrics_broken_refs.json new file mode 100644 index 000000000..48d70bc8f --- /dev/null +++ b/score_harness/fixtures/metrics_broken_refs.json @@ -0,0 +1,33 @@ +{ + "schema_version": "1", + "generated_by": "traceability_gate_test_fixture", + "metrics_by_type": { + "tool_req": { + "include_not_implemented": false, + "requirements": { + "total": 1, + "with_code_link": 1, + "with_test_link": 1, + "fully_linked": 1, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [] + }, + "tests": { + "total": 2, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 100.0, + "broken_references": [ + { + "testcase": "TC_X", + "missing_need": "REQ_UNKNOWN" + } + ] + } + } + } +} diff --git a/score_harness/fixtures/metrics_threshold_fail.json b/score_harness/fixtures/metrics_threshold_fail.json new file mode 100644 index 000000000..b35575163 --- /dev/null +++ b/score_harness/fixtures/metrics_threshold_fail.json @@ -0,0 +1,28 @@ +{ + "schema_version": "1", + "generated_by": "traceability_gate_test_fixture", + "metrics_by_type": { + "tool_req": { + "include_not_implemented": false, + "requirements": { + "total": 4, + "with_code_link": 3, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 75.0, + "with_test_link_pct": 50.0, + "fully_linked_pct": 50.0, + "missing_code_link_ids": ["REQ_4"], + "missing_test_link_ids": ["REQ_3", "REQ_4"], + "not_fully_linked_ids": ["REQ_3", "REQ_4"] + }, + "tests": { + "total": 3, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 66.67, + "broken_references": [] + } + } + } +} diff --git a/score_harness/fixtures/metrics_tool_req_scope_pass.json b/score_harness/fixtures/metrics_tool_req_scope_pass.json new file mode 100644 index 000000000..c551cbbe0 --- /dev/null +++ b/score_harness/fixtures/metrics_tool_req_scope_pass.json @@ -0,0 +1,50 @@ +{ + "schema_version": "1", + "generated_by": "traceability_gate_test_fixture", + "metrics_by_type": { + "tool_req": { + "include_not_implemented": false, + "requirements": { + "total": 2, + "with_code_link": 2, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [] + }, + "tests": { + "total": 1, + "filtered_test_types": [], + "linked_to_requirements": 1, + "linked_to_requirements_pct": 100.0, + "broken_references": [] + } + }, + "comp_req": { + "include_not_implemented": false, + "requirements": { + "total": 5, + "with_code_link": 0, + "with_test_link": 0, + "fully_linked": 0, + "with_code_link_pct": 0.0, + "with_test_link_pct": 0.0, + "fully_linked_pct": 0.0, + "missing_code_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "missing_test_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "not_fully_linked_ids": ["C1", "C2", "C3", "C4", "C5"] + }, + "tests": { + "total": 0, + "filtered_test_types": [], + "linked_to_requirements": 0, + "linked_to_requirements_pct": 100.0, + "broken_references": [] + } + } + } +} diff --git a/score_harness/harness/BUILD b/score_harness/harness/BUILD new file mode 100644 index 000000000..f4f75c286 --- /dev/null +++ b/score_harness/harness/BUILD @@ -0,0 +1,35 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +load("@aspect_rules_py//py:defs.bzl", "py_library") +load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") + +py_library( + name = "base_harness", + srcs = ["base_harness.py"], + visibility = ["//score_harness:__subpackages__"], + deps = all_requirements, +) + +py_library( + name = "rule_retrieval_harness", + srcs = ["rule_retrieval_harness.py"], + visibility = ["//score_harness:__subpackages__"], + deps = [ + ":base_harness", + ] + all_requirements, +) diff --git a/score_harness/harness/base_harness.py b/score_harness/harness/base_harness.py new file mode 100644 index 000000000..f27128f16 --- /dev/null +++ b/score_harness/harness/base_harness.py @@ -0,0 +1,82 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Base harness interface for the docs-as-code assurance consistency domain. + +Every candidate harness must subclass AssuranceHarness and override get_context(). +The outer loop loads and evaluates candidates via this interface without modification. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + + +class AssuranceHarness(ABC): + """ + Interface every candidate harness must satisfy. + + The outer loop calls get_context() before the agent acts, then runs the + Lane A gate, then calls post_process() on the result before distillation. + """ + + @abstractmethod + def get_context(self, task_spec: dict) -> str: + """ + Return context to present to the agent before it acts on the task. + + task_spec fields (from spec.md parsed as dict): + - input_path: str path to the RST or needs.json file to change + - change_description: str what the agent should do + - expected_verdict: str "pass" or "fail" (used only for evaluation) + - consistency_rules: list[str] rule IDs expected to be relevant + + Returns a string that will be prepended to the agent's task prompt. + """ + ... + + def post_process(self, agent_output: str, task_spec: dict) -> dict: + """ + Optional: transform or validate agent output before the gate runs. + + Default implementation returns the output unchanged. + Override to add pre-gate validation, output normalization, etc. + """ + return {"agent_output": agent_output} + + +class BaselineHarness(AssuranceHarness): + """ + Baseline: no pre-context. Agent receives only the task description. + + This is the weakest possible harness and serves as the search baseline. + Any candidate that beats this on the search set is a candidate improvement. + """ + + def get_context(self, task_spec: dict) -> str: + return "" diff --git a/score_harness/harness/rule_retrieval_harness.py b/score_harness/harness/rule_retrieval_harness.py new file mode 100644 index 000000000..513e63cae --- /dev/null +++ b/score_harness/harness/rule_retrieval_harness.py @@ -0,0 +1,123 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Rule-retrieval harness: provides consistency rule context for each task. + +This candidate reads the consistency_rules field from the task spec, retrieves +the corresponding rule definitions from consistency_rules.yaml, and formats +them as context prepended to the agent's task prompt. + +Hypothesis: explicit consistency rule context reduces false-negative gate +failures by helping the agent preserve coverage invariants. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import yaml + +# Inject harness directory into path so base_harness can be imported +_harness_dir = Path(__file__).parent +if str(_harness_dir) not in sys.path: + sys.path.insert(0, str(_harness_dir)) + +from base_harness import AssuranceHarness # noqa: E402 + + +class RuleRetrievalHarness(AssuranceHarness): + """ + Harness that retrieves and formats consistency rules for the agent. + + For each rule ID in task_spec["consistency_rules"], this candidate: + 1. Loads the rule definition from consistency_rules.yaml + 2. Formats the trigger, impact class, and required action + 3. Returns the formatted rules as context + """ + + def __init__(self): + self.rules_cache = None + + def _load_rules(self) -> dict[str, dict]: + """Load and index the consistency rule catalog.""" + if self.rules_cache is not None: + return self.rules_cache + + # Resolve rules file relative to this harness file + harness_dir = Path(__file__).parent + rules_path = harness_dir.parent / "consistency_rules.yaml" + + if not rules_path.exists(): + return {} + + with rules_path.open() as f: + data = yaml.safe_load(f) + + rules_by_id = {rule["id"]: rule for rule in data.get("rules", [])} + self.rules_cache = rules_by_id + return rules_by_id + + def get_context(self, task_spec: dict) -> str: + """ + Retrieve and format consistency rules referenced in the task spec. + + Returns a formatted string listing each rule's trigger, impact class, + and required action, designed to be prepended to the agent's prompt. + """ + rule_ids = task_spec.get("consistency_rules", []) + if not rule_ids: + return "" + + rules_by_id = self._load_rules() + context_lines = ["## Relevant Consistency Rules", ""] + + for rule_id in rule_ids: + rule = rules_by_id.get(rule_id) + if not rule: + context_lines.append(f"- {rule_id}: (rule not found)") + continue + + context_lines.append( + f"### {rule_id}: {rule.get('description', '').strip()}" + ) + context_lines.append("") + + trigger = rule.get("trigger", {}) + context_lines.append( + f"**Trigger**: {trigger.get('artifact_type', 'unknown')} — {trigger.get('change', 'unknown')}" + ) + context_lines.append( + f"**Impact class**: {rule.get('impact_class', 'unknown')}" + ) + context_lines.append( + f"**Action required**: {rule.get('action_required', 'unknown')}" + ) + context_lines.append("") + + return "\n".join(context_lines) diff --git a/score_harness/outer_loop.py b/score_harness/outer_loop.py new file mode 100644 index 000000000..016b0ecad --- /dev/null +++ b/score_harness/outer_loop.py @@ -0,0 +1,390 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Outer loop for the docs-as-code assurance harness. + +This script is deterministic Python — no LLM required. +It loads a harness candidate, runs it against a task set, calls the Lane A gate, +distills structured trace artifacts, and appends a summary line to +evolution_summary.jsonl. + +Usage: + python3 outer_loop.py --candidate harness/base_harness.py --tasks spec/ \ + [--iteration 1] [--output-dir runs/] +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import subprocess +import sys +from datetime import UTC, datetime +from pathlib import Path + +from common import load_harness, load_task_specs +from validate_candidate import validate_candidate + + +def run_gate_from_needs_json( + needs_json: Path, + gate_script: Path, + gate_args: list[str], +) -> dict: + """ + Run traceability_gate.py using metrics.json from the same build as needs.json. + + In this repo, the docs build (Sphinx + score_metamodel extension) produces + both needs.json and metrics.json. The gate validates the metrics.json. + + This function locates metrics.json in the same directory as needs.json. + """ + # Locate metrics.json alongside needs.json + metrics_json = needs_json.parent / "metrics.json" + if not metrics_json.exists(): + return { + "metrics": {"error": f"metrics.json not found at {metrics_json}"}, + "gate_passed": False, + "gate_stdout": f"Error: metrics.json not found at {metrics_json}", + "gate_returncode": 1, + } + + metrics = json.loads(metrics_json.read_text()) + gate_result = subprocess.run( + [sys.executable, str(gate_script), "--metrics-json", str(metrics_json)] + + gate_args, + capture_output=True, + text=True, + ) + + return { + "metrics": metrics, + "gate_passed": gate_result.returncode == 0, + "gate_stdout": gate_result.stdout, + "gate_returncode": gate_result.returncode, + } + + +def run_gate_from_metrics_json( + metrics_json: Path, + gate_script: Path, + gate_args: list[str], +) -> dict: + """ + Run traceability_gate.py directly on an existing metrics.json fixture. + This mode is useful for task specs derived from gate tests and fixtures. + """ + metrics = json.loads(metrics_json.read_text()) + gate_result = subprocess.run( + [sys.executable, str(gate_script), "--metrics-json", str(metrics_json)] + + gate_args, + capture_output=True, + text=True, + ) + + return { + "metrics": metrics, + "gate_passed": gate_result.returncode == 0, + "gate_stdout": gate_result.stdout, + "gate_returncode": gate_result.returncode, + } + + +def distill_trace(gate_result: dict, task_spec: dict) -> dict: + """ + Extract only the fields the proposer needs. Never raw stdout dumps. + This is the distillation step — deterministic, no LLM. + """ + metrics = gate_result.get("metrics", {}) + impacted = [] + + # Extract broken complies/test references by type from metrics + for req_type, data in metrics.get("metrics_by_type", {}).items(): + reqs = data.get("requirements", {}) + if reqs.get("with_test_link_pct", 100) < 100: + impacted.append( + { + "artifact_type": req_type, + "impact_class": "direct_recheck", + "reason": "missing_test_link", + } + ) + if reqs.get("with_code_link_pct", 100) < 100: + impacted.append( + { + "artifact_type": req_type, + "impact_class": "direct_recheck", + "reason": "missing_code_link", + } + ) + tests = data.get("tests", {}) + broken_references = tests.get("broken_references", []) + if isinstance(broken_references, int): + broken_reference_count = broken_references + else: + broken_reference_count = len(broken_references) + if broken_reference_count > 0: + impacted.append( + { + "artifact_type": req_type, + "impact_class": "revision_required", + "reason": "broken_test_reference", + "count": broken_reference_count, + "references": broken_references, + } + ) + + return { + "gate_passed": gate_result["gate_passed"], + "expected_verdict": task_spec.get("expected_verdict", "unknown"), + "task_mode": "metrics_json" + if task_spec.get("metrics_json_path") + else "needs_json", + "verdict_correct": gate_result["gate_passed"] + == (task_spec.get("expected_verdict") == "pass"), + "impacted_elements": impacted, + "coverage_summary": metrics.get("metrics_by_type", {}), + "responsible_role": task_spec.get("responsible_role", "pr_creator"), + "provenance": { + "execution_timestamp": datetime.now(UTC).isoformat(), + "python_version": sys.version.split()[0], + "environment_hash": hashlib.sha256( + f"{sys.version}{os.environ.get('USER', 'unknown')}".encode() + ).hexdigest()[:16], + "gate_script_version": "1.0", + }, + } + + +def resolve_task_path(path_value: str, task_spec: dict) -> Path: + """Resolve a task artifact path robustly across common invocation directories.""" + path = Path(path_value) + if path.is_absolute(): + return path + + candidates = [Path.cwd() / path] + task_spec_path = Path(task_spec.get("_task_spec_path", "")) + if task_spec_path: + task_dir = task_spec_path.parent + harness_root = task_dir.parent + candidates.extend( + [ + task_dir / path, + harness_root / path, + ] + ) + if path.parts and path.parts[0] == harness_root.name: + candidates.append(harness_root.parent / path) + + for candidate in candidates: + if candidate.exists(): + return candidate + + return candidates[0] + + +def resolve_support_path(path_value: Path) -> Path: + """Resolve repo support scripts independent of the current working directory.""" + if path_value.is_absolute(): + return path_value + + repo_root = Path(__file__).resolve().parent.parent + candidates = [Path.cwd() / path_value, repo_root / path_value] + for candidate in candidates: + if candidate.exists(): + return candidate + return repo_root / path_value + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> None: # noqa: C901 + parser = argparse.ArgumentParser(description="Score harness outer loop") + parser.add_argument("--candidate", required=True, type=Path) + parser.add_argument("--tasks", required=True, type=Path) + parser.add_argument("--iteration", type=int, default=1) + parser.add_argument("--output-dir", type=Path, default=Path("runs")) + parser.add_argument( + "--gate-script", + type=Path, + default=Path("scripts_bazel/traceability_gate.py"), + ) + parser.add_argument( + "--skip-validation", + action="store_true", + help="Skip the cheap pre-benchmark candidate validation step.", + ) + args = parser.parse_args() + args.gate_script = resolve_support_path(args.gate_script) + + candidate_name = args.candidate.stem + run_dir = args.output_dir / f"iteration_{args.iteration:03d}" / candidate_name + run_dir.mkdir(parents=True, exist_ok=True) + + harness = load_harness(args.candidate) + task_specs = load_task_specs(args.tasks) + + if not task_specs: + print(f"No task specs found in {args.tasks}. Create spec/*.json files first.") + sys.exit(1) + + if not args.skip_validation: + validation_task_spec = None + for candidate_task_spec in sorted(args.tasks.glob("*.json")): + task_data = json.loads(candidate_task_spec.read_text()) + if not task_data.get("active", True): + continue + if task_data.get("needs_json_path") or task_data.get("metrics_json_path"): + validation_task_spec = candidate_task_spec + break + + if validation_task_spec is None: + print("No runnable task spec found for validation.") + sys.exit(1) + + validation_result = validate_candidate( + args.candidate, + validation_task_spec, + ) + print( + f"[validation] candidate={validation_result['candidate']} " + f"task={validation_result['task_id']} status={validation_result['status']}" + ) + + task_results = [] + for task_spec in task_specs: + if not task_spec.get("active", True): + print(f" [SKIP] {task_spec.get('id', 'unknown')}: inactive task spec") + continue + + task_id = task_spec.get("id", "unknown") + trace_dir = run_dir / "traces" / task_id + trace_dir.mkdir(parents=True, exist_ok=True) + + # Get harness context (would be passed to agent in a real run) + harness.get_context(task_spec) + + gate_args = task_spec.get("gate_args", []) + + metrics_json_path = task_spec.get("metrics_json_path", "") + if metrics_json_path: + metrics_json = resolve_task_path(metrics_json_path, task_spec) + if not metrics_json.exists(): + print(f" [SKIP] {task_id}: metrics_json not found at {metrics_json}") + continue + gate_result = run_gate_from_metrics_json( + metrics_json, + args.gate_script, + gate_args, + ) + else: + needs_json_path = task_spec.get("needs_json_path", "") + if not needs_json_path: + print(f" [SKIP] {task_id}: no needs_json_path provided") + continue + needs_json = resolve_task_path(needs_json_path, task_spec) + if not needs_json.exists(): + print(f" [SKIP] {task_id}: needs_json not found at {needs_json}") + continue + + gate_result = run_gate_from_needs_json( + needs_json, + args.gate_script, + gate_args, + ) + trace = distill_trace(gate_result, task_spec) + + # Write distilled trace files + (trace_dir / "gate_output.json").write_text( + json.dumps( + { + "gate_passed": gate_result["gate_passed"], + "gate_returncode": gate_result["gate_returncode"], + }, + indent=2, + ) + ) + (trace_dir / "impacted_elements.json").write_text( + json.dumps(trace["impacted_elements"], indent=2) + ) + (trace_dir / "score.json").write_text(json.dumps(trace, indent=2)) + + task_results.append( + { + "task_id": task_id, + "verdict_correct": trace["verdict_correct"], + "gate_passed": trace["gate_passed"], + "task_mode": trace["task_mode"], + } + ) + print(f" [{'+' if trace['verdict_correct'] else 'X'}] {task_id}") + + # Write run-level score + n_correct = sum(1 for r in task_results if r["verdict_correct"]) + (run_dir / "score.json").write_text( + json.dumps( + { + "candidate": candidate_name, + "iteration": args.iteration, + "tasks_total": len(task_results), + "tasks_correct": n_correct, + "pass_rate": n_correct / len(task_results) if task_results else 0.0, + "timestamp": datetime.now(UTC).isoformat(), + }, + indent=2, + ) + ) + + # Append to evolution_summary.jsonl + summary_path = args.output_dir / "evolution_summary.jsonl" + with summary_path.open("a") as f: + f.write( + json.dumps( + { + "iteration": args.iteration, + "candidate": candidate_name, + "pass_rate": n_correct / len(task_results) if task_results else 0.0, + "tasks_total": len(task_results), + "timestamp": datetime.now(UTC).isoformat(), + } + ) + + "\n" + ) + + print(f"\n{candidate_name}: {n_correct}/{len(task_results)} correct") + print(f"Traces written to {run_dir}") + print(f"Summary appended to {summary_path}") + + +if __name__ == "__main__": + main() diff --git a/score_harness/query_runs.py b/score_harness/query_runs.py new file mode 100644 index 000000000..2bd367a37 --- /dev/null +++ b/score_harness/query_runs.py @@ -0,0 +1,141 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Small helper for querying score_harness run history. + +This helper keeps navigation cheap for both humans and agents by exposing a +small summary interface over the append-only runs/ filesystem. +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + + +def read_summary(summary_path: Path) -> list[dict]: + if not summary_path.exists(): + return [] + rows = [] + for line in summary_path.read_text().splitlines(): + if not line.strip(): + continue + rows.append(json.loads(line)) + return rows + + +def print_top_candidates(rows: list[dict], top_k: int) -> None: + ranked = sorted(rows, key=lambda row: row.get("pass_rate", 0.0), reverse=True) + for row in ranked[:top_k]: + print( + f"iteration={row.get('iteration')} candidate={row.get('candidate')} " + f"pass_rate={row.get('pass_rate', 0.0):.3f} tasks={row.get('tasks_total', 0)}" + ) + + +def print_failed_tasks(runs_dir: Path, candidate: str | None = None) -> None: + candidate_dirs = sorted(runs_dir.glob("iteration_*/**/traces/*/score.json")) + for score_path in candidate_dirs: + candidate_name = score_path.parents[2].name + if candidate and candidate_name != candidate: + continue + score = json.loads(score_path.read_text()) + if not score.get("verdict_correct", False): + task_id = score_path.parent.name + print( + f"candidate={candidate_name} task={task_id} " + f"gate_passed={score.get('gate_passed')} " + f"expected={score.get('expected_verdict')}" + ) + + +def load_task_scores(runs_dir: Path, candidate: str) -> dict[str, dict]: + scores: dict[str, dict] = {} + for score_path in sorted( + runs_dir.glob(f"iteration_*/{candidate}/traces/*/score.json") + ): + scores[score_path.parent.name] = json.loads(score_path.read_text()) + return scores + + +def print_candidate_diff(runs_dir: Path, left: str, right: str) -> None: + left_scores = load_task_scores(runs_dir, left) + right_scores = load_task_scores(runs_dir, right) + task_ids = sorted(set(left_scores) | set(right_scores)) + + for task_id in task_ids: + left_score = left_scores.get(task_id) + right_score = right_scores.get(task_id) + if left_score is None or right_score is None: + print( + f"task={task_id} present_in={left if left_score else ''}" + f"{right if right_score else ''}" + ) + continue + + if left_score.get("verdict_correct") != right_score.get("verdict_correct"): + print( + f"task={task_id} {left}={left_score.get('verdict_correct')} " + f"{right}={right_score.get('verdict_correct')}" + ) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Query score-harness run history") + parser.add_argument("--runs-dir", type=Path, default=Path("runs")) + parser.add_argument("--top", type=int, default=5) + parser.add_argument("--failed-tasks", action="store_true") + parser.add_argument("--candidate", type=str) + parser.add_argument("--diff-candidates", nargs=2, metavar=("LEFT", "RIGHT")) + args = parser.parse_args() + + rows = read_summary(args.runs_dir / "evolution_summary.jsonl") + if rows: + print("Top candidates") + print("--------------") + print_top_candidates(rows, args.top) + else: + print("No run summary found yet.") + + if args.failed_tasks: + print() + print("Failed tasks") + print("------------") + print_failed_tasks(args.runs_dir, args.candidate) + + if args.diff_candidates: + left, right = args.diff_candidates + print() + print("Candidate diff") + print("--------------") + print_candidate_diff(args.runs_dir, left, right) + + +if __name__ == "__main__": + main() diff --git a/score_harness/spec/task_001_example.json b/score_harness/spec/task_001_example.json new file mode 100644 index 000000000..9c2978ced --- /dev/null +++ b/score_harness/spec/task_001_example.json @@ -0,0 +1,12 @@ +{ + "_comment": "Example task spec. Copy and edit per change scenario. See README.md for field descriptions.", + "active": false, + "id": "task_001_complies_link_removed", + "description": "A guideline loses its complies link to a std_req. Gate should fail due to broken reference.", + "input_path": "docs/internals/requirements/requirements.rst", + "change_description": "Remove the complies link from gd_guidl__xyz to std_req__iso26262__001", + "expected_verdict": "fail", + "consistency_rules": ["CR-001"], + "needs_json_path": "", + "_note": "Set needs_json_path to the path of the needs.json produced after the agent applies the change." +} diff --git a/score_harness/spec/task_002_threshold_fail.json b/score_harness/spec/task_002_threshold_fail.json new file mode 100644 index 000000000..3730f9c0a --- /dev/null +++ b/score_harness/spec/task_002_threshold_fail.json @@ -0,0 +1,13 @@ +{ + "id": "task_002_threshold_fail", + "description": "Coverage threshold failure for tool_req when code-link coverage is below 100%.", + "input_path": "score_harness/fixtures/metrics_threshold_fail.json", + "change_description": "Evaluate a metrics fixture where tool_req code-link coverage is 75% against a 100% threshold.", + "expected_verdict": "fail", + "consistency_rules": ["CR-005"], + "metrics_json_path": "score_harness/fixtures/metrics_threshold_fail.json", + "gate_args": ["--min-req-code", "100"], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/spec/task_003_broken_refs_fail.json b/score_harness/spec/task_003_broken_refs_fail.json new file mode 100644 index 000000000..d9e7f54e9 --- /dev/null +++ b/score_harness/spec/task_003_broken_refs_fail.json @@ -0,0 +1,13 @@ +{ + "id": "task_003_broken_refs_fail", + "description": "Broken testcase reference should fail when fail-on-broken-test-refs is enabled.", + "input_path": "score_harness/fixtures/metrics_broken_refs.json", + "change_description": "Evaluate a metrics fixture with one broken testcase reference.", + "expected_verdict": "fail", + "consistency_rules": ["CR-003"], + "metrics_json_path": "score_harness/fixtures/metrics_broken_refs.json", + "gate_args": ["--fail-on-broken-test-refs"], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/spec/task_004_need_type_scope_pass.json b/score_harness/spec/task_004_need_type_scope_pass.json new file mode 100644 index 000000000..cac83de8b --- /dev/null +++ b/score_harness/spec/task_004_need_type_scope_pass.json @@ -0,0 +1,13 @@ +{ + "id": "task_004_need_type_scope_pass", + "description": "Need-type scoping should pass when only fully-linked tool_req is checked and comp_req is ignored.", + "input_path": "score_harness/fixtures/metrics_tool_req_scope_pass.json", + "change_description": "Evaluate a metrics fixture where tool_req passes and comp_req fails, while gate is scoped to tool_req only.", + "expected_verdict": "pass", + "consistency_rules": ["CR-005"], + "metrics_json_path": "score_harness/fixtures/metrics_tool_req_scope_pass.json", + "gate_args": ["--need-type", "tool_req", "--require-all-links"], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/spec/task_005_build_output_pass.json b/score_harness/spec/task_005_build_output_pass.json new file mode 100644 index 000000000..81dd3f839 --- /dev/null +++ b/score_harness/spec/task_005_build_output_pass.json @@ -0,0 +1,13 @@ +{ + "id": "task_005_build_output_pass", + "description": "Current docs build output should pass when no thresholds are enforced.", + "input_path": "../_build/needs.json", + "change_description": "Evaluate the current repo docs build output with default gate settings (no minimum thresholds).", + "expected_verdict": "pass", + "consistency_rules": ["CR-005"], + "needs_json_path": "../_build/needs.json", + "gate_args": [], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/tests/BUILD b/score_harness/tests/BUILD new file mode 100644 index 000000000..3943bee6c --- /dev/null +++ b/score_harness/tests/BUILD @@ -0,0 +1,49 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") +load("//:score_pytest.bzl", "score_pytest") + +score_pytest( + name = "validate_candidate_test", + srcs = ["validate_candidate_test.py"], + deps = [ + "//score_harness:validate_candidate", + "//score_harness:common", + "//score_harness/harness:base_harness", + "//score_harness/harness:rule_retrieval_harness", + ] + all_requirements, + data = [ + "//score_harness:spec_files", + "//score_harness:sources", + ], + pytest_config = "//:pyproject.toml", +) + +score_pytest( + name = "common_test", + srcs = ["common_test.py"], + deps = [ + "//score_harness:common", + "//score_harness/harness:base_harness", + ] + all_requirements, + data = [ + "//score_harness:spec_files", + "//score_harness:sources", + ], + pytest_config = "//:pyproject.toml", +) diff --git a/score_harness/tests/common_test.py b/score_harness/tests/common_test.py new file mode 100644 index 000000000..0c936f6d1 --- /dev/null +++ b/score_harness/tests/common_test.py @@ -0,0 +1,51 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Tests for harness common utilities.""" + +from __future__ import annotations + +from pathlib import Path + +# Import from workspace target +import score_harness.common as common_module + +_MY_PATH = Path(__file__).parent + + +def test_load_harness_baseline(): + """Test loading baseline harness.""" + harness_path = _MY_PATH.parent / "harness" / "base_harness.py" + harness_cls = common_module.load_harness(harness_path) + + assert harness_cls is not None + assert hasattr(harness_cls, "get_context") + assert hasattr(harness_cls, "post_process") + + +def test_load_task_specs(): + """Test loading task specs from spec directory.""" + spec_dir = _MY_PATH.parent / "spec" + tasks = common_module.load_task_specs(spec_dir) + + assert len(tasks) > 0, "Should have at least one task spec" + + for task in tasks: + # Task specs use "id" not "task_id" + assert "id" in task + assert "_task_spec_path" in task + assert Path(task["_task_spec_path"]).exists() diff --git a/score_harness/tests/validate_candidate_test.py b/score_harness/tests/validate_candidate_test.py new file mode 100644 index 000000000..68289564a --- /dev/null +++ b/score_harness/tests/validate_candidate_test.py @@ -0,0 +1,87 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Tests for validate_candidate.py""" + +from __future__ import annotations + +from pathlib import Path + +# Import from workspace target +import score_harness.validate_candidate as validate_candidate_module + +_MY_PATH = Path(__file__).parent + + +def test_validate_harness_baseline(): + """Test that baseline harness passes validation.""" + + harness_path = _MY_PATH.parent / "harness" / "base_harness.py" + assert harness_path.exists(), f"Harness not found: {harness_path}" + + # Use a minimal task spec for validation + task_spec_path = _MY_PATH.parent / "spec" / "task_002_threshold_fail.json" + assert task_spec_path.exists(), f"Task spec not found: {task_spec_path}" + + # Should not raise + result = validate_candidate_module.validate_candidate(harness_path, task_spec_path) + assert isinstance(result, dict) + assert result.get("status") == "ok" + assert "context_length" in result + + +def test_validate_harness_rule_retrieval(): + """Test that rule_retrieval harness passes validation.""" + harness_path = _MY_PATH.parent / "harness" / "rule_retrieval_harness.py" + assert harness_path.exists(), f"Harness not found: {harness_path}" + + # Use a minimal task spec for validation + task_spec_path = _MY_PATH.parent / "spec" / "task_002_threshold_fail.json" + assert task_spec_path.exists(), f"Task spec not found: {task_spec_path}" + + # Should not raise + result = validate_candidate_module.validate_candidate(harness_path, task_spec_path) + assert isinstance(result, dict) + assert result.get("status") == "ok" + assert "context_length" in result + + +def test_task_spec_schema(): + """Test that task specs have required fields.""" + import json + + spec_dir = _MY_PATH.parent / "spec" + + for spec_file in spec_dir.glob("task_*.json"): + with open(spec_file) as f: + spec = json.load(f) + + # Skip inactive examples + if spec.get("active") is False: + continue + + # Check required fields + assert "id" in spec, f"{spec_file.name}: missing id" + assert "description" in spec, f"{spec_file.name}: missing description" + assert "responsible_role" in spec, f"{spec_file.name}: missing responsible_role" + + # Check that task has either metrics_json_path or needs_json_path + has_metrics = "metrics_json_path" in spec + has_needs = "needs_json_path" in spec + assert has_metrics or has_needs, ( + f"{spec_file.name}: must have metrics_json_path or needs_json_path" + ) diff --git a/score_harness/validate_candidate.py b/score_harness/validate_candidate.py new file mode 100644 index 000000000..11e1dd615 --- /dev/null +++ b/score_harness/validate_candidate.py @@ -0,0 +1,100 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Lightweight validation for docs-as-code harness candidates. + +This is the cheap pre-benchmark check. It catches malformed candidates before +they consume the full task set or expensive gate runs. + +Validation covers: +1. candidate module loads +2. candidate class instantiates +3. get_context() returns a string for one tiny task spec +4. post_process() returns a dict +5. expected trace filenames are known and stable +6. task spec provides either needs_json_path or metrics_json_path +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from score_harness.common import load_harness + +REQUIRED_TRACE_FILENAMES = ( + "gate_output.json", + "impacted_elements.json", + "score.json", +) + + +def validate_candidate(candidate_path: Path, task_spec_path: Path) -> dict: + """Return a structured validation result for one candidate and one tiny task.""" + harness = load_harness(candidate_path) + task_spec = json.loads(task_spec_path.read_text()) + if not task_spec.get("active", True): + raise ValueError("task spec is inactive; choose a runnable task spec") + + if not task_spec.get("needs_json_path") and not task_spec.get("metrics_json_path"): + raise ValueError( + "task spec must provide either needs_json_path or metrics_json_path" + ) + + context = harness.get_context(task_spec) + if not isinstance(context, str): + raise TypeError(f"get_context() must return str, got {type(context).__name__}") + + post_processed = harness.post_process("", task_spec) + if not isinstance(post_processed, dict): + raise TypeError( + f"post_process() must return dict, got {type(post_processed).__name__}" + ) + + return { + "candidate": candidate_path.stem, + "task_id": task_spec.get("id", "unknown"), + "context_length": len(context), + "required_trace_filenames": list(REQUIRED_TRACE_FILENAMES), + "status": "ok", + } + + +def main() -> None: + parser = argparse.ArgumentParser(description="Validate a score-harness candidate") + parser.add_argument("--candidate", required=True, type=Path) + parser.add_argument("--task-spec", required=True, type=Path) + args = parser.parse_args() + + result = validate_candidate(args.candidate, args.task_spec) + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() From 32713e14f7e2f6c794c7c1112dd9093636a4ef99 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Thu, 30 Apr 2026 09:17:55 +0000 Subject: [PATCH 23/27] Add self-healing validation system - Log validation failures to validation_failures.jsonl - Instruct LLM proposer to read failure log first (SKILL.md Step 1) - Document learning mechanism in SELF_HEALING.md - Enables system to learn from linting/type/interface mistakes - Auto-logs failures in outer_loop.py when validation fails Self-healing allows the proposer to see past mistakes (with exact error messages and fix commands) so it can avoid repeating them. This is more effective than explicit instructions because the LLM sees actual failures it made, not generic rules. Related: eclipse-score/score#2850 --- score_harness/SELF_HEALING.md | 198 ++++++++++++++++++++++++ score_harness/SKILL.md | 19 ++- score_harness/outer_loop.py | 19 +++ score_harness/validate_candidate.py | 172 +++++++++++++++++--- score_harness/validation_failures.jsonl | 3 + 5 files changed, 382 insertions(+), 29 deletions(-) create mode 100644 score_harness/SELF_HEALING.md create mode 100644 score_harness/validation_failures.jsonl diff --git a/score_harness/SELF_HEALING.md b/score_harness/SELF_HEALING.md new file mode 100644 index 000000000..c14e3196f --- /dev/null +++ b/score_harness/SELF_HEALING.md @@ -0,0 +1,198 @@ +# Self-Healing Validation System + +This system automatically learns from validation failures and feeds them back to the LLM proposer in future iterations. + +## How It Works + +### 1. Validation Runs Automatically + +When `outer_loop.py` runs, it first validates the candidate harness: + +```bash +python3 score_harness/outer_loop.py \ + --candidate score_harness/harness/my_candidate.py \ + --tasks score_harness/spec/ \ + --iteration 5 +``` + +The validation checks: +- ✅ Harness interface (get_context, post_process) +- ✅ Linting (ruff) +- ✅ Type checking (basedpyright) +- ✅ Import succeeds +- ✅ Basic smoke test + +### 2. Failures Are Logged + +If validation fails (e.g., linting error, type error, missing method), the failure is automatically logged to **`validation_failures.jsonl`**: + +```json +{"iteration": 5, "candidate": "my_candidate", "failure_type": "linting_error", "message": "F401 'json' imported but unused", "fix": "Run: ruff check --fix score_harness/harness/my_candidate.py"} +{"iteration": 6, "candidate": "another_candidate", "failure_type": "type_error", "message": "Type 'str | None' cannot be assigned to 'str'", "fix": "Fix type errors reported by basedpyright"} +``` + +### 3. Proposer Learns From Failures + +In **Step 1** of the SKILL.md workflow, the LLM proposer is instructed to: + +> **Read validation_failures.jsonl FIRST** — learn from past mistakes + +Example mistakes the proposer will see: + +| Iteration | Mistake | Fix Applied in Next Iteration | +|-----------|---------|-------------------------------| +| 3 | Forgot to import `Path` | Iteration 4: Add `from pathlib import Path` | +| 5 | Linting error: unused import | Iteration 6: Run `ruff check --fix` before submitting | +| 7 | Type error: `str | None` not handled | Iteration 8: Add null check | +| 9 | Forgot to run validation | Iteration 10: Always run `validate_candidate.py` | + +### 4. System Self-Improves + +Over iterations, the proposer builds a mental model of: +- Common validation failures +- How to prevent them +- Which checks to run before submitting + +This is **learning without explicit training** — the feedback loop teaches the proposer through structured failure logs. + +## Example: Learning From a Linting Mistake + +**Iteration 3**: Proposer submits candidate with unused import +```python +# harness/candidate_3.py +import json # ← unused +from pathlib import Path + +class AssuranceHarness: + def get_context(self, task_spec): + return "context" +``` + +**Validation fails**: +```bash +[validation] FAILED: candidate_3 — 1 failures logged to validation_failures.jsonl + - linting_error: Run: ruff check --fix score_harness/harness/candidate_3.py +``` + +**validation_failures.jsonl** records: +```json +{"iteration": 3, "candidate": "candidate_3", "failure_type": "linting_error", "message": "F401 'json' imported but unused", "fix": "Run: ruff check --fix score_harness/harness/candidate_3.py"} +``` + +**Iteration 4**: Proposer reads validation_failures.jsonl, sees the linting error, and now includes linting in Step 2: +```python +# harness/candidate_4.py +from pathlib import Path # ← fixed: removed unused import + +class AssuranceHarness: + def get_context(self, task_spec): + return "context" +``` + +**Validation passes** ✅ + +## Manual Validation + +You can also run validation manually: + +```bash +# Basic validation (interface checks only) +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/my_candidate.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json + +# With failure logging +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/my_candidate.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json \ + --iteration 5 \ + --log-failures + +# Skip external checks (for CI environments without ruff/basedpyright) +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/my_candidate.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json \ + --skip-external-checks +``` + +## Viewing Past Failures + +```bash +# See all validation failures +cat score_harness/validation_failures.jsonl | jq . + +# See failures from iteration 5 +cat score_harness/validation_failures.jsonl | jq 'select(.iteration == 5)' + +# See all linting errors +cat score_harness/validation_failures.jsonl | jq 'select(.failure_type == "linting_error")' + +# Count failures by type +cat score_harness/validation_failures.jsonl | jq -r '.failure_type' | sort | uniq -c +``` + +## Why This Works Better Than Explicit Instructions + +**Without self-healing**: +```markdown +Step 2: Implement +- Run linting +- Run type checking +- Run tests +- Run validation +``` + +❌ LLM might skip steps +❌ No feedback when steps are forgotten +❌ Instructions get stale + +**With self-healing**: +```json +{"iteration": 3, "failure_type": "linting_error", "fix": "Run: ruff check --fix ..."} +{"iteration": 5, "failure_type": "type_error", "fix": "Fix type errors ..."} +``` + +✅ LLM sees **actual mistakes it made** +✅ Concrete fix instructions for each failure +✅ System learns over time +✅ Validation is enforced, not suggested + +## Integration with CI + +In CI environments, skip external checks if tools aren't installed: + +```yaml +# .github/workflows/harness-validation.yml +- name: Validate harness candidates + run: | + python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/base_harness.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json \ + --skip-external-checks # Skip ruff/basedpyright in CI +``` + +Or install the tools: + +```yaml +- name: Install validation tools + run: pip install ruff basedpyright + +- name: Validate with full checks + run: | + python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/base_harness.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json +``` + +## Design Rationale + +This approach is inspired by **Meta-Harness** but goes further: + +| Meta-Harness | score_harness (with self-healing) | +|--------------|----------------------------------| +| Import validation only | Import + linting + type checking + interface validation | +| No failure logging | Failures logged to validation_failures.jsonl | +| LLM sees success/fail only | LLM sees exact failure reason + fix command | +| No learning across iterations | System learns from mistakes | + +The key insight: **validation failures are training data**. By logging failures in a structured format, we enable the proposer to learn without manual instruction updates. diff --git a/score_harness/SKILL.md b/score_harness/SKILL.md index 4f86827a1..86015c678 100644 --- a/score_harness/SKILL.md +++ b/score_harness/SKILL.md @@ -42,7 +42,8 @@ The harness variable is: what context is provided to the agent before it edits a ## Key files - `harness/base_harness.py` — base class and baseline candidate. Read before proposing. -- `evolution_summary.jsonl` — one line per prior candidate (read this first) +- **`validation_failures.jsonl`** — append-only log of past validation failures. Read this FIRST to avoid repeating mistakes. +- `evolution_summary.jsonl` — one line per prior candidate (read this second) - `runs/` — trace history. Use grep to find patterns across tasks and iterations. - `spec/*.json` — task specs defining input, expected verdict, and relevant consistency rules. @@ -50,16 +51,22 @@ The harness variable is: what context is provided to the agent before it edits a ### Step 1: Analyze -1. Read `evolution_summary.jsonl` to understand what has been tried. -2. Read `runs/` traces for failed tasks: `impacted_elements.json` and `score.json`. -3. Read prior candidate harness files in `harness/`. -4. Form a falsifiable hypothesis: "Providing X before the agent acts will reduce Y failure class." +1. **Read validation_failures.jsonl FIRST** — learn from past mistakes (linting errors, type errors, interface violations). +2. Read `evolution_summary.jsonl` to understand what has been tried. +3. Read `runs/` traces for failed tasks: `impacted_elements.json` and `score.json`. +4. Read prior candidate harness files in `harness/`. +5. Form a falsifiable hypothesis: "Providing X before the agent acts will reduce Y failure class." ### Step 2: Implement 1. Copy `harness/base_harness.py` to `harness/.py`. 2. Override `get_context()` with your mechanism. Keep `post_process()` default unless needed. -3. Validate import: `python3 -c "from score_harness.harness. import *; print('OK')"`. +3. **Validate import**: `python3 -c "from score_harness.harness. import *; print('OK')"`. +4. **Run linting**: `ruff check score_harness/harness/.py` (fix any errors with `ruff check --fix`). +5. **Run type checking**: `basedpyright score_harness/harness/.py` (fix type errors before submitting). +6. **Run cheap validation**: `python3 score_harness/validate_candidate.py --candidate score_harness/harness/.py --task-spec score_harness/spec/task_002_threshold_fail.json`. + +**If validation fails**, the failure is logged to `validation_failures.jsonl` for the next iteration to learn from. ### Step 3: Write pending_eval.json diff --git a/score_harness/outer_loop.py b/score_harness/outer_loop.py index 016b0ecad..cdeaa2f18 100644 --- a/score_harness/outer_loop.py +++ b/score_harness/outer_loop.py @@ -275,7 +275,26 @@ def main() -> None: # noqa: C901 validation_result = validate_candidate( args.candidate, validation_task_spec, + skip_external_checks=False, ) + + # Log validation failures for learning + if validation_result["status"] == "failed": + from score_harness.validate_candidate import log_validation_failure + + log_validation_failure( + args.iteration, + validation_result["candidate"], + validation_result["failures"], + ) + print( + f"[validation] FAILED: {validation_result['candidate']} " + f"— {len(validation_result['failures'])} failures logged to validation_failures.jsonl" + ) + for failure in validation_result["failures"]: + print(f" - {failure['failure_type']}: {failure['fix']}") + sys.exit(1) + print( f"[validation] candidate={validation_result['candidate']} " f"task={validation_result['task_id']} status={validation_result['status']}" diff --git a/score_harness/validate_candidate.py b/score_harness/validate_candidate.py index 11e1dd615..37606384f 100644 --- a/score_harness/validate_candidate.py +++ b/score_harness/validate_candidate.py @@ -38,12 +38,16 @@ 4. post_process() returns a dict 5. expected trace filenames are known and stable 6. task spec provides either needs_json_path or metrics_json_path +7. linting passes (ruff) +8. type checking passes (basedpyright) """ from __future__ import annotations import argparse import json +import subprocess +import sys from pathlib import Path from score_harness.common import load_harness @@ -55,46 +59,168 @@ ) -def validate_candidate(candidate_path: Path, task_spec_path: Path) -> dict: - """Return a structured validation result for one candidate and one tiny task.""" - harness = load_harness(candidate_path) - task_spec = json.loads(task_spec_path.read_text()) - if not task_spec.get("active", True): - raise ValueError("task spec is inactive; choose a runnable task spec") - - if not task_spec.get("needs_json_path") and not task_spec.get("metrics_json_path"): - raise ValueError( - "task spec must provide either needs_json_path or metrics_json_path" - ) - - context = harness.get_context(task_spec) - if not isinstance(context, str): - raise TypeError(f"get_context() must return str, got {type(context).__name__}") - - post_processed = harness.post_process("", task_spec) - if not isinstance(post_processed, dict): - raise TypeError( - f"post_process() must return dict, got {type(post_processed).__name__}" +def validate_candidate( + candidate_path: Path, task_spec_path: Path, skip_external_checks: bool = False +) -> dict: + """Return a structured validation result for one candidate and one tiny task. + + Args: + candidate_path: Path to the candidate harness module + task_spec_path: Path to a task spec for validation + skip_external_checks: If True, skip linting/type checking (for CI environments) + + Returns: + dict with status="ok" or status="failed" + failure_type/message/fix + """ + failures = [] + + # Check 1-6: Harness interface and basic functionality + try: + harness = load_harness(candidate_path) + task_spec = json.loads(task_spec_path.read_text()) + if not task_spec.get("active", True): + raise ValueError("task spec is inactive; choose a runnable task spec") + + if not task_spec.get("needs_json_path") and not task_spec.get( + "metrics_json_path" + ): + raise ValueError( + "task spec must provide either needs_json_path or metrics_json_path" + ) + + context = harness.get_context(task_spec) + if not isinstance(context, str): + raise TypeError( + f"get_context() must return str, got {type(context).__name__}" + ) + + post_processed = harness.post_process("", task_spec) + if not isinstance(post_processed, dict): + raise TypeError( + f"post_process() must return dict, got {type(post_processed).__name__}" + ) + + context_length = len(context) + + except Exception as e: + failures.append( + { + "failure_type": "interface_error", + "message": str(e), + "fix": "Ensure candidate implements get_context() and post_process() correctly", + } ) + context_length = 0 + + # Check 7: Linting (ruff) + if not skip_external_checks: + try: + result = subprocess.run( + ["ruff", "check", str(candidate_path)], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + failures.append( + { + "failure_type": "linting_error", + "message": result.stdout.strip(), + "fix": "Run: ruff check --fix score_harness/harness/.py", + } + ) + except FileNotFoundError: + # ruff not installed - warn but don't fail + pass + + # Check 8: Type checking (basedpyright) + if not skip_external_checks: + try: + result = subprocess.run( + ["basedpyright", str(candidate_path)], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + failures.append( + { + "failure_type": "type_error", + "message": result.stdout.strip(), + "fix": "Fix type errors reported by basedpyright", + } + ) + except FileNotFoundError: + # basedpyright not installed - warn but don't fail + pass + + if failures: + return { + "candidate": candidate_path.stem, + "task_id": task_spec_path.stem, + "status": "failed", + "failures": failures, + } return { "candidate": candidate_path.stem, - "task_id": task_spec.get("id", "unknown"), - "context_length": len(context), + "task_id": task_spec_path.stem, + "context_length": context_length, "required_trace_filenames": list(REQUIRED_TRACE_FILENAMES), "status": "ok", } +def log_validation_failure( + iteration: int, candidate: str, failures: list[dict] +) -> None: + """Append validation failures to validation_failures.jsonl for learning.""" + log_path = Path(__file__).parent / "validation_failures.jsonl" + + for failure in failures: + entry = { + "iteration": iteration, + "candidate": candidate, + "failure_type": failure["failure_type"], + "message": failure["message"], + "fix": failure["fix"], + } + with log_path.open("a") as f: + f.write(json.dumps(entry) + "\n") + + def main() -> None: parser = argparse.ArgumentParser(description="Validate a score-harness candidate") parser.add_argument("--candidate", required=True, type=Path) parser.add_argument("--task-spec", required=True, type=Path) + parser.add_argument("--iteration", type=int, default=0) + parser.add_argument( + "--skip-external-checks", + action="store_true", + help="Skip linting/type checking (for CI)", + ) + parser.add_argument( + "--log-failures", + action="store_true", + help="Write validation failures to validation_failures.jsonl", + ) args = parser.parse_args() - result = validate_candidate(args.candidate, args.task_spec) + result = validate_candidate( + args.candidate, args.task_spec, args.skip_external_checks + ) + + if args.log_failures and result["status"] == "failed": + log_validation_failure( + args.iteration, result["candidate"], result["failures"] + ) + print(json.dumps(result, indent=2)) + # Exit with non-zero if validation failed + if result["status"] == "failed": + sys.exit(1) + if __name__ == "__main__": main() diff --git a/score_harness/validation_failures.jsonl b/score_harness/validation_failures.jsonl new file mode 100644 index 000000000..bd2114b4a --- /dev/null +++ b/score_harness/validation_failures.jsonl @@ -0,0 +1,3 @@ +# Validation failure log (append-only) +# Each line records one validation failure so the proposer can learn from mistakes +# Format: {"iteration": N, "candidate": "name", "failure_type": "...", "message": "...", "fix": "..."} From f225f5e033f665c5a99dfef1ca020af7a4a5304f Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Thu, 30 Apr 2026 09:54:28 +0000 Subject: [PATCH 24/27] refactor: clean up repository structure Moved internal documentation outside the public repository. --- .gitignore | 3 --- AGENTS.md | 1 - score_harness/.gitignore | 1 - 3 files changed, 5 deletions(-) diff --git a/.gitignore b/.gitignore index 59e81291c..e4adda9a2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,8 +34,5 @@ __pycache__/ # Harness execution history (local only) score_harness/runs/ -# Internal OEM documentation (confidential) -docs/internals/requirements/oem_internal_workstreams.md - # Temporary issue drafts (already created in GitHub) .tmp_issue_updates/ diff --git a/AGENTS.md b/AGENTS.md index 618c5f706..a8f0082fb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -46,4 +46,3 @@ The harness subsystem lives under `score_harness/`. - Public issues: tooling, schemas, validators, workflows, public task corpora - Internal work: release authority, confidential benchmarks, waivers, supplier/legal obligations -- Internal boundary details live in `docs/internals/requirements/oem_internal_workstreams.md` diff --git a/score_harness/.gitignore b/score_harness/.gitignore index 531d0fac2..e7fa2430b 100644 --- a/score_harness/.gitignore +++ b/score_harness/.gitignore @@ -1,3 +1,2 @@ -docs/internals/requirements/oem_internal_workstreams.md runs/ .tmp_issue_updates/ From e7655b5cb3b0a5fc28fd2ba0b9f48d7e4afaa7b4 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Mon, 11 May 2026 14:15:27 +0000 Subject: [PATCH 25/27] harness --- .devcontainer/devcontainer.json | 5 +- checkable_safety_arguments.md | 0 .../concepts/agentic_software_development.rst | 152 ++++++++++++++++++ docs/concepts/index.rst | 1 + docs/how-to/agentic_software_development.rst | 33 ++++ docs/how-to/get_started.rst | 8 + docs/how-to/index.rst | 1 + docs/internals/agent_harness.rst | 84 ++++++++++ docs/internals/index.rst | 1 + prepare_commit.sh | 49 ++++++ score_harness/README.md | 25 ++- score_harness/outer_loop.py | 12 +- score_harness/validate_candidate.py | 11 ++ 13 files changed, 374 insertions(+), 8 deletions(-) create mode 100644 checkable_safety_arguments.md create mode 100644 docs/concepts/agentic_software_development.rst create mode 100644 docs/how-to/agentic_software_development.rst create mode 100644 docs/internals/agent_harness.rst create mode 100755 prepare_commit.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 90cf9e677..588f9604b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,8 @@ { "name": "eclipse-s-core", "image": "ghcr.io/eclipse-score/devcontainer:v1.2.0", - "updateContentCommand": "bazel run //:ide_support" + "updateContentCommand": "bazel run //:ide_support", + "mounts": [ + "source=${localEnv:HOME}/.agents,target=/root/.agents,type=bind,consistency=cached" + ] } diff --git a/checkable_safety_arguments.md b/checkable_safety_arguments.md new file mode 100644 index 000000000..e69de29bb diff --git a/docs/concepts/agentic_software_development.rst b/docs/concepts/agentic_software_development.rst new file mode 100644 index 000000000..d19a14b8e --- /dev/null +++ b/docs/concepts/agentic_software_development.rst @@ -0,0 +1,152 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +.. _docs_agentic_software_development: + +Agentic Software Development +============================ + +Docs-as-code can be developed with conventional manual changes or with +agent-assisted workflows. The key design rule is that agent assistance changes +how changes are proposed and analyzed, not how merge eligibility is decided. + +Lane A and Lane B +----------------- + +Lane A is the deterministic path. It validates candidate behavior and evaluates +change scenarios through the existing traceability gate. Lane A does not require +an LLM. + +Lane B is optional. A coding agent may inspect prior structured traces and +propose improved harness candidates. Lane B can accelerate iteration, but it +never overrides a Lane A denial. + +This means: + +- manual and agent-assisted changes both converge on the same deterministic gate. +- optional agent reasoning is kept outside the merge-critical decision path. +- structured run artifacts are part of the design, not an afterthought. + +Old approach vs harness-based approach +-------------------------------------- + +Without the harness, repository checks can still validate a change, but the +feedback surface is coarse: pass/fail results, scattered logs, and limited +structured evidence for iterative improvement. + +With the harness, changes are evaluated through a staged workflow: + +- cheap candidate validation first +- deterministic task execution through the Lane A gate +- distilled JSON trace artifacts for each task +- append-only summary history for future comparison and learning + +Old approach flow +~~~~~~~~~~~~~~~~~ + +.. plantuml:: + + @startuml + title Old Approach: Repo Checks Without Harness Context + + hide footbox + autonumber + skinparam shadowing false + + actor "Developer / Agent" as DEV + participant "Requirement / Docs Change" as REQ + participant "Generic Repo Checks" as REPO + participant "Review + CI Gate" as GATE + + DEV -> REQ : Edit requirement/docs/links + REQ -> REPO : Trigger repo checks + REPO -> REPO : Run lint/tests/traceability gate + alt Checks fail + REPO --> DEV : Coarse failure logs + DEV -> REQ : Patch and retry + else Checks pass + REPO --> GATE : Pass/fail status + logs + GATE --> DEV : Merge decision + end + + @enduml + +Harness-based flow +~~~~~~~~~~~~~~~~~~ + +.. plantuml:: + + @startuml + title Harness-Based Approach: Candidate + Smoke Validation + Structured Outputs + + hide footbox + autonumber + skinparam shadowing false + + actor "Developer / Agent" as DEV + participant "Requirement Change" as REQ + participant "Candidate\n(score_harness/harness/*.py)" as CAND + collections "Agent Instructions\n(AGENTS.md)" as AGENTFILE + collections "Skill Prompts\n(.github/instructions, SKILL.md)" as SKILLFILE + collections "Consistency Rules\n(score_harness/consistency_rules.yaml)" as RULES + collections "Task Specs\n(score_harness/spec/*.yaml)" as SPECS + participant "Smoke Validation\n(validate_candidate.py)" as SMOKE + participant "Outer Loop\n(outer_loop.py)" as LOOP + participant "Lane A Gate\n(traceability_gate.py)" as GATE + participant "Review + CI Gate" as CIGATE + database "Run Artifacts\n(score_harness/runs/...)" as ART + + DEV -> REQ : Start from requirement update + DEV -> CAND : Propose or edit candidate + CAND -> AGENTFILE : Read operating constraints + CAND -> SKILLFILE : Read workflow skills/instructions + CAND -> RULES : Load deterministic consistency checks + LOOP -> SPECS : Load seeded task corpus + + LOOP -> SMOKE : Validate candidate before expensive runs + alt Smoke validation fails + SMOKE -> ART : Append validation_failures.jsonl + SMOKE --> DEV : Early fail with actionable reason + else Smoke validation passes + LOOP -> GATE : Execute deterministic Lane A per task + GATE --> LOOP : Verdict + metrics + LOOP -> ART : Write traces//... artifacts + LOOP -> ART : Append evolution_summary.jsonl + LOOP -> CIGATE : Submit evidence bundle + verdict + CIGATE --> DEV : Merge decision (same gate authority) + end + + note over ART + Append-only summaries and traces enable + comparison, replay, and self-healing loops. + end note + + @enduml + +Why the harness exists +---------------------- + +The harness adds a repeatable evaluation layer around docs-as-code change +scenarios. It exists to make change quality visible in machine-readable form and +make iterative improvement queryable over time. + +The design goals are: + +- deterministic merge-critical checks +- cheap rejection of malformed candidates +- structured traces rather than raw stdout as the primary evidence format +- selective navigation of prior runs through summary-first artifacts +- optional agent-assisted improvement without introducing an LLM dependency into Lane A + +For the implementation details, see :doc:`../internals/agent_harness`. diff --git a/docs/concepts/index.rst b/docs/concepts/index.rst index 123c01dcf..c6f8ca73b 100644 --- a/docs/concepts/index.rst +++ b/docs/concepts/index.rst @@ -21,5 +21,6 @@ Here you find explanations how and why docs-as-code works the way it does. .. toctree:: :maxdepth: 1 + agentic_software_development bidirectional_traceability docs_deps diff --git a/docs/how-to/agentic_software_development.rst b/docs/how-to/agentic_software_development.rst new file mode 100644 index 000000000..1cef4c739 --- /dev/null +++ b/docs/how-to/agentic_software_development.rst @@ -0,0 +1,33 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +Agentic Software Development +============================ + +Docs-as-code supports both manual and agent-assisted change workflows. +In both cases, merge-relevant decisions remain anchored in deterministic Lane A +checks. + +What this means in practice: + +- manual changes and agent-generated changes are subject to the same Lane A gate. +- Lane A is deterministic Python and open-source tooling; it decides pass/fail. +- Lane B is optional and can use a coding agent to propose or improve harness candidates. +- structured traces make failures inspectable and reusable instead of relying on raw command logs. + +If you are new to this workflow, start with the concept overview at +:doc:`../concepts/agentic_software_development`. + +If you maintain the harness implementation itself, continue with +:doc:`../internals/agent_harness`. diff --git a/docs/how-to/get_started.rst b/docs/how-to/get_started.rst index 0567894e8..a5f2b8b56 100644 --- a/docs/how-to/get_started.rst +++ b/docs/how-to/get_started.rst @@ -27,3 +27,11 @@ In a new S-CORE repository, see :ref:`setup`. After the initial setup, continue with :doc:`dashboards_and_quality_gates` to build a repository dashboard and enforce CI quality gates. + +Agentic software development +---------------------------- + +Docs-as-code also supports an agent-assisted development workflow. Manual and +agent-generated changes still converge on the same deterministic Lane A checks. + +For an overview of that workflow, see :doc:`agentic_software_development`. diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index 7cae52036..dc1dc4a98 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -23,6 +23,7 @@ Here you find practical guides on how to use docs-as-code. :maxdepth: 1 get_started + agentic_software_development setup write_docs faq diff --git a/docs/internals/agent_harness.rst b/docs/internals/agent_harness.rst new file mode 100644 index 000000000..2f1314c02 --- /dev/null +++ b/docs/internals/agent_harness.rst @@ -0,0 +1,84 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +Agent Harness +============= + +The docs-as-code harness implementation lives under ``score_harness/``. It is a +maintainer-facing subsystem for evaluating harness candidates against +machine-readable change scenarios. + +Subsystem map +------------- + +The current subsystem layout is: + +- ``score_harness/spec/``: task specifications used as evaluation units +- ``score_harness/harness/``: harness candidates, one Python file per candidate +- ``score_harness/outer_loop.py``: deterministic evaluation runner +- ``score_harness/validate_candidate.py``: cheap pre-benchmark validation +- ``score_harness/query_runs.py``: summary-first query helpers for prior runs +- ``score_harness/consistency_rules.yaml``: public rule catalog used by tasks and candidates +- ``score_harness/runs/``: append-only execution history and distilled traces + +Execution flow +-------------- + +The execution contract is intentionally narrow: + +1. Validate the candidate cheaply against one runnable task specification. +2. Load the candidate and task corpus. +3. Run the deterministic Lane A traceability gate for each active task. +4. Distill task-level trace artifacts into small JSON outputs. +5. Append a run-level summary entry to ``evolution_summary.jsonl``. + +The outer loop is deterministic Python. No LLM is required in Lane A. + +Artifacts +--------- + +A successful run writes: + +- ``runs///score.json`` +- ``runs///traces//gate_output.json`` +- ``runs///traces//impacted_elements.json`` +- ``runs///traces//score.json`` +- ``runs/evolution_summary.jsonl`` + +If cheap validation fails, structured failure entries can also be appended to +``score_harness/validation_failures.jsonl`` so later iterations can avoid +repeating the same mistakes. + +Manual and agent-assisted changes +--------------------------------- + +The harness is not limited to agent-generated changes. The important split is +not human versus agent, but deterministic versus optional. + +- Lane A applies equally to manual changes and agent-assisted changes. +- Lane B is the optional agentic workflow for proposing and improving harness candidates. +- Merge eligibility remains tied to deterministic checks, not to the proposer. + +Current CI status +----------------- + +The harness is already covered indirectly by repository CI: + +- linting covers harness files through repository-wide ``pre-commit`` execution +- Bazel test execution includes harness tests through ``bazel test //...`` + +What is not yet present is a dedicated harness workflow job that runs the outer +loop itself as a named CI check and uploads harness run artifacts. + +That dedicated CI integration remains planned work. diff --git a/docs/internals/index.rst b/docs/internals/index.rst index f0e913836..333be01cb 100644 --- a/docs/internals/index.rst +++ b/docs/internals/index.rst @@ -21,6 +21,7 @@ This section is not relevant for users of docs-as-code but for developers extend .. toctree:: :maxdepth: 1 + agent_harness extensions/index benchmark_results decisions/index diff --git a/prepare_commit.sh b/prepare_commit.sh new file mode 100755 index 000000000..a12ee7f85 --- /dev/null +++ b/prepare_commit.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Script to properly stage and commit the harness implementation + +cd /workspaces/docs-as-code + +# Update .gitignore to exclude sensitive/temporary files +cat >> .gitignore << 'EOF' + +# Harness execution history (local only) +score_harness/runs/ + +# Internal OEM documentation (confidential) +docs/internals/requirements/oem_internal_workstreams.md + +# Temporary issue drafts (already created in GitHub) +.tmp_issue_updates/ +EOF + +# Stage the harness implementation +git add AGENTS.md +git add score_harness/ + +# Show what will be committed +echo "=========================================" +echo "Files to be committed:" +echo "=========================================" +git status --short + +echo "" +echo "=========================================" +echo "Excluded files (not staged):" +echo "=========================================" +git status --short | grep "^??" | grep -E "(oem_internal|\.tmp_issue|runs/)" + +echo "" +echo "=========================================" +echo "Ready to commit! Use:" +echo "=========================================" +echo 'git commit -m "feat(harness): Add pilot foundation for docs-as-code assurance harness + +- Add outer loop with both metrics_json and needs_json task modes +- Add lightweight candidate validation and query tooling +- Add baseline + rule-retrieval harness candidates +- Add provenance metadata and responsibility model for audit compliance +- Add tool safety restrictions and compliance documentation +- Create executable seed corpus (4 tasks) + +Addresses #518, #524 +Part of #520"' diff --git a/score_harness/README.md b/score_harness/README.md index 91b5a0982..d5bc312b8 100644 --- a/score_harness/README.md +++ b/score_harness/README.md @@ -2,8 +2,8 @@ Agent harness infrastructure for Eclipse S-CORE docs-as-code. -This directory is the integration gate between agent-generated changes and the -Lane A traceability gate (`scripts_bazel/traceability_gate.py`). +This directory is the integration gate between docs-as-code change workflows +and the Lane A traceability gate (`scripts_bazel/traceability_gate.py`). Treat this file as the entry map for the harness area. Keep it short. Put deeper detail in the structured files below so agents can navigate selectively. @@ -40,6 +40,10 @@ Every harness candidate is evaluated against the same Lane A gate: No LLM is required in Lane A. The outer loop is deterministic Python. +Lane A applies equally to manual and agent-assisted change workflows. Agentic +behavior only changes how candidates are proposed and improved, not how merge +eligibility is decided. + Note: `traceability_coverage.py` no longer exists as a separate script—coverage extraction is integrated into the Sphinx build via the score_metamodel extension. ## Queryability rules @@ -60,24 +64,33 @@ determines merge eligibility. ```bash # Validate a candidate cheaply before full evaluation -python3 score_harness/validate_candidate.py \ +PYTHONPATH=. python3 score_harness/validate_candidate.py \ --candidate score_harness/harness/base_harness.py \ --task-spec score_harness/spec/task_002_threshold_fail.json # Run the seeded gate-fixture corpus against the baseline harness -python3 score_harness/outer_loop.py \ +PYTHONPATH=. python3 score_harness/outer_loop.py \ --candidate score_harness/harness/base_harness.py \ --tasks score_harness/spec/ # Query prior runs, failed tasks, and candidate deltas -python3 score_harness/query_runs.py \ +PYTHONPATH=. python3 score_harness/query_runs.py \ --runs-dir score_harness/runs \ --failed-tasks \ --diff-candidates base_harness candidate_x ``` +## Enforcement and bypass resistance + +- Local runs are for fast feedback and can always be bypassed by intent. +- Merge protection must come from required CI checks. +- `validate_candidate.py --skip-external-checks` is blocked unless `SCORE_HARNESS_ALLOW_SKIP_EXTERNAL_CHECKS=1`. +- `outer_loop.py --skip-validation` is blocked unless `SCORE_HARNESS_ALLOW_SKIP_VALIDATION=1`. +- CI must run both validation and outer loop without skip flags. +- Configure branch protection so the harness CI workflow is required before merge. + ## Next implementation steps 1. Grow the seeded corpus beyond gate metrics fixtures to full docs build snapshots using the needs_json task path. 2. Add more candidate harnesses so run-to-run diffs show meaningful behavioral deltas. -3. Integrate the outer loop into CI as a non-blocking pre-gate pilot. +3. Keep the harness CI gate required on pull requests and evolve the task corpus over time. diff --git a/score_harness/outer_loop.py b/score_harness/outer_loop.py index cdeaa2f18..ef1255ffa 100644 --- a/score_harness/outer_loop.py +++ b/score_harness/outer_loop.py @@ -242,7 +242,10 @@ def main() -> None: # noqa: C901 parser.add_argument( "--skip-validation", action="store_true", - help="Skip the cheap pre-benchmark candidate validation step.", + help=( + "Skip cheap pre-benchmark candidate validation " + "(dev-only; requires SCORE_HARNESS_ALLOW_SKIP_VALIDATION=1)." + ), ) args = parser.parse_args() args.gate_script = resolve_support_path(args.gate_script) @@ -258,6 +261,13 @@ def main() -> None: # noqa: C901 print(f"No task specs found in {args.tasks}. Create spec/*.json files first.") sys.exit(1) + if args.skip_validation and os.getenv("SCORE_HARNESS_ALLOW_SKIP_VALIDATION") != "1": + print( + "Refusing --skip-validation: set SCORE_HARNESS_ALLOW_SKIP_VALIDATION=1 " + "for local debugging only." + ) + sys.exit(2) + if not args.skip_validation: validation_task_spec = None for candidate_task_spec in sorted(args.tasks.glob("*.json")): diff --git a/score_harness/validate_candidate.py b/score_harness/validate_candidate.py index 37606384f..f6c369d7c 100644 --- a/score_harness/validate_candidate.py +++ b/score_harness/validate_candidate.py @@ -46,6 +46,7 @@ import argparse import json +import os import subprocess import sys from pathlib import Path @@ -206,6 +207,16 @@ def main() -> None: ) args = parser.parse_args() + if ( + args.skip_external_checks + and os.getenv("SCORE_HARNESS_ALLOW_SKIP_EXTERNAL_CHECKS") != "1" + ): + print( + "Refusing --skip-external-checks: set " + "SCORE_HARNESS_ALLOW_SKIP_EXTERNAL_CHECKS=1 for local debugging only." + ) + sys.exit(2) + result = validate_candidate( args.candidate, args.task_spec, args.skip_external_checks ) From bdd43c9475fcdb650c3a4c2bc851d2031a48fb3c Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Fri, 15 May 2026 14:44:01 +0000 Subject: [PATCH 26/27] chore: bootstrap SCORE copier overlay in docs-as-code --- .github/copilot-instructions.md | 7 + .github/references/agent-card.schema.json | 155 +++++++++++++++++++ .github/references/repo-manifest.schema.json | 152 ++++++++++++++++++ .github/score/.copier-answers.yml | 9 ++ .github/score/repo-manifest.json | 23 +++ copier.yml | 64 ++++++++ 6 files changed, 410 insertions(+) create mode 100644 .github/copilot-instructions.md create mode 100644 .github/references/agent-card.schema.json create mode 100644 .github/references/repo-manifest.schema.json create mode 100644 .github/score/.copier-answers.yml create mode 100644 .github/score/repo-manifest.json create mode 100644 copier.yml diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000..1741d5ca3 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,7 @@ +# SCORE Copilot Overlay + +- Follow `AGENTS.md` as the primary behavior contract for planning and execution. +- Keep changes minimal, scoped, and traceable to the active issue. +- Place generated evidence and intermediate artifacts under `.stage/ISSUE-/...`. +- Prefer reproducible commands and include validation steps for touched files. +- When uncertain, prioritize repository conventions and existing automation. diff --git a/.github/references/agent-card.schema.json b/.github/references/agent-card.schema.json new file mode 100644 index 000000000..a62fd1074 --- /dev/null +++ b/.github/references/agent-card.schema.json @@ -0,0 +1,155 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://score.dev/schemas/agent-card.schema.json", + "title": "SCORE Agent Card", + "description": "Structured handoff artifact exchanged between agents and tools for one issue-scoped work item.", + "type": "object", + "additionalProperties": false, + "required": [ + "version", + "issue_id", + "repository", + "goal", + "status", + "summary", + "validation", + "next_action" + ], + "properties": { + "version": { + "const": 1 + }, + "issue_id": { + "type": "string", + "pattern": "^(ISSUE-[0-9]+|POC-[0-9]{8}-[0-9]{4})$" + }, + "repository": { + "type": "string", + "minLength": 1 + }, + "branch": { + "type": "string", + "minLength": 1 + }, + "goal": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "in_progress", + "blocked", + "ready_for_handoff", + "completed" + ] + }, + "summary": { + "type": "string", + "minLength": 1 + }, + "findings": { + "type": "array", + "items": { + "$ref": "#/$defs/note" + }, + "default": [] + }, + "open_questions": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "default": [] + }, + "touched_files": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "uniqueItems": true, + "default": [] + }, + "validation": { + "type": "object", + "additionalProperties": false, + "required": [ + "status" + ], + "properties": { + "status": { + "type": "string", + "enum": [ + "not_run", + "passed", + "failed" + ] + }, + "commands": { + "type": "array", + "items": { + "$ref": "#/$defs/commandResult" + }, + "default": [] + } + } + }, + "trajectory": { + "type": "array", + "items": { + "$ref": "#/$defs/note" + }, + "default": [] + }, + "next_action": { + "type": "string", + "minLength": 1 + } + }, + "$defs": { + "note": { + "type": "object", + "additionalProperties": false, + "required": [ + "title", + "detail" + ], + "properties": { + "title": { + "type": "string", + "minLength": 1 + }, + "detail": { + "type": "string", + "minLength": 1 + } + } + }, + "commandResult": { + "type": "object", + "additionalProperties": false, + "required": [ + "command", + "status" + ], + "properties": { + "command": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "passed", + "failed" + ] + }, + "detail": { + "type": "string" + } + } + } + } +} \ No newline at end of file diff --git a/.github/references/repo-manifest.schema.json b/.github/references/repo-manifest.schema.json new file mode 100644 index 000000000..ac9859637 --- /dev/null +++ b/.github/references/repo-manifest.schema.json @@ -0,0 +1,152 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://score.dev/schemas/repo-manifest.schema.json", + "title": "SCORE Repo Manifest", + "description": "Minimal federated harness contract for a SCORE repository.", + "type": "object", + "additionalProperties": false, + "required": [ + "version", + "repository", + "bootstrap", + "execution", + "mcp" + ], + "properties": { + "version": { + "const": 1 + }, + "repository": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "language", + "visibility" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "language": { + "type": "string", + "enum": [ + "python", + "go", + "rust", + "cpp", + "typescript", + "mixed", + "other" + ] + }, + "visibility": { + "type": "string", + "enum": [ + "public", + "internal", + "private" + ] + }, + "tags": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "uniqueItems": true, + "default": [] + } + } + }, + "bootstrap": { + "type": "object", + "additionalProperties": false, + "required": [ + "contract_version" + ], + "properties": { + "contract_version": { + "type": "string", + "pattern": "^v[0-9]+\\.[0-9]+\\.[0-9]+$" + }, + "template_version": { + "type": "string", + "pattern": "^v[0-9]+\\.[0-9]+\\.[0-9]+$" + } + } + }, + "execution": { + "type": "object", + "additionalProperties": false, + "required": [ + "build", + "test", + "lint" + ], + "properties": { + "build": { + "$ref": "#/$defs/commandSpec" + }, + "test": { + "$ref": "#/$defs/commandSpec" + }, + "lint": { + "$ref": "#/$defs/commandSpec" + }, + "typecheck": { + "$ref": "#/$defs/commandSpec" + } + } + }, + "mcp": { + "type": "object", + "additionalProperties": false, + "required": [ + "server_name", + "tools" + ], + "properties": { + "server_name": { + "type": "string", + "minLength": 1 + }, + "tools": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "build", + "test", + "lint", + "typecheck", + "search" + ] + }, + "uniqueItems": true, + "minItems": 1 + } + } + } + }, + "$defs": { + "commandSpec": { + "type": "object", + "additionalProperties": false, + "required": [ + "command" + ], + "properties": { + "command": { + "type": "string", + "minLength": 1 + }, + "working_directory": { + "type": "string", + "minLength": 1 + } + } + } + } +} \ No newline at end of file diff --git a/.github/score/.copier-answers.yml b/.github/score/.copier-answers.yml new file mode 100644 index 000000000..d08527138 --- /dev/null +++ b/.github/score/.copier-answers.yml @@ -0,0 +1,9 @@ +_src_path: gh:eclipse-score/.github +_commit: cccecdc7c915d4d3df2e747b7fee97accc1ba812 +repo_name: docs-as-code +repo_language: Python +repo_visibility: public +build_command: bazel build //... +test_command: bazel test //... +lint_command: bazel test //... +assistant_instructions_file: copilot-instructions.md diff --git a/.github/score/repo-manifest.json b/.github/score/repo-manifest.json new file mode 100644 index 000000000..7887d166f --- /dev/null +++ b/.github/score/repo-manifest.json @@ -0,0 +1,23 @@ +{ + "version": 1, + "repository": { + "name": "docs-as-code", + "language": "python", + "visibility": "public", + "tags": ["docs", "traceability", "iso26262"] + }, + "bootstrap": { + "contract_version": "v0.1.0", + "template_version": "v0.1.0" + }, + "execution": { + "build": { "command": "bazel build //..." }, + "test": { "command": "bazel test //..." }, + "lint": { "command": "bazel test //..." }, + "typecheck": { "command": "bazel test //..." } + }, + "mcp": { + "server_name": "score-repo-tools", + "tools": ["build", "test", "lint", "typecheck"] + } +} diff --git a/copier.yml b/copier.yml new file mode 100644 index 000000000..9df2761d2 --- /dev/null +++ b/copier.yml @@ -0,0 +1,64 @@ +# SCORE governance overlay — Copier template configuration +# Adopter repos run: +# copier copy gh:eclipse-score/.github path/to/repo +# copier update (from inside the adopter repo to pull latest SCORE changes) + +_subdirectory: template +_answers_file: .github/score/.copier-answers.yml + +_message_after_copy: | + SCORE governance overlay applied. + + Next steps: + 1. Review AGENTS.md as the canonical cross-assistant policy. + 2. If needed, add assistant-specific notes in CLAUDE.md or .github/. + 3. Review .github/score/repo-manifest.json — update build/test/lint commands. + 4. Commit the generated files. + 5. Run: python3 scripts/check_markdown_hygiene.py --root . --include .github + 6. To update SCORE governance in future: copier update + +# ── Questions ──────────────────────────────────────────────────────────────── + +repo_name: + type: str + help: Repository name (used in repo-manifest.json, e.g. score-baselibc) + +repo_language: + type: str + help: Primary language + choices: + - C++ + - Rust + - Python + - Go + - Other + +repo_visibility: + type: str + help: Repository visibility + default: public + choices: + - public + - private + +build_command: + type: str + help: Build command (e.g. "bazel build //...") + default: "bazel build //..." + +test_command: + type: str + help: Test command (e.g. "bazel test //...") + default: "bazel test //..." + +lint_command: + type: str + help: Lint command (e.g. "bazel run //:lint") + default: "bazel test //..." + +assistant_instructions_file: + type: str + help: > + Filename your AI assistant loads as instructions + (Copilot commonly uses copilot-instructions.md; other runtimes may use a different name) + default: copilot-instructions.md From 4bc0fbfc83938cd9fa036d31f9f885f6ccde18c9 Mon Sep 17 00:00:00 2001 From: Frank Scholter Peres Date: Fri, 15 May 2026 15:29:54 +0000 Subject: [PATCH 27/27] feat: add score harness adapter contract v0.1 draft --- score_harness/README.md | 2 + .../contract/adapter_contract_v0_1.md | 127 ++++++++++++++++++ .../adapter_contract_v0_1.schema.json | 54 ++++++++ 3 files changed, 183 insertions(+) create mode 100644 score_harness/contract/adapter_contract_v0_1.md create mode 100644 score_harness/contract/adapter_contract_v0_1.schema.json diff --git a/score_harness/README.md b/score_harness/README.md index d5bc312b8..8957fd2be 100644 --- a/score_harness/README.md +++ b/score_harness/README.md @@ -14,6 +14,7 @@ detail in the structured files below so agents can navigate selectively. score_harness/ spec/ Task specs (small, structured change scenarios) harness/ Harness candidates (one Python file per candidate) + contract/ Adapter contract and schema (v0.1) runs/ Execution history (append-only, per iteration/candidate/task) consistency_rules.yaml Public docs-as-code rule catalog SKILL.md Domain skill for the outer loop proposer @@ -24,6 +25,7 @@ score_harness/ - Start here for the overall contract and command sequence - Read `spec/` for task units and expected verdicts +- Read `contract/` for adapter interface and machine-readable schema - Read `consistency_rules.yaml` for rule IDs and impact semantics - Read `outer_loop.py` for evaluation, distillation, and filesystem layout - Read `SKILL.md` only when working on Lane B candidate evolution diff --git a/score_harness/contract/adapter_contract_v0_1.md b/score_harness/contract/adapter_contract_v0_1.md new file mode 100644 index 000000000..d6371f0e7 --- /dev/null +++ b/score_harness/contract/adapter_contract_v0_1.md @@ -0,0 +1,127 @@ +# Adapter Contract v0.1 + +This contract defines the minimal integration interface between docs-as-code +consumers and the current harness implementation. + +Scope: one backend only (`docs-as-code` harness). No framework switching is +required in v0.1. + +## Design goals + +- Keep the surface minimal and stable +- Keep execution deterministic and replayable +- Keep artifact locations machine-readable +- Keep governance linkage explicit (`issue_id` + run artifacts) + +## Operations + +Three operations are required: + +1. `validate` +2. `run` +3. `report` + +## Common request envelope + +```json +{ + "contract_version": "v0.1.0", + "operation": "validate", + "issue_id": 1234, + "task_spec": "score_harness/spec/task_001.json", + "candidate_path": "score_harness/harness/base_harness.py", + "artifacts_dir": ".stage/ISSUE-1234/harness", + "profile": "iso26262", + "strict": true +} +``` + +Field notes: + +- `contract_version`: must be `v0.1.0` +- `operation`: one of `validate`, `run`, `report` +- `issue_id`: positive integer for issue-first traceability +- `task_spec`: path to one task file or task directory +- `candidate_path`: harness candidate entry file +- `artifacts_dir`: root directory for generated artifacts +- `profile`: currently fixed to `iso26262` +- `strict`: when true, fail on any contract or validation violation + +## Common response envelope + +```json +{ + "contract_version": "v0.1.0", + "operation": "validate", + "status": "pass", + "error_code": null, + "summary": "candidate validation completed", + "artifacts": [ + { + "path": ".stage/ISSUE-1234/harness/validation.json", + "type": "validation_result" + } + ], + "traceability": { + "issue_id": 1234, + "task_id": "task_001", + "run_id": "20260515T101500Z_base_harness" + } +} +``` + +## Status and errors + +`status` must be one of: + +- `pass`: operation completed and checks passed +- `fail`: operation completed but checks failed +- `error`: operation could not complete due to runtime/config/input issues + +`error_code` must be null on `pass`; otherwise one of: + +- `E_INPUT_INVALID` +- `E_CONTRACT_VERSION` +- `E_PROFILE_UNSUPPORTED` +- `E_CANDIDATE_INVALID` +- `E_TASK_SPEC_INVALID` +- `E_RUNTIME_FAILURE` +- `E_ARTIFACT_WRITE` + +## Operation semantics + +### validate + +Expected behavior: + +- Run cheap candidate validation (`validate_candidate.py`) +- Validate task spec readability/shape +- Emit validation artifact + +### run + +Expected behavior: + +- Execute deterministic outer loop (`outer_loop.py`) +- Run Lane A gate per task +- Emit per-task trace artifacts and evolution summary artifacts + +### report + +Expected behavior: + +- Read existing run artifacts only +- Return compact summary view +- No mutation of candidate code or task specs + +## Artifact rules + +- Prefer `.stage/ISSUE-/...` for issue-scoped outputs +- Keep JSON artifacts small and grep-friendly +- Keep artifact names stable across runs + +## Compatibility policy + +- v0.1 is additive only +- New optional fields are allowed +- Required-field removals or semantic changes require v0.2+ diff --git a/score_harness/contract/adapter_contract_v0_1.schema.json b/score_harness/contract/adapter_contract_v0_1.schema.json new file mode 100644 index 000000000..77482257c --- /dev/null +++ b/score_harness/contract/adapter_contract_v0_1.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://score.dev/schemas/score-harness/adapter-contract-v0.1.schema.json", + "title": "Score Harness Adapter Contract v0.1", + "type": "object", + "additionalProperties": false, + "required": [ + "contract_version", + "operation", + "issue_id", + "task_spec", + "candidate_path", + "artifacts_dir", + "profile", + "strict" + ], + "properties": { + "contract_version": { + "type": "string", + "const": "v0.1.0" + }, + "operation": { + "type": "string", + "enum": [ + "validate", + "run", + "report" + ] + }, + "issue_id": { + "type": "integer", + "minimum": 1 + }, + "task_spec": { + "type": "string", + "minLength": 1 + }, + "candidate_path": { + "type": "string", + "minLength": 1 + }, + "artifacts_dir": { + "type": "string", + "minLength": 1 + }, + "profile": { + "type": "string", + "const": "iso26262" + }, + "strict": { + "type": "boolean" + } + } +}