diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 90cf9e677..588f9604b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,8 @@ { "name": "eclipse-s-core", "image": "ghcr.io/eclipse-score/devcontainer:v1.2.0", - "updateContentCommand": "bazel run //:ide_support" + "updateContentCommand": "bazel run //:ide_support", + "mounts": [ + "source=${localEnv:HOME}/.agents,target=/root/.agents,type=bind,consistency=cached" + ] } diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000..1741d5ca3 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,7 @@ +# SCORE Copilot Overlay + +- Follow `AGENTS.md` as the primary behavior contract for planning and execution. +- Keep changes minimal, scoped, and traceable to the active issue. +- Place generated evidence and intermediate artifacts under `.stage/ISSUE-/...`. +- Prefer reproducible commands and include validation steps for touched files. +- When uncertain, prioritize repository conventions and existing automation. diff --git a/.github/references/agent-card.schema.json b/.github/references/agent-card.schema.json new file mode 100644 index 000000000..a62fd1074 --- /dev/null +++ b/.github/references/agent-card.schema.json @@ -0,0 +1,155 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://score.dev/schemas/agent-card.schema.json", + "title": "SCORE Agent Card", + "description": "Structured handoff artifact exchanged between agents and tools for one issue-scoped work item.", + "type": "object", + "additionalProperties": false, + "required": [ + "version", + "issue_id", + "repository", + "goal", + "status", + "summary", + "validation", + "next_action" + ], + "properties": { + "version": { + "const": 1 + }, + "issue_id": { + "type": "string", + "pattern": "^(ISSUE-[0-9]+|POC-[0-9]{8}-[0-9]{4})$" + }, + "repository": { + "type": "string", + "minLength": 1 + }, + "branch": { + "type": "string", + "minLength": 1 + }, + "goal": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "in_progress", + "blocked", + "ready_for_handoff", + "completed" + ] + }, + "summary": { + "type": "string", + "minLength": 1 + }, + "findings": { + "type": "array", + "items": { + "$ref": "#/$defs/note" + }, + "default": [] + }, + "open_questions": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "default": [] + }, + "touched_files": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "uniqueItems": true, + "default": [] + }, + "validation": { + "type": "object", + "additionalProperties": false, + "required": [ + "status" + ], + "properties": { + "status": { + "type": "string", + "enum": [ + "not_run", + "passed", + "failed" + ] + }, + "commands": { + "type": "array", + "items": { + "$ref": "#/$defs/commandResult" + }, + "default": [] + } + } + }, + "trajectory": { + "type": "array", + "items": { + "$ref": "#/$defs/note" + }, + "default": [] + }, + "next_action": { + "type": "string", + "minLength": 1 + } + }, + "$defs": { + "note": { + "type": "object", + "additionalProperties": false, + "required": [ + "title", + "detail" + ], + "properties": { + "title": { + "type": "string", + "minLength": 1 + }, + "detail": { + "type": "string", + "minLength": 1 + } + } + }, + "commandResult": { + "type": "object", + "additionalProperties": false, + "required": [ + "command", + "status" + ], + "properties": { + "command": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "passed", + "failed" + ] + }, + "detail": { + "type": "string" + } + } + } + } +} \ No newline at end of file diff --git a/.github/references/repo-manifest.schema.json b/.github/references/repo-manifest.schema.json new file mode 100644 index 000000000..ac9859637 --- /dev/null +++ b/.github/references/repo-manifest.schema.json @@ -0,0 +1,152 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://score.dev/schemas/repo-manifest.schema.json", + "title": "SCORE Repo Manifest", + "description": "Minimal federated harness contract for a SCORE repository.", + "type": "object", + "additionalProperties": false, + "required": [ + "version", + "repository", + "bootstrap", + "execution", + "mcp" + ], + "properties": { + "version": { + "const": 1 + }, + "repository": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "language", + "visibility" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "language": { + "type": "string", + "enum": [ + "python", + "go", + "rust", + "cpp", + "typescript", + "mixed", + "other" + ] + }, + "visibility": { + "type": "string", + "enum": [ + "public", + "internal", + "private" + ] + }, + "tags": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "uniqueItems": true, + "default": [] + } + } + }, + "bootstrap": { + "type": "object", + "additionalProperties": false, + "required": [ + "contract_version" + ], + "properties": { + "contract_version": { + "type": "string", + "pattern": "^v[0-9]+\\.[0-9]+\\.[0-9]+$" + }, + "template_version": { + "type": "string", + "pattern": "^v[0-9]+\\.[0-9]+\\.[0-9]+$" + } + } + }, + "execution": { + "type": "object", + "additionalProperties": false, + "required": [ + "build", + "test", + "lint" + ], + "properties": { + "build": { + "$ref": "#/$defs/commandSpec" + }, + "test": { + "$ref": "#/$defs/commandSpec" + }, + "lint": { + "$ref": "#/$defs/commandSpec" + }, + "typecheck": { + "$ref": "#/$defs/commandSpec" + } + } + }, + "mcp": { + "type": "object", + "additionalProperties": false, + "required": [ + "server_name", + "tools" + ], + "properties": { + "server_name": { + "type": "string", + "minLength": 1 + }, + "tools": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "build", + "test", + "lint", + "typecheck", + "search" + ] + }, + "uniqueItems": true, + "minItems": 1 + } + } + } + }, + "$defs": { + "commandSpec": { + "type": "object", + "additionalProperties": false, + "required": [ + "command" + ], + "properties": { + "command": { + "type": "string", + "minLength": 1 + }, + "working_directory": { + "type": "string", + "minLength": 1 + } + } + } + } +} \ No newline at end of file diff --git a/.github/score/.copier-answers.yml b/.github/score/.copier-answers.yml new file mode 100644 index 000000000..d08527138 --- /dev/null +++ b/.github/score/.copier-answers.yml @@ -0,0 +1,9 @@ +_src_path: gh:eclipse-score/.github +_commit: cccecdc7c915d4d3df2e747b7fee97accc1ba812 +repo_name: docs-as-code +repo_language: Python +repo_visibility: public +build_command: bazel build //... +test_command: bazel test //... +lint_command: bazel test //... +assistant_instructions_file: copilot-instructions.md diff --git a/.github/score/repo-manifest.json b/.github/score/repo-manifest.json new file mode 100644 index 000000000..7887d166f --- /dev/null +++ b/.github/score/repo-manifest.json @@ -0,0 +1,23 @@ +{ + "version": 1, + "repository": { + "name": "docs-as-code", + "language": "python", + "visibility": "public", + "tags": ["docs", "traceability", "iso26262"] + }, + "bootstrap": { + "contract_version": "v0.1.0", + "template_version": "v0.1.0" + }, + "execution": { + "build": { "command": "bazel build //..." }, + "test": { "command": "bazel test //..." }, + "lint": { "command": "bazel test //..." }, + "typecheck": { "command": "bazel test //..." } + }, + "mcp": { + "server_name": "score-repo-tools", + "tools": ["build", "test", "lint", "typecheck"] + } +} diff --git a/.gitignore b/.gitignore index 2900b6074..e4adda9a2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ # Commonly used for local settings and secrets +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ .env # Bazel @@ -26,3 +29,10 @@ __pycache__/ # bug: This file is created in repo root on test discovery. /consumer_test.log +.clwb + +# Harness execution history (local only) +score_harness/runs/ + +# Temporary issue drafts (already created in GitHub) +.tmp_issue_updates/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..a8f0082fb --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,48 @@ +# Agent Entry Map + +This file is the repository entrypoint for coding agents. It is intentionally +short. Treat it as a map, not an encyclopedia. + +## What This Repo Does + +- Builds and validates docs-as-code workflows for Eclipse S-CORE +- Maintains a Sphinx-needs metamodel and traceability tooling +- Provides Lane A validation for requirements, code links, and test links + +## Where To Look First + +- `README.md` - repository overview and main build/test flow +- `docs/` - user and internal documentation +- `scripts_bazel/traceability_coverage.py` - metrics extraction +- `scripts_bazel/traceability_gate.py` - deterministic Lane A gate +- `score_harness/README.md` - harness subsystem map and execution flow + +## Core Commands + +- Build docs / generate needs data: use the existing repo build flow from `README.md` +- Run traceability metrics: `python scripts_bazel/traceability_coverage.py --json-output ` +- Run traceability gate: `python scripts_bazel/traceability_gate.py ` +- Run Python tests: use the existing repo test command from `README.md` + +## Harness Area + +The harness subsystem lives under `score_harness/`. + +- `score_harness/spec/` - task specs (small, structured change scenarios) +- `score_harness/harness/` - candidate harness code +- `score_harness/runs/` - append-only execution history +- `score_harness/outer_loop.py` - deterministic evaluation and trace distillation +- `score_harness/consistency_rules.yaml` - public rule catalog for docs-as-code + +## Working Rules + +- Keep Lane A deterministic and OSS-only +- Keep required artifacts machine-readable and grep-able +- Put recurring rules into tooling when possible, not only prose +- Keep top-level instruction files short; move detail into indexed files nearby +- Prefer additive, low-risk harness changes over broad prompt rewrites + +## Public vs Internal Boundary + +- Public issues: tooling, schemas, validators, workflows, public task corpora +- Internal work: release authority, confidential benchmarks, waivers, supplier/legal obligations diff --git a/checkable_safety_arguments.md b/checkable_safety_arguments.md new file mode 100644 index 000000000..e69de29bb diff --git a/copier.yml b/copier.yml new file mode 100644 index 000000000..9df2761d2 --- /dev/null +++ b/copier.yml @@ -0,0 +1,64 @@ +# SCORE governance overlay — Copier template configuration +# Adopter repos run: +# copier copy gh:eclipse-score/.github path/to/repo +# copier update (from inside the adopter repo to pull latest SCORE changes) + +_subdirectory: template +_answers_file: .github/score/.copier-answers.yml + +_message_after_copy: | + SCORE governance overlay applied. + + Next steps: + 1. Review AGENTS.md as the canonical cross-assistant policy. + 2. If needed, add assistant-specific notes in CLAUDE.md or .github/. + 3. Review .github/score/repo-manifest.json — update build/test/lint commands. + 4. Commit the generated files. + 5. Run: python3 scripts/check_markdown_hygiene.py --root . --include .github + 6. To update SCORE governance in future: copier update + +# ── Questions ──────────────────────────────────────────────────────────────── + +repo_name: + type: str + help: Repository name (used in repo-manifest.json, e.g. score-baselibc) + +repo_language: + type: str + help: Primary language + choices: + - C++ + - Rust + - Python + - Go + - Other + +repo_visibility: + type: str + help: Repository visibility + default: public + choices: + - public + - private + +build_command: + type: str + help: Build command (e.g. "bazel build //...") + default: "bazel build //..." + +test_command: + type: str + help: Test command (e.g. "bazel test //...") + default: "bazel test //..." + +lint_command: + type: str + help: Lint command (e.g. "bazel run //:lint") + default: "bazel test //..." + +assistant_instructions_file: + type: str + help: > + Filename your AI assistant loads as instructions + (Copilot commonly uses copilot-instructions.md; other runtimes may use a different name) + default: copilot-instructions.md diff --git a/docs.bzl b/docs.bzl index 699c2370f..bb1a4899b 100644 --- a/docs.bzl +++ b/docs.bzl @@ -293,10 +293,12 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = [], known_good = "--jobs", "auto", "--define=external_needs_source=" + str(data), + "--define=score_sourcelinks_json=$(location :sourcelinks_json)", + "--define=score_source_code_linker_plain_links=1", ], formats = ["needs"], sphinx = ":sphinx_build", - tools = data, + tools = data + [":sourcelinks_json"], visibility = ["//visibility:public"], # Persistent workers cause stale symlinks after dependency version # changes, corrupting the Bazel cache. diff --git a/docs/concepts/agentic_software_development.rst b/docs/concepts/agentic_software_development.rst new file mode 100644 index 000000000..d19a14b8e --- /dev/null +++ b/docs/concepts/agentic_software_development.rst @@ -0,0 +1,152 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +.. _docs_agentic_software_development: + +Agentic Software Development +============================ + +Docs-as-code can be developed with conventional manual changes or with +agent-assisted workflows. The key design rule is that agent assistance changes +how changes are proposed and analyzed, not how merge eligibility is decided. + +Lane A and Lane B +----------------- + +Lane A is the deterministic path. It validates candidate behavior and evaluates +change scenarios through the existing traceability gate. Lane A does not require +an LLM. + +Lane B is optional. A coding agent may inspect prior structured traces and +propose improved harness candidates. Lane B can accelerate iteration, but it +never overrides a Lane A denial. + +This means: + +- manual and agent-assisted changes both converge on the same deterministic gate. +- optional agent reasoning is kept outside the merge-critical decision path. +- structured run artifacts are part of the design, not an afterthought. + +Old approach vs harness-based approach +-------------------------------------- + +Without the harness, repository checks can still validate a change, but the +feedback surface is coarse: pass/fail results, scattered logs, and limited +structured evidence for iterative improvement. + +With the harness, changes are evaluated through a staged workflow: + +- cheap candidate validation first +- deterministic task execution through the Lane A gate +- distilled JSON trace artifacts for each task +- append-only summary history for future comparison and learning + +Old approach flow +~~~~~~~~~~~~~~~~~ + +.. plantuml:: + + @startuml + title Old Approach: Repo Checks Without Harness Context + + hide footbox + autonumber + skinparam shadowing false + + actor "Developer / Agent" as DEV + participant "Requirement / Docs Change" as REQ + participant "Generic Repo Checks" as REPO + participant "Review + CI Gate" as GATE + + DEV -> REQ : Edit requirement/docs/links + REQ -> REPO : Trigger repo checks + REPO -> REPO : Run lint/tests/traceability gate + alt Checks fail + REPO --> DEV : Coarse failure logs + DEV -> REQ : Patch and retry + else Checks pass + REPO --> GATE : Pass/fail status + logs + GATE --> DEV : Merge decision + end + + @enduml + +Harness-based flow +~~~~~~~~~~~~~~~~~~ + +.. plantuml:: + + @startuml + title Harness-Based Approach: Candidate + Smoke Validation + Structured Outputs + + hide footbox + autonumber + skinparam shadowing false + + actor "Developer / Agent" as DEV + participant "Requirement Change" as REQ + participant "Candidate\n(score_harness/harness/*.py)" as CAND + collections "Agent Instructions\n(AGENTS.md)" as AGENTFILE + collections "Skill Prompts\n(.github/instructions, SKILL.md)" as SKILLFILE + collections "Consistency Rules\n(score_harness/consistency_rules.yaml)" as RULES + collections "Task Specs\n(score_harness/spec/*.yaml)" as SPECS + participant "Smoke Validation\n(validate_candidate.py)" as SMOKE + participant "Outer Loop\n(outer_loop.py)" as LOOP + participant "Lane A Gate\n(traceability_gate.py)" as GATE + participant "Review + CI Gate" as CIGATE + database "Run Artifacts\n(score_harness/runs/...)" as ART + + DEV -> REQ : Start from requirement update + DEV -> CAND : Propose or edit candidate + CAND -> AGENTFILE : Read operating constraints + CAND -> SKILLFILE : Read workflow skills/instructions + CAND -> RULES : Load deterministic consistency checks + LOOP -> SPECS : Load seeded task corpus + + LOOP -> SMOKE : Validate candidate before expensive runs + alt Smoke validation fails + SMOKE -> ART : Append validation_failures.jsonl + SMOKE --> DEV : Early fail with actionable reason + else Smoke validation passes + LOOP -> GATE : Execute deterministic Lane A per task + GATE --> LOOP : Verdict + metrics + LOOP -> ART : Write traces//... artifacts + LOOP -> ART : Append evolution_summary.jsonl + LOOP -> CIGATE : Submit evidence bundle + verdict + CIGATE --> DEV : Merge decision (same gate authority) + end + + note over ART + Append-only summaries and traces enable + comparison, replay, and self-healing loops. + end note + + @enduml + +Why the harness exists +---------------------- + +The harness adds a repeatable evaluation layer around docs-as-code change +scenarios. It exists to make change quality visible in machine-readable form and +make iterative improvement queryable over time. + +The design goals are: + +- deterministic merge-critical checks +- cheap rejection of malformed candidates +- structured traces rather than raw stdout as the primary evidence format +- selective navigation of prior runs through summary-first artifacts +- optional agent-assisted improvement without introducing an LLM dependency into Lane A + +For the implementation details, see :doc:`../internals/agent_harness`. diff --git a/docs/concepts/index.rst b/docs/concepts/index.rst index 123c01dcf..c6f8ca73b 100644 --- a/docs/concepts/index.rst +++ b/docs/concepts/index.rst @@ -21,5 +21,6 @@ Here you find explanations how and why docs-as-code works the way it does. .. toctree:: :maxdepth: 1 + agentic_software_development bidirectional_traceability docs_deps diff --git a/docs/how-to/agentic_software_development.rst b/docs/how-to/agentic_software_development.rst new file mode 100644 index 000000000..1cef4c739 --- /dev/null +++ b/docs/how-to/agentic_software_development.rst @@ -0,0 +1,33 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +Agentic Software Development +============================ + +Docs-as-code supports both manual and agent-assisted change workflows. +In both cases, merge-relevant decisions remain anchored in deterministic Lane A +checks. + +What this means in practice: + +- manual changes and agent-generated changes are subject to the same Lane A gate. +- Lane A is deterministic Python and open-source tooling; it decides pass/fail. +- Lane B is optional and can use a coding agent to propose or improve harness candidates. +- structured traces make failures inspectable and reusable instead of relying on raw command logs. + +If you are new to this workflow, start with the concept overview at +:doc:`../concepts/agentic_software_development`. + +If you maintain the harness implementation itself, continue with +:doc:`../internals/agent_harness`. diff --git a/docs/how-to/dashboards_and_quality_gates.rst b/docs/how-to/dashboards_and_quality_gates.rst new file mode 100644 index 000000000..35df229e0 --- /dev/null +++ b/docs/how-to/dashboards_and_quality_gates.rst @@ -0,0 +1,183 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +Build Dashboards and Quality Gates +================================== + +This guide is for repositories that *consume* docs-as-code as a Bazel +dependency. Examples are module repositories and integration repositories that +want to: + +1. publish their own traceability dashboards, +2. export ``metrics.json`` during documentation builds, and +3. enforce quality gates in CI. + +The docs-as-code repository itself documents tooling coverage. Consumer +repositories use the same extensions to document *their own* requirements, +architecture, source-code links, and verification evidence. + +What You Get +------------ + +When a consumer repository integrates docs-as-code correctly, it can: + +- build an HTML dashboard from its own Sphinx needs, +- include external needs from other repositories when desired, +- export ``needs.json`` and ``metrics.json`` for machine-readable reporting, +- gate CI on traceability thresholds via ``traceability_gate``. + +Typical Setup +------------- + +1. Add docs-as-code as a Bazel dependency as described in :ref:`setup`. +2. Define the documentation target via the ``docs(...)`` macro. +3. Provide process or upstream needs via the ``data`` argument when cross-repo + traceability is required. +4. Provide implementation sources via ``scan_code`` so ``source_code_link`` can + be generated. +5. Add test metadata so ``testlink`` and testcase needs can be generated. + +Minimal Consumer Example +------------------------ + +In ``BUILD``: + +.. code-block:: starlark + + load("@score_docs_as_code//:docs.bzl", "docs") + + filegroup( + name = "module_sources", + srcs = glob([ + "src/**/*.py", + "src/**/*.cpp", + "src/**/*.h", + "src/**/*.rs", + ]), + ) + + docs( + source_dir = "docs", + data = [ + "@score_process//:needs_json", + ], + scan_code = [":module_sources"], + ) + +In ``docs/conf.py``: + +.. code-block:: python + + score_metamodel_requirement_types = "feat_req,comp_req,aou_req" + score_metamodel_include_external_needs = False + +Use ``score_metamodel_include_external_needs = True`` only in repositories that +intentionally aggregate traceability across dependencies, such as integration +repositories. + +Building the Dashboard +---------------------- + +Run: + +.. code-block:: bash + + bazel run //:docs + +This generates HTML output under ``_build/``. + +Run: + +.. code-block:: bash + + bazel build //:needs_json + +This generates machine-readable output under: + +- ``bazel-bin/needs_json/_build/needs/needs.json`` +- ``bazel-bin/needs_json/_build/needs/metrics.json`` + +The HTML dashboard and the exported ``metrics.json`` are backed by the same +traceability metric implementation, so the charts and the CI gate evaluate the +same data. + +Inputs for Linkage Metrics +-------------------------- + +To get meaningful dashboard and gate values, consumer repositories typically +need three inputs: + +1. Requirement and architecture needs in the documentation itself. +2. Source code references via :doc:`source_to_doc_links`. +3. Test metadata via :doc:`test_to_doc_links`. + +If one of those inputs is missing, the related chart or gate metric will remain +empty or low. + +Choosing Local vs Aggregated Views +---------------------------------- + +There are two common modes: + +**Module repository** + +- Set ``score_metamodel_include_external_needs = False``. +- Gate only on the needs owned by the repository itself. +- Use this for per-module implementation progress and traceability. + +**Integration repository** + +- Set ``score_metamodel_include_external_needs = True``. +- Aggregate requirements across module dependencies when that is the intended + repository purpose. +- Use this for system or integration-level dashboards. + +CI Quality Gate +--------------- + +After building ``//:needs_json``, run the gate on the exported metrics: + +.. code-block:: bash + + bazel run @score_docs_as_code//scripts_bazel:traceability_gate -- \ + --metrics-json bazel-bin/needs_json/_build/needs/metrics.json \ + --min-req-code 70 \ + --min-req-test 70 \ + --min-req-fully-linked 60 \ + --min-tests-linked 70 + +Useful flags: + +- ``--require-all-links`` for strict 100 percent gating +- ``--fail-on-broken-test-refs`` to fail when testcase references point to + unknown requirement IDs + +Recommended Rollout +------------------- + +For a new consumer repository: + +1. Start with local-only metrics. +2. Enable ``scan_code`` and verify ``source_code_link`` coverage first. +3. Add test metadata and verify ``testlink`` coverage. +4. Introduce modest thresholds in CI. +5. Raise thresholds over time as the repository matures. + +Related Guides +-------------- + +- :ref:`setup` +- :doc:`other_modules` +- :doc:`source_to_doc_links` +- :doc:`test_to_doc_links` diff --git a/docs/how-to/get_started.rst b/docs/how-to/get_started.rst index c469c37f2..a5f2b8b56 100644 --- a/docs/how-to/get_started.rst +++ b/docs/how-to/get_started.rst @@ -24,3 +24,14 @@ In an existing S-CORE repository, you can build the documentation using Bazel: Open the generated site at ``_build/index.html`` in your browser. In a new S-CORE repository, see :ref:`setup`. + +After the initial setup, continue with :doc:`dashboards_and_quality_gates` to +build a repository dashboard and enforce CI quality gates. + +Agentic software development +---------------------------- + +Docs-as-code also supports an agent-assisted development workflow. Manual and +agent-generated changes still converge on the same deterministic Lane A checks. + +For an overview of that workflow, see :doc:`agentic_software_development`. diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index 4e3f17e8c..dc1dc4a98 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -23,10 +23,12 @@ Here you find practical guides on how to use docs-as-code. :maxdepth: 1 get_started + agentic_software_development setup write_docs faq other_modules + dashboards_and_quality_gates source_to_doc_links test_to_doc_links add_extensions diff --git a/docs/how-to/setup.md b/docs/how-to/setup.md index 253f88116..bdd4d612d 100644 --- a/docs/how-to/setup.md +++ b/docs/how-to/setup.md @@ -86,3 +86,9 @@ bazel run //:docs #### 6. Access your documentation at `/_build/index.html` + +## Next Step + +After basic setup, see {doc}`dashboards_and_quality_gates` to configure +traceability dashboards, export `metrics.json`, and enforce CI quality gates in +consumer repositories. diff --git a/docs/how-to/test_to_doc_links.rst b/docs/how-to/test_to_doc_links.rst index d7c3677f3..7da9842cb 100644 --- a/docs/how-to/test_to_doc_links.rst +++ b/docs/how-to/test_to_doc_links.rst @@ -12,6 +12,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* + # ╓ ╖ + # ║ Some portions generated by Github Copilot ║ + # ╙ ╜ + Reference Docs in Tests ======================= @@ -53,3 +57,117 @@ Limitations - Partial properties will lead to no Testlink creation. If you want a test to be linked, please ensure all requirement properties are provided. - Tests must be executed by Bazel first so `test.xml` files exist. + + +CI/CD Gate for Linkage Percentage +--------------------------------- + +The traceability tooling uses a **two-step architecture**: + +1. The **Sphinx build** computes metrics via the ``score_metamodel`` extension and + writes a machine-readable ``metrics.json`` (schema v1) to the build output + directory alongside ``needs.json``. +2. ``traceability_gate`` reads that ``metrics.json`` and enforces configurable + coverage thresholds. + +Separating computation (Sphinx extension, during docs build) from gating (thin +CLI, in CI) keeps the gate decoupled from the Sphinx/Bazel build: it never +parses ``needs.json`` itself and has direct access to all sphinx-needs data. + +.. note:: + + ``metrics.json`` is the **single source of truth** for traceability data. + It is written by the Sphinx docs build (via the ``score_metamodel`` extension) + to ``/metrics.json``. The same computation that powers the dashboard + pie charts produces this file, so the gate and the dashboard always show + the same numbers. + +.. plantuml:: + + @startuml + skinparam componentStyle rectangle + skinparam defaultTextAlignment center + + rectangle "docs build (Sphinx + score_metamodel extension)" { + component "calc metrics\n(Sphinx extension\nbuild-finished hook)" as coverage + } + + usecase "test" as test + database "needs.json\n(sphinx-needs)" as needsjson + database "metrics.json\n(v1: metrics per needs type,\ne.g. tool_req)" as metricsjson + component "gate\n(traceability_gate)" as gate + + test --> coverage : xml + needsjson --> coverage : sphinx-needs data\n(already loaded) + coverage --> metricsjson + metricsjson --> gate + gate --> (Pretty output) + + @enduml + +Current workflow: + +1. Run tests. +2. Build docs (``score_metamodel`` extension writes ``metrics.json`` automatically). +3. Run the gate against the exported metrics. + +.. code-block:: bash + + bazel test //... + bazel build //:needs_json + + bazel run //scripts_bazel:traceability_gate -- \ + --metrics-json bazel-bin/needs_json/_build/needs/metrics.json \ + --min-req-code 100 \ + --min-req-test 100 \ + --min-req-fully-linked 100 \ + --min-tests-linked 100 \ + --fail-on-broken-test-refs + +In repository CI, wire the gate target to depend on the test-report and +``//:needs_json`` targets so Bazel handles the build order automatically. + +The ``--require-all-links`` shortcut is equivalent to setting all ``--min-*`` +flags to 100 and enabling ``--fail-on-broken-test-refs``. + +The gate reports: + +- Percentage of requirements with ``source_code_link`` +- Percentage of requirements with ``testlink`` +- Percentage of requirements with both links (fully linked) +- Percentage of testcases linked to at least one requirement +- Broken testcase references (testcases referencing an unknown requirement ID) + +.. note:: + + Testcase-based metrics depend on testcase needs being present in the + exported ``needs.json``. Testcases are currently generated as external + needs, so values such as testcase linkage percentage or broken testcase + references are only meaningful if those external testcase needs are also + included in the exported dataset. + +To restrict which need types are treated as requirements when computing metrics, +set ``score_metamodel_requirement_types`` in your Sphinx ``conf.py`` +(default: ``tool_req``): + +.. code-block:: python + + score_metamodel_requirement_types = "tool_req,comp_req" + +By default, dashboard and gate use only needs defined in the current repository +(``is_external == False``). This supports per-repo CI gates. +For integration repositories that intentionally aggregate across dependencies, +you can include external needs in both dashboard and gate by setting: + +.. code-block:: python + + score_metamodel_include_external_needs = True + +You can also override dashboard behaviour per pie chart via filter args: + +.. code-block:: rst + + .. needpie:: Requirements with Codelinks + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_code_links(tool_req,true) + +Use lower thresholds during rollout and tighten towards 100% over time. diff --git a/docs/internals/agent_harness.rst b/docs/internals/agent_harness.rst new file mode 100644 index 000000000..2f1314c02 --- /dev/null +++ b/docs/internals/agent_harness.rst @@ -0,0 +1,84 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +Agent Harness +============= + +The docs-as-code harness implementation lives under ``score_harness/``. It is a +maintainer-facing subsystem for evaluating harness candidates against +machine-readable change scenarios. + +Subsystem map +------------- + +The current subsystem layout is: + +- ``score_harness/spec/``: task specifications used as evaluation units +- ``score_harness/harness/``: harness candidates, one Python file per candidate +- ``score_harness/outer_loop.py``: deterministic evaluation runner +- ``score_harness/validate_candidate.py``: cheap pre-benchmark validation +- ``score_harness/query_runs.py``: summary-first query helpers for prior runs +- ``score_harness/consistency_rules.yaml``: public rule catalog used by tasks and candidates +- ``score_harness/runs/``: append-only execution history and distilled traces + +Execution flow +-------------- + +The execution contract is intentionally narrow: + +1. Validate the candidate cheaply against one runnable task specification. +2. Load the candidate and task corpus. +3. Run the deterministic Lane A traceability gate for each active task. +4. Distill task-level trace artifacts into small JSON outputs. +5. Append a run-level summary entry to ``evolution_summary.jsonl``. + +The outer loop is deterministic Python. No LLM is required in Lane A. + +Artifacts +--------- + +A successful run writes: + +- ``runs///score.json`` +- ``runs///traces//gate_output.json`` +- ``runs///traces//impacted_elements.json`` +- ``runs///traces//score.json`` +- ``runs/evolution_summary.jsonl`` + +If cheap validation fails, structured failure entries can also be appended to +``score_harness/validation_failures.jsonl`` so later iterations can avoid +repeating the same mistakes. + +Manual and agent-assisted changes +--------------------------------- + +The harness is not limited to agent-generated changes. The important split is +not human versus agent, but deterministic versus optional. + +- Lane A applies equally to manual changes and agent-assisted changes. +- Lane B is the optional agentic workflow for proposing and improving harness candidates. +- Merge eligibility remains tied to deterministic checks, not to the proposer. + +Current CI status +----------------- + +The harness is already covered indirectly by repository CI: + +- linting covers harness files through repository-wide ``pre-commit`` execution +- Bazel test execution includes harness tests through ``bazel test //...`` + +What is not yet present is a dedicated harness workflow job that runs the outer +loop itself as a named CI check and uploads harness run artifacts. + +That dedicated CI integration remains planned work. diff --git a/docs/internals/index.rst b/docs/internals/index.rst index f0e913836..333be01cb 100644 --- a/docs/internals/index.rst +++ b/docs/internals/index.rst @@ -21,6 +21,7 @@ This section is not relevant for users of docs-as-code but for developers extend .. toctree:: :maxdepth: 1 + agent_harness extensions/index benchmark_results decisions/index diff --git a/docs/internals/requirements/implementation_state.rst b/docs/internals/requirements/implementation_state.rst index 580e090fc..ea7840eb1 100644 --- a/docs/internals/requirements/implementation_state.rst +++ b/docs/internals/requirements/implementation_state.rst @@ -11,21 +11,60 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* + + # ╓ ╖ + # ║ Some portions generated by Github Copilot ║ + # ╙ ╜ .. _docs_statistics: -Implementation State Statistics -================================ +Tooling Coverage +================ + +This page shows how the docs-as-code tooling covers process and tool +requirements. It focuses on tooling capabilities offered to downstream +repositories rather than on product-specific traceability inside those +repositories. Overview -------- .. needpie:: Requirements Status - :labels: not implemented, implemented but not tested, implemented and tested + :labels: not implemented, implemented but incomplete traceability, fully linked :colors: red,yellow, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_status(tool_req) + +Jump to evidence tables: + +- :ref:`Tool Requirement Implementation and Links table ` +- :ref:`Process Requirement to Tool Requirement mapping table ` + +How To Read These Levels +------------------------ + +The overview pie combines implementation state and traceability evidence: + +- ``not implemented``: + requirement has ``implemented == NO``. +- ``implemented but incomplete traceability``: + requirement has ``implemented == YES`` or ``implemented == PARTIAL``, + but is missing at least one traceability link (code link and/or test link). +- ``fully linked``: + requirement has both ``source_code_link`` and ``testlink``. - type == 'tool_req' and implemented == 'NO' - type == 'tool_req' and testlink == '' and (implemented == 'YES' or implemented == 'PARTIAL') - type == 'tool_req' and testlink != '' and (implemented == 'YES' or implemented == 'PARTIAL') +Implementation labels used on this page: + +- ``NO``: requirement is not implemented. +- ``PARTIAL``: requirement is partly implemented. +- ``YES``: requirement is implemented. + +Why multiple pies are shown: + +- ``Requirements with Codelinks`` shows implementation-to-source traceability. +- ``Requirements with linked tests`` shows implementation-to-verification traceability. +- ``Requirements fully linked`` is the strict roll-up (both links present). + +These are intentionally separate because they answer different diagnostics: +missing code links, missing test links, or both. In Detail --------- @@ -48,78 +87,43 @@ In Detail .. needpie:: Requirements with Codelinks :labels: no codelink, with codelink :colors: red, green - - type == 'tool_req' and source_code_link == '' - type == 'tool_req' and source_code_link != '' + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_code_links(tool_req) .. grid-item-card:: - .. needpie:: Test Results - :labels: passed, failed, skipped - :colors: green, red, orange - - type == 'testcase' and result == 'passed' - type == 'testcase' and result == 'failed' - type == 'testcase' and result == 'skipped' - -.. grid:: 2 + .. needpie:: Requirements with linked tests + :labels: no test link, with test link + :colors: red, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_with_test_links(tool_req) .. grid-item-card:: - Failed Tests - - *Hint: this table is empty by definition, as PRs with failing tests are not allowed to be merged in docs-as-code repo.* - - .. needtable:: FAILED TESTS - :filter: result == "failed" - :tags: TEST - :columns: name as "testcase";result;fully_verifies;partially_verifies;test_type;derivation_technique;id as "link" + .. needpie:: Requirements fully linked (code + tests) + :labels: not fully linked, fully linked + :colors: orange, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_requirements_fully_linked(tool_req) .. grid-item-card:: - Skipped / Disabled Tests - - *Hint: this table is empty by definition, as we do not allow skipped or disabled tests in docs-as-code repo.* - - .. needtable:: SKIPPED/DISABLED TESTS - :filter: result != "failed" and result != "passed" - :tags: TEST - :columns: name as "testcase";result;fully_verifies;partially_verifies;test_type;derivation_technique;id as "link" - - - - -All passed Tests ------------------ - -.. needtable:: SUCCESSFUL TESTS - :filter: result == "passed" - :tags: TEST - :columns: name as "testcase";result;fully_verifies;partially_verifies;test_type;derivation_technique;id as "link" - + .. needpie:: Process requirements linked by tool requirements + :labels: not linked, linked + :colors: red, green + :filter-func: src.extensions.score_metamodel.checks.traceability_dashboard.pie_process_requirements_linked(tool_req,true) -Details About Testcases ------------------------- -*Data is not filled out yet within the test cases.* -.. needpie:: Test Types Used In Testcases - :labels: fault-injection, interface-test, requirements-based, resource-usage - :legend: +Process-to-Tool Mapping +----------------------- - type == 'testcase' and test_type == 'fault-injection' - type == 'testcase' and test_type == 'interface-test' - type == 'testcase' and test_type == 'requirements-based' - type == 'testcase' and test_type == 'resource-usage' +.. _tooling_coverage_table_process_mapping: +.. needtable:: Process requirement -> tool requirement mapping + :types: tool_req + :columns: satisfies as "Process Requirement";id as "Tool Requirement" + :style: table -.. needpie:: Derivation Techniques Used In Testcases - :labels: requirements-analysis, design-analysis, boundary-values, equivalence-classes, fuzz-testing, error-guessing, explorative-testing - :legend: +.. _tooling_coverage_table_impl_links: - type == 'testcase' and derivation_technique == 'requirements-analysis' - type == 'testcase' and derivation_technique == 'design-analysis' - type == 'testcase' and derivation_technique == 'boundary-values' - type == 'testcase' and derivation_technique == 'equivalence-classes' - type == 'testcase' and derivation_technique == 'fuzz-testing' - type == 'testcase' and derivation_technique == 'error-guessing' - type == 'testcase' and derivation_technique == 'explorative-testing' +.. needtable:: Tool requirement implementation and links + :types: tool_req + :columns: id as "Tool Requirement";implemented;source_code_link;testlink + :style: table diff --git a/docs/internals/requirements/index.rst b/docs/internals/requirements/index.rst index b63c2a056..31a5f5b3d 100644 --- a/docs/internals/requirements/index.rst +++ b/docs/internals/requirements/index.rst @@ -15,11 +15,29 @@ Requirements ============ +This repository provides the docs-as-code tooling used by other SCORE +repositories. The pages in this section therefore focus on two questions: + +1. Which process and tool requirements are covered by the docs-as-code tooling? +2. How is the tooling itself verified and qualified for downstream use? + +Actual product and module traceability is expected to live in consuming +repositories, such as module repositories and integration repositories that use +docs-as-code as a Bazel dependency. + +Pages +----- + +- ``implementation_state`` describes tooling coverage: implemented capabilities, + source-code links, test links, full linkage, and process-to-tool mapping. +- ``tooling_verification`` describes verification evidence for the tooling + itself, including test results and testcase metadata. + .. toctree:: :maxdepth: 1 capabilities process_overview - tool_req_overview requirements implementation_state + tooling_verification diff --git a/docs/internals/requirements/tool_req_overview.rst b/docs/internals/requirements/tool_req_overview.rst index 0990fc68e..2ae20c6bb 100644 --- a/docs/internals/requirements/tool_req_overview.rst +++ b/docs/internals/requirements/tool_req_overview.rst @@ -12,15 +12,17 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -=============================== -Tool Requirements Overview -=============================== +.. _docs_tool_req_overview: -Here are all our tool requirements -tersely packed in a table -with some hopefully useful meta information. +:orphan: -.. needtable:: - :types: tool_req - :columns: satisfies as "Process Requirement" ;id as "Tool Requirement";implemented;source_code_link - :style: table +Tool Requirements Overview (Deprecated) +======================================= + +This page is deprecated. + +Use the canonical requirements dashboard: +:doc:`implementation_state` + +The process-to-tool mapping tables previously shown here are now part of +the implementation state dashboard. diff --git a/docs/internals/requirements/tooling_verification.rst b/docs/internals/requirements/tooling_verification.rst new file mode 100644 index 000000000..5b477043d --- /dev/null +++ b/docs/internals/requirements/tooling_verification.rst @@ -0,0 +1,100 @@ +.. + # ******************************************************************************* + # Copyright (c) 2026 Contributors to the Eclipse Foundation + # + # See the NOTICE file(s) distributed with this work for additional + # information regarding copyright ownership. + # + # This program and the accompanying materials are made available under the + # terms of the Apache License Version 2.0 which is available at + # https://www.apache.org/licenses/LICENSE-2.0 + # + # SPDX-License-Identifier: Apache-2.0 + # ******************************************************************************* + +.. _docs_tooling_verification: + +Tooling Verification +==================== + +This page describes verification evidence for the docs-as-code tooling itself. +It is intentionally separate from tooling coverage so downstream quality signals +such as unit tests, future static analysis, and other verification evidence can +evolve independently from traceability support. + +Overview +-------- + +.. needpie:: Test Results + :labels: passed, failed, skipped + :colors: green, red, orange + + type == 'testcase' and result == 'passed' + type == 'testcase' and result == 'failed' + type == 'testcase' and result == 'skipped' + +.. grid:: 2 + :class-container: score-grid + + .. grid-item-card:: + + Failed Tests + + *Hint: this table is empty by definition, as PRs with failing tests are not allowed to be merged in docs-as-code repo.* + + No failing tests are expected in the current dataset. + + .. grid-item-card:: + + Skipped / Disabled Tests + + *Hint: this table is empty by definition, as we do not allow skipped or disabled tests in docs-as-code repo.* + + No skipped or disabled tests are expected in the current dataset. + + +Testcase Metadata Overview +-------------------------- + +*Data is not filled out yet within the test cases.* + +.. needpie:: Test Types Used In Testcases + :labels: fault-injection, interface-test, requirements-based, resource-usage + :legend: + + type == 'testcase' and test_type == 'fault-injection' + type == 'testcase' and test_type == 'interface-test' + type == 'testcase' and test_type == 'requirements-based' + type == 'testcase' and test_type == 'resource-usage' + + +.. needpie:: Derivation Techniques Used In Testcases + :labels: requirements-analysis, design-analysis, boundary-values, equivalence-classes, fuzz-testing, error-guessing, explorative-testing + :legend: + + type == 'testcase' and derivation_technique == 'requirements-analysis' + type == 'testcase' and derivation_technique == 'design-analysis' + type == 'testcase' and derivation_technique == 'boundary-values' + type == 'testcase' and derivation_technique == 'equivalence-classes' + type == 'testcase' and derivation_technique == 'fuzz-testing' + type == 'testcase' and derivation_technique == 'error-guessing' + type == 'testcase' and derivation_technique == 'explorative-testing' + + +All passed Tests +---------------- + +.. needtable:: SUCCESSFUL TESTS - status and link + :filter: result == "passed" + :tags: TEST + :columns: name as "testcase";result;id as "link" + +.. needtable:: SUCCESSFUL TESTS - verification mapping + :filter: result == "passed" + :tags: TEST + :columns: name as "testcase";fully_verifies;partially_verifies + +.. needtable:: SUCCESSFUL TESTS - optional metadata + :filter: result == "passed" + :tags: TEST + :columns: name as "testcase";test_type;derivation_technique diff --git a/docs/reference/commands.md b/docs/reference/commands.md index 01bc8d15d..a7a83a056 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -1,10 +1,17 @@ + + # Commands | Target | What it does | | ---------------------------------------------- | ------------------------------------------------------------------------------------------------- | -| `bazel run //:docs` | Builds documentation | +| `bazel run //:docs` | Builds documentation (also writes `metrics.json` via the score_metamodel extension) | | `bazel run //:docs_check` | Verifies documentation correctness | | `bazel run //:docs_combo` | Builds combined documentation with all external dependencies included | +| `bazel run @score_docs_as_code//scripts_bazel:traceability_gate -- --metrics-json bazel-bin/needs_json/_build/needs/metrics.json --min-req-code 100 --min-req-test 100 --min-req-fully-linked 100 --min-tests-linked 100 --fail-on-broken-test-refs` | Reads the pre-computed metrics.json from the docs build and fails if coverage thresholds are not met | | `bazel run //:live_preview` | Creates a live_preview of the documentation viewable in a local server | | `bazel run //:live_preview_combo_experimental` | Creates a live_preview of the full documentation with all dependencies viewable in a local server | | `bazel run //:ide_support` | Sets up a Python venv for esbonio (Remember to restart VS Code!) | diff --git a/prepare_commit.sh b/prepare_commit.sh new file mode 100755 index 000000000..a12ee7f85 --- /dev/null +++ b/prepare_commit.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Script to properly stage and commit the harness implementation + +cd /workspaces/docs-as-code + +# Update .gitignore to exclude sensitive/temporary files +cat >> .gitignore << 'EOF' + +# Harness execution history (local only) +score_harness/runs/ + +# Internal OEM documentation (confidential) +docs/internals/requirements/oem_internal_workstreams.md + +# Temporary issue drafts (already created in GitHub) +.tmp_issue_updates/ +EOF + +# Stage the harness implementation +git add AGENTS.md +git add score_harness/ + +# Show what will be committed +echo "=========================================" +echo "Files to be committed:" +echo "=========================================" +git status --short + +echo "" +echo "=========================================" +echo "Excluded files (not staged):" +echo "=========================================" +git status --short | grep "^??" | grep -E "(oem_internal|\.tmp_issue|runs/)" + +echo "" +echo "=========================================" +echo "Ready to commit! Use:" +echo "=========================================" +echo 'git commit -m "feat(harness): Add pilot foundation for docs-as-code assurance harness + +- Add outer loop with both metrics_json and needs_json task modes +- Add lightweight candidate validation and query tooling +- Add baseline + rule-retrieval harness candidates +- Add provenance metadata and responsibility model for audit compliance +- Add tool safety restrictions and compliance documentation +- Create executable seed corpus (4 tasks) + +Addresses #518, #524 +Part of #520"' diff --git a/score_harness/.gitignore b/score_harness/.gitignore new file mode 100644 index 000000000..e7fa2430b --- /dev/null +++ b/score_harness/.gitignore @@ -0,0 +1,2 @@ +runs/ +.tmp_issue_updates/ diff --git a/score_harness/BUILD b/score_harness/BUILD new file mode 100644 index 000000000..574b13d30 --- /dev/null +++ b/score_harness/BUILD @@ -0,0 +1,75 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +load("@aspect_rules_py//py:defs.bzl", "py_binary", "py_library") +load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") + +filegroup( + name = "sources", + srcs = glob(["**/*.py"]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "spec_files", + srcs = glob(["spec/*.json"]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "fixture_files", + srcs = glob(["fixtures/*.json"]), + visibility = ["//visibility:public"], +) + +py_library( + name = "common", + srcs = ["common.py"], + visibility = ["//score_harness:__subpackages__"], + deps = all_requirements, +) + +py_binary( + name = "outer_loop", + srcs = ["outer_loop.py"], + main = "outer_loop.py", + visibility = ["//visibility:public"], + deps = [ + ":common", + "//score_harness/harness:base_harness", + "//scripts_bazel:traceability_gate", + ] + all_requirements, +) + +py_binary( + name = "validate_candidate", + srcs = ["validate_candidate.py"], + main = "validate_candidate.py", + visibility = ["//visibility:public"], + deps = [ + ":common", + "//score_harness/harness:base_harness", + ] + all_requirements, +) + +py_binary( + name = "query_runs", + srcs = ["query_runs.py"], + main = "query_runs.py", + visibility = ["//visibility:public"], + deps = all_requirements, +) diff --git a/score_harness/README.md b/score_harness/README.md new file mode 100644 index 000000000..8957fd2be --- /dev/null +++ b/score_harness/README.md @@ -0,0 +1,98 @@ +# score-harness + +Agent harness infrastructure for Eclipse S-CORE docs-as-code. + +This directory is the integration gate between docs-as-code change workflows +and the Lane A traceability gate (`scripts_bazel/traceability_gate.py`). + +Treat this file as the entry map for the harness area. Keep it short. Put deeper +detail in the structured files below so agents can navigate selectively. + +## Structure + +``` +score_harness/ + spec/ Task specs (small, structured change scenarios) + harness/ Harness candidates (one Python file per candidate) + contract/ Adapter contract and schema (v0.1) + runs/ Execution history (append-only, per iteration/candidate/task) + consistency_rules.yaml Public docs-as-code rule catalog + SKILL.md Domain skill for the outer loop proposer + outer_loop.py Deterministic outer loop: run harness -> gate -> distill -> log +``` + +## Navigation + +- Start here for the overall contract and command sequence +- Read `spec/` for task units and expected verdicts +- Read `contract/` for adapter interface and machine-readable schema +- Read `consistency_rules.yaml` for rule IDs and impact semantics +- Read `outer_loop.py` for evaluation, distillation, and filesystem layout +- Read `SKILL.md` only when working on Lane B candidate evolution + +## Lane A contract + +Every harness candidate is evaluated against the same Lane A gate: + +1. Run cheap candidate validation against one runnable task spec +2. For metrics_json tasks: load a stable `metrics.json` fixture directly +3. For needs_json tasks: use `metrics.json` from the same build directory as `needs.json` (both produced by Sphinx build) +4. Run `traceability_gate.py` with task-specific arguments to produce pass/fail verdict +5. Distill structured trace artifacts into `runs///traces//` + +No LLM is required in Lane A. The outer loop is deterministic Python. + +Lane A applies equally to manual and agent-assisted change workflows. Agentic +behavior only changes how candidates are proposed and improved, not how merge +eligibility is decided. + +Note: `traceability_coverage.py` no longer exists as a separate script—coverage extraction is integrated into the Sphinx build via the score_metamodel extension. + +## Queryability rules + +- `runs/` is append-only +- trace artifacts must be JSON, small, and consistently named +- the proposer should start from `evolution_summary.jsonl` and then inspect only + the traces it needs +- avoid raw stdout dumps as the primary artifact + +## Lane B (optional) + +A proposer (any coding agent) may read the trace history via `runs/` and +`evolution_summary.jsonl` and propose new harness candidates. Lane B never +determines merge eligibility. + +## Getting started + +```bash +# Validate a candidate cheaply before full evaluation +PYTHONPATH=. python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/base_harness.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json + +# Run the seeded gate-fixture corpus against the baseline harness +PYTHONPATH=. python3 score_harness/outer_loop.py \ + --candidate score_harness/harness/base_harness.py \ + --tasks score_harness/spec/ + +# Query prior runs, failed tasks, and candidate deltas +PYTHONPATH=. python3 score_harness/query_runs.py \ + --runs-dir score_harness/runs \ + --failed-tasks \ + --diff-candidates base_harness candidate_x +``` + +## Enforcement and bypass resistance + +- Local runs are for fast feedback and can always be bypassed by intent. +- Merge protection must come from required CI checks. +- `validate_candidate.py --skip-external-checks` is blocked unless `SCORE_HARNESS_ALLOW_SKIP_EXTERNAL_CHECKS=1`. +- `outer_loop.py --skip-validation` is blocked unless `SCORE_HARNESS_ALLOW_SKIP_VALIDATION=1`. +- CI must run both validation and outer loop without skip flags. +- Configure branch protection so the harness CI workflow is required before merge. + +## Next implementation steps + +1. Grow the seeded corpus beyond gate metrics fixtures to full docs build snapshots using the needs_json task path. +2. Add more candidate harnesses so run-to-run diffs show meaningful behavioral deltas. +3. Keep the harness CI gate required on pull requests and evolve the task corpus over time. diff --git a/score_harness/SELF_HEALING.md b/score_harness/SELF_HEALING.md new file mode 100644 index 000000000..c14e3196f --- /dev/null +++ b/score_harness/SELF_HEALING.md @@ -0,0 +1,198 @@ +# Self-Healing Validation System + +This system automatically learns from validation failures and feeds them back to the LLM proposer in future iterations. + +## How It Works + +### 1. Validation Runs Automatically + +When `outer_loop.py` runs, it first validates the candidate harness: + +```bash +python3 score_harness/outer_loop.py \ + --candidate score_harness/harness/my_candidate.py \ + --tasks score_harness/spec/ \ + --iteration 5 +``` + +The validation checks: +- ✅ Harness interface (get_context, post_process) +- ✅ Linting (ruff) +- ✅ Type checking (basedpyright) +- ✅ Import succeeds +- ✅ Basic smoke test + +### 2. Failures Are Logged + +If validation fails (e.g., linting error, type error, missing method), the failure is automatically logged to **`validation_failures.jsonl`**: + +```json +{"iteration": 5, "candidate": "my_candidate", "failure_type": "linting_error", "message": "F401 'json' imported but unused", "fix": "Run: ruff check --fix score_harness/harness/my_candidate.py"} +{"iteration": 6, "candidate": "another_candidate", "failure_type": "type_error", "message": "Type 'str | None' cannot be assigned to 'str'", "fix": "Fix type errors reported by basedpyright"} +``` + +### 3. Proposer Learns From Failures + +In **Step 1** of the SKILL.md workflow, the LLM proposer is instructed to: + +> **Read validation_failures.jsonl FIRST** — learn from past mistakes + +Example mistakes the proposer will see: + +| Iteration | Mistake | Fix Applied in Next Iteration | +|-----------|---------|-------------------------------| +| 3 | Forgot to import `Path` | Iteration 4: Add `from pathlib import Path` | +| 5 | Linting error: unused import | Iteration 6: Run `ruff check --fix` before submitting | +| 7 | Type error: `str | None` not handled | Iteration 8: Add null check | +| 9 | Forgot to run validation | Iteration 10: Always run `validate_candidate.py` | + +### 4. System Self-Improves + +Over iterations, the proposer builds a mental model of: +- Common validation failures +- How to prevent them +- Which checks to run before submitting + +This is **learning without explicit training** — the feedback loop teaches the proposer through structured failure logs. + +## Example: Learning From a Linting Mistake + +**Iteration 3**: Proposer submits candidate with unused import +```python +# harness/candidate_3.py +import json # ← unused +from pathlib import Path + +class AssuranceHarness: + def get_context(self, task_spec): + return "context" +``` + +**Validation fails**: +```bash +[validation] FAILED: candidate_3 — 1 failures logged to validation_failures.jsonl + - linting_error: Run: ruff check --fix score_harness/harness/candidate_3.py +``` + +**validation_failures.jsonl** records: +```json +{"iteration": 3, "candidate": "candidate_3", "failure_type": "linting_error", "message": "F401 'json' imported but unused", "fix": "Run: ruff check --fix score_harness/harness/candidate_3.py"} +``` + +**Iteration 4**: Proposer reads validation_failures.jsonl, sees the linting error, and now includes linting in Step 2: +```python +# harness/candidate_4.py +from pathlib import Path # ← fixed: removed unused import + +class AssuranceHarness: + def get_context(self, task_spec): + return "context" +``` + +**Validation passes** ✅ + +## Manual Validation + +You can also run validation manually: + +```bash +# Basic validation (interface checks only) +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/my_candidate.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json + +# With failure logging +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/my_candidate.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json \ + --iteration 5 \ + --log-failures + +# Skip external checks (for CI environments without ruff/basedpyright) +python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/my_candidate.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json \ + --skip-external-checks +``` + +## Viewing Past Failures + +```bash +# See all validation failures +cat score_harness/validation_failures.jsonl | jq . + +# See failures from iteration 5 +cat score_harness/validation_failures.jsonl | jq 'select(.iteration == 5)' + +# See all linting errors +cat score_harness/validation_failures.jsonl | jq 'select(.failure_type == "linting_error")' + +# Count failures by type +cat score_harness/validation_failures.jsonl | jq -r '.failure_type' | sort | uniq -c +``` + +## Why This Works Better Than Explicit Instructions + +**Without self-healing**: +```markdown +Step 2: Implement +- Run linting +- Run type checking +- Run tests +- Run validation +``` + +❌ LLM might skip steps +❌ No feedback when steps are forgotten +❌ Instructions get stale + +**With self-healing**: +```json +{"iteration": 3, "failure_type": "linting_error", "fix": "Run: ruff check --fix ..."} +{"iteration": 5, "failure_type": "type_error", "fix": "Fix type errors ..."} +``` + +✅ LLM sees **actual mistakes it made** +✅ Concrete fix instructions for each failure +✅ System learns over time +✅ Validation is enforced, not suggested + +## Integration with CI + +In CI environments, skip external checks if tools aren't installed: + +```yaml +# .github/workflows/harness-validation.yml +- name: Validate harness candidates + run: | + python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/base_harness.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json \ + --skip-external-checks # Skip ruff/basedpyright in CI +``` + +Or install the tools: + +```yaml +- name: Install validation tools + run: pip install ruff basedpyright + +- name: Validate with full checks + run: | + python3 score_harness/validate_candidate.py \ + --candidate score_harness/harness/base_harness.py \ + --task-spec score_harness/spec/task_002_threshold_fail.json +``` + +## Design Rationale + +This approach is inspired by **Meta-Harness** but goes further: + +| Meta-Harness | score_harness (with self-healing) | +|--------------|----------------------------------| +| Import validation only | Import + linting + type checking + interface validation | +| No failure logging | Failures logged to validation_failures.jsonl | +| LLM sees success/fail only | LLM sees exact failure reason + fix command | +| No learning across iterations | System learns from mistakes | + +The key insight: **validation failures are training data**. By logging failures in a structured format, we enable the proposer to learn without manual instruction updates. diff --git a/score_harness/SKILL.md b/score_harness/SKILL.md new file mode 100644 index 000000000..86015c678 --- /dev/null +++ b/score_harness/SKILL.md @@ -0,0 +1,88 @@ +--- +name: score-harness-assurance +description: Run one iteration of assurance harness evolution for Eclipse S-CORE docs-as-code. +--- + +# Score Harness — Assurance Consistency Domain + +Run ONE iteration of harness evolution. You analyze prior results, propose a new harness candidate, and implement it. The outer loop (`outer_loop.py`) handles evaluation. + +## Critical constraints + +- You MUST produce 1 new harness candidate every iteration. +- Do NOT hardcode task-specific knowledge. Harnesses must be general-purpose. +- Do NOT read raw `gate_stdout` fields from trace artifacts. Read only distilled JSON fields. +- One mechanism per candidate. If you are tempted to add "and also..." that is a second candidate. + +## Safety and scope restrictions + +**Harness candidates must comply with these mandatory rules:** + +1. **File scope**: Candidates may only read files declared in the task spec's `input_path` or referenced by `consistency_rules`. +2. **No network access**: Candidates must not make HTTP requests, DNS lookups, or access external services. +3. **No side effects**: `get_context()` must be read-only. Write operations belong in `post_process()` if needed. +4. **Deterministic**: Same task spec + same candidate → same context. No timestamps, random values, or external state in context. +5. **Tool safety**: Candidates may import stdlib and repo-local modules only. No dynamic code execution via `eval()` or `exec()`. + +**Violation consequences:** +- Candidates violating these rules will fail the cheap validation step before evaluation. +- Repeated violations may block future candidate submissions until governance review. + +## Domain context + +The task domain is: maintain ISO 26262 / ASPICE assurance arguments consistent with Sphinx-needs artifacts under change. + +The Lane A evaluation sequence is: +1. `traceability_coverage.py --json-output` → metrics JSON +2. `traceability_gate.py` → pass/fail verdict +3. Structured trace artifacts written to `runs///traces//` + +The harness variable is: what context is provided to the agent before it edits an RST file or needs.json. + +## Key files + +- `harness/base_harness.py` — base class and baseline candidate. Read before proposing. +- **`validation_failures.jsonl`** — append-only log of past validation failures. Read this FIRST to avoid repeating mistakes. +- `evolution_summary.jsonl` — one line per prior candidate (read this second) +- `runs/` — trace history. Use grep to find patterns across tasks and iterations. +- `spec/*.json` — task specs defining input, expected verdict, and relevant consistency rules. + +## Workflow + +### Step 1: Analyze + +1. **Read validation_failures.jsonl FIRST** — learn from past mistakes (linting errors, type errors, interface violations). +2. Read `evolution_summary.jsonl` to understand what has been tried. +3. Read `runs/` traces for failed tasks: `impacted_elements.json` and `score.json`. +4. Read prior candidate harness files in `harness/`. +5. Form a falsifiable hypothesis: "Providing X before the agent acts will reduce Y failure class." + +### Step 2: Implement + +1. Copy `harness/base_harness.py` to `harness/.py`. +2. Override `get_context()` with your mechanism. Keep `post_process()` default unless needed. +3. **Validate import**: `python3 -c "from score_harness.harness. import *; print('OK')"`. +4. **Run linting**: `ruff check score_harness/harness/.py` (fix any errors with `ruff check --fix`). +5. **Run type checking**: `basedpyright score_harness/harness/.py` (fix type errors before submitting). +6. **Run cheap validation**: `python3 score_harness/validate_candidate.py --candidate score_harness/harness/.py --task-spec score_harness/spec/task_002_threshold_fail.json`. + +**If validation fails**, the failure is logged to `validation_failures.jsonl` for the next iteration to learn from. + +### Step 3: Write pending_eval.json + +```json +{ + "iteration": , + "candidates": [ + { + "name": "", + "file": "harness/.py", + "hypothesis": "", + "mechanism": "", + "expected_impact": "" + } + ] +} +``` + +Output: `CANDIDATE: ` diff --git a/score_harness/common.py b/score_harness/common.py new file mode 100644 index 000000000..d1f07b4b1 --- /dev/null +++ b/score_harness/common.py @@ -0,0 +1,55 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Shared helpers for score_harness utilities.""" + +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + + +def load_harness(candidate_path: Path): + """Dynamically load a harness candidate from a Python file.""" + spec = importlib.util.spec_from_file_location("candidate_harness", candidate_path) + if spec is None or spec.loader is None: + raise ValueError(f"Cannot load module spec from {candidate_path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + for name in dir(module): + cls = getattr(module, name) + try: + if ( + isinstance(cls, type) + and name != "AssuranceHarness" + and issubclass(cls, module.AssuranceHarness) + ): + return cls() + except (TypeError, AttributeError): + continue + raise ValueError(f"No AssuranceHarness subclass found in {candidate_path}") + + +def load_task_specs(tasks_dir: Path) -> list[dict]: + """Load all task spec JSON files from the tasks directory.""" + specs = [] + for path in sorted(tasks_dir.glob("*.json")): + spec = json.loads(path.read_text()) + spec["_task_spec_path"] = str(path.resolve()) + specs.append(spec) + return specs diff --git a/score_harness/consistency_rules.yaml b/score_harness/consistency_rules.yaml new file mode 100644 index 000000000..a40dbdb0f --- /dev/null +++ b/score_harness/consistency_rules.yaml @@ -0,0 +1,72 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# Consistency Rule Catalog +# Machine-readable rules linking argument elements to artifact types and change scenarios. +# Format: YAML list. Each rule has an id, description, trigger, impacted_element_type, +# impact_class, and action_required. + +rules: + - id: CR-001 + description: > + If a complies link target is renamed or removed, all linking elements + are directly impacted and must be rechecked. + trigger: + artifact_type: requirement + change: link_target_removed_or_renamed + impacted_element_type: guideline + impact_class: direct_recheck + action_required: re-verify compliance claim + + - id: CR-002 + description: > + If a requirement type changes (e.g. tool_req to process_requirement), + all guidelines that comply with it are indirectly impacted. + trigger: + artifact_type: requirement + change: type_changed + impacted_element_type: guideline + impact_class: indirect_propagation + action_required: review compliance argument chain + + - id: CR-003 + description: > + If a test reference is broken (linked test ID does not exist in needs.json), + the linked requirement loses its test coverage evidence. + trigger: + artifact_type: test_link + change: reference_broken + impacted_element_type: requirement + impact_class: revision_required + action_required: restore or replace test reference before gate can pass + + - id: CR-004 + description: > + If a std_req changes content, all gd_guidl elements that comply with it + require re-review of the compliance argument. + trigger: + artifact_type: std_req + change: content_changed + impacted_element_type: gd_guidl + impact_class: indirect_propagation + action_required: re-review compliance argument + + - id: CR-005 + description: > + If coverage drops below the configured threshold for any requirement type, + the gate verdict changes from pass to fail. + trigger: + artifact_type: metrics + change: coverage_below_threshold + impacted_element_type: gate_verdict + impact_class: direct_recheck + action_required: restore coverage above threshold or request waiver diff --git a/score_harness/contract/adapter_contract_v0_1.md b/score_harness/contract/adapter_contract_v0_1.md new file mode 100644 index 000000000..d6371f0e7 --- /dev/null +++ b/score_harness/contract/adapter_contract_v0_1.md @@ -0,0 +1,127 @@ +# Adapter Contract v0.1 + +This contract defines the minimal integration interface between docs-as-code +consumers and the current harness implementation. + +Scope: one backend only (`docs-as-code` harness). No framework switching is +required in v0.1. + +## Design goals + +- Keep the surface minimal and stable +- Keep execution deterministic and replayable +- Keep artifact locations machine-readable +- Keep governance linkage explicit (`issue_id` + run artifacts) + +## Operations + +Three operations are required: + +1. `validate` +2. `run` +3. `report` + +## Common request envelope + +```json +{ + "contract_version": "v0.1.0", + "operation": "validate", + "issue_id": 1234, + "task_spec": "score_harness/spec/task_001.json", + "candidate_path": "score_harness/harness/base_harness.py", + "artifacts_dir": ".stage/ISSUE-1234/harness", + "profile": "iso26262", + "strict": true +} +``` + +Field notes: + +- `contract_version`: must be `v0.1.0` +- `operation`: one of `validate`, `run`, `report` +- `issue_id`: positive integer for issue-first traceability +- `task_spec`: path to one task file or task directory +- `candidate_path`: harness candidate entry file +- `artifacts_dir`: root directory for generated artifacts +- `profile`: currently fixed to `iso26262` +- `strict`: when true, fail on any contract or validation violation + +## Common response envelope + +```json +{ + "contract_version": "v0.1.0", + "operation": "validate", + "status": "pass", + "error_code": null, + "summary": "candidate validation completed", + "artifacts": [ + { + "path": ".stage/ISSUE-1234/harness/validation.json", + "type": "validation_result" + } + ], + "traceability": { + "issue_id": 1234, + "task_id": "task_001", + "run_id": "20260515T101500Z_base_harness" + } +} +``` + +## Status and errors + +`status` must be one of: + +- `pass`: operation completed and checks passed +- `fail`: operation completed but checks failed +- `error`: operation could not complete due to runtime/config/input issues + +`error_code` must be null on `pass`; otherwise one of: + +- `E_INPUT_INVALID` +- `E_CONTRACT_VERSION` +- `E_PROFILE_UNSUPPORTED` +- `E_CANDIDATE_INVALID` +- `E_TASK_SPEC_INVALID` +- `E_RUNTIME_FAILURE` +- `E_ARTIFACT_WRITE` + +## Operation semantics + +### validate + +Expected behavior: + +- Run cheap candidate validation (`validate_candidate.py`) +- Validate task spec readability/shape +- Emit validation artifact + +### run + +Expected behavior: + +- Execute deterministic outer loop (`outer_loop.py`) +- Run Lane A gate per task +- Emit per-task trace artifacts and evolution summary artifacts + +### report + +Expected behavior: + +- Read existing run artifacts only +- Return compact summary view +- No mutation of candidate code or task specs + +## Artifact rules + +- Prefer `.stage/ISSUE-/...` for issue-scoped outputs +- Keep JSON artifacts small and grep-friendly +- Keep artifact names stable across runs + +## Compatibility policy + +- v0.1 is additive only +- New optional fields are allowed +- Required-field removals or semantic changes require v0.2+ diff --git a/score_harness/contract/adapter_contract_v0_1.schema.json b/score_harness/contract/adapter_contract_v0_1.schema.json new file mode 100644 index 000000000..77482257c --- /dev/null +++ b/score_harness/contract/adapter_contract_v0_1.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://score.dev/schemas/score-harness/adapter-contract-v0.1.schema.json", + "title": "Score Harness Adapter Contract v0.1", + "type": "object", + "additionalProperties": false, + "required": [ + "contract_version", + "operation", + "issue_id", + "task_spec", + "candidate_path", + "artifacts_dir", + "profile", + "strict" + ], + "properties": { + "contract_version": { + "type": "string", + "const": "v0.1.0" + }, + "operation": { + "type": "string", + "enum": [ + "validate", + "run", + "report" + ] + }, + "issue_id": { + "type": "integer", + "minimum": 1 + }, + "task_spec": { + "type": "string", + "minLength": 1 + }, + "candidate_path": { + "type": "string", + "minLength": 1 + }, + "artifacts_dir": { + "type": "string", + "minLength": 1 + }, + "profile": { + "type": "string", + "const": "iso26262" + }, + "strict": { + "type": "boolean" + } + } +} diff --git a/score_harness/fixtures/metrics_broken_refs.json b/score_harness/fixtures/metrics_broken_refs.json new file mode 100644 index 000000000..48d70bc8f --- /dev/null +++ b/score_harness/fixtures/metrics_broken_refs.json @@ -0,0 +1,33 @@ +{ + "schema_version": "1", + "generated_by": "traceability_gate_test_fixture", + "metrics_by_type": { + "tool_req": { + "include_not_implemented": false, + "requirements": { + "total": 1, + "with_code_link": 1, + "with_test_link": 1, + "fully_linked": 1, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [] + }, + "tests": { + "total": 2, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 100.0, + "broken_references": [ + { + "testcase": "TC_X", + "missing_need": "REQ_UNKNOWN" + } + ] + } + } + } +} diff --git a/score_harness/fixtures/metrics_threshold_fail.json b/score_harness/fixtures/metrics_threshold_fail.json new file mode 100644 index 000000000..b35575163 --- /dev/null +++ b/score_harness/fixtures/metrics_threshold_fail.json @@ -0,0 +1,28 @@ +{ + "schema_version": "1", + "generated_by": "traceability_gate_test_fixture", + "metrics_by_type": { + "tool_req": { + "include_not_implemented": false, + "requirements": { + "total": 4, + "with_code_link": 3, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 75.0, + "with_test_link_pct": 50.0, + "fully_linked_pct": 50.0, + "missing_code_link_ids": ["REQ_4"], + "missing_test_link_ids": ["REQ_3", "REQ_4"], + "not_fully_linked_ids": ["REQ_3", "REQ_4"] + }, + "tests": { + "total": 3, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 66.67, + "broken_references": [] + } + } + } +} diff --git a/score_harness/fixtures/metrics_tool_req_scope_pass.json b/score_harness/fixtures/metrics_tool_req_scope_pass.json new file mode 100644 index 000000000..c551cbbe0 --- /dev/null +++ b/score_harness/fixtures/metrics_tool_req_scope_pass.json @@ -0,0 +1,50 @@ +{ + "schema_version": "1", + "generated_by": "traceability_gate_test_fixture", + "metrics_by_type": { + "tool_req": { + "include_not_implemented": false, + "requirements": { + "total": 2, + "with_code_link": 2, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [] + }, + "tests": { + "total": 1, + "filtered_test_types": [], + "linked_to_requirements": 1, + "linked_to_requirements_pct": 100.0, + "broken_references": [] + } + }, + "comp_req": { + "include_not_implemented": false, + "requirements": { + "total": 5, + "with_code_link": 0, + "with_test_link": 0, + "fully_linked": 0, + "with_code_link_pct": 0.0, + "with_test_link_pct": 0.0, + "fully_linked_pct": 0.0, + "missing_code_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "missing_test_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "not_fully_linked_ids": ["C1", "C2", "C3", "C4", "C5"] + }, + "tests": { + "total": 0, + "filtered_test_types": [], + "linked_to_requirements": 0, + "linked_to_requirements_pct": 100.0, + "broken_references": [] + } + } + } +} diff --git a/score_harness/harness/BUILD b/score_harness/harness/BUILD new file mode 100644 index 000000000..f4f75c286 --- /dev/null +++ b/score_harness/harness/BUILD @@ -0,0 +1,35 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +load("@aspect_rules_py//py:defs.bzl", "py_library") +load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") + +py_library( + name = "base_harness", + srcs = ["base_harness.py"], + visibility = ["//score_harness:__subpackages__"], + deps = all_requirements, +) + +py_library( + name = "rule_retrieval_harness", + srcs = ["rule_retrieval_harness.py"], + visibility = ["//score_harness:__subpackages__"], + deps = [ + ":base_harness", + ] + all_requirements, +) diff --git a/score_harness/harness/base_harness.py b/score_harness/harness/base_harness.py new file mode 100644 index 000000000..f27128f16 --- /dev/null +++ b/score_harness/harness/base_harness.py @@ -0,0 +1,82 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Base harness interface for the docs-as-code assurance consistency domain. + +Every candidate harness must subclass AssuranceHarness and override get_context(). +The outer loop loads and evaluates candidates via this interface without modification. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + + +class AssuranceHarness(ABC): + """ + Interface every candidate harness must satisfy. + + The outer loop calls get_context() before the agent acts, then runs the + Lane A gate, then calls post_process() on the result before distillation. + """ + + @abstractmethod + def get_context(self, task_spec: dict) -> str: + """ + Return context to present to the agent before it acts on the task. + + task_spec fields (from spec.md parsed as dict): + - input_path: str path to the RST or needs.json file to change + - change_description: str what the agent should do + - expected_verdict: str "pass" or "fail" (used only for evaluation) + - consistency_rules: list[str] rule IDs expected to be relevant + + Returns a string that will be prepended to the agent's task prompt. + """ + ... + + def post_process(self, agent_output: str, task_spec: dict) -> dict: + """ + Optional: transform or validate agent output before the gate runs. + + Default implementation returns the output unchanged. + Override to add pre-gate validation, output normalization, etc. + """ + return {"agent_output": agent_output} + + +class BaselineHarness(AssuranceHarness): + """ + Baseline: no pre-context. Agent receives only the task description. + + This is the weakest possible harness and serves as the search baseline. + Any candidate that beats this on the search set is a candidate improvement. + """ + + def get_context(self, task_spec: dict) -> str: + return "" diff --git a/score_harness/harness/rule_retrieval_harness.py b/score_harness/harness/rule_retrieval_harness.py new file mode 100644 index 000000000..513e63cae --- /dev/null +++ b/score_harness/harness/rule_retrieval_harness.py @@ -0,0 +1,123 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Rule-retrieval harness: provides consistency rule context for each task. + +This candidate reads the consistency_rules field from the task spec, retrieves +the corresponding rule definitions from consistency_rules.yaml, and formats +them as context prepended to the agent's task prompt. + +Hypothesis: explicit consistency rule context reduces false-negative gate +failures by helping the agent preserve coverage invariants. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import yaml + +# Inject harness directory into path so base_harness can be imported +_harness_dir = Path(__file__).parent +if str(_harness_dir) not in sys.path: + sys.path.insert(0, str(_harness_dir)) + +from base_harness import AssuranceHarness # noqa: E402 + + +class RuleRetrievalHarness(AssuranceHarness): + """ + Harness that retrieves and formats consistency rules for the agent. + + For each rule ID in task_spec["consistency_rules"], this candidate: + 1. Loads the rule definition from consistency_rules.yaml + 2. Formats the trigger, impact class, and required action + 3. Returns the formatted rules as context + """ + + def __init__(self): + self.rules_cache = None + + def _load_rules(self) -> dict[str, dict]: + """Load and index the consistency rule catalog.""" + if self.rules_cache is not None: + return self.rules_cache + + # Resolve rules file relative to this harness file + harness_dir = Path(__file__).parent + rules_path = harness_dir.parent / "consistency_rules.yaml" + + if not rules_path.exists(): + return {} + + with rules_path.open() as f: + data = yaml.safe_load(f) + + rules_by_id = {rule["id"]: rule for rule in data.get("rules", [])} + self.rules_cache = rules_by_id + return rules_by_id + + def get_context(self, task_spec: dict) -> str: + """ + Retrieve and format consistency rules referenced in the task spec. + + Returns a formatted string listing each rule's trigger, impact class, + and required action, designed to be prepended to the agent's prompt. + """ + rule_ids = task_spec.get("consistency_rules", []) + if not rule_ids: + return "" + + rules_by_id = self._load_rules() + context_lines = ["## Relevant Consistency Rules", ""] + + for rule_id in rule_ids: + rule = rules_by_id.get(rule_id) + if not rule: + context_lines.append(f"- {rule_id}: (rule not found)") + continue + + context_lines.append( + f"### {rule_id}: {rule.get('description', '').strip()}" + ) + context_lines.append("") + + trigger = rule.get("trigger", {}) + context_lines.append( + f"**Trigger**: {trigger.get('artifact_type', 'unknown')} — {trigger.get('change', 'unknown')}" + ) + context_lines.append( + f"**Impact class**: {rule.get('impact_class', 'unknown')}" + ) + context_lines.append( + f"**Action required**: {rule.get('action_required', 'unknown')}" + ) + context_lines.append("") + + return "\n".join(context_lines) diff --git a/score_harness/outer_loop.py b/score_harness/outer_loop.py new file mode 100644 index 000000000..ef1255ffa --- /dev/null +++ b/score_harness/outer_loop.py @@ -0,0 +1,419 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Outer loop for the docs-as-code assurance harness. + +This script is deterministic Python — no LLM required. +It loads a harness candidate, runs it against a task set, calls the Lane A gate, +distills structured trace artifacts, and appends a summary line to +evolution_summary.jsonl. + +Usage: + python3 outer_loop.py --candidate harness/base_harness.py --tasks spec/ \ + [--iteration 1] [--output-dir runs/] +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import subprocess +import sys +from datetime import UTC, datetime +from pathlib import Path + +from common import load_harness, load_task_specs +from validate_candidate import validate_candidate + + +def run_gate_from_needs_json( + needs_json: Path, + gate_script: Path, + gate_args: list[str], +) -> dict: + """ + Run traceability_gate.py using metrics.json from the same build as needs.json. + + In this repo, the docs build (Sphinx + score_metamodel extension) produces + both needs.json and metrics.json. The gate validates the metrics.json. + + This function locates metrics.json in the same directory as needs.json. + """ + # Locate metrics.json alongside needs.json + metrics_json = needs_json.parent / "metrics.json" + if not metrics_json.exists(): + return { + "metrics": {"error": f"metrics.json not found at {metrics_json}"}, + "gate_passed": False, + "gate_stdout": f"Error: metrics.json not found at {metrics_json}", + "gate_returncode": 1, + } + + metrics = json.loads(metrics_json.read_text()) + gate_result = subprocess.run( + [sys.executable, str(gate_script), "--metrics-json", str(metrics_json)] + + gate_args, + capture_output=True, + text=True, + ) + + return { + "metrics": metrics, + "gate_passed": gate_result.returncode == 0, + "gate_stdout": gate_result.stdout, + "gate_returncode": gate_result.returncode, + } + + +def run_gate_from_metrics_json( + metrics_json: Path, + gate_script: Path, + gate_args: list[str], +) -> dict: + """ + Run traceability_gate.py directly on an existing metrics.json fixture. + This mode is useful for task specs derived from gate tests and fixtures. + """ + metrics = json.loads(metrics_json.read_text()) + gate_result = subprocess.run( + [sys.executable, str(gate_script), "--metrics-json", str(metrics_json)] + + gate_args, + capture_output=True, + text=True, + ) + + return { + "metrics": metrics, + "gate_passed": gate_result.returncode == 0, + "gate_stdout": gate_result.stdout, + "gate_returncode": gate_result.returncode, + } + + +def distill_trace(gate_result: dict, task_spec: dict) -> dict: + """ + Extract only the fields the proposer needs. Never raw stdout dumps. + This is the distillation step — deterministic, no LLM. + """ + metrics = gate_result.get("metrics", {}) + impacted = [] + + # Extract broken complies/test references by type from metrics + for req_type, data in metrics.get("metrics_by_type", {}).items(): + reqs = data.get("requirements", {}) + if reqs.get("with_test_link_pct", 100) < 100: + impacted.append( + { + "artifact_type": req_type, + "impact_class": "direct_recheck", + "reason": "missing_test_link", + } + ) + if reqs.get("with_code_link_pct", 100) < 100: + impacted.append( + { + "artifact_type": req_type, + "impact_class": "direct_recheck", + "reason": "missing_code_link", + } + ) + tests = data.get("tests", {}) + broken_references = tests.get("broken_references", []) + if isinstance(broken_references, int): + broken_reference_count = broken_references + else: + broken_reference_count = len(broken_references) + if broken_reference_count > 0: + impacted.append( + { + "artifact_type": req_type, + "impact_class": "revision_required", + "reason": "broken_test_reference", + "count": broken_reference_count, + "references": broken_references, + } + ) + + return { + "gate_passed": gate_result["gate_passed"], + "expected_verdict": task_spec.get("expected_verdict", "unknown"), + "task_mode": "metrics_json" + if task_spec.get("metrics_json_path") + else "needs_json", + "verdict_correct": gate_result["gate_passed"] + == (task_spec.get("expected_verdict") == "pass"), + "impacted_elements": impacted, + "coverage_summary": metrics.get("metrics_by_type", {}), + "responsible_role": task_spec.get("responsible_role", "pr_creator"), + "provenance": { + "execution_timestamp": datetime.now(UTC).isoformat(), + "python_version": sys.version.split()[0], + "environment_hash": hashlib.sha256( + f"{sys.version}{os.environ.get('USER', 'unknown')}".encode() + ).hexdigest()[:16], + "gate_script_version": "1.0", + }, + } + + +def resolve_task_path(path_value: str, task_spec: dict) -> Path: + """Resolve a task artifact path robustly across common invocation directories.""" + path = Path(path_value) + if path.is_absolute(): + return path + + candidates = [Path.cwd() / path] + task_spec_path = Path(task_spec.get("_task_spec_path", "")) + if task_spec_path: + task_dir = task_spec_path.parent + harness_root = task_dir.parent + candidates.extend( + [ + task_dir / path, + harness_root / path, + ] + ) + if path.parts and path.parts[0] == harness_root.name: + candidates.append(harness_root.parent / path) + + for candidate in candidates: + if candidate.exists(): + return candidate + + return candidates[0] + + +def resolve_support_path(path_value: Path) -> Path: + """Resolve repo support scripts independent of the current working directory.""" + if path_value.is_absolute(): + return path_value + + repo_root = Path(__file__).resolve().parent.parent + candidates = [Path.cwd() / path_value, repo_root / path_value] + for candidate in candidates: + if candidate.exists(): + return candidate + return repo_root / path_value + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> None: # noqa: C901 + parser = argparse.ArgumentParser(description="Score harness outer loop") + parser.add_argument("--candidate", required=True, type=Path) + parser.add_argument("--tasks", required=True, type=Path) + parser.add_argument("--iteration", type=int, default=1) + parser.add_argument("--output-dir", type=Path, default=Path("runs")) + parser.add_argument( + "--gate-script", + type=Path, + default=Path("scripts_bazel/traceability_gate.py"), + ) + parser.add_argument( + "--skip-validation", + action="store_true", + help=( + "Skip cheap pre-benchmark candidate validation " + "(dev-only; requires SCORE_HARNESS_ALLOW_SKIP_VALIDATION=1)." + ), + ) + args = parser.parse_args() + args.gate_script = resolve_support_path(args.gate_script) + + candidate_name = args.candidate.stem + run_dir = args.output_dir / f"iteration_{args.iteration:03d}" / candidate_name + run_dir.mkdir(parents=True, exist_ok=True) + + harness = load_harness(args.candidate) + task_specs = load_task_specs(args.tasks) + + if not task_specs: + print(f"No task specs found in {args.tasks}. Create spec/*.json files first.") + sys.exit(1) + + if args.skip_validation and os.getenv("SCORE_HARNESS_ALLOW_SKIP_VALIDATION") != "1": + print( + "Refusing --skip-validation: set SCORE_HARNESS_ALLOW_SKIP_VALIDATION=1 " + "for local debugging only." + ) + sys.exit(2) + + if not args.skip_validation: + validation_task_spec = None + for candidate_task_spec in sorted(args.tasks.glob("*.json")): + task_data = json.loads(candidate_task_spec.read_text()) + if not task_data.get("active", True): + continue + if task_data.get("needs_json_path") or task_data.get("metrics_json_path"): + validation_task_spec = candidate_task_spec + break + + if validation_task_spec is None: + print("No runnable task spec found for validation.") + sys.exit(1) + + validation_result = validate_candidate( + args.candidate, + validation_task_spec, + skip_external_checks=False, + ) + + # Log validation failures for learning + if validation_result["status"] == "failed": + from score_harness.validate_candidate import log_validation_failure + + log_validation_failure( + args.iteration, + validation_result["candidate"], + validation_result["failures"], + ) + print( + f"[validation] FAILED: {validation_result['candidate']} " + f"— {len(validation_result['failures'])} failures logged to validation_failures.jsonl" + ) + for failure in validation_result["failures"]: + print(f" - {failure['failure_type']}: {failure['fix']}") + sys.exit(1) + + print( + f"[validation] candidate={validation_result['candidate']} " + f"task={validation_result['task_id']} status={validation_result['status']}" + ) + + task_results = [] + for task_spec in task_specs: + if not task_spec.get("active", True): + print(f" [SKIP] {task_spec.get('id', 'unknown')}: inactive task spec") + continue + + task_id = task_spec.get("id", "unknown") + trace_dir = run_dir / "traces" / task_id + trace_dir.mkdir(parents=True, exist_ok=True) + + # Get harness context (would be passed to agent in a real run) + harness.get_context(task_spec) + + gate_args = task_spec.get("gate_args", []) + + metrics_json_path = task_spec.get("metrics_json_path", "") + if metrics_json_path: + metrics_json = resolve_task_path(metrics_json_path, task_spec) + if not metrics_json.exists(): + print(f" [SKIP] {task_id}: metrics_json not found at {metrics_json}") + continue + gate_result = run_gate_from_metrics_json( + metrics_json, + args.gate_script, + gate_args, + ) + else: + needs_json_path = task_spec.get("needs_json_path", "") + if not needs_json_path: + print(f" [SKIP] {task_id}: no needs_json_path provided") + continue + needs_json = resolve_task_path(needs_json_path, task_spec) + if not needs_json.exists(): + print(f" [SKIP] {task_id}: needs_json not found at {needs_json}") + continue + + gate_result = run_gate_from_needs_json( + needs_json, + args.gate_script, + gate_args, + ) + trace = distill_trace(gate_result, task_spec) + + # Write distilled trace files + (trace_dir / "gate_output.json").write_text( + json.dumps( + { + "gate_passed": gate_result["gate_passed"], + "gate_returncode": gate_result["gate_returncode"], + }, + indent=2, + ) + ) + (trace_dir / "impacted_elements.json").write_text( + json.dumps(trace["impacted_elements"], indent=2) + ) + (trace_dir / "score.json").write_text(json.dumps(trace, indent=2)) + + task_results.append( + { + "task_id": task_id, + "verdict_correct": trace["verdict_correct"], + "gate_passed": trace["gate_passed"], + "task_mode": trace["task_mode"], + } + ) + print(f" [{'+' if trace['verdict_correct'] else 'X'}] {task_id}") + + # Write run-level score + n_correct = sum(1 for r in task_results if r["verdict_correct"]) + (run_dir / "score.json").write_text( + json.dumps( + { + "candidate": candidate_name, + "iteration": args.iteration, + "tasks_total": len(task_results), + "tasks_correct": n_correct, + "pass_rate": n_correct / len(task_results) if task_results else 0.0, + "timestamp": datetime.now(UTC).isoformat(), + }, + indent=2, + ) + ) + + # Append to evolution_summary.jsonl + summary_path = args.output_dir / "evolution_summary.jsonl" + with summary_path.open("a") as f: + f.write( + json.dumps( + { + "iteration": args.iteration, + "candidate": candidate_name, + "pass_rate": n_correct / len(task_results) if task_results else 0.0, + "tasks_total": len(task_results), + "timestamp": datetime.now(UTC).isoformat(), + } + ) + + "\n" + ) + + print(f"\n{candidate_name}: {n_correct}/{len(task_results)} correct") + print(f"Traces written to {run_dir}") + print(f"Summary appended to {summary_path}") + + +if __name__ == "__main__": + main() diff --git a/score_harness/query_runs.py b/score_harness/query_runs.py new file mode 100644 index 000000000..2bd367a37 --- /dev/null +++ b/score_harness/query_runs.py @@ -0,0 +1,141 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Small helper for querying score_harness run history. + +This helper keeps navigation cheap for both humans and agents by exposing a +small summary interface over the append-only runs/ filesystem. +""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path + + +def read_summary(summary_path: Path) -> list[dict]: + if not summary_path.exists(): + return [] + rows = [] + for line in summary_path.read_text().splitlines(): + if not line.strip(): + continue + rows.append(json.loads(line)) + return rows + + +def print_top_candidates(rows: list[dict], top_k: int) -> None: + ranked = sorted(rows, key=lambda row: row.get("pass_rate", 0.0), reverse=True) + for row in ranked[:top_k]: + print( + f"iteration={row.get('iteration')} candidate={row.get('candidate')} " + f"pass_rate={row.get('pass_rate', 0.0):.3f} tasks={row.get('tasks_total', 0)}" + ) + + +def print_failed_tasks(runs_dir: Path, candidate: str | None = None) -> None: + candidate_dirs = sorted(runs_dir.glob("iteration_*/**/traces/*/score.json")) + for score_path in candidate_dirs: + candidate_name = score_path.parents[2].name + if candidate and candidate_name != candidate: + continue + score = json.loads(score_path.read_text()) + if not score.get("verdict_correct", False): + task_id = score_path.parent.name + print( + f"candidate={candidate_name} task={task_id} " + f"gate_passed={score.get('gate_passed')} " + f"expected={score.get('expected_verdict')}" + ) + + +def load_task_scores(runs_dir: Path, candidate: str) -> dict[str, dict]: + scores: dict[str, dict] = {} + for score_path in sorted( + runs_dir.glob(f"iteration_*/{candidate}/traces/*/score.json") + ): + scores[score_path.parent.name] = json.loads(score_path.read_text()) + return scores + + +def print_candidate_diff(runs_dir: Path, left: str, right: str) -> None: + left_scores = load_task_scores(runs_dir, left) + right_scores = load_task_scores(runs_dir, right) + task_ids = sorted(set(left_scores) | set(right_scores)) + + for task_id in task_ids: + left_score = left_scores.get(task_id) + right_score = right_scores.get(task_id) + if left_score is None or right_score is None: + print( + f"task={task_id} present_in={left if left_score else ''}" + f"{right if right_score else ''}" + ) + continue + + if left_score.get("verdict_correct") != right_score.get("verdict_correct"): + print( + f"task={task_id} {left}={left_score.get('verdict_correct')} " + f"{right}={right_score.get('verdict_correct')}" + ) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Query score-harness run history") + parser.add_argument("--runs-dir", type=Path, default=Path("runs")) + parser.add_argument("--top", type=int, default=5) + parser.add_argument("--failed-tasks", action="store_true") + parser.add_argument("--candidate", type=str) + parser.add_argument("--diff-candidates", nargs=2, metavar=("LEFT", "RIGHT")) + args = parser.parse_args() + + rows = read_summary(args.runs_dir / "evolution_summary.jsonl") + if rows: + print("Top candidates") + print("--------------") + print_top_candidates(rows, args.top) + else: + print("No run summary found yet.") + + if args.failed_tasks: + print() + print("Failed tasks") + print("------------") + print_failed_tasks(args.runs_dir, args.candidate) + + if args.diff_candidates: + left, right = args.diff_candidates + print() + print("Candidate diff") + print("--------------") + print_candidate_diff(args.runs_dir, left, right) + + +if __name__ == "__main__": + main() diff --git a/score_harness/spec/task_001_example.json b/score_harness/spec/task_001_example.json new file mode 100644 index 000000000..9c2978ced --- /dev/null +++ b/score_harness/spec/task_001_example.json @@ -0,0 +1,12 @@ +{ + "_comment": "Example task spec. Copy and edit per change scenario. See README.md for field descriptions.", + "active": false, + "id": "task_001_complies_link_removed", + "description": "A guideline loses its complies link to a std_req. Gate should fail due to broken reference.", + "input_path": "docs/internals/requirements/requirements.rst", + "change_description": "Remove the complies link from gd_guidl__xyz to std_req__iso26262__001", + "expected_verdict": "fail", + "consistency_rules": ["CR-001"], + "needs_json_path": "", + "_note": "Set needs_json_path to the path of the needs.json produced after the agent applies the change." +} diff --git a/score_harness/spec/task_002_threshold_fail.json b/score_harness/spec/task_002_threshold_fail.json new file mode 100644 index 000000000..3730f9c0a --- /dev/null +++ b/score_harness/spec/task_002_threshold_fail.json @@ -0,0 +1,13 @@ +{ + "id": "task_002_threshold_fail", + "description": "Coverage threshold failure for tool_req when code-link coverage is below 100%.", + "input_path": "score_harness/fixtures/metrics_threshold_fail.json", + "change_description": "Evaluate a metrics fixture where tool_req code-link coverage is 75% against a 100% threshold.", + "expected_verdict": "fail", + "consistency_rules": ["CR-005"], + "metrics_json_path": "score_harness/fixtures/metrics_threshold_fail.json", + "gate_args": ["--min-req-code", "100"], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/spec/task_003_broken_refs_fail.json b/score_harness/spec/task_003_broken_refs_fail.json new file mode 100644 index 000000000..d9e7f54e9 --- /dev/null +++ b/score_harness/spec/task_003_broken_refs_fail.json @@ -0,0 +1,13 @@ +{ + "id": "task_003_broken_refs_fail", + "description": "Broken testcase reference should fail when fail-on-broken-test-refs is enabled.", + "input_path": "score_harness/fixtures/metrics_broken_refs.json", + "change_description": "Evaluate a metrics fixture with one broken testcase reference.", + "expected_verdict": "fail", + "consistency_rules": ["CR-003"], + "metrics_json_path": "score_harness/fixtures/metrics_broken_refs.json", + "gate_args": ["--fail-on-broken-test-refs"], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/spec/task_004_need_type_scope_pass.json b/score_harness/spec/task_004_need_type_scope_pass.json new file mode 100644 index 000000000..cac83de8b --- /dev/null +++ b/score_harness/spec/task_004_need_type_scope_pass.json @@ -0,0 +1,13 @@ +{ + "id": "task_004_need_type_scope_pass", + "description": "Need-type scoping should pass when only fully-linked tool_req is checked and comp_req is ignored.", + "input_path": "score_harness/fixtures/metrics_tool_req_scope_pass.json", + "change_description": "Evaluate a metrics fixture where tool_req passes and comp_req fails, while gate is scoped to tool_req only.", + "expected_verdict": "pass", + "consistency_rules": ["CR-005"], + "metrics_json_path": "score_harness/fixtures/metrics_tool_req_scope_pass.json", + "gate_args": ["--need-type", "tool_req", "--require-all-links"], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/spec/task_005_build_output_pass.json b/score_harness/spec/task_005_build_output_pass.json new file mode 100644 index 000000000..81dd3f839 --- /dev/null +++ b/score_harness/spec/task_005_build_output_pass.json @@ -0,0 +1,13 @@ +{ + "id": "task_005_build_output_pass", + "description": "Current docs build output should pass when no thresholds are enforced.", + "input_path": "../_build/needs.json", + "change_description": "Evaluate the current repo docs build output with default gate settings (no minimum thresholds).", + "expected_verdict": "pass", + "consistency_rules": ["CR-005"], + "needs_json_path": "../_build/needs.json", + "gate_args": [], + "responsible_role": "pr_creator", + "escalation_role": "harness_maintainer", + "waiver_authority": "release_approver" +} diff --git a/score_harness/tests/BUILD b/score_harness/tests/BUILD new file mode 100644 index 000000000..3943bee6c --- /dev/null +++ b/score_harness/tests/BUILD @@ -0,0 +1,49 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") +load("//:score_pytest.bzl", "score_pytest") + +score_pytest( + name = "validate_candidate_test", + srcs = ["validate_candidate_test.py"], + deps = [ + "//score_harness:validate_candidate", + "//score_harness:common", + "//score_harness/harness:base_harness", + "//score_harness/harness:rule_retrieval_harness", + ] + all_requirements, + data = [ + "//score_harness:spec_files", + "//score_harness:sources", + ], + pytest_config = "//:pyproject.toml", +) + +score_pytest( + name = "common_test", + srcs = ["common_test.py"], + deps = [ + "//score_harness:common", + "//score_harness/harness:base_harness", + ] + all_requirements, + data = [ + "//score_harness:spec_files", + "//score_harness:sources", + ], + pytest_config = "//:pyproject.toml", +) diff --git a/score_harness/tests/common_test.py b/score_harness/tests/common_test.py new file mode 100644 index 000000000..0c936f6d1 --- /dev/null +++ b/score_harness/tests/common_test.py @@ -0,0 +1,51 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Tests for harness common utilities.""" + +from __future__ import annotations + +from pathlib import Path + +# Import from workspace target +import score_harness.common as common_module + +_MY_PATH = Path(__file__).parent + + +def test_load_harness_baseline(): + """Test loading baseline harness.""" + harness_path = _MY_PATH.parent / "harness" / "base_harness.py" + harness_cls = common_module.load_harness(harness_path) + + assert harness_cls is not None + assert hasattr(harness_cls, "get_context") + assert hasattr(harness_cls, "post_process") + + +def test_load_task_specs(): + """Test loading task specs from spec directory.""" + spec_dir = _MY_PATH.parent / "spec" + tasks = common_module.load_task_specs(spec_dir) + + assert len(tasks) > 0, "Should have at least one task spec" + + for task in tasks: + # Task specs use "id" not "task_id" + assert "id" in task + assert "_task_spec_path" in task + assert Path(task["_task_spec_path"]).exists() diff --git a/score_harness/tests/validate_candidate_test.py b/score_harness/tests/validate_candidate_test.py new file mode 100644 index 000000000..68289564a --- /dev/null +++ b/score_harness/tests/validate_candidate_test.py @@ -0,0 +1,87 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Tests for validate_candidate.py""" + +from __future__ import annotations + +from pathlib import Path + +# Import from workspace target +import score_harness.validate_candidate as validate_candidate_module + +_MY_PATH = Path(__file__).parent + + +def test_validate_harness_baseline(): + """Test that baseline harness passes validation.""" + + harness_path = _MY_PATH.parent / "harness" / "base_harness.py" + assert harness_path.exists(), f"Harness not found: {harness_path}" + + # Use a minimal task spec for validation + task_spec_path = _MY_PATH.parent / "spec" / "task_002_threshold_fail.json" + assert task_spec_path.exists(), f"Task spec not found: {task_spec_path}" + + # Should not raise + result = validate_candidate_module.validate_candidate(harness_path, task_spec_path) + assert isinstance(result, dict) + assert result.get("status") == "ok" + assert "context_length" in result + + +def test_validate_harness_rule_retrieval(): + """Test that rule_retrieval harness passes validation.""" + harness_path = _MY_PATH.parent / "harness" / "rule_retrieval_harness.py" + assert harness_path.exists(), f"Harness not found: {harness_path}" + + # Use a minimal task spec for validation + task_spec_path = _MY_PATH.parent / "spec" / "task_002_threshold_fail.json" + assert task_spec_path.exists(), f"Task spec not found: {task_spec_path}" + + # Should not raise + result = validate_candidate_module.validate_candidate(harness_path, task_spec_path) + assert isinstance(result, dict) + assert result.get("status") == "ok" + assert "context_length" in result + + +def test_task_spec_schema(): + """Test that task specs have required fields.""" + import json + + spec_dir = _MY_PATH.parent / "spec" + + for spec_file in spec_dir.glob("task_*.json"): + with open(spec_file) as f: + spec = json.load(f) + + # Skip inactive examples + if spec.get("active") is False: + continue + + # Check required fields + assert "id" in spec, f"{spec_file.name}: missing id" + assert "description" in spec, f"{spec_file.name}: missing description" + assert "responsible_role" in spec, f"{spec_file.name}: missing responsible_role" + + # Check that task has either metrics_json_path or needs_json_path + has_metrics = "metrics_json_path" in spec + has_needs = "needs_json_path" in spec + assert has_metrics or has_needs, ( + f"{spec_file.name}: must have metrics_json_path or needs_json_path" + ) diff --git a/score_harness/validate_candidate.py b/score_harness/validate_candidate.py new file mode 100644 index 000000000..f6c369d7c --- /dev/null +++ b/score_harness/validate_candidate.py @@ -0,0 +1,237 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ +Lightweight validation for docs-as-code harness candidates. + +This is the cheap pre-benchmark check. It catches malformed candidates before +they consume the full task set or expensive gate runs. + +Validation covers: +1. candidate module loads +2. candidate class instantiates +3. get_context() returns a string for one tiny task spec +4. post_process() returns a dict +5. expected trace filenames are known and stable +6. task spec provides either needs_json_path or metrics_json_path +7. linting passes (ruff) +8. type checking passes (basedpyright) +""" + +from __future__ import annotations + +import argparse +import json +import os +import subprocess +import sys +from pathlib import Path + +from score_harness.common import load_harness + +REQUIRED_TRACE_FILENAMES = ( + "gate_output.json", + "impacted_elements.json", + "score.json", +) + + +def validate_candidate( + candidate_path: Path, task_spec_path: Path, skip_external_checks: bool = False +) -> dict: + """Return a structured validation result for one candidate and one tiny task. + + Args: + candidate_path: Path to the candidate harness module + task_spec_path: Path to a task spec for validation + skip_external_checks: If True, skip linting/type checking (for CI environments) + + Returns: + dict with status="ok" or status="failed" + failure_type/message/fix + """ + failures = [] + + # Check 1-6: Harness interface and basic functionality + try: + harness = load_harness(candidate_path) + task_spec = json.loads(task_spec_path.read_text()) + if not task_spec.get("active", True): + raise ValueError("task spec is inactive; choose a runnable task spec") + + if not task_spec.get("needs_json_path") and not task_spec.get( + "metrics_json_path" + ): + raise ValueError( + "task spec must provide either needs_json_path or metrics_json_path" + ) + + context = harness.get_context(task_spec) + if not isinstance(context, str): + raise TypeError( + f"get_context() must return str, got {type(context).__name__}" + ) + + post_processed = harness.post_process("", task_spec) + if not isinstance(post_processed, dict): + raise TypeError( + f"post_process() must return dict, got {type(post_processed).__name__}" + ) + + context_length = len(context) + + except Exception as e: + failures.append( + { + "failure_type": "interface_error", + "message": str(e), + "fix": "Ensure candidate implements get_context() and post_process() correctly", + } + ) + context_length = 0 + + # Check 7: Linting (ruff) + if not skip_external_checks: + try: + result = subprocess.run( + ["ruff", "check", str(candidate_path)], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + failures.append( + { + "failure_type": "linting_error", + "message": result.stdout.strip(), + "fix": "Run: ruff check --fix score_harness/harness/.py", + } + ) + except FileNotFoundError: + # ruff not installed - warn but don't fail + pass + + # Check 8: Type checking (basedpyright) + if not skip_external_checks: + try: + result = subprocess.run( + ["basedpyright", str(candidate_path)], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + failures.append( + { + "failure_type": "type_error", + "message": result.stdout.strip(), + "fix": "Fix type errors reported by basedpyright", + } + ) + except FileNotFoundError: + # basedpyright not installed - warn but don't fail + pass + + if failures: + return { + "candidate": candidate_path.stem, + "task_id": task_spec_path.stem, + "status": "failed", + "failures": failures, + } + + return { + "candidate": candidate_path.stem, + "task_id": task_spec_path.stem, + "context_length": context_length, + "required_trace_filenames": list(REQUIRED_TRACE_FILENAMES), + "status": "ok", + } + + +def log_validation_failure( + iteration: int, candidate: str, failures: list[dict] +) -> None: + """Append validation failures to validation_failures.jsonl for learning.""" + log_path = Path(__file__).parent / "validation_failures.jsonl" + + for failure in failures: + entry = { + "iteration": iteration, + "candidate": candidate, + "failure_type": failure["failure_type"], + "message": failure["message"], + "fix": failure["fix"], + } + with log_path.open("a") as f: + f.write(json.dumps(entry) + "\n") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Validate a score-harness candidate") + parser.add_argument("--candidate", required=True, type=Path) + parser.add_argument("--task-spec", required=True, type=Path) + parser.add_argument("--iteration", type=int, default=0) + parser.add_argument( + "--skip-external-checks", + action="store_true", + help="Skip linting/type checking (for CI)", + ) + parser.add_argument( + "--log-failures", + action="store_true", + help="Write validation failures to validation_failures.jsonl", + ) + args = parser.parse_args() + + if ( + args.skip_external_checks + and os.getenv("SCORE_HARNESS_ALLOW_SKIP_EXTERNAL_CHECKS") != "1" + ): + print( + "Refusing --skip-external-checks: set " + "SCORE_HARNESS_ALLOW_SKIP_EXTERNAL_CHECKS=1 for local debugging only." + ) + sys.exit(2) + + result = validate_candidate( + args.candidate, args.task_spec, args.skip_external_checks + ) + + if args.log_failures and result["status"] == "failed": + log_validation_failure( + args.iteration, result["candidate"], result["failures"] + ) + + print(json.dumps(result, indent=2)) + + # Exit with non-zero if validation failed + if result["status"] == "failed": + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/score_harness/validation_failures.jsonl b/score_harness/validation_failures.jsonl new file mode 100644 index 000000000..bd2114b4a --- /dev/null +++ b/score_harness/validation_failures.jsonl @@ -0,0 +1,3 @@ +# Validation failure log (append-only) +# Each line records one validation failure so the proposer can learn from mistakes +# Format: {"iteration": N, "candidate": "name", "failure_type": "...", "message": "...", "fix": "..."} diff --git a/scripts_bazel/BUILD b/scripts_bazel/BUILD index befe51730..58dda85ff 100644 --- a/scripts_bazel/BUILD +++ b/scripts_bazel/BUILD @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + load("@aspect_rules_py//py:defs.bzl", "py_binary") load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") @@ -37,3 +41,11 @@ py_binary( main = "merge_sourcelinks.py", visibility = ["//visibility:public"], ) + +py_binary( + name = "traceability_gate", + srcs = ["traceability_gate.py"], + main = "traceability_gate.py", + visibility = ["//visibility:public"], + deps = all_requirements, +) diff --git a/scripts_bazel/tests/BUILD b/scripts_bazel/tests/BUILD index 7ff48c428..b5dc6d722 100644 --- a/scripts_bazel/tests/BUILD +++ b/scripts_bazel/tests/BUILD @@ -11,6 +11,10 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + load("@docs_as_code_hub_env//:requirements.bzl", "all_requirements") load("//:score_pytest.bzl", "score_pytest") @@ -32,3 +36,12 @@ score_pytest( ] + all_requirements, pytest_config = "//:pyproject.toml", ) + +score_pytest( + name = "traceability_gate_test", + srcs = ["traceability_gate_test.py"], + deps = [ + "//scripts_bazel:traceability_gate", + ] + all_requirements, + pytest_config = "//:pyproject.toml", +) diff --git a/scripts_bazel/tests/traceability_gate_test.py b/scripts_bazel/tests/traceability_gate_test.py new file mode 100644 index 000000000..9ac29331d --- /dev/null +++ b/scripts_bazel/tests/traceability_gate_test.py @@ -0,0 +1,230 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Tests for traceability_gate.py.""" + +import json +import subprocess +import sys +from pathlib import Path + +_MY_PATH = Path(__file__).parent + +_GATE_SCRIPT = _MY_PATH.parent / "traceability_gate.py" + + +def _write_metrics_json(tmp_path: Path, metrics_by_type: dict | None = None) -> Path: + """Write a schema-v1 metrics JSON and return its path.""" + if metrics_by_type is None: + metrics_by_type = { + "tool_req": { + "include_not_implemented": False, + "requirements": { + "total": 4, + "with_code_link": 3, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 75.0, + "with_test_link_pct": 50.0, + "fully_linked_pct": 50.0, + "missing_code_link_ids": ["REQ_4"], + "missing_test_link_ids": ["REQ_3", "REQ_4"], + "not_fully_linked_ids": ["REQ_3", "REQ_4"], + }, + "tests": { + "total": 3, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 66.67, + "broken_references": [], + }, + } + } + payload = { + "schema_version": "1", + "generated_by": "sphinx_build", + "metrics_by_type": metrics_by_type, + } + out = tmp_path / "metrics.json" + out.write_text(json.dumps(payload), encoding="utf-8") + return out + + +def _run_gate(metrics_json: Path, extra_args: list[str]) -> subprocess.CompletedProcess: + return subprocess.run( + [sys.executable, _GATE_SCRIPT, "--metrics-json", str(metrics_json)] + + extra_args, + capture_output=True, + text=True, + ) + + +def test_gate_passes_when_thresholds_met(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate( + metrics_json, + ["--min-req-code", "70", "--min-req-test", "50", "--min-tests-linked", "60"], + ) + + assert result.returncode == 0 + assert "Threshold check passed." in result.stdout + + +def test_gate_fails_when_threshold_not_met(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate( + metrics_json, + ["--min-req-code", "100"], + ) + + assert result.returncode == 2 + assert "Threshold check failed:" in result.stdout + assert "[tool_req] requirements with code links" in result.stdout + + +def test_gate_require_all_links_fails(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate(metrics_json, ["--require-all-links"]) + + assert result.returncode == 2 + assert "Threshold check failed:" in result.stdout + + +def test_gate_fail_on_broken_refs(tmp_path: Path) -> None: + metrics_by_type = { + "tool_req": { + "include_not_implemented": False, + "requirements": { + "total": 1, + "with_code_link": 1, + "with_test_link": 1, + "fully_linked": 1, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [], + }, + "tests": { + "total": 2, + "filtered_test_types": [], + "linked_to_requirements": 2, + "linked_to_requirements_pct": 100.0, + "broken_references": [ + {"testcase": "TC_X", "missing_need": "REQ_UNKNOWN"} + ], + }, + } + } + metrics_json = _write_metrics_json(tmp_path, metrics_by_type) + + result = _run_gate(metrics_json, ["--fail-on-broken-test-refs"]) + + assert result.returncode == 2 + assert "broken testcase references found:" in result.stdout + + +def test_gate_specific_need_type(tmp_path: Path) -> None: + metrics_by_type = { + "tool_req": { + "include_not_implemented": False, + "requirements": { + "total": 2, + "with_code_link": 2, + "with_test_link": 2, + "fully_linked": 2, + "with_code_link_pct": 100.0, + "with_test_link_pct": 100.0, + "fully_linked_pct": 100.0, + "missing_code_link_ids": [], + "missing_test_link_ids": [], + "not_fully_linked_ids": [], + }, + "tests": { + "total": 1, + "filtered_test_types": [], + "linked_to_requirements": 1, + "linked_to_requirements_pct": 100.0, + "broken_references": [], + }, + }, + "comp_req": { + "include_not_implemented": False, + "requirements": { + "total": 5, + "with_code_link": 0, + "with_test_link": 0, + "fully_linked": 0, + "with_code_link_pct": 0.0, + "with_test_link_pct": 0.0, + "fully_linked_pct": 0.0, + "missing_code_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "missing_test_link_ids": ["C1", "C2", "C3", "C4", "C5"], + "not_fully_linked_ids": ["C1", "C2", "C3", "C4", "C5"], + }, + "tests": { + "total": 0, + "filtered_test_types": [], + "linked_to_requirements": 0, + "linked_to_requirements_pct": 100.0, + "broken_references": [], + }, + }, + } + metrics_json = _write_metrics_json(tmp_path, metrics_by_type) + + # Gate only on tool_req (which is fully linked) — comp_req failures are ignored + result = _run_gate( + metrics_json, + ["--need-type", "tool_req", "--require-all-links"], + ) + + assert result.returncode == 0 + assert "[tool_req]" in result.stdout + assert "[comp_req]" not in result.stdout + + +def test_gate_unknown_need_type_fails(tmp_path: Path) -> None: + metrics_json = _write_metrics_json(tmp_path) + + result = _run_gate(metrics_json, ["--need-type", "nonexistent_req"]) + + assert result.returncode == 2 + assert "not found in metrics JSON" in result.stdout + + +def test_gate_unsupported_schema_version(tmp_path: Path) -> None: + bad = tmp_path / "bad.json" + bad.write_text( + json.dumps({"schema_version": "99", "metrics_by_type": {}}), encoding="utf-8" + ) + + result = _run_gate(bad, []) + + assert result.returncode == 1 + assert "unsupported schema_version" in result.stderr + + +def test_gate_missing_file_returns_error(tmp_path: Path) -> None: + result = _run_gate(tmp_path / "does_not_exist.json", []) + + assert result.returncode == 1 + assert "not found" in result.stderr diff --git a/scripts_bazel/traceability_gate.py b/scripts_bazel/traceability_gate.py new file mode 100644 index 000000000..399f43f80 --- /dev/null +++ b/scripts_bazel/traceability_gate.py @@ -0,0 +1,250 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Traceability gate: read a metrics JSON and enforce coverage thresholds. + +This script is the CI gate for a metrics.json based workflow: + docs build → score_metamodel extension writes metrics.json + CI gate → traceability_gate --metrics-json metrics.json [--min-* ...] + +The gate never parses needs.json itself; it only reads the pre-computed +schema-v1 metrics file produced by the docs build. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from pathlib import Path +from typing import Any + +_SUPPORTED_SCHEMA_VERSION = "1" + + +def _print_type_summary(need_type: str, metrics: dict[str, Any]) -> None: + req = metrics["requirements"] + tst = metrics["tests"] + req_total = req["total"] + req_with_code_link = req["with_code_link"] + req_with_code_link_pct = req["with_code_link_pct"] + req_with_test_link = req["with_test_link"] + req_with_test_link_pct = req["with_test_link_pct"] + req_fully_linked = req["fully_linked"] + req_fully_linked_pct = req["fully_linked_pct"] + tst_total = tst["total"] + tst_linked_to_requirements = tst["linked_to_requirements"] + tst_linked_to_requirements_pct = tst["linked_to_requirements_pct"] + tst_broken_references = tst["broken_references"] + + print(f"[{need_type}]") + print( + f" Requirements with source links: " + f"{req_with_code_link}/{req_total} ({req_with_code_link_pct:.2f}%)" + ) + print( + f" Requirements with test links: " + f"{req_with_test_link}/{req_total} ({req_with_test_link_pct:.2f}%)" + ) + print( + f" Requirements fully linked: " + f"{req_fully_linked}/{req_total} ({req_fully_linked_pct:.2f}%)" + ) + print( + f" Tests linked to requirements: " + f"{tst_linked_to_requirements}/{tst_total} " + f"({tst_linked_to_requirements_pct:.2f}%)" + ) + print(f" Broken test references: {len(tst_broken_references)}") + if tst_broken_references: + for item in tst_broken_references: + print(f" - {item['testcase']} -> {item['missing_need']}") + + +def _check_type_thresholds( + need_type: str, + metrics: dict[str, Any], + min_req_code: float, + min_req_test: float, + min_req_fully_linked: float, + min_tests_linked: float, + fail_on_broken_test_refs: bool, +) -> list[str]: + failures: list[str] = [] + req = metrics["requirements"] + tst = metrics["tests"] + req_with_code_link_pct = req["with_code_link_pct"] + req_with_test_link_pct = req["with_test_link_pct"] + req_fully_linked_pct = req["fully_linked_pct"] + tst_linked_to_requirements_pct = tst["linked_to_requirements_pct"] + tst_broken_references = tst["broken_references"] + prefix = f"[{need_type}] " + + if req_with_code_link_pct < min_req_code: + failures.append( + f"{prefix}requirements with code links " + f"{req_with_code_link_pct:.2f}% < {min_req_code:.2f}%" + ) + if req_with_test_link_pct < min_req_test: + failures.append( + f"{prefix}requirements with test links " + f"{req_with_test_link_pct:.2f}% < {min_req_test:.2f}%" + ) + if req_fully_linked_pct < min_req_fully_linked: + failures.append( + f"{prefix}requirements fully linked " + f"{req_fully_linked_pct:.2f}% < {min_req_fully_linked:.2f}%" + ) + if tst_linked_to_requirements_pct < min_tests_linked: + failures.append( + f"{prefix}tests linked to requirements " + f"{tst_linked_to_requirements_pct:.2f}% < {min_tests_linked:.2f}%" + ) + if fail_on_broken_test_refs and tst_broken_references: + failures.append( + f"{prefix}broken testcase references found: {len(tst_broken_references)}" + ) + return failures + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Read a traceability metrics JSON (schema v1) and enforce coverage " + "thresholds. Exits 0 on pass, 2 on threshold failure, 1 on input error." + ) + ) + parser.add_argument( + "--metrics-json", + required=True, + help="Path to the metrics JSON produced by the docs build.", + ) + parser.add_argument( + "--need-type", + default=None, + help=( + "Check only this need type (e.g. 'tool_req'). " + "If omitted, all types present in the JSON are checked." + ), + ) + parser.add_argument( + "--min-req-code", + type=float, + default=0.0, + help="Minimum %% of requirements that must have a source_code_link.", + ) + parser.add_argument( + "--min-req-test", + type=float, + default=0.0, + help="Minimum %% of requirements that must have a testlink.", + ) + parser.add_argument( + "--min-req-fully-linked", + type=float, + default=0.0, + help="Minimum %% of requirements that must have both source_code_link and testlink.", + ) + parser.add_argument( + "--min-tests-linked", + type=float, + default=0.0, + help="Minimum %% of testcases that must reference at least one requirement.", + ) + parser.add_argument( + "--require-all-links", + action="store_true", + help="Shortcut: sets all --min-* to 100 and enables --fail-on-broken-test-refs.", + ) + parser.add_argument( + "--fail-on-broken-test-refs", + action="store_true", + help="Fail when any testcase references an unknown requirement ID.", + ) + + args = parser.parse_args() + + if args.require_all_links: + args.min_req_code = 100.0 + args.min_req_test = 100.0 + args.min_req_fully_linked = 100.0 + args.min_tests_linked = 100.0 + args.fail_on_broken_test_refs = True + + metrics_path = Path(args.metrics_json) + workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "").strip() + if not metrics_path.is_absolute() and workspace_dir: + metrics_path = Path(workspace_dir) / metrics_path + if not metrics_path.exists(): + print(f"Error: metrics JSON not found: {metrics_path}", file=sys.stderr) + return 1 + + data: dict[str, Any] = json.loads(metrics_path.read_text(encoding="utf-8")) + + schema_version = data.get("schema_version") + if schema_version != _SUPPORTED_SCHEMA_VERSION: + print( + f"Error: unsupported schema_version {schema_version!r} " + f"(expected {_SUPPORTED_SCHEMA_VERSION!r})", + file=sys.stderr, + ) + return 1 + + metrics_by_type: dict[str, Any] = data["metrics_by_type"] + types_to_check = ( + [args.need_type] if args.need_type else list(metrics_by_type.keys()) + ) + + print(f"Traceability gate input: {metrics_path}") + print("-" * 72) + + failures: list[str] = [] + for need_type in types_to_check: + if need_type not in metrics_by_type: + available = list(metrics_by_type.keys()) + failures.append( + f"need type '{need_type}' not found in metrics JSON " + f"(available: {available})" + ) + continue + _print_type_summary(need_type, metrics_by_type[need_type]) + failures.extend( + _check_type_thresholds( + need_type, + metrics_by_type[need_type], + args.min_req_code, + args.min_req_test, + args.min_req_fully_linked, + args.min_tests_linked, + args.fail_on_broken_test_refs, + ) + ) + + print("-" * 72) + if failures: + print("Threshold check failed:") + for msg in failures: + print(f" - {msg}") + return 2 + + print("Threshold check passed.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts_bazel/traceability_metrics_schema.json b/scripts_bazel/traceability_metrics_schema.json new file mode 100644 index 000000000..86c8f1494 --- /dev/null +++ b/scripts_bazel/traceability_metrics_schema.json @@ -0,0 +1,172 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://eclipse-score.github.io/docs-as-code/traceability-metrics-schema.json", + "title": "Traceability Metrics", + "description": "Per-need-type traceability coverage metrics produced by the docs build (score_metamodel Sphinx extension). Consumed by traceability_gate to enforce coverage thresholds without re-parsing needs.json.", + "type": "object", + "required": ["schema_version", "generated_by", "metrics_by_type"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "string", + "const": "1", + "description": "Schema version. Bump when the shape changes incompatibly." + }, + "generated_by": { + "type": "string", + "description": "Name of the producer that generated this file (e.g. 'sphinx_build')." + }, + "metrics_by_type": { + "type": "object", + "description": "Metrics keyed by need type (e.g. 'tool_req', 'comp_req'). Each key is one call-scope: the requirement type string passed via --requirement-types.", + "minProperties": 1, + "additionalProperties": { + "$ref": "#/$defs/TypeMetrics" + } + } + }, + "$defs": { + "TypeMetrics": { + "type": "object", + "required": ["include_not_implemented", "requirements", "tests"], + "additionalProperties": false, + "properties": { + "include_not_implemented": { + "type": "boolean", + "description": "Whether requirements with implemented==NO were counted in the denominator." + }, + "requirements": { + "$ref": "#/$defs/RequirementMetrics" + }, + "tests": { + "$ref": "#/$defs/TestMetrics" + } + } + }, + "RequirementMetrics": { + "type": "object", + "required": [ + "total", + "with_code_link", + "with_test_link", + "fully_linked", + "with_code_link_pct", + "with_test_link_pct", + "fully_linked_pct", + "missing_code_link_ids", + "missing_test_link_ids", + "not_fully_linked_ids" + ], + "additionalProperties": false, + "properties": { + "total": { + "type": "integer", + "minimum": 0, + "description": "Total number of requirements in scope." + }, + "with_code_link": { + "type": "integer", + "minimum": 0, + "description": "Requirements that have a non-empty source_code_link." + }, + "with_test_link": { + "type": "integer", + "minimum": 0, + "description": "Requirements that have a non-empty testlink." + }, + "fully_linked": { + "type": "integer", + "minimum": 0, + "description": "Requirements that have both source_code_link and testlink." + }, + "with_code_link_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "with_code_link / total * 100, or 100 when total == 0." + }, + "with_test_link_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "with_test_link / total * 100, or 100 when total == 0." + }, + "fully_linked_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "fully_linked / total * 100, or 100 when total == 0." + }, + "missing_code_link_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Sorted IDs of requirements missing source_code_link." + }, + "missing_test_link_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Sorted IDs of requirements missing testlink." + }, + "not_fully_linked_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Sorted IDs of requirements missing either source_code_link or testlink." + } + } + }, + "TestMetrics": { + "type": "object", + "required": [ + "total", + "filtered_test_types", + "linked_to_requirements", + "linked_to_requirements_pct", + "broken_references" + ], + "additionalProperties": false, + "properties": { + "total": { + "type": "integer", + "minimum": 0, + "description": "Total testcase needs considered (after optional test_type filter)." + }, + "filtered_test_types": { + "type": "array", + "items": { "type": "string" }, + "description": "The test_type values used to filter testcases. Empty means all types." + }, + "linked_to_requirements": { + "type": "integer", + "minimum": 0, + "description": "Testcases that reference at least one requirement via partially_verifies or fully_verifies." + }, + "linked_to_requirements_pct": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "linked_to_requirements / total * 100, or 100 when total == 0." + }, + "broken_references": { + "type": "array", + "items": { "$ref": "#/$defs/BrokenReference" }, + "description": "Testcase references that point to unknown requirement IDs." + } + } + }, + "BrokenReference": { + "type": "object", + "required": ["testcase", "missing_need"], + "additionalProperties": false, + "properties": { + "testcase": { + "type": "string", + "description": "ID of the testcase containing the broken reference." + }, + "missing_need": { + "type": "string", + "description": "Requirement ID referenced by the testcase that does not exist in needs.json." + } + } + } + } +} diff --git a/src/extensions/score_layout/assets/css/score_design.css b/src/extensions/score_layout/assets/css/score_design.css index 887da5ad6..5bd9bc537 100644 --- a/src/extensions/score_layout/assets/css/score_design.css +++ b/src/extensions/score_layout/assets/css/score_design.css @@ -16,8 +16,34 @@ font-size: 1.2em; } + +.score-grid .sd-card { + border: 1px solid rgba(45, 25, 66, 0.15); + box-shadow: none !important; + border-radius: 0.5rem; +} + .score-grid .sd-card-body { - text-align: justify; + text-align: center; + padding: 0.75rem; +} + +.score-grid .sd-card-body img { + display: block; + width: 100%; + max-width: 420px; + height: auto; + margin: 0 auto; +} + +.score-grid .needs_filter_warning { + min-height: 280px; + display: flex; + align-items: center; + justify-content: center; + text-align: center; + color: var(--pst-color-text-base); + margin: 0; } details.sd-dropdown summary.sd-card-header + div.sd-summary-content diff --git a/src/extensions/score_layout/assets/css/score_needs.css b/src/extensions/score_layout/assets/css/score_needs.css index 0a800e819..020d04c4c 100644 --- a/src/extensions/score_layout/assets/css/score_needs.css +++ b/src/extensions/score_layout/assets/css/score_needs.css @@ -16,6 +16,36 @@ table.needs_type_tool_req > tbody > tr.head > td { /* Common NEED configs */ /* ######################### */ +/* Improve wrapping and width usage for Sphinx-Needs tables/datatables only */ +.needstable_wrapper .pst-scrollable-table-container, +.needstable_wrapper .dataTables_wrapper, +.needstable_wrapper table.NEEDS_TABLE, +.needstable_wrapper table.NEEDS_DATATABLES, +table.NEEDS_TABLE, +table.NEEDS_DATATABLES { + width: 100%; +} + +.needstable_wrapper table.NEEDS_TABLE, +.needstable_wrapper table.NEEDS_DATATABLES, +table.NEEDS_TABLE, +table.NEEDS_DATATABLES { + table-layout: fixed; +} + +.needstable_wrapper table.NEEDS_TABLE th, +.needstable_wrapper table.NEEDS_TABLE td, +.needstable_wrapper table.NEEDS_DATATABLES th, +.needstable_wrapper table.NEEDS_DATATABLES td, +table.NEEDS_TABLE th, +table.NEEDS_TABLE td, +table.NEEDS_DATATABLES th, +table.NEEDS_DATATABLES td { + white-space: normal; + overflow-wrap: anywhere; + word-break: break-word; +} + /* All need table cells */ table.need.need.need > tbody > tr > td { padding: 0.7em 1.25em; diff --git a/src/extensions/score_metamodel/__init__.py b/src/extensions/score_metamodel/__init__.py index f0b90c8ee..04017121f 100644 --- a/src/extensions/score_metamodel/__init__.py +++ b/src/extensions/score_metamodel/__init__.py @@ -11,10 +11,12 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* import importlib +import json import os import pkgutil from collections.abc import Callable from pathlib import Path +from typing import Any from sphinx.application import Sphinx from sphinx_needs import logging @@ -29,6 +31,9 @@ ProhibitedWordCheck as ProhibitedWordCheck, ScoreNeedType as ScoreNeedType, ) +from src.extensions.score_metamodel.traceability_metrics import ( + compute_traceability_summary, +) from src.extensions.score_metamodel.yaml_parser import ( default_options as default_options, load_metamodel_data as load_metamodel_data, @@ -94,6 +99,54 @@ def graph_check(func: graph_check_function): return func +def _write_metrics_json(app: Sphinx, exception: Exception | None) -> None: + """Write a schema-v1 metrics.json alongside needs.json in the build output. + + This is the single source of truth for traceability metrics. It runs + inside the Sphinx build so it has access to all needs (local + external) + and produces the same metrics the dashboard pie charts display. + The traceability_gate reads this file to enforce CI thresholds. + """ + if exception: + return + + all_needs: list[Any] = list(SphinxNeedsData(app.env).get_needs_view().values()) + + raw = str(getattr(app.config, "score_metamodel_requirement_types", "tool_req")) + requirement_types = {t.strip() for t in raw.split(",") if t.strip()} or {"tool_req"} + include_not_implemented = True + include_external: bool = bool( + getattr(app.config, "score_metamodel_include_external_needs", False) + ) + + metrics_by_type: dict[str, Any] = {} + for req_type in sorted(requirement_types): + type_summary = compute_traceability_summary( + all_needs=all_needs, + requirement_types={req_type}, + include_not_implemented=include_not_implemented, + filtered_test_types=set(), + include_external=include_external, + ) + metrics_by_type[req_type] = { + "include_not_implemented": type_summary["include_not_implemented"], + "include_external": type_summary["include_external"], + "requirements": type_summary["requirements"], + "tests": type_summary["tests"], + } + + output: dict[str, Any] = { + "schema_version": "1", + "generated_by": "sphinx_build", + "metrics_by_type": metrics_by_type, + } + + out_path = Path(app.outdir) / "metrics.json" + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(json.dumps(output, indent=2), encoding="utf-8") + logger.info(f"Traceability metrics written to: {out_path}") + + def _run_checks(app: Sphinx, exception: Exception | None) -> None: # Do not run checks if an exception occurred during build if exception: @@ -161,6 +214,18 @@ def is_check_enabled(check: local_check_function | graph_check_function): ) +def _configure_traceability_dashboard(app: Sphinx, config: object) -> None: + """Propagate repo-level traceability settings to dashboard filters.""" + from src.extensions.score_metamodel.checks.traceability_dashboard import ( + set_default_include_external, + ) + + include_external = bool( + getattr(config, "score_metamodel_include_external_needs", False) + ) + set_default_include_external(include_external) + + def _remove_prefix(word: str, prefixes: list[str]) -> str: for prefix in prefixes or []: if isinstance(word, str) and word.startswith(prefix): @@ -264,6 +329,29 @@ def setup(app: Sphinx) -> dict[str, str | bool]: ), ) + app.add_config_value( + "score_metamodel_requirement_types", + "tool_req", + rebuild="env", + description=( + "Comma-separated list of need types treated as requirements for " + "traceability metrics (default: tool_req)." + ), + ) + + app.add_config_value( + "score_metamodel_include_external_needs", + False, + rebuild="env", + description=( + "When True, include external requirements in dashboard and CI metrics. " + "Default is False so each repo gates only its own needs." + ), + ) + + _ = app.connect("config-inited", _configure_traceability_dashboard, priority=498) + + _ = app.connect("build-finished", _write_metrics_json) _ = app.connect("build-finished", _run_checks) return { diff --git a/src/extensions/score_metamodel/checks/standards.py b/src/extensions/score_metamodel/checks/standards.py index 7d27f5bff..a04d176ba 100644 --- a/src/extensions/score_metamodel/checks/standards.py +++ b/src/extensions/score_metamodel/checks/standards.py @@ -10,10 +10,20 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ # from sphinx.application import Sphinx from sphinx_needs.need_item import NeedItem +from ..sphinx_filters import ( + generic_pie_items_by_tag, + generic_pie_items_in_relationships, + generic_pie_linked_items, +) + # from score_metamodel import ( # CheckLogger, # graph_check, @@ -186,27 +196,8 @@ def my_pie_linked_standard_requirements( Passed arguments can be accessed via kwargs['arg'] See: https://sphinx-needs.readthedocs.io/en/latest/filter.html#arguments """ - cnt_connected = 0 - cnt_not_connected = 0 - - standard = kwargs["arg1"] - - all_standards_needs = get_standards_needs(needs) - standards_needs = { - k: v - for k, v in all_standards_needs.items() - if k.startswith(f"std_req__{standard}__") - } - compliance_req_needs = get_compliance_req_needs(needs) - - for need in standards_needs.values(): - if need["id"] in compliance_req_needs: - cnt_connected += 1 - else: - cnt_not_connected += 1 - - results.append(cnt_connected) - results.append(cnt_not_connected) + standard = str(kwargs["arg1"]) + generic_pie_linked_items(needs, results, arg1=f"std_req__{standard}__", arg2="gd_") def my_pie_linked_standard_requirements_by_tag( @@ -238,23 +229,10 @@ def my_pie_linked_standard_requirements_by_tag( the mutated `results`list, and use this to display/generate the piechart. """ - count_linked = 0 - count_non_linked = 0 - - tag = str(kwargs["arg1"]) assert len(kwargs) == 1, ( "Can only provide one tag to `my_pie_linked_standard_requirements_by_tag`" ) - - compliance_req_needs = get_compliance_req_needs(needs) - for need in needs: - if tag in need["tags"]: - if need["id"] in compliance_req_needs: - count_linked += 1 - else: - count_non_linked += 1 - results.append(count_linked) - results.append(count_non_linked) + generic_pie_items_by_tag(needs, results, arg1=kwargs["arg1"], arg2="gd_") def my_pie_linked_standard_workproducts( @@ -267,28 +245,10 @@ def my_pie_linked_standard_workproducts( Passed arguments can be accessed via kwargs['arg'] See: https://sphinx-needs.readthedocs.io/en/latest/filter.html#arguments """ - cwp_connected = 0 - cwp_not_connected = 0 - - standard = kwargs["arg1"] - - all_standard_workproducts = get_standards_workproducts(needs) - standard_workproducts = { - k: v - for k, v in all_standard_workproducts.items() - if k.startswith(f"std_wp__{standard}__") - } - - compliance_wp_needs = get_compliance_wp_needs(needs) - - for need in standard_workproducts.values(): - if need["id"] in compliance_wp_needs: - cwp_connected += 1 - else: - cwp_not_connected += 1 - - results.append(cwp_connected) - results.append(cwp_not_connected) + standard = str(kwargs["arg1"]) + generic_pie_linked_items( + needs, results, arg1=f"std_wp__{standard}__", arg2="workproduct" + ) def my_pie_workproducts_contained_in_exactly_one_workflow( @@ -299,33 +259,6 @@ def my_pie_workproducts_contained_in_exactly_one_workflow( in exactly one workflow, the not connected once and the once that are connected to multiple workflows. """ - all_workflows = get_workflows(needs) - all_workproducts = get_workproducts(needs) - - # Map to track counts for each workproduct and their associated workflows - workproduct_analysis = {wp["id"]: {"count": 0} for wp in all_workproducts.values()} - - # Iterate over workflows and update the counts and workflows - for workflow in all_workflows.values(): - for output in workflow.get("output", []): - # Increment count and add workflow_id if workproduct is in analysis - if output in workproduct_analysis: - workproduct_analysis[output]["count"] += 1 - - not_connected_wp = 0 - nb_wp_connected_to_one_workflow = 0 - nb_wp_connected_to_more_than_one_workflow = 0 - - for analysis in workproduct_analysis.values(): - count = analysis["count"] - - if count == 0: - not_connected_wp += 1 - elif count == 1: - nb_wp_connected_to_one_workflow += 1 - else: - nb_wp_connected_to_more_than_one_workflow += 1 - - results.append(not_connected_wp) - results.append(nb_wp_connected_to_one_workflow) - results.append(nb_wp_connected_to_more_than_one_workflow) + generic_pie_items_in_relationships( + needs, results, arg1="workflow", arg2="output", arg3="workproduct" + ) diff --git a/src/extensions/score_metamodel/checks/traceability_dashboard.py b/src/extensions/score_metamodel/checks/traceability_dashboard.py new file mode 100644 index 000000000..a59087928 --- /dev/null +++ b/src/extensions/score_metamodel/checks/traceability_dashboard.py @@ -0,0 +1,181 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Needpie filter functions backed by shared traceability metric calculations.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + +from sphinx_needs.need_item import NeedItem + +from ..traceability_metrics import compute_traceability_summary, filter_requirements + +_DEFAULT_INCLUDE_EXTERNAL = False + + +def set_default_include_external(include_external: bool) -> None: + """Configure default behaviour for including external requirements.""" + global _DEFAULT_INCLUDE_EXTERNAL + _DEFAULT_INCLUDE_EXTERNAL = bool(include_external) + + +def _include_external(kwargs: dict[str, str | int | float]) -> bool: + """Read include_external override from filter args, else use configured default.""" + raw = kwargs.get("arg2") + if raw is None: + return _DEFAULT_INCLUDE_EXTERNAL + text = str(raw).strip().lower() + return text in {"1", "true", "yes", "on"} + + +def _requirement_types(kwargs: dict[str, str | int | float]) -> set[str]: + raw = str(kwargs.get("arg1", "tool_req")).strip() + values = {value.strip() for value in raw.split(",") if value.strip()} + return values or {"tool_req"} + + +def pie_requirements_status( + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, +) -> None: + """Dashboard status split: not implemented, implemented/incomplete, fully linked.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + + all_requirements = filter_requirements( + needs, + requirement_types=req_types, + include_not_implemented=True, + include_external=include_external, + ) + implemented_requirements = filter_requirements( + needs, + requirement_types=req_types, + include_not_implemented=False, + include_external=include_external, + ) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=False, + filtered_test_types=set(), + include_external=include_external, + ) + + not_implemented = len(all_requirements) - len(implemented_requirements) + fully_linked = int(summary["requirements"]["fully_linked"]) + implemented_incomplete = len(implemented_requirements) - fully_linked + + results.append(not_implemented) + results.append(implemented_incomplete) + results.append(fully_linked) + + +def pie_requirements_with_code_links( + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, +) -> None: + """Dashboard split: requirements with and without source code links.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + total = int(summary["requirements"]["total"]) + with_code = int(summary["requirements"]["with_code_link"]) + + results.append(total - with_code) + results.append(with_code) + + +def pie_requirements_with_test_links( + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, +) -> None: + """Dashboard split: requirements with and without testcase links.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + total = int(summary["requirements"]["total"]) + with_test = int(summary["requirements"]["with_test_link"]) + + results.append(total - with_test) + results.append(with_test) + + +def pie_requirements_fully_linked( + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, +) -> None: + """Dashboard split: requirements fully linked vs incomplete.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + total = int(summary["requirements"]["total"]) + fully_linked = int(summary["requirements"]["fully_linked"]) + + results.append(total - fully_linked) + results.append(fully_linked) + + +def pie_process_requirements_linked( + needs: Sequence[NeedItem | dict[str, Any]], + results: list[int], + **kwargs: str | int | float, +) -> None: + """Dashboard split: process requirements linked vs not linked.""" + req_types = _requirement_types(kwargs) + include_external = _include_external(kwargs) + summary = compute_traceability_summary( + all_needs=needs, + requirement_types=req_types, + include_not_implemented=True, + filtered_test_types=set(), + include_external=include_external, + ) + + process_requirements = summary["process_requirements"] + total = int(process_requirements["total"]) + linked = int(process_requirements["linked"]) + + results.append(total - linked) + results.append(linked) diff --git a/src/extensions/score_metamodel/sphinx_filters.py b/src/extensions/score_metamodel/sphinx_filters.py new file mode 100644 index 000000000..8fd5fca19 --- /dev/null +++ b/src/extensions/score_metamodel/sphinx_filters.py @@ -0,0 +1,166 @@ +# ******************************************************************************* +# Copyright (c) 2025 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Generic sphinx-needs filter functions for ``needpie`` directives. + +These functions are fully parameterizable and designed to be called directly +by consumers of docs-as-code (e.g. reference-integration repos) when they +pull in the ``score_docs_as_code`` Bazel module. All functions follow the +sphinx-needs ``filter-func`` signature convention: + +.. code-block:: python + + def func(needs: list[NeedItem], results: list[int], **kwargs) -> None: ... + +Arguments are injected from the ``:filter-func:`` call-site as positional +``arg1``, ``arg2``, … keyword arguments. + +Example usage in RST:: + + .. needpie:: My Requirements Coverage + :labels: Linked, Not Linked + :filter-func: score_metamodel.sphinx_filters.generic_pie_linked_items(std_req__mystandard__, gd_) + +""" + +from __future__ import annotations + +from sphinx_needs.need_item import NeedItem + + +def _matches_source_selector(need: NeedItem, selector: str) -> bool: + """Return whether a need matches a source selector. + + The selector is treated as a prefix and matched against both ``type`` and + ``id`` so filters remain robust when metamodels use explicit id prefixes + that are not coupled to directive names. + """ + need_type = str(need.get("type", "")) + need_id = str(need.get("id", "")) + return need_type.startswith(selector) or need_id.startswith(selector) + + +def generic_pie_linked_items( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count target IDs by whether they are linked by selected source needs. + + Arguments are passed via ``arg1`` (target ID prefix), ``arg2`` (source + selector prefix, matched against source ``type`` and ``id``), and ``arg3`` + (link field name, default ``complies``). + """ + id_prefix = str(kwargs.get("arg1", "")) + source_selector = str(kwargs.get("arg2", "")) + link_field = str(kwargs.get("arg3", "complies")) + + target_ids = [ + str(n.get("id", "")) + for n in needs + if str(n.get("id", "")).startswith(id_prefix) + ] + + linked_ids: set[str] = { + ref + for n in needs + if _matches_source_selector(n, source_selector) + for ref in n.get(link_field, []) + if ref + } + + connected = sum(1 for item_id in target_ids if item_id in linked_ids) + not_connected = len(target_ids) - connected + + results.append(connected) + results.append(not_connected) + + +def generic_pie_items_by_tag( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count tagged items split by whether selected source needs link them. + + Arguments are passed via ``arg1`` (tag), ``arg2`` (source selector prefix, + matched against source ``type`` and ``id``), and ``arg3`` (link field + name, default ``complies``). + """ + tag = str(kwargs.get("arg1", "")) + source_selector = str(kwargs.get("arg2", "")) + link_field = str(kwargs.get("arg3", "complies")) + + linked_ids: set[str] = { + ref + for n in needs + if _matches_source_selector(n, source_selector) + for ref in n.get(link_field, []) + if ref + } + + linked = 0 + not_linked = 0 + for n in needs: + if tag in n.get("tags", []): + if str(n.get("id", "")) in linked_ids: + linked += 1 + else: + not_linked += 1 + + results.append(linked) + results.append(not_linked) + + +def generic_pie_items_in_relationships( + needs: list[NeedItem], results: list[int], **kwargs: str | int | float +) -> None: + """Count items of a given type by how many container items reference them. + + For every need of type *arg3*, counts how many needs of type *arg1* + include its id in their *arg2* field. Splits the result into three + buckets: not referenced, referenced exactly once, referenced more than + once. + + :filter-func: arguments: + + - ``arg1`` – type of the container needs (e.g. ``workflow``) + - ``arg2`` – field on the container that holds references + (e.g. ``output``) + - ``arg3`` – type of the items to count (e.g. ``workproduct``) + + Appends to *results*: + ``[not_referenced_count, referenced_once_count, referenced_multiple_count]`` + """ + container_type = str(kwargs.get("arg1", "")) + field = str(kwargs.get("arg2", "")) + item_type = str(kwargs.get("arg3", "")) + + containers = [n for n in needs if n.get("type") == container_type] + items = [n for n in needs if n.get("type") == item_type] + + item_counts: dict[str, int] = {str(n.get("id", "")): 0 for n in items} + + for container in containers: + for ref in container.get(field, []): + if ref in item_counts: + item_counts[ref] += 1 + + item_count_values = list(item_counts.values()) + not_referenced = item_count_values.count(0) + referenced_once = item_count_values.count(1) + referenced_multiple = len(item_count_values) - not_referenced - referenced_once + + results.append(not_referenced) + results.append(referenced_once) + results.append(referenced_multiple) diff --git a/src/extensions/score_metamodel/tests/test_sphinx_filters.py b/src/extensions/score_metamodel/tests/test_sphinx_filters.py new file mode 100644 index 000000000..c065a01b7 --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_sphinx_filters.py @@ -0,0 +1,74 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +from typing import cast + +from sphinx_needs.need_item import NeedItem + +from src.extensions.score_metamodel.sphinx_filters import ( + generic_pie_items_by_tag, + generic_pie_linked_items, +) + + +def test_generic_pie_linked_items_matches_source_by_id_prefix() -> None: + needs = cast( + list[NeedItem], + [ + {"id": "std_req__iso26262__001", "type": "std_req"}, + # Type intentionally does not match selector prefix, id does. + { + "id": "gd_guidl__xyz", + "type": "guideline", + "complies": ["std_req__iso26262__001"], + }, + ], + ) + + results: list[int] = [] + generic_pie_linked_items( + needs, + results, + arg1="std_req__iso26262__", + arg2="gd_", + arg3="complies", + ) + + assert results == [1, 0] + + +def test_generic_pie_items_by_tag_matches_source_by_id_prefix() -> None: + needs = cast( + list[NeedItem], + [ + {"id": "REQ_A", "type": "tool_req", "tags": ["aspice40_man5"]}, + {"id": "REQ_B", "type": "tool_req", "tags": ["aspice40_man5"]}, + # Type intentionally does not match selector prefix, id does. + { + "id": "gd_req__abc", + "type": "process_requirement", + "complies": ["REQ_A"], + }, + ], + ) + + results: list[int] = [] + generic_pie_items_by_tag( + needs, + results, + arg1="aspice40_man5", + arg2="gd_", + arg3="complies", + ) + + assert results == [1, 1] diff --git a/src/extensions/score_metamodel/tests/test_traceability_dashboard.py b/src/extensions/score_metamodel/tests/test_traceability_dashboard.py new file mode 100644 index 000000000..2e06b3b7d --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_traceability_dashboard.py @@ -0,0 +1,218 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Tests that dashboard filters follow local/external settings.""" + +from collections.abc import Sequence +from typing import Any + +import pytest + +from src.extensions.score_metamodel.checks import traceability_dashboard +from src.extensions.score_metamodel.checks.traceability_dashboard import ( + pie_process_requirements_linked, + pie_requirements_fully_linked, + pie_requirements_with_code_links, + pie_requirements_with_test_links, + set_default_include_external, +) +from src.extensions.score_metamodel.traceability_metrics import ( + compute_traceability_summary, +) + + +def _needs() -> list[dict[str, object]]: + return [ + { + "id": "LOCAL_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "", + "testlink": "", + "is_external": False, + }, + { + "id": "LOCAL_SYS_REQ", + "type": "sys_req", + "implemented": "YES", + "source_code_link": "", + "testlink": "T_LOCAL", + "is_external": False, + }, + { + "id": "EXT_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/ext.py:10", + "testlink": "T_EXT", + "is_external": True, + }, + ] + + +def test_dashboard_defaults_to_local_only() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_code_links(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + assert results == [1, 0] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["with_code_link"], + summary["requirements"]["with_code_link"], + ] + + +def test_dashboard_can_include_external_via_default_flag() -> None: + set_default_include_external(True) + + results: list[int] = [] + pie_requirements_with_code_links(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=True, + ) + + assert results == [1, 1] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["with_code_link"], + summary["requirements"]["with_code_link"], + ] + + +def test_dashboard_filter_arg_can_override_default() -> None: + set_default_include_external(True) + + results: list[int] = [] + pie_requirements_with_code_links(_needs(), results, arg1="tool_req", arg2="false") + + assert results == [1, 0] + + +def test_requirements_with_test_links_default_local_only() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_test_links(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + assert results == [1, 0] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["with_test_link"], + summary["requirements"]["with_test_link"], + ] + + +def test_requirements_with_test_links_can_override_include_external() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_test_links(_needs(), results, arg1="tool_req", arg2="true") + + assert results == [1, 1] + + +def test_requirements_with_test_links_parses_multiple_types() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_with_test_links(_needs(), results, arg1="tool_req,sys_req") + + assert results == [1, 1] + + +def test_requirements_fully_linked_uses_shared_summary() -> None: + set_default_include_external(False) + + results: list[int] = [] + pie_requirements_fully_linked(_needs(), results, arg1="tool_req") + + summary = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + assert results == [1, 0] + assert results == [ + summary["requirements"]["total"] - summary["requirements"]["fully_linked"], + summary["requirements"]["fully_linked"], + ] + + +def test_requirements_fully_linked_can_include_external() -> None: + set_default_include_external(True) + + results: list[int] = [] + pie_requirements_fully_linked(_needs(), results, arg1="tool_req") + + assert results == [1, 1] + + +def test_process_requirements_linked_uses_stream_a_process_requirement_totals( + monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_summary( + all_needs: Sequence[dict[str, Any]], + requirement_types: set[str], + include_not_implemented: bool, + filtered_test_types: set[str], + include_external: bool, + ) -> dict[str, dict[str, int]]: + captured["all_needs"] = all_needs + captured["requirement_types"] = requirement_types + captured["include_not_implemented"] = include_not_implemented + captured["filtered_test_types"] = filtered_test_types + captured["include_external"] = include_external + return { + "requirements": {"total": 99, "linked": 0}, + "process_requirements": {"total": 4, "linked": 3}, + } + + monkeypatch.setattr( + traceability_dashboard, "compute_traceability_summary", _fake_summary + ) + + results: list[int] = [] + pie_process_requirements_linked( + _needs(), results, arg1="tool_req,sys_req", arg2="true" + ) + + assert results == [1, 3] + assert captured["requirement_types"] == {"tool_req", "sys_req"} + assert captured["include_not_implemented"] is True + assert captured["filtered_test_types"] == set() + assert captured["include_external"] is True diff --git a/src/extensions/score_metamodel/tests/test_traceability_metrics.py b/src/extensions/score_metamodel/tests/test_traceability_metrics.py new file mode 100644 index 000000000..850f064f6 --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_traceability_metrics.py @@ -0,0 +1,206 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Unit tests for traceability_metrics include_external handling.""" + +from src.extensions.score_metamodel.traceability_metrics import ( + compute_traceability_summary, + filter_requirements, +) + + +def _needs() -> list[dict[str, object]]: + return [ + { + "id": "LOCAL_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/local.py:1", + "testlink": "tests/test_local.py::test_ok", + "is_external": False, + }, + { + "id": "EXT_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/external.py:9", + "testlink": "tests/test_external.py::test_ok", + "is_external": True, + }, + { + "id": "TC_1", + "type": "testcase", + "partially_verifies": "LOCAL_REQ", + "fully_verifies": "", + "is_external": False, + }, + ] + + +def test_filter_requirements_defaults_to_local_only() -> None: + filtered = filter_requirements( + _needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + ) + + assert [need["id"] for need in filtered] == ["LOCAL_REQ"] + + +def test_filter_requirements_can_include_external_needs() -> None: + filtered = filter_requirements( + _needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + include_external=True, + ) + + assert sorted(need["id"] for need in filtered) == ["EXT_REQ", "LOCAL_REQ"] + + +def test_compute_traceability_summary_propagates_include_external() -> None: + summary_local = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + summary_all = compute_traceability_summary( + all_needs=_needs(), + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=True, + ) + + assert summary_local["include_external"] is False + assert summary_local["requirements"]["total"] == 1 + assert summary_all["include_external"] is True + assert summary_all["requirements"]["total"] == 2 + + +def test_compute_traceability_summary_process_requirements_summary() -> None: + summary = compute_traceability_summary( + all_needs=[ + { + "id": "TOOL_REQ_1", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/req.py:10", + "testlink": "tests/test_req.py::test_ok", + "satisfies": "PR_LOCAL_1,OTHER_REQ", + "is_external": False, + }, + { + "id": "TOOL_REQ_2", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/req.py:20", + "testlink": "tests/test_req.py::test_ok_2", + "satisfies": ["PR_LOCAL_1", "PR_LOCAL_2"], + "is_external": False, + }, + { + "id": "PR_LOCAL_1", + "type": "process_req", + "is_external": False, + }, + { + "id": "PR_LOCAL_2", + "type": "gd_req", + "is_external": False, + }, + { + "id": "PR_LOCAL_3", + "type": "gd_req", + "is_external": False, + }, + ], + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + + process_requirements = summary["process_requirements"] + + assert process_requirements["total"] == 3 + assert process_requirements["linked_by_tool_requirements"] == 2 + assert process_requirements["linked_by_tool_requirements_pct"] == (2 / 3) * 100 + assert process_requirements["unlinked_ids"] == ["PR_LOCAL_3"] + + +def test_compute_traceability_summary_process_requirements_respects_include_external() -> ( + None +): + all_needs = [ + { + "id": "TOOL_REQ_LOCAL", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/local.py:1", + "testlink": "tests/test_local.py::test_ok", + "satisfies": "PR_LOCAL", + "is_external": False, + }, + { + "id": "TOOL_REQ_EXTERNAL", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "src/external.py:1", + "testlink": "tests/test_external.py::test_ok", + "satisfies": "PR_EXTERNAL", + "is_external": True, + }, + { + "id": "PR_LOCAL", + "type": "gd_req", + "is_external": False, + }, + { + "id": "PR_EXTERNAL", + "type": "gd_req", + "is_external": True, + }, + ] + + summary_local = compute_traceability_summary( + all_needs=all_needs, + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=False, + ) + summary_all = compute_traceability_summary( + all_needs=all_needs, + requirement_types={"tool_req"}, + include_not_implemented=True, + filtered_test_types=set(), + include_external=True, + ) + + assert summary_local["process_requirements"] == { + "total": 1, + "linked": 1, + "linked_by_tool_requirements": 1, + "linked_by_tool_requirements_pct": 100.0, + "unlinked_ids": [], + } + assert summary_all["process_requirements"] == { + "total": 2, + "linked": 2, + "linked_by_tool_requirements": 2, + "linked_by_tool_requirements_pct": 100.0, + "unlinked_ids": [], + } diff --git a/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py b/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py new file mode 100644 index 000000000..764659874 --- /dev/null +++ b/src/extensions/score_metamodel/tests/test_traceability_metrics_json_generation.py @@ -0,0 +1,96 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Tests for Sphinx-side metrics.json generation defaults.""" + +import json +from pathlib import Path +from types import SimpleNamespace +from typing import cast + +import pytest +from sphinx.application import Sphinx + +import src.extensions.score_metamodel.__init__ as metamodel_init + + +class _FakeNeedsData: + def __init__(self, env: object): + self._env = env + + def get_needs_view(self) -> dict[str, dict[str, object]]: + return { + "LOCAL_REQ": { + "id": "LOCAL_REQ", + "type": "tool_req", + "implemented": "YES", + "source_code_link": "", + "testlink": "", + "is_external": False, + }, + "EXT_REQ": { + "id": "EXT_REQ", + "type": "tool_req", + "implemented": "NO", + "source_code_link": "src/ext.py:1", + "testlink": "", + "is_external": True, + }, + } + + +def _app(tmp_path: Path, include_external: bool) -> SimpleNamespace: + return SimpleNamespace( + env=object(), + outdir=str(tmp_path), + config=SimpleNamespace( + score_metamodel_requirement_types="tool_req", + score_metamodel_include_external_needs=include_external, + ), + ) + + +def test_write_metrics_json_defaults_to_local_only( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(metamodel_init, "SphinxNeedsData", _FakeNeedsData) + + metamodel_init._write_metrics_json( + cast(Sphinx, _app(tmp_path, include_external=False)), + None, + ) + + payload = json.loads((tmp_path / "metrics.json").read_text(encoding="utf-8")) + metrics = payload["metrics_by_type"]["tool_req"] + + assert payload["schema_version"] == "1" + assert metrics["include_not_implemented"] is True + assert metrics["include_external"] is False + assert metrics["requirements"]["total"] == 1 + + +def test_write_metrics_json_can_include_external( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(metamodel_init, "SphinxNeedsData", _FakeNeedsData) + + metamodel_init._write_metrics_json( + cast(Sphinx, _app(tmp_path, include_external=True)), + None, + ) + + payload = json.loads((tmp_path / "metrics.json").read_text(encoding="utf-8")) + metrics = payload["metrics_by_type"]["tool_req"] + + assert metrics["include_external"] is True + assert metrics["requirements"]["total"] == 2 diff --git a/src/extensions/score_metamodel/traceability_metrics.py b/src/extensions/score_metamodel/traceability_metrics.py new file mode 100644 index 000000000..08e1723e9 --- /dev/null +++ b/src/extensions/score_metamodel/traceability_metrics.py @@ -0,0 +1,258 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +# ╓ ╖ +# ║ Some portions generated by Github Copilot ║ +# ╙ ╜ + +"""Shared traceability metric calculations for CI checks and dashboards.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + + +def is_non_empty(value: Any) -> bool: + """Return True if value should be treated as present for traceability checks.""" + if isinstance(value, str): + return bool(value.strip()) + return bool(value) + + +def parse_need_id_list(value: Any) -> list[str]: + """Normalize need-id lists encoded as CSV strings or string lists.""" + if value is None: + return [] + if isinstance(value, str): + return [item.strip() for item in value.split(",") if item.strip()] + if isinstance(value, list): + out: list[str] = [] + for item in value: + if isinstance(item, str) and item.strip(): + out.append(item.strip()) + return out + return [] + + +def safe_percent(numerator: int, denominator: int) -> float: + """Return percentage in range [0, 100], treating empty denominator as 100%.""" + if denominator == 0: + return 100.0 + return (numerator / denominator) * 100.0 + + +def filter_requirements( + all_needs: Sequence[Any], + requirement_types: set[str], + include_not_implemented: bool, + include_external: bool = False, +) -> list[Any]: + """Extract requirements by type, implementation state, and origin.""" + requirements: list[dict[str, Any]] = [] + for need in all_needs: + need_type = str(need.get("type", "")).strip() + if need_type not in requirement_types: + continue + if not include_external and need.get("is_external", False): + continue + if not include_not_implemented: + implemented = str(need.get("implemented", "")).upper().strip() + if implemented not in {"YES", "PARTIAL"}: + continue + requirements.append(need) + return requirements + + +def calculate_requirement_metrics( + requirements: Sequence[Any], +) -> dict[str, Any]: + """Calculate requirement traceability statistics for links and completeness.""" + total = len(requirements) + with_code = sum( + 1 for need in requirements if is_non_empty(need.get("source_code_link")) + ) + with_test = sum(1 for need in requirements if is_non_empty(need.get("testlink"))) + fully_linked = sum( + 1 + for need in requirements + if is_non_empty(need.get("source_code_link")) + and is_non_empty(need.get("testlink")) + ) + + missing_code_ids = [ + str(need.get("id", "")) + for need in requirements + if not is_non_empty(need.get("source_code_link")) and need.get("id") + ] + missing_test_ids = [ + str(need.get("id", "")) + for need in requirements + if not is_non_empty(need.get("testlink")) and need.get("id") + ] + not_fully_linked_ids = [ + str(need.get("id", "")) + for need in requirements + if ( + ( + not is_non_empty(need.get("source_code_link")) + or not is_non_empty(need.get("testlink")) + ) + and need.get("id") + ) + ] + + return { + "total": total, + "with_code_link": with_code, + "with_test_link": with_test, + "fully_linked": fully_linked, + "with_code_link_pct": safe_percent(with_code, total), + "with_test_link_pct": safe_percent(with_test, total), + "fully_linked_pct": safe_percent(fully_linked, total), + "missing_code_link_ids": sorted(missing_code_ids), + "missing_test_link_ids": sorted(missing_test_ids), + "not_fully_linked_ids": sorted(not_fully_linked_ids), + } + + +def calculate_test_metrics( + all_needs: Sequence[Any], + requirement_ids: set[str], + filtered_test_types: set[str], +) -> dict[str, Any]: + """Calculate testcase linkage and broken testcase-reference statistics.""" + testcases = [ + need for need in all_needs if str(need.get("type", "")).strip() == "testcase" + ] + if filtered_test_types: + testcases = [ + need + for need in testcases + if str(need.get("test_type", need.get("TestType", ""))).strip() + in filtered_test_types + ] + + tests_total = len(testcases) + tests_linked = 0 + broken_references: list[dict[str, str]] = [] + + for test_need in testcases: + test_id = str(test_need.get("id", "")) + partially = parse_need_id_list( + test_need.get("partially_verifies", test_need.get("PartiallyVerifies")) + ) + fully = parse_need_id_list( + test_need.get("fully_verifies", test_need.get("FullyVerifies")) + ) + refs = partially + fully + if refs: + tests_linked += 1 + for ref in refs: + if ref not in requirement_ids: + broken_references.append({"testcase": test_id, "missing_need": ref}) + + return { + "total": tests_total, + "filtered_test_types": sorted(filtered_test_types), + "linked_to_requirements": tests_linked, + "linked_to_requirements_pct": safe_percent(tests_linked, tests_total), + "broken_references": broken_references, + } + + +def calculate_process_requirement_metrics( + all_needs: Sequence[Any], + include_not_implemented: bool, + include_external: bool, +) -> dict[str, Any]: + """Calculate process-requirement coverage via tool_req ``satisfies`` links.""" + process_requirements = [ + need + for need in all_needs + if str(need.get("type", "")).strip() in {"gd_req", "process_req"} + and (include_external or not need.get("is_external", False)) + ] + process_requirement_ids = { + str(need.get("id", "")).strip() + for need in process_requirements + if need.get("id") + } + + tool_requirements = filter_requirements( + all_needs, + requirement_types={"tool_req"}, + include_not_implemented=include_not_implemented, + include_external=include_external, + ) + + linked_process_requirement_ids: set[str] = set() + for need in tool_requirements: + satisfies_ids = parse_need_id_list(need.get("satisfies", need.get("Satisfies"))) + for ref_id in satisfies_ids: + if ref_id in process_requirement_ids: + linked_process_requirement_ids.add(ref_id) + + total = len(process_requirement_ids) + linked_by_tool_requirements = len(linked_process_requirement_ids) + unlinked_ids = sorted(process_requirement_ids - linked_process_requirement_ids) + + return { + "total": total, + "linked": linked_by_tool_requirements, + "linked_by_tool_requirements": linked_by_tool_requirements, + "linked_by_tool_requirements_pct": safe_percent( + linked_by_tool_requirements, total + ), + "unlinked_ids": unlinked_ids, + } + + +def compute_traceability_summary( + all_needs: Sequence[Any], + requirement_types: set[str], + include_not_implemented: bool, + filtered_test_types: set[str], + include_external: bool = False, +) -> dict[str, Any]: + """Return full CI/dashboard summary using one shared metric implementation.""" + requirements = filter_requirements( + all_needs, + requirement_types=requirement_types, + include_not_implemented=include_not_implemented, + include_external=include_external, + ) + requirement_ids = { + str(need.get("id", "")).strip() for need in requirements if need.get("id") + } + + req_metrics = calculate_requirement_metrics(requirements) + test_metrics = calculate_test_metrics( + all_needs, + requirement_ids=requirement_ids, + filtered_test_types=filtered_test_types, + ) + process_requirement_metrics = calculate_process_requirement_metrics( + all_needs, + include_not_implemented=include_not_implemented, + include_external=include_external, + ) + + return { + "requirement_types": sorted(requirement_types), + "include_not_implemented": include_not_implemented, + "include_external": include_external, + "requirements": req_metrics, + "tests": test_metrics, + "process_requirements": process_requirement_metrics, + } diff --git a/src/extensions/score_source_code_linker/__init__.py b/src/extensions/score_source_code_linker/__init__.py index 50a42462e..a668225d7 100644 --- a/src/extensions/score_source_code_linker/__init__.py +++ b/src/extensions/score_source_code_linker/__init__.py @@ -23,7 +23,7 @@ import os from copy import deepcopy from pathlib import Path -from typing import cast +from typing import Any, cast from sphinx.application import Sphinx from sphinx.environment import BuildEnvironment @@ -41,15 +41,18 @@ store_source_code_links_combined_json, ) from src.extensions.score_source_code_linker.needlinks import ( + NeedLink, load_source_code_links_json, load_source_code_links_with_metadata_json, ) from src.extensions.score_source_code_linker.repo_source_links import ( + RepoInfo, group_needs_by_repo, load_repo_source_links_json, store_repo_source_links_json, ) from src.extensions.score_source_code_linker.testlink import ( + DataForTestLink, load_data_of_test_case_json, load_test_xml_parsed_json, ) @@ -104,9 +107,15 @@ def build_and_save_combined_file(outdir: Path): source_code_links = load_source_code_links_with_metadata_json( source_code_links_json ) - test_code_links = load_test_xml_parsed_json( - get_cache_filename(outdir, "score_xml_parser_cache.json") - ) + test_cache = get_cache_filename(outdir, "score_xml_parser_cache.json") + if test_cache.exists(): + test_code_links = load_test_xml_parsed_json(test_cache) + else: + LOGGER.debug( + "No score_xml_parser_cache.json found. Continuing without test XML links.", + type="score_source_code_linker", + ) + test_code_links = [] scl_list = group_by_need(source_code_links, test_code_links) store_source_code_links_combined_json( outdir / "score_scl_grouped_cache.json", scl_list @@ -118,7 +127,7 @@ def build_and_save_combined_file(outdir: Path): # ╰──────────────────────────────────────╯ -def setup_source_code_linker(app: Sphinx, ws_root: Path): +def setup_source_code_linker(app: Sphinx, ws_root: Path | None): """ Setting up source_code_linker with all needed options. Allows us to only have this run once during live_preview & esbonio @@ -144,11 +153,26 @@ def setup_source_code_linker(app: Sphinx, ws_root: Path): ) score_sourcelinks_json = os.environ.get("SCORE_SOURCELINKS") + if not score_sourcelinks_json: + score_sourcelinks_json = str( + getattr(app.config, "score_sourcelinks_json", "") + ).strip() + if score_sourcelinks_json: + # Reuse existing code paths that expect this env var. + os.environ["SCORE_SOURCELINKS"] = score_sourcelinks_json if score_sourcelinks_json: # No need to generate the JSON file if this env var is set # because it points to an existing file with the needed data. return + if ws_root is None: + LOGGER.info( + "No workspace root found and no SCORE_SOURCELINKS provided. " + "Skipping source-code-link scan.", + type="score_source_code_linker", + ) + return + scl_cache_json = get_cache_filename( app.outdir, "score_source_code_linker_cache.json" ) @@ -277,14 +301,13 @@ def setup_once(app: Sphinx): ) LOGGER.debug(f"DEBUG: Git root is {find_git_root()}") - # Run only for local files! - # ws_root is not set when running on external repositories (dependencies). + # Run for local files if possible. In Bazel sandbox builds, ws_root may be + # unavailable; in that case we can still operate when SCORE_SOURCELINKS + # (or score_sourcelinks_json config) is provided. ws_root = find_ws_root() - if not ws_root: - return - - # When BUILD_WORKSPACE_DIRECTORY is set, we are inside a git repository. - assert find_git_root() + if ws_root: + # When BUILD_WORKSPACE_DIRECTORY is set, we are inside a git repository. + assert find_git_root() # Register & Run (if needed) parsing & saving of JSON caches setup_source_code_linker(app, ws_root) @@ -300,6 +323,13 @@ def setup(app: Sphinx) -> dict[str, str | bool]: # Esbonio will execute setup() on every iteration. # setup_once will only be called once. app.add_config_value("KNOWN_GOOD_JSON", default="", rebuild="env", types=str) + app.add_config_value("score_sourcelinks_json", default="", rebuild="env", types=str) + app.add_config_value( + "score_source_code_linker_plain_links", + default=False, + rebuild="env", + types=bool, + ) setup_once(app) return { @@ -316,6 +346,102 @@ def find_need(all_needs: NeedsMutable, id: str) -> NeedItem | None: return all_needs.get(id) +def _log_existing_links(needs: NeedsMutable) -> None: + """Emit debug logs for needs that already contain source/test links.""" + if LOGGER.getEffectiveLevel() < 10: + return + + for need_id, need in needs.items(): + if need.get("source_code_link"): + LOGGER.debug( + f"?? Need {need_id} already has source_code_link: " + f"{need.get('source_code_link')}" + ) + if need.get("testlink"): + LOGGER.debug( + f"?? Need {need_id} already has testlink: {need.get('testlink')}" + ) + + +def _render_code_link(plain_links: bool, metadata: RepoInfo, link: NeedLink) -> str: + if plain_links: + return ( + "https://github.com/placeholder/placeholder/blob/unknown/" + f"{link.file}#L{link.line}<>{link.file}:{link.line}" + ) + try: + base = get_github_link(metadata, link) + except AssertionError: + LOGGER.info( + "Falling back to local code-link format (no git remote available): " + f"{link.file}:{link.line}", + type="score_source_code_linker", + ) + return f"{link.file}:{link.line}" + return f"{base}<>{link.file}:{link.line}" + + +def _render_test_link( + plain_links: bool, + metadata: RepoInfo, + link: DataForTestLink, +) -> str: + if plain_links: + return str(link.name) + try: + base = get_github_link(metadata, link) + except AssertionError: + LOGGER.info( + "Falling back to local test-link format (no git remote available): " + f"{link.name}", + type="score_source_code_linker", + ) + return str(link.name) + return f"{base}<>{link.name}" + + +def _warn_missing_need(source_code_links: object) -> None: + links = cast(Any, source_code_links).links + need_id = cast(Any, source_code_links).need + + for code_link in links.CodeLinks: + LOGGER.warning( + f"{code_link.file}:{code_link.line}: Could not find {need_id} " + "in documentation [CODE LINK]", + type="score_source_code_linker", + ) + for test_link in links.TestLinks: + LOGGER.warning( + f"{test_link.file}:{test_link.line}: Could not find {need_id} " + "in documentation [TEST LINK]", + type="score_source_code_linker", + ) + + +def _apply_links_to_need( + needs_data: SphinxNeedsData, + need: NeedItem, + source_code_links: object, + metadata: RepoInfo, + plain_links: bool, +) -> None: + links = cast(Any, source_code_links).links + need_as_dict = cast(dict[str, object], need) + need_as_dict["source_code_link"] = ", ".join( + _render_code_link(plain_links, metadata, code_link) + for code_link in links.CodeLinks + ) + need_as_dict["testlink"] = ", ".join( + _render_test_link(plain_links, metadata, test_link) + for test_link in links.TestLinks + ) + + # NOTE: Removing & adding the need is important to make sure + # the needs gets 're-evaluated'. + needs_data.remove_need(need["id"]) + needs_data.add_need(need) + + # re-qid: gd_req__req__attr_impl def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: """ @@ -327,66 +453,37 @@ def inject_links_into_needs(app: Sphinx, env: BuildEnvironment) -> None: env: Buildenvironment, this is filled automatically app: Sphinx app application, this is filled automatically """ - ws_root = find_ws_root() - assert ws_root - - Needs_Data = SphinxNeedsData(env) - needs = Needs_Data.get_needs_mutable() + needs_data = SphinxNeedsData(env) + needs = needs_data.get_needs_mutable() needs_copy = deepcopy( needs ) # TODO: why do we create a copy? Can we also needs_copy = needs[:]? copy(needs)? - # Enabled automatically for DEBUGGING - if LOGGER.getEffectiveLevel() >= 10: - for id, need in needs.items(): - if need.get("source_code_link"): - LOGGER.debug( - f"?? Need {id} already has source_code_link: " - f"{need.get('source_code_link')}" - ) - if need.get("testlink"): - LOGGER.debug( - f"?? Need {id} already has testlink: {need.get('testlink')}" - ) + _log_existing_links(needs) scl_by_module = load_repo_source_links_json( get_cache_filename(app.outdir, "score_repo_grouped_scl_cache.json") ) + plain_links = bool( + getattr(app.config, "score_source_code_linker_plain_links", False) + ) + for module_grouped_needs in scl_by_module: for source_code_links in module_grouped_needs.needs: need = find_need(needs_copy, source_code_links.need) if need is None: # TODO: print github annotations as in https://github.com/eclipse-score/bazel_registry/blob/7423b9996a45dd0a9ec868e06a970330ee71cf4f/tools/verify_semver_compatibility_level.py#L126-L129 - for n in source_code_links.links.CodeLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [CODE LINK]", - type="score_source_code_linker", - ) - for n in source_code_links.links.TestLinks: - LOGGER.warning( - f"{n.file}:{n.line}: Could not find {source_code_links.need} " - "in documentation [TEST LINK]", - type="score_source_code_linker", - ) + _warn_missing_need(source_code_links) continue - need_as_dict = cast(dict[str, object], need) - metadata = module_grouped_needs.repo - need_as_dict["source_code_link"] = ", ".join( - f"{get_github_link(metadata, n)}<>{n.file}:{n.line}" - for n in source_code_links.links.CodeLinks - ) - need_as_dict["testlink"] = ", ".join( - f"{get_github_link(metadata, n)}<>{n.name}" - for n in source_code_links.links.TestLinks + _apply_links_to_need( + needs_data=needs_data, + need=need, + source_code_links=source_code_links, + metadata=module_grouped_needs.repo, + plain_links=plain_links, ) - # NOTE: Removing & adding the need is important to make sure - # the needs gets 're-evaluated'. - Needs_Data.remove_need(need["id"]) - Needs_Data.add_need(need) - # ╭──────────────────────────────────────╮ # │ WARNING: This somehow screws up the │ diff --git a/src/extensions/score_source_code_linker/tests/test_xml_parser.py b/src/extensions/score_source_code_linker/tests/test_xml_parser.py index 8fde2cec3..c1725d31c 100644 --- a/src/extensions/score_source_code_linker/tests/test_xml_parser.py +++ b/src/extensions/score_source_code_linker/tests/test_xml_parser.py @@ -335,6 +335,45 @@ def test_short_hash_consistency_and_format(): assert len(h1) == 5 +def test_construct_and_add_need_uses_fallback_url_for_missing_repo_metadata( + monkeypatch: pytest.MonkeyPatch, +): + calls: list[dict[str, object]] = [] + + def _fake_add_external_need(**kwargs: object) -> object: + calls.append(kwargs) + return object() + + monkeypatch.setattr(xml_parser, "add_external_need", _fake_add_external_need) + + testcase = DataOfTestCase( + name="tc_missing_meta", + file="tests/foo_test.py", + line="10", + result="passed", + result_text="", + FullyVerifies="REQ_1", + PartiallyVerifies="", + TestType="requirements-based", + DerivationTechnique="analysis", + repo_name=None, + hash=None, + url=None, + ) + + app_stub: Any = object() + xml_parser.construct_and_add_need( + app=app_stub, + tn=testcase, + ) + + # Must not crash and should create an external need using fallback metadata. + assert len(calls) == 1 + assert calls[0]["external_url"] == ( + "https://github.com/placeholder/placeholder/blob/unknown/tests/foo_test.py#L10" + ) + + # ─────────────[ Boilerplate generated by CoPilot ]───────────── diff --git a/src/extensions/score_source_code_linker/xml_parser.py b/src/extensions/score_source_code_linker/xml_parser.py index 9c741d9f4..302d87469 100644 --- a/src/extensions/score_source_code_linker/xml_parser.py +++ b/src/extensions/score_source_code_linker/xml_parser.py @@ -338,11 +338,22 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): # and either 'Fully' or 'PartiallyVerifies' should not be None here assert tn.file is not None assert tn.name is not None - assert tn.repo_name is not None - assert tn.hash is not None - assert tn.url is not None - # Have to build metadata here for the gh link func - metadata = RepoInfo(name=tn.repo_name, hash=tn.hash, url=tn.url) + external_url = "" + if tn.repo_name is None or tn.hash is None or tn.url is None: + logger.info( + "Creating testcase need with fallback URL due to incomplete repo metadata: " + f"name={tn.name}, file={tn.file}, repo_name={tn.repo_name}, " + f"hash={tn.hash}, url={tn.url}", + type="score_source_code_linker", + ) + line = tn.line if tn.line is not None else 1 + external_url = ( + f"https://github.com/placeholder/placeholder/blob/unknown/{tn.file}#L{line}" + ) + else: + # Have to build metadata here for the gh link func + metadata = RepoInfo(name=tn.repo_name, hash=tn.hash, url=tn.url) + external_url = get_github_link(metadata, tn) # IDK if this is ideal or not with contextlib.suppress(BaseException): _ = add_external_need( @@ -352,7 +363,7 @@ def construct_and_add_need(app: Sphinx, tn: DataOfTestCase): tags="TEST", id=f"testcase__{tn.name}_{short_hash(tn.file + tn.name)}", name=tn.name, - external_url=get_github_link(metadata, tn), + external_url=external_url, fully_verifies=tn.FullyVerifies if tn.FullyVerifies is not None else "", partially_verifies=tn.PartiallyVerifies if tn.PartiallyVerifies is not None