From c2353042ffd2afb6dcd08e71c887b296619e36bb Mon Sep 17 00:00:00 2001 From: stacknil Date: Sun, 21 Jun 2026 09:52:12 +0800 Subject: [PATCH] feat(report): add report schema identity --- CHANGELOG.md | 1 + docs/report-artifacts.md | 7 +++++++ src/report.cpp | 2 ++ .../report_contracts/journalctl_short_full/report.json | 2 ++ .../multi_host_journalctl_short_full/report.json | 2 ++ .../multi_host_syslog_legacy/report.json | 2 ++ .../report_contracts/syslog_legacy/report.json | 2 ++ tests/test_report.cpp | 10 ++++++++++ tests/test_report_contracts.cpp | 2 ++ 9 files changed, 30 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 282bf05..f63c4d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable user-visible changes should be recorded here. ### Added - Added sanitized golden `report.md` / `report.json` regression fixtures to lock report contracts. +- Added `schema` and `schema_version` fields to `report.json` so downstream tooling can identify the report artifact contract. - Expanded parser coverage for `Accepted publickey` and selected `pam_faillock` / `pam_sss` variants. - Added compact host-level summaries for multi-host reports. - Added optional CSV export for findings and warnings when explicitly requested. diff --git a/docs/report-artifacts.md b/docs/report-artifacts.md index e2fbc4b..f146b69 100644 --- a/docs/report-artifacts.md +++ b/docs/report-artifacts.md @@ -18,6 +18,8 @@ Without `--csv`, LogLens does not create, overwrite, or delete existing CSV file The JSON report keeps parser observability visible next to findings: - `tool` +- `schema` +- `schema_version` - `input` - `input_mode` - `assume_year` for syslog-style input when a year is supplied @@ -44,6 +46,11 @@ Finding objects contain `rule_id`, `rule`, `subject_kind`, `subject`, `grouping_ Warning objects contain the original `line_number`, parser `category`, and parser `reason`. +`schema` and `schema_version` identify the report artifact contract, not the +application release. They are intended for downstream tooling that needs a +stable way to reject incompatible report shapes. The current JSON contract is +`loglens.report.v1` with `schema_version` set to `1`. + Parser failure categories are stable reviewer-facing buckets for unsupported lines: `unknown_timestamp`, `unknown_program`, `known_program_unknown_message`, `malformed_source_ip`, and diff --git a/src/report.cpp b/src/report.cpp index 5d38cbb..35bf41f 100644 --- a/src/report.cpp +++ b/src/report.cpp @@ -637,6 +637,8 @@ std::string render_json_report(const ReportData& data) { output << "{\n"; output << " \"tool\": \"LogLens\",\n"; + output << " \"schema\": \"loglens.report.v1\",\n"; + output << " \"schema_version\": 1,\n"; output << " \"input\": \"" << escape_json(data.input_path.generic_string()) << "\",\n"; output << " \"input_mode\": \"" << to_string(data.parse_metadata.input_mode) << "\",\n"; if (data.parse_metadata.assume_year.has_value()) { diff --git a/tests/fixtures/report_contracts/journalctl_short_full/report.json b/tests/fixtures/report_contracts/journalctl_short_full/report.json index 43a3d75..763052c 100644 --- a/tests/fixtures/report_contracts/journalctl_short_full/report.json +++ b/tests/fixtures/report_contracts/journalctl_short_full/report.json @@ -1,5 +1,7 @@ { "tool": "LogLens", + "schema": "loglens.report.v1", + "schema_version": 1, "input": "tests/fixtures/report_contracts/journalctl_short_full/input.log", "input_mode": "journalctl_short_full", "timezone_present": true, diff --git a/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json b/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json index c9581f0..99c7ce9 100644 --- a/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json +++ b/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json @@ -1,5 +1,7 @@ { "tool": "LogLens", + "schema": "loglens.report.v1", + "schema_version": 1, "input": "tests/fixtures/report_contracts/multi_host_journalctl_short_full/input.log", "input_mode": "journalctl_short_full", "timezone_present": true, diff --git a/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json b/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json index 13cc567..9b620f3 100644 --- a/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json +++ b/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json @@ -1,5 +1,7 @@ { "tool": "LogLens", + "schema": "loglens.report.v1", + "schema_version": 1, "input": "tests/fixtures/report_contracts/multi_host_syslog_legacy/input.log", "input_mode": "syslog_legacy", "assume_year": 2026, diff --git a/tests/fixtures/report_contracts/syslog_legacy/report.json b/tests/fixtures/report_contracts/syslog_legacy/report.json index c041831..384843e 100644 --- a/tests/fixtures/report_contracts/syslog_legacy/report.json +++ b/tests/fixtures/report_contracts/syslog_legacy/report.json @@ -1,5 +1,7 @@ { "tool": "LogLens", + "schema": "loglens.report.v1", + "schema_version": 1, "input": "tests/fixtures/report_contracts/syslog_legacy/input.log", "input_mode": "syslog_legacy", "assume_year": 2026, diff --git a/tests/test_report.cpp b/tests/test_report.cpp index 77b7ab2..8c5673f 100644 --- a/tests/test_report.cpp +++ b/tests/test_report.cpp @@ -183,6 +183,15 @@ void test_json_finding_includes_explainability_fields() { "expected json finding to include evidence event ids"); } +void test_json_report_includes_schema_identity() { + const auto json = loglens::render_json_report(make_report_data()); + + expect(json.find("\"schema\": \"loglens.report.v1\"") != std::string::npos, + "expected json report to include schema identifier"); + expect(json.find("\"schema_version\": 1") != std::string::npos, + "expected json report to include schema version"); +} + void test_reports_include_total_input_line_count() { auto data = make_report_data(); data.parser_quality.total_lines = 3; @@ -349,6 +358,7 @@ int main() { test_markdown_table_cells_escape_user_controlled_values(); test_json_escapes_generic_control_characters(); test_json_finding_includes_explainability_fields(); + test_json_report_includes_schema_identity(); test_reports_include_total_input_line_count(); test_csv_neutralizes_formula_like_fields(); test_write_reports_fails_when_report_path_is_directory(); diff --git a/tests/test_report_contracts.cpp b/tests/test_report_contracts.cpp index 5413f23..8c7ca6b 100644 --- a/tests/test_report_contracts.cpp +++ b/tests/test_report_contracts.cpp @@ -138,6 +138,8 @@ std::vector extract_json_contract_lines(const std::string& json) { } if (starts_with(line, "\"tool\": ") + || starts_with(line, "\"schema\": ") + || starts_with(line, "\"schema_version\": ") || starts_with(line, "\"input\": ") || starts_with(line, "\"input_mode\": ") || starts_with(line, "\"assume_year\": ")