diff --git a/docs/report-artifacts.md b/docs/report-artifacts.md index 831c541..a9dd017 100644 --- a/docs/report-artifacts.md +++ b/docs/report-artifacts.md @@ -37,7 +37,9 @@ The JSON report keeps parser observability visible next to findings: - `findings` - `warnings` -Finding objects contain `rule`, `subject_kind`, `subject`, `event_count`, `window_start`, `window_end`, `usernames`, and `summary`. +Finding objects contain `rule_id`, `rule`, `subject_kind`, `subject`, `grouping_key`, `threshold`, `observed_count`, `event_count`, `window_start`, `window_end`, `evidence_event_ids`, `usernames`, and `summary`. + +`evidence_event_ids` are deterministic local event identifiers derived from the source line number, formatted as `line:`. They let reviewers trace a finding back to the normalized input events that satisfied the rule window without implying global event identity. Warning objects contain the original `line_number` and the parser `reason`. diff --git a/docs/rule-catalog.md b/docs/rule-catalog.md index d7b6098..49303e2 100644 --- a/docs/rule-catalog.md +++ b/docs/rule-catalog.md @@ -26,6 +26,19 @@ Metadata equivalent: - Default values below match the built-in detector configuration. - The checked-in `assets/sample_config.json` is a tested default-equivalent fixture. +## Finding Explainability Fields + +JSON findings include both the finding conclusion and the rule context used to reach it: + +- `rule_id`: stable rule identifier +- `grouping_key`: the normalized field used to group evidence +- `threshold`: configured threshold for the rule +- `observed_count`: observed value compared against the threshold +- `window_start` and `window_end`: selected evidence window +- `evidence_event_ids`: deterministic local event IDs in the selected window, formatted as `line:` + +For `multi_user_probing`, `observed_count` is the distinct username count, while `event_count` remains the number of attempt-evidence events in the selected window. + ## Brute Force ### Rule name diff --git a/src/detector.cpp b/src/detector.cpp index 6a1b24d..c1c9940 100644 --- a/src/detector.cpp +++ b/src/detector.cpp @@ -19,6 +19,23 @@ std::vector sort_signals_by_time(const std::vector evidence_event_ids_for_window(const std::vector& ordered, + std::size_t start, + std::size_t end) { + std::vector event_ids; + event_ids.reserve(end - start + 1); + + for (std::size_t index = start; index <= end; ++index) { + if (!ordered[index]->event_id.empty()) { + event_ids.push_back(ordered[index]->event_id); + } else { + event_ids.push_back("line:" + std::to_string(ordered[index]->line_number)); + } + } + + return event_ids; +} + SignalGroup group_terminal_auth_failures_by_ip(const std::vector& signals) { SignalGroup grouped; for (const auto& signal : signals) { @@ -54,16 +71,23 @@ SignalGroup group_sudo_burst_evidence_by_user(const std::vector& sig Finding make_brute_force_finding(const std::string& ip, std::size_t count, + std::size_t threshold, std::chrono::sys_seconds first_seen, std::chrono::sys_seconds last_seen, - std::chrono::minutes window) { + std::chrono::minutes window, + std::vector evidence_event_ids) { Finding finding; finding.type = FindingType::BruteForce; + finding.rule_id = to_string(finding.type); finding.subject_kind = "source_ip"; finding.subject = ip; + finding.grouping_key = "source_ip"; + finding.threshold = threshold; + finding.observed_count = count; finding.event_count = count; finding.first_seen = first_seen; finding.last_seen = last_seen; + finding.evidence_event_ids = std::move(evidence_event_ids); finding.summary = std::to_string(count) + " failed SSH attempts from " + ip + " within " + std::to_string(window.count()) + " minutes."; return finding; @@ -71,17 +95,25 @@ Finding make_brute_force_finding(const std::string& ip, Finding make_multi_user_finding(const std::string& ip, std::size_t count, + std::size_t threshold, + std::size_t distinct_username_count, std::chrono::sys_seconds first_seen, std::chrono::sys_seconds last_seen, std::vector usernames, - std::chrono::minutes window) { + std::chrono::minutes window, + std::vector evidence_event_ids) { Finding finding; finding.type = FindingType::MultiUserProbing; + finding.rule_id = to_string(finding.type); finding.subject_kind = "source_ip"; finding.subject = ip; + finding.grouping_key = "source_ip"; + finding.threshold = threshold; + finding.observed_count = distinct_username_count; finding.event_count = count; finding.first_seen = first_seen; finding.last_seen = last_seen; + finding.evidence_event_ids = std::move(evidence_event_ids); finding.usernames = std::move(usernames); finding.summary = ip + " targeted " + std::to_string(finding.usernames.size()) + " usernames within " + std::to_string(window.count()) + " minutes."; @@ -90,16 +122,23 @@ Finding make_multi_user_finding(const std::string& ip, Finding make_sudo_finding(const std::string& user, std::size_t count, + std::size_t threshold, std::chrono::sys_seconds first_seen, std::chrono::sys_seconds last_seen, - std::chrono::minutes window) { + std::chrono::minutes window, + std::vector evidence_event_ids) { Finding finding; finding.type = FindingType::SudoBurst; + finding.rule_id = to_string(finding.type); finding.subject_kind = "username"; finding.subject = user; + finding.grouping_key = "username"; + finding.threshold = threshold; + finding.observed_count = count; finding.event_count = count; finding.first_seen = first_seen; finding.last_seen = last_seen; + finding.evidence_event_ids = std::move(evidence_event_ids); finding.summary = user + " ran " + std::to_string(count) + " sudo commands within " + std::to_string(window.count()) + " minutes."; return finding; @@ -134,9 +173,11 @@ std::vector detect_brute_force(const std::vector& signals, findings.push_back(make_brute_force_finding( ip, best_count, + config.brute_force.threshold, ordered[best_start]->timestamp, ordered[best_end]->timestamp, - config.brute_force.window)); + config.brute_force.window, + evidence_event_ids_for_window(ordered, best_start, best_end))); } } @@ -198,10 +239,13 @@ std::vector detect_multi_user(const std::vector& signals, c findings.push_back(make_multi_user_finding( ip, best_count, + config.multi_user_probing.threshold, + best_distinct, ordered[best_start]->timestamp, ordered[best_end]->timestamp, best_usernames, - config.multi_user_probing.window)); + config.multi_user_probing.window, + evidence_event_ids_for_window(ordered, best_start, best_end))); } } @@ -237,9 +281,11 @@ std::vector detect_sudo_burst(const std::vector& signals, c findings.push_back(make_sudo_finding( username, best_count, + config.sudo_burst.threshold, ordered[best_start]->timestamp, ordered[best_end]->timestamp, - config.sudo_burst.window)); + config.sudo_burst.window, + evidence_event_ids_for_window(ordered, best_start, best_end))); } } diff --git a/src/detector.hpp b/src/detector.hpp index 2565a78..0acfbc9 100644 --- a/src/detector.hpp +++ b/src/detector.hpp @@ -29,11 +29,16 @@ struct DetectorConfig { struct Finding { FindingType type = FindingType::BruteForce; + std::string rule_id; std::string subject_kind; std::string subject; + std::string grouping_key; + std::size_t threshold = 0; + std::size_t observed_count = 0; std::size_t event_count = 0; std::chrono::sys_seconds first_seen{}; std::chrono::sys_seconds last_seen{}; + std::vector evidence_event_ids; std::vector usernames; std::string summary; }; diff --git a/src/report.cpp b/src/report.cpp index afaaef0..73415a7 100644 --- a/src/report.cpp +++ b/src/report.cpp @@ -291,6 +291,38 @@ std::string usernames_csv_field(const Finding& finding) { return usernames.str(); } +std::string finding_rule_id(const Finding& finding) { + if (!finding.rule_id.empty()) { + return finding.rule_id; + } + return to_string(finding.type); +} + +std::string finding_grouping_key(const Finding& finding) { + if (!finding.grouping_key.empty()) { + return finding.grouping_key; + } + return finding.subject_kind; +} + +std::size_t finding_observed_count(const Finding& finding) { + if (finding.observed_count != 0) { + return finding.observed_count; + } + return finding.event_count; +} + +void write_json_string_array(std::ostream& output, const std::vector& values) { + output << '['; + for (std::size_t index = 0; index < values.size(); ++index) { + output << '"' << escape_json(values[index]) << '"'; + if (index + 1 != values.size()) { + output << ", "; + } + } + output << ']'; +} + std::string format_parse_success_rate(double rate) { std::ostringstream output; output << std::fixed << std::setprecision(4) << rate; @@ -651,20 +683,22 @@ std::string render_json_report(const ReportData& data) { for (std::size_t index = 0; index < findings.size(); ++index) { const auto& finding = findings[index]; output << " {\n"; + output << " \"rule_id\": \"" << escape_json(finding_rule_id(finding)) << "\",\n"; output << " \"rule\": \"" << to_string(finding.type) << "\",\n"; output << " \"subject_kind\": \"" << escape_json(finding.subject_kind) << "\",\n"; output << " \"subject\": \"" << escape_json(finding.subject) << "\",\n"; + output << " \"grouping_key\": \"" << escape_json(finding_grouping_key(finding)) << "\",\n"; + output << " \"threshold\": " << finding.threshold << ",\n"; + output << " \"observed_count\": " << finding_observed_count(finding) << ",\n"; output << " \"event_count\": " << finding.event_count << ",\n"; output << " \"window_start\": \"" << format_timestamp(finding.first_seen) << "\",\n"; output << " \"window_end\": \"" << format_timestamp(finding.last_seen) << "\",\n"; - output << " \"usernames\": ["; - for (std::size_t name_index = 0; name_index < finding.usernames.size(); ++name_index) { - output << '"' << escape_json(finding.usernames[name_index]) << '"'; - if (name_index + 1 != finding.usernames.size()) { - output << ", "; - } - } - output << "],\n"; + output << " \"evidence_event_ids\": "; + write_json_string_array(output, finding.evidence_event_ids); + output << ",\n"; + output << " \"usernames\": "; + write_json_string_array(output, finding.usernames); + output << ",\n"; output << " \"summary\": \"" << escape_json(finding.summary) << "\"\n"; output << " }"; output << (index + 1 == findings.size() ? "\n" : ",\n"); diff --git a/src/signal.cpp b/src/signal.cpp index 7b027a2..4841f99 100644 --- a/src/signal.cpp +++ b/src/signal.cpp @@ -5,6 +5,10 @@ namespace loglens { namespace { +std::string event_id_for(const Event& event) { + return "line:" + std::to_string(event.line_number); +} + struct SignalMapping { AuthSignalKind signal_kind = AuthSignalKind::Unknown; bool counts_as_attempt_evidence = false; @@ -97,7 +101,8 @@ std::vector build_auth_signals(const std::vector& events, con mapping->counts_as_attempt_evidence, mapping->counts_as_terminal_auth_failure, mapping->counts_as_sudo_burst_evidence, - event.line_number}); + event.line_number, + event_id_for(event)}); } return signals; diff --git a/src/signal.hpp b/src/signal.hpp index 1ca00da..004a6e6 100644 --- a/src/signal.hpp +++ b/src/signal.hpp @@ -43,6 +43,7 @@ struct AuthSignal { bool counts_as_terminal_auth_failure = false; bool counts_as_sudo_burst_evidence = false; std::size_t line_number = 0; + std::string event_id; }; std::vector build_auth_signals(const std::vector& events, const AuthSignalConfig& config); diff --git a/tests/fixtures/report_contracts/journalctl_short_full/report.json b/tests/fixtures/report_contracts/journalctl_short_full/report.json index ccffff9..696a2d2 100644 --- a/tests/fixtures/report_contracts/journalctl_short_full/report.json +++ b/tests/fixtures/report_contracts/journalctl_short_full/report.json @@ -29,32 +29,47 @@ ], "findings": [ { + "rule_id": "brute_force", "rule": "brute_force", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 5, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, { + "rule_id": "multi_user_probing", "rule": "multi_user_probing", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 3, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, { + "rule_id": "sudo_burst", "rule": "sudo_burst", "subject_kind": "username", "subject": "alice", + "grouping_key": "username", + "threshold": 3, + "observed_count": 3, "event_count": 3, "window_start": "2026-03-10 08:21:00", "window_end": "2026-03-10 08:24:15", + "evidence_event_ids": ["line:7", "line:8", "line:9"], "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json b/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json index 0d1a9c5..890a43d 100644 --- a/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json +++ b/tests/fixtures/report_contracts/multi_host_journalctl_short_full/report.json @@ -56,32 +56,47 @@ ], "findings": [ { + "rule_id": "brute_force", "rule": "brute_force", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 5, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, { + "rule_id": "multi_user_probing", "rule": "multi_user_probing", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 3, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, { + "rule_id": "sudo_burst", "rule": "sudo_burst", "subject_kind": "username", "subject": "alice", + "grouping_key": "username", + "threshold": 3, + "observed_count": 3, "event_count": 3, "window_start": "2026-03-11 09:11:00", "window_end": "2026-03-11 09:14:15", + "evidence_event_ids": ["line:9", "line:10", "line:13"], "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json b/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json index ab7332d..91c7ec4 100644 --- a/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json +++ b/tests/fixtures/report_contracts/multi_host_syslog_legacy/report.json @@ -57,32 +57,47 @@ ], "findings": [ { + "rule_id": "brute_force", "rule": "brute_force", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 5, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, { + "rule_id": "multi_user_probing", "rule": "multi_user_probing", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 3, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-11 09:00:00", "window_end": "2026-03-11 09:04:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, { + "rule_id": "sudo_burst", "rule": "sudo_burst", "subject_kind": "username", "subject": "alice", + "grouping_key": "username", + "threshold": 3, + "observed_count": 3, "event_count": 3, "window_start": "2026-03-11 09:11:00", "window_end": "2026-03-11 09:14:15", + "evidence_event_ids": ["line:9", "line:10", "line:13"], "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/fixtures/report_contracts/syslog_legacy/report.json b/tests/fixtures/report_contracts/syslog_legacy/report.json index 222891c..5377ef6 100644 --- a/tests/fixtures/report_contracts/syslog_legacy/report.json +++ b/tests/fixtures/report_contracts/syslog_legacy/report.json @@ -30,32 +30,47 @@ ], "findings": [ { + "rule_id": "brute_force", "rule": "brute_force", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 5, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": [], "summary": "5 failed SSH attempts from 203.0.113.10 within 10 minutes." }, { + "rule_id": "multi_user_probing", "rule": "multi_user_probing", "subject_kind": "source_ip", "subject": "203.0.113.10", + "grouping_key": "source_ip", + "threshold": 3, + "observed_count": 5, "event_count": 5, "window_start": "2026-03-10 08:11:22", "window_end": "2026-03-10 08:18:05", + "evidence_event_ids": ["line:1", "line:2", "line:3", "line:4", "line:5"], "usernames": ["admin", "deploy", "guest", "root", "test"], "summary": "203.0.113.10 targeted 5 usernames within 15 minutes." }, { + "rule_id": "sudo_burst", "rule": "sudo_burst", "subject_kind": "username", "subject": "alice", + "grouping_key": "username", + "threshold": 3, + "observed_count": 3, "event_count": 3, "window_start": "2026-03-10 08:21:00", "window_end": "2026-03-10 08:24:15", + "evidence_event_ids": ["line:7", "line:8", "line:9"], "usernames": [], "summary": "alice ran 3 sudo commands within 5 minutes." } diff --git a/tests/test_detector.cpp b/tests/test_detector.cpp index c6cb76b..695e025 100644 --- a/tests/test_detector.cpp +++ b/tests/test_detector.cpp @@ -204,15 +204,36 @@ void test_default_thresholds() { const auto* brute_force = find_finding(findings, loglens::FindingType::BruteForce, "203.0.113.10"); expect(brute_force != nullptr, "expected brute force finding"); + expect(brute_force->rule_id == "brute_force", "expected brute force rule id"); + expect(brute_force->grouping_key == "source_ip", "expected brute force grouping key"); + expect(brute_force->threshold == 5, "expected brute force threshold"); + expect(brute_force->observed_count == 5, "expected brute force observed count"); expect(brute_force->event_count == 5, "expected brute force count"); + expect((brute_force->evidence_event_ids == std::vector{ + "line:1", "line:2", "line:3", "line:4", "line:5"}), + "expected brute force evidence event ids"); const auto* multi_user = find_finding(findings, loglens::FindingType::MultiUserProbing, "203.0.113.10"); expect(multi_user != nullptr, "expected multi-user finding"); + expect(multi_user->rule_id == "multi_user_probing", "expected multi-user rule id"); + expect(multi_user->grouping_key == "source_ip", "expected multi-user grouping key"); + expect(multi_user->threshold == 3, "expected multi-user threshold"); + expect(multi_user->observed_count == 5, "expected multi-user observed username count"); + expect(multi_user->event_count == 5, "expected multi-user event count"); + expect((multi_user->evidence_event_ids == std::vector{ + "line:1", "line:2", "line:3", "line:4", "line:5"}), + "expected multi-user evidence event ids"); expect(multi_user->usernames.size() == 5, "expected five usernames"); const auto* sudo = find_finding(findings, loglens::FindingType::SudoBurst, "alice"); expect(sudo != nullptr, "expected sudo finding"); + expect(sudo->rule_id == "sudo_burst", "expected sudo rule id"); + expect(sudo->grouping_key == "username", "expected sudo grouping key"); + expect(sudo->threshold == 3, "expected sudo threshold"); + expect(sudo->observed_count == 3, "expected sudo observed count"); expect(sudo->event_count == 3, "expected sudo count"); + expect((sudo->evidence_event_ids == std::vector{"line:6", "line:7", "line:8"}), + "expected sudo evidence event ids"); } void test_custom_thresholds() { diff --git a/tests/test_report.cpp b/tests/test_report.cpp index 858645b..69c07ef 100644 --- a/tests/test_report.cpp +++ b/tests/test_report.cpp @@ -149,6 +149,39 @@ void test_json_escapes_generic_control_characters() { "expected json warning reason to use valid escapes"); } +void test_json_finding_includes_explainability_fields() { + auto data = make_report_data(); + + loglens::Finding finding; + finding.type = loglens::FindingType::SudoBurst; + finding.rule_id = "sudo_burst"; + finding.subject_kind = "username"; + finding.subject = "alice"; + finding.grouping_key = "username"; + finding.threshold = 3; + finding.observed_count = 4; + finding.event_count = 4; + finding.first_seen = timestamp_at_minute(21); + finding.last_seen = timestamp_at_minute(24); + finding.evidence_event_ids = {"line:6", "line:7", "line:8", "line:9"}; + finding.summary = "alice ran 4 sudo commands within 5 minutes."; + data.findings.push_back(finding); + + const auto json = loglens::render_json_report(data); + + expect(json.find("\"rule_id\": \"sudo_burst\"") != std::string::npos, + "expected json finding to include rule id"); + expect(json.find("\"grouping_key\": \"username\"") != std::string::npos, + "expected json finding to include grouping key"); + expect(json.find("\"threshold\": 3") != std::string::npos, + "expected json finding to include threshold"); + expect(json.find("\"observed_count\": 4") != std::string::npos, + "expected json finding to include observed count"); + expect(json.find("\"evidence_event_ids\": [\"line:6\", \"line:7\", \"line:8\", \"line:9\"]") + != std::string::npos, + "expected json finding to include evidence event ids"); +} + void test_reports_include_total_input_line_count() { auto data = make_report_data(); data.parser_quality.total_lines = 3; @@ -314,6 +347,7 @@ int main() { test_noisy_auth_report_json_keeps_unsupported_lines_visible(); test_markdown_table_cells_escape_user_controlled_values(); test_json_escapes_generic_control_characters(); + test_json_finding_includes_explainability_fields(); test_reports_include_total_input_line_count(); test_csv_neutralizes_formula_like_fields(); test_write_reports_fails_when_report_path_is_directory(); diff --git a/tests/test_report_contracts.cpp b/tests/test_report_contracts.cpp index d93b114..481bc97 100644 --- a/tests/test_report_contracts.cpp +++ b/tests/test_report_contracts.cpp @@ -155,12 +155,17 @@ std::vector extract_json_contract_lines(const std::string& json) { || starts_with(line, "\"hostname\": ") || starts_with(line, "{\"pattern\": ") || starts_with(line, "{\"event_type\": ") + || starts_with(line, "\"rule_id\": ") || starts_with(line, "\"rule\": ") || starts_with(line, "\"subject_kind\": ") || starts_with(line, "\"subject\": ") + || starts_with(line, "\"grouping_key\": ") + || starts_with(line, "\"threshold\": ") + || starts_with(line, "\"observed_count\": ") || starts_with(line, "\"event_count\": ") || starts_with(line, "\"window_start\": ") || starts_with(line, "\"window_end\": ") + || starts_with(line, "\"evidence_event_ids\": ") || starts_with(line, "\"usernames\": ") || starts_with(line, "\"summary\": ") || starts_with(line, "{\"line_number\": ")) {