diff --git a/Cargo.lock b/Cargo.lock index e79eb41..ab20718 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2446,6 +2446,7 @@ dependencies = [ "flate2", "fs2", "hex", + "libc", "once_cell", "qbsdiff", "regex", diff --git a/Cargo.toml b/Cargo.toml index 6821912..245a6b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ tar = "=0.4.46" flate2 = "=1.1.9" zip = { version = "=8.6.0", default-features = false, features = ["deflate"] } fs2 = "=0.4.3" +libc = "=0.2.182" wiremock = "=0.6.5" portable-pty = "=0.9.0" testcontainers = "=0.27.3" diff --git a/crates/socket-patch-cli/src/args.rs b/crates/socket-patch-cli/src/args.rs index 4d2dbc5..784b0fe 100644 --- a/crates/socket-patch-cli/src/args.rs +++ b/crates/socket-patch-cli/src/args.rs @@ -217,10 +217,10 @@ pub struct GlobalArgs { #[arg(long = "lock-timeout", env = "SOCKET_LOCK_TIMEOUT")] pub lock_timeout: Option, - /// Force-remove `<.socket>/apply.lock` before attempting - /// acquisition. Use when you are certain no other socket-patch - /// process is running (e.g. a previous run crashed in a way that - /// stripped the OS lock but left the file). Emits a + /// Reclaim a stale `<.socket>/apply.lock` left behind by a + /// crashed run (the file is never deleted — deleting a lock file + /// defeats mutual exclusion). Refuses with `lock_held` if a live + /// socket-patch process still holds the lock. Emits a /// `lock_broken` warning event in the JSON envelope so the /// action is auditable. Only meaningful for mutating /// subcommands; other commands accept it silently. @@ -282,10 +282,21 @@ impl GlobalArgs { } /// Apply CLI-flag toggles for env-driven knobs by mirroring them into env -/// vars. This is how `--debug` / `--no-telemetry` reach core code that -/// reads `SOCKET_DEBUG` / `SOCKET_TELEMETRY_DISABLED` directly. Idempotent -/// and a no-op when the flags are off. +/// vars. This is how `--offline` / `--debug` / `--no-telemetry` reach core +/// code that reads `SOCKET_OFFLINE` / `SOCKET_DEBUG` / +/// `SOCKET_TELEMETRY_DISABLED` directly. Idempotent and a no-op when the +/// flags are off. +/// +/// `offline` matters most: the telemetry kill-switch +/// (`socket_patch_core::utils::telemetry::is_telemetry_disabled`) honors the +/// strict-airgap contract by reading `SOCKET_OFFLINE` from the env, so +/// without this mirror a bare `--offline` flag (or a truthy spelling like +/// `SOCKET_OFFLINE=yes` that core's `"1" | "true"` match doesn't recognize) +/// still let telemetry fire a network request. pub fn apply_env_toggles(common: &GlobalArgs) { + if common.offline { + std::env::set_var("SOCKET_OFFLINE", "1"); + } if common.debug { std::env::set_var("SOCKET_DEBUG", "1"); } @@ -294,6 +305,53 @@ pub fn apply_env_toggles(common: &GlobalArgs) { } } +/// Every env var `GlobalArgs` binds (one per `env = "..."` attribute above). +/// Single source of truth for [`scrub_empty_global_env_vars`] and the +/// clean-environment test harnesses. +pub const GLOBAL_ARG_ENV_VARS: &[&str] = &[ + "SOCKET_CWD", + "SOCKET_MANIFEST_PATH", + "SOCKET_API_URL", + "SOCKET_API_TOKEN", + "SOCKET_ORG_SLUG", + "SOCKET_PROXY_URL", + "SOCKET_ECOSYSTEMS", + "SOCKET_DOWNLOAD_MODE", + "SOCKET_OFFLINE", + "SOCKET_GLOBAL", + "SOCKET_GLOBAL_PREFIX", + "SOCKET_JSON", + "SOCKET_VERBOSE", + "SOCKET_SILENT", + "SOCKET_DRY_RUN", + "SOCKET_YES", + "SOCKET_LOCK_TIMEOUT", + "SOCKET_BREAK_LOCK", + "SOCKET_DEBUG", + "SOCKET_TELEMETRY_DISABLED", +]; + +/// Remove exported-but-**empty** `GlobalArgs` env vars before clap parses. +/// +/// `SOCKET_CWD=` — the conventional shell/CI idiom for blanking a variable +/// without unsetting it — must mean "unset, fall back to the default", not +/// abort the command. [`parse_bool_flag`] already gives the bool flags that +/// semantic, but clap rejects an empty `SOCKET_CWD` / `SOCKET_GLOBAL_PREFIX` +/// ("a value is required"), `SOCKET_LOCK_TIMEOUT` ("cannot parse integer +/// from empty string") and `SOCKET_ECOSYSTEMS` (the per-token validator) +/// outright — a single stray blank var crashed every subcommand — and an +/// empty `SOCKET_DOWNLOAD_MODE` / `SOCKET_MANIFEST_PATH` leaked `""` past +/// the documented defaults. Called from `main` after legacy-name promotion +/// and before clap runs. Only exactly-empty values are scrubbed; whitespace +/// is significant in paths, so it is left for the parsers to judge. +pub fn scrub_empty_global_env_vars() { + for &var in GLOBAL_ARG_ENV_VARS { + if matches!(std::env::var(var).as_deref(), Ok("")) { + std::env::remove_var(var); + } + } +} + impl Default for GlobalArgs { /// Defaults intended for **test struct literals** (e.g. `..GlobalArgs::default()`). /// @@ -350,28 +408,8 @@ mod tests { /// Full list of env vars `GlobalArgs` reads, so each clap-parse test starts /// from a known-clean environment (no ambient `SOCKET_*` bleed-through). - const SOCKET_ENV_VARS: &[&str] = &[ - "SOCKET_CWD", - "SOCKET_MANIFEST_PATH", - "SOCKET_API_URL", - "SOCKET_API_TOKEN", - "SOCKET_ORG_SLUG", - "SOCKET_PROXY_URL", - "SOCKET_ECOSYSTEMS", - "SOCKET_DOWNLOAD_MODE", - "SOCKET_OFFLINE", - "SOCKET_GLOBAL", - "SOCKET_GLOBAL_PREFIX", - "SOCKET_JSON", - "SOCKET_VERBOSE", - "SOCKET_SILENT", - "SOCKET_DRY_RUN", - "SOCKET_YES", - "SOCKET_LOCK_TIMEOUT", - "SOCKET_BREAK_LOCK", - "SOCKET_DEBUG", - "SOCKET_TELEMETRY_DISABLED", - ]; + /// Aliases the production list so the scrub and the harness can't drift. + const SOCKET_ENV_VARS: &[&str] = GLOBAL_ARG_ENV_VARS; /// Snapshot/clear every `SOCKET_*` var, run `f`, then restore. Keeps the /// env-mutating clap tests hermetic and reversible. @@ -392,6 +430,118 @@ mod tests { } } + /// Clear the extra env the core telemetry gate reads beyond the + /// `SOCKET_*` set (`is_telemetry_disabled` also consults `VITEST` and the + /// legacy `SOCKET_PATCH_TELEMETRY_DISABLED` name), so the airgap tests + /// below can't pass or fail vacuously. Restores afterwards. + fn with_clean_telemetry_env(f: impl FnOnce()) { + const EXTRA: &[&str] = &["VITEST", "SOCKET_PATCH_TELEMETRY_DISABLED"]; + let saved: Vec<(&str, Option)> = + EXTRA.iter().map(|&k| (k, std::env::var(k).ok())).collect(); + for &k in EXTRA { + std::env::remove_var(k); + } + f(); + for (k, v) in saved { + match v { + Some(v) => std::env::set_var(k, v), + None => std::env::remove_var(k), + } + } + } + + /// `--offline` promises "never contact the network", but the telemetry + /// kill-switch (`socket_patch_core::utils::telemetry::is_telemetry_disabled`) + /// reads the `SOCKET_OFFLINE` env var directly — it never sees the parsed + /// flag. `apply_env_toggles` must therefore mirror `--offline` into the + /// env exactly like `--debug` / `--no-telemetry`, or an airgapped + /// `socket-patch apply --offline` still fires a telemetry HTTP request. + #[test] + #[serial_test::serial] + fn apply_env_toggles_mirrors_offline_into_env_for_airgap() { + with_clean_socket_env(|| { + with_clean_telemetry_env(|| { + let args = GlobalArgs { + offline: true, + ..GlobalArgs::default() + }; + apply_env_toggles(&args); + assert_eq!(std::env::var("SOCKET_OFFLINE").as_deref(), Ok("1")); + assert!( + socket_patch_core::utils::telemetry::is_telemetry_disabled(), + "--offline must disable telemetry (strict airgap: never contact the network)", + ); + }); + }); + } + + /// The full `SOCKET_OFFLINE` vocabulary must reach the telemetry gate. + /// clap (via `parse_bool_flag`) accepts `yes`/`on`/`y`/`t` as true, but + /// core's direct env read matches only `"1" | "true"` — so the toggle + /// mirror has to re-export the parsed flag in normalized form. + #[test] + #[serial_test::serial] + fn truthy_offline_env_vocabulary_reaches_telemetry_gate() { + with_clean_socket_env(|| { + with_clean_telemetry_env(|| { + std::env::set_var("SOCKET_OFFLINE", "yes"); + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert!(cli.common.offline, "SOCKET_OFFLINE=yes parses as offline"); + apply_env_toggles(&cli.common); + assert!( + socket_patch_core::utils::telemetry::is_telemetry_disabled(), + "SOCKET_OFFLINE=yes must disable telemetry like SOCKET_OFFLINE=1", + ); + }); + }); + } + + /// `scrub_empty_global_env_vars` removes exactly-empty `SOCKET_*` globals + /// (the `VAR=` blank-without-unsetting idiom) and nothing else: set, + /// non-empty values — even whitespace-only ones, which are significant in + /// paths — survive, and the previously-crashing parse then sees plain + /// defaults. + #[test] + #[serial_test::serial] + fn scrub_empty_global_env_vars_unsets_only_empties() { + with_clean_socket_env(|| { + std::env::set_var("SOCKET_CWD", ""); + std::env::set_var("SOCKET_LOCK_TIMEOUT", ""); + std::env::set_var("SOCKET_GLOBAL_PREFIX", ""); + std::env::set_var("SOCKET_ECOSYSTEMS", ""); + std::env::set_var("SOCKET_DOWNLOAD_MODE", ""); + std::env::set_var("SOCKET_MANIFEST_PATH", "keep.json"); + std::env::set_var("SOCKET_ORG_SLUG", " "); + + scrub_empty_global_env_vars(); + + assert!( + std::env::var("SOCKET_CWD").is_err(), + "empty var is scrubbed" + ); + assert!(std::env::var("SOCKET_LOCK_TIMEOUT").is_err()); + assert_eq!( + std::env::var("SOCKET_MANIFEST_PATH").as_deref(), + Ok("keep.json"), + "non-empty values must survive the scrub", + ); + assert_eq!( + std::env::var("SOCKET_ORG_SLUG").as_deref(), + Ok(" "), + "whitespace-only values are left for the parsers to judge", + ); + + let cli = TestCli::try_parse_from(["socket-patch"]) + .expect("blank env vars must mean 'unset', not a parse abort"); + assert_eq!(cli.common.cwd, PathBuf::from(".")); + assert_eq!(cli.common.lock_timeout, None); + assert!(cli.common.global_prefix.is_none()); + assert!(cli.common.ecosystems.is_none()); + assert_eq!(cli.common.download_mode, "diff"); + assert_eq!(cli.common.manifest_path, "keep.json"); + }); + } + /// `parse_bool_flag` accepts the same vocabulary as clap's /// `BoolishValueParser`, case-insensitively and with surrounding whitespace /// trimmed. diff --git a/crates/socket-patch-cli/src/commands/apply.rs b/crates/socket-patch-cli/src/commands/apply.rs index 08aca97..07d11db 100644 --- a/crates/socket-patch-cli/src/commands/apply.rs +++ b/crates/socket-patch-cli/src/commands/apply.rs @@ -772,9 +772,31 @@ async fn apply_patches_inner( // Resolve patch sources (read `.socket/` directly, or stage an overlay // tempdir + download the gap). Shared with `vendor` via fetch_stage. let socket_dir = manifest_path.parent().unwrap(); + // Partition manifest PURLs by ecosystem up front. The source probes, + // the offline guard, and the download planner in `fetch_stage` must only + // consider patches this run can actually apply — the `--ecosystems` + // filter plus the ecosystems compiled into this build. An out-of-scope + // patch with no local source must not fail (or trigger fetches for) a + // run that will never apply it. + let manifest_purls: Vec = manifest.patches.keys().cloned().collect(); + let partitioned = partition_purls(&manifest_purls, args.common.ecosystems.as_deref()); + + let target_manifest_purls: HashSet = partitioned + .values() + .flat_map(|purls| purls.iter().cloned()) + .collect(); + + // In-scope view of the manifest for source probing and fetching. The + // apply loop keeps using the full `manifest` for per-PURL lookups — + // those are already scoped by `partitioned`. + let mut scoped_manifest = manifest.clone(); + scoped_manifest + .patches + .retain(|purl, _| target_manifest_purls.contains(purl)); + let staged = match crate::commands::fetch_stage::stage_patch_sources( &args.common, - &manifest, + &scoped_manifest, socket_dir, ) .await? @@ -788,15 +810,6 @@ async fn apply_patches_inner( let diffs_path = staged.diffs.clone(); let packages_path = staged.packages.clone(); - // Partition manifest PURLs by ecosystem - let manifest_purls: Vec = manifest.patches.keys().cloned().collect(); - let partitioned = partition_purls(&manifest_purls, args.common.ecosystems.as_deref()); - - let target_manifest_purls: HashSet = partitioned - .values() - .flat_map(|purls| purls.iter().cloned()) - .collect(); - // Vendor ownership wins for EVERY ecosystem: a purl recorded in // `.socket/vendor/state.json` is managed by the explicit `vendor` // action — apply must not re-patch its installed tree (or repoint a diff --git a/crates/socket-patch-cli/src/commands/get.rs b/crates/socket-patch-cli/src/commands/get.rs index ac9861b..e7359aa 100644 --- a/crates/socket-patch-cli/src/commands/get.rs +++ b/crates/socket-patch-cli/src/commands/get.rs @@ -249,6 +249,15 @@ fn report_error(json: bool, message: impl std::fmt::Display) { } } +/// A blob hash must be a SHA-256 hex string — the same shape `fetch_blob` +/// enforces before splicing a hash into a URL. Enforced here because the +/// hash comes from an untrusted API response and is used as a filesystem +/// path component: anything else (`../../x`, an absolute path) would +/// escape the blobs directory via `Path::join`. +fn is_valid_blob_hash(hash: &str) -> bool { + hash.len() == 64 && hash.bytes().all(|b| b.is_ascii_hexdigit()) +} + /// Decode a base64 string and write it to `blobs_dir/hash`. Returns a /// formatted error string referencing `file_path` and `label` on failure. async fn write_blob_entry( @@ -258,6 +267,11 @@ async fn write_blob_entry( file_path: &str, label: &str, ) -> Result<(), String> { + if !is_valid_blob_hash(hash) { + return Err(format!( + "Refusing to write {label} for {file_path}: invalid blob hash {hash:?} (expected 64 hex chars)" + )); + } let decoded = base64_decode(b64).map_err(|e| format!("Failed to decode {label} for {file_path}: {e}"))?; tokio::fs::write(blobs_dir.join(hash), &decoded) @@ -1249,6 +1263,9 @@ pub async fn run(args: GetArgs) -> i32 { } apply_env_toggles(&args.common); + // `--silent` is "errors only" (CLI_CONTRACT.md): every informational + // print below is gated on this; errors and JSON envelopes are not. + let quiet = args.common.json || args.common.silent; let overrides = args.common.api_client_overrides(); let (mut api_client, mut use_public_proxy) = get_api_client_with_overrides(overrides.clone()).await; @@ -1277,7 +1294,7 @@ pub async fn run(args: GetArgs) -> i32 { match detect_identifier_type(&args.identifier) { Some(t) => t, None => { - if !args.common.json { + if !quiet { println!("Treating \"{}\" as a package name search", args.identifier); } IdentifierType::Package @@ -1287,7 +1304,7 @@ pub async fn run(args: GetArgs) -> i32 { // Handle UUID: fetch and download directly if id_type == IdentifierType::Uuid { - if !args.common.json { + if !quiet { println!("Fetching patch by UUID: {}", args.identifier); } let mut fetch_result = api_client @@ -1335,7 +1352,7 @@ pub async fn run(args: GetArgs) -> i32 { "tier": "paid", }], })); - } else { + } else if !args.common.silent { println!("\nThis patch requires a paid subscription to download."); println!("\n Patch: {}", patch.purl); println!(" Tier: paid"); @@ -1359,9 +1376,10 @@ pub async fn run(args: GetArgs) -> i32 { telemetry_org.as_deref(), ) .await; - // Save to manifest - return save_and_apply_patch(&args, &patch.purl, &patch.uuid, effective_org_slug) - .await; + // Save to manifest. Pass the fetched patch through so the + // save step reuses this (possibly proxy-fallback) result + // instead of re-fetching with a fresh client. + return save_and_apply_patch(&args, &patch).await; } Ok(None) => { track_patch_fetch_failed( @@ -1374,7 +1392,7 @@ pub async fn run(args: GetArgs) -> i32 { .await; if args.common.json { print_json(&empty_result_json("not_found")); - } else { + } else if !args.common.silent { println!("No patch found with UUID: {}", args.identifier); } return 0; @@ -1398,7 +1416,7 @@ pub async fn run(args: GetArgs) -> i32 { // the matching endpoint, and surface errors via `report_fetch_failure`. let search_response: SearchResponse = match id_type { IdentifierType::Cve | IdentifierType::Ghsa | IdentifierType::Purl => { - if !args.common.json { + if !quiet { let label = match id_type { IdentifierType::Cve => "CVE", IdentifierType::Ghsa => "GHSA", @@ -1441,7 +1459,7 @@ pub async fn run(args: GetArgs) -> i32 { } } IdentifierType::Package => { - if !args.common.json { + if !quiet { println!("Enumerating packages..."); } let crawler_options = CrawlerOptions { @@ -1455,25 +1473,27 @@ pub async fn run(args: GetArgs) -> i32 { if all_packages.is_empty() { if args.common.json { print_json(&empty_result_json("no_packages")); - } else if args.common.global { - println!("No global packages found."); - } else { - #[allow(unused_mut)] - let mut install_cmds = String::from("npm/yarn/pnpm/pip"); - #[cfg(feature = "cargo")] - install_cmds.push_str("/cargo"); - #[cfg(feature = "golang")] - install_cmds.push_str("/go"); - #[cfg(feature = "maven")] - install_cmds.push_str("/mvn"); - #[cfg(feature = "composer")] - install_cmds.push_str("/composer"); - println!("No packages found. Run {install_cmds} install first."); + } else if !args.common.silent { + if args.common.global { + println!("No global packages found."); + } else { + #[allow(unused_mut)] + let mut install_cmds = String::from("npm/yarn/pnpm/pip"); + #[cfg(feature = "cargo")] + install_cmds.push_str("/cargo"); + #[cfg(feature = "golang")] + install_cmds.push_str("/go"); + #[cfg(feature = "maven")] + install_cmds.push_str("/mvn"); + #[cfg(feature = "composer")] + install_cmds.push_str("/composer"); + println!("No packages found. Run {install_cmds} install first."); + } } return 0; } - if !args.common.json { + if !quiet { println!("Found {} packages", all_packages.len()); } @@ -1482,13 +1502,13 @@ pub async fn run(args: GetArgs) -> i32 { if matches.is_empty() { if args.common.json { print_json(&empty_result_json("no_match")); - } else { + } else if !args.common.silent { println!("No packages matching \"{}\" found.", args.identifier); } return 0; } - if !args.common.json { + if !quiet { println!( "Found {} matching package(s), checking for available patches...", matches.len() @@ -1521,13 +1541,13 @@ pub async fn run(args: GetArgs) -> i32 { if search_response.patches.is_empty() { if args.common.json { print_json(&empty_result_json("not_found")); - } else { + } else if !args.common.silent { println!("No patches found for {}: {}", id_type, args.identifier); } return 0; } - if !args.common.json { + if !quiet { display_search_results( &search_response.patches, search_response.can_access_paid_patches, @@ -1555,7 +1575,7 @@ pub async fn run(args: GetArgs) -> i32 { "tier": p.tier, })).collect::>(), })); - } else { + } else if !args.common.silent { println!("\nAll available patches require a paid subscription."); println!("\n Upgrade at: https://socket.dev/pricing\n"); } @@ -1573,7 +1593,7 @@ pub async fn run(args: GetArgs) -> i32 { }; if selected.is_empty() { - if !args.common.json { + if !quiet { println!("No patches selected."); } return 0; @@ -1582,7 +1602,7 @@ pub async fn run(args: GetArgs) -> i32 { // Confirm before downloading (default YES) let prompt = format!("Download {} patch(es)?", selected.len()); if !confirm(&prompt, true, args.common.yes, args.common.json) { - if !args.common.json { + if !quiet { println!("Download cancelled."); } return 0; @@ -1597,7 +1617,7 @@ pub async fn run(args: GetArgs) -> i32 { global: args.common.global, global_prefix: args.common.global_prefix.clone(), json: args.common.json, - silent: false, + silent: args.common.silent, download_mode: args.common.download_mode.clone(), api_overrides: args.common.api_client_overrides(), all_releases: args.all_releases, @@ -1654,32 +1674,16 @@ fn display_search_results(patches: &[PatchSearchResult], can_access_paid: bool) } } -async fn save_and_apply_patch( - args: &GetArgs, - _purl: &str, - uuid: &str, - _org_slug: Option<&str>, -) -> i32 { - // For UUID mode, fetch and save - let (api_client, _) = get_api_client_with_overrides(args.common.api_client_overrides()).await; - let effective_org: Option<&str> = None; // org slug is already stored in the client - - let patch = match api_client.fetch_patch(effective_org, uuid).await { - Ok(Some(p)) => p, - Ok(None) => { - if args.common.json { - print_json(&empty_result_json("not_found")); - } else { - println!("No patch found with UUID: {uuid}"); - } - return 0; - } - Err(e) => { - report_error(args.common.json, e); - return 1; - } - }; - +/// Save an already-fetched patch to the manifest and (unless +/// `--save-only`) apply it. Takes the `PatchResponse` the caller fetched +/// rather than re-fetching by UUID: the caller's client may have fallen +/// back to the public proxy after a 401/403, and a fresh client built +/// here would hit the same auth failure again, breaking the fallback +/// end to end. +async fn save_and_apply_patch(args: &GetArgs, patch: &PatchResponse) -> i32 { + // Same "errors only" gate as `run` — informational prints respect + // `--silent`; errors and the JSON envelope do not. + let quiet = args.common.json || args.common.silent; let socket_dir = args.common.cwd.join(".socket"); let blobs_dir = socket_dir.join("blobs"); let manifest_path = socket_dir.join("manifest.json"); @@ -1714,7 +1718,7 @@ async fn save_and_apply_patch( } } - if write_all_patch_blobs(&blobs_dir, &patch, args.common.json) + if write_all_patch_blobs(&blobs_dir, patch, args.common.json) .await .is_err() { @@ -1748,7 +1752,7 @@ async fn save_and_apply_patch( manifest .patches - .insert(patch.purl.clone(), build_patch_record(&patch, files)); + .insert(patch.purl.clone(), build_patch_record(patch, files)); if let Err(e) = write_manifest(&manifest_path, &manifest).await { report_error(args.common.json, format!("Error writing manifest: {e}")); @@ -1776,7 +1780,7 @@ async fn save_and_apply_patch( `socket-patch vendor` to refresh the committed artifact", patch.purl, entry.uuid, patch.uuid ); - if !args.common.json { + if !quiet { eprintln!(" [note] {w}"); } warnings.push(w); @@ -1784,7 +1788,7 @@ async fn save_and_apply_patch( } } - if !args.common.json { + if !quiet { println!("\nPatch saved to {}", manifest_path.display()); if added { println!(" Added: 1"); @@ -1795,7 +1799,7 @@ async fn save_and_apply_patch( let mut apply_succeeded = false; if !args.save_only && added { - if !args.common.json { + if !quiet { println!("\nApplying patches..."); } let apply_args = super::apply::ApplyArgs { @@ -1804,7 +1808,7 @@ async fn save_and_apply_patch( manifest_path: manifest_path.display().to_string(), global: args.common.global, global_prefix: args.common.global_prefix.clone(), - silent: args.common.json, + silent: quiet, download_mode: args.common.download_mode.clone(), ..crate::args::GlobalArgs::default() }, @@ -1817,7 +1821,7 @@ async fn save_and_apply_patch( }; let code = super::apply::run(apply_args).await; apply_succeeded = code == 0; - if code != 0 && !args.common.json { + if code != 0 && !quiet { eprintln!("\nSome patches could not be applied."); } } @@ -1840,7 +1844,7 @@ async fn save_and_apply_patch( if added { // Only enrich when the patch was actually added — a `skipped` // record means the consumer already saw the metadata last time. - merge_metadata(&mut patch_record, patch_event_metadata(&patch)); + merge_metadata(&mut patch_record, patch_event_metadata(patch)); } let mut result_json = serde_json::json!({ "status": status, @@ -2464,6 +2468,79 @@ mod tests { assert_eq!(out, format!("{}...", "é".repeat(77))); } + // --- write_blob_entry ------------------------------------------------ + // Blob hashes come straight from the API response and are used as + // filesystem path components (`blobs_dir.join(hash)`). A hostile or + // compromised API/proxy returning `afterHash: "../../x"` must not be + // able to write outside the blobs directory. + + // "patched\n" in base64 — a valid payload so only the hash is at fault. + const BLOB_B64: &str = "cGF0Y2hlZAo="; + + #[tokio::test] + async fn write_blob_entry_rejects_relative_traversal_hash() { + let tmp = tempfile::tempdir().unwrap(); + let blobs_dir = tmp.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let res = write_blob_entry( + &blobs_dir, + BLOB_B64, + "../escaped", + "package/index.js", + "blob", + ) + .await; + assert!( + res.is_err(), + "a traversal hash must be rejected, got {res:?}" + ); + assert!( + !tmp.path().join("escaped").exists(), + "traversal hash must not write outside the blobs dir" + ); + } + + #[tokio::test] + async fn write_blob_entry_rejects_absolute_path_hash() { + let tmp = tempfile::tempdir().unwrap(); + let blobs_dir = tmp.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + // An absolute "hash" makes Path::join discard blobs_dir entirely. + let target = tmp.path().join("abs_escape"); + let res = write_blob_entry( + &blobs_dir, + BLOB_B64, + target.to_str().unwrap(), + "package/index.js", + "blob", + ) + .await; + assert!( + res.is_err(), + "an absolute-path hash must be rejected, got {res:?}" + ); + assert!( + !target.exists(), + "absolute-path hash must not write outside the blobs dir" + ); + } + + #[tokio::test] + async fn write_blob_entry_accepts_valid_sha256_hash() { + let tmp = tempfile::tempdir().unwrap(); + let blobs_dir = tmp.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let hash = "1111111111111111111111111111111111111111111111111111111111111111"; + write_blob_entry(&blobs_dir, BLOB_B64, hash, "package/index.js", "blob") + .await + .expect("a canonical 64-hex hash must be accepted"); + let written = std::fs::read(blobs_dir.join(hash)).unwrap(); + assert_eq!(written, b"patched\n"); + } + // --- short_uuid ------------------------------------------------------ // The `[update]` log line prints the first 8 chars of the manifest's // existing UUID. A naive `&uuid[..8]` panics on a short or non-ASCII diff --git a/crates/socket-patch-cli/src/commands/list.rs b/crates/socket-patch-cli/src/commands/list.rs index f1a0cdd..abbb258 100644 --- a/crates/socket-patch-cli/src/commands/list.rs +++ b/crates/socket-patch-cli/src/commands/list.rs @@ -120,6 +120,10 @@ pub async fn run(args: ListArgs) -> i32 { if args.common.json { println!("{}", build_list_envelope(&manifest).to_pretty_json()); + } else if args.common.silent { + // `--silent` is "errors only" (CLI_CONTRACT.md): suppress the + // entire human-readable listing, mirroring `get`/`repair`. + // The exit code still distinguishes the manifest states. } else if patch_entries.is_empty() { println!("No patches found in manifest."); } else { diff --git a/crates/socket-patch-cli/src/commands/lock_cli.rs b/crates/socket-patch-cli/src/commands/lock_cli.rs index d2d5b87..82f6823 100644 --- a/crates/socket-patch-cli/src/commands/lock_cli.rs +++ b/crates/socket-patch-cli/src/commands/lock_cli.rs @@ -19,9 +19,9 @@ use socket_patch_core::patch::apply_lock::{acquire, LockError, LockGuard}; use crate::json_envelope::{Command, Envelope, EnvelopeError, PatchAction, PatchEvent}; /// Stable `errorCode` tag emitted as a `Skipped` warning event when -/// `--break-lock` actually deletes a pre-existing lock file. Exposed -/// for downstream consumers and integration tests that pattern-match -/// on it. +/// `--break-lock` actually reclaims a stale pre-existing lock file. +/// Exposed for downstream consumers and integration tests that +/// pattern-match on it. pub const LOCK_BROKEN_CODE: &str = "lock_broken"; /// Outcome of a successful lock acquisition. Callers attach a @@ -33,8 +33,8 @@ pub const LOCK_BROKEN_CODE: &str = "lock_broken"; #[derive(Debug)] pub struct LockAcquired { pub guard: LockGuard, - /// True iff `--break-lock` was set AND the helper actually - /// removed a pre-existing `apply.lock` file before acquiring. + /// True iff `--break-lock` was set AND a pre-existing + /// `apply.lock` file (with no live holder) was reclaimed. /// False when the file didn't exist (nothing to break) — the /// flag was a no-op in that case so no warning is warranted. pub broke_lock: bool, @@ -53,15 +53,18 @@ pub struct LockAcquired { /// try-once shape. Positive values wait with a 100 ms backoff — /// see `socket_patch_core::patch::apply_lock::acquire`. /// -/// `break_lock = true` clears a *stale* `/apply.lock` -/// before the acquire attempt. The motivating case is a crashed prior -/// run that left the file behind. It is **not** a force-steal: a -/// non-blocking probe runs first, and if a live holder still owns the -/// lock the helper refuses with `lock_held` rather than removing the -/// file out from under it (which would defeat mutual exclusion — the -/// unlink recreates a fresh inode the holder's advisory lock no longer -/// guards). When a pre-existing file is removed with no live holder the -/// return value's `broke_lock` is true and the caller should attach a +/// `break_lock = true` reclaims a *stale* `/apply.lock` +/// via a non-blocking acquire. The motivating case is a crashed prior +/// run that left the file behind — harmless to a fresh acquire (the +/// kernel already released the dead holder's advisory lock), but worth +/// an audit event. It is **not** a force-steal: if a live holder still +/// owns the lock the helper refuses with `lock_held` rather than +/// stealing. The file is deliberately never unlinked — an unlink +/// defeats mutual exclusion, because a competitor (live holder or +/// mid-acquire racer) can keep or take an advisory lock on the +/// orphaned inode while a fresh acquire locks its replacement. When a +/// pre-existing file is reclaimed with no live holder the return +/// value's `broke_lock` is true and the caller should attach a /// `lock_broken` warning event to their envelope. pub fn acquire_or_emit( socket_dir: &Path, @@ -72,7 +75,6 @@ pub fn acquire_or_emit( timeout: Duration, break_lock: bool, ) -> Result { - let mut broke_lock = false; if break_lock { let path = socket_dir.join("apply.lock"); @@ -80,27 +82,50 @@ pub fn acquire_or_emit( // anything. The probe `acquire` below opens with `create(true)`, // so afterwards the file always exists — even on a clean tree. // We only want to report `broke_lock` (and emit the warning - // event) when a *pre-existing* leftover was actually removed, + // event) when a *pre-existing* leftover was actually reclaimed, // mirroring `unlock`'s `lock_existed` source-of-truth pattern. let lock_existed = path.exists(); - // CRITICAL: never steal a lock from a *live* holder. A bare - // `remove_file` + re-`acquire` is unsafe — on Unix the unlink - // recreates a fresh inode, so a still-running holder keeps its - // `flock(2)` on the old (unlinked) inode while we lock the new - // one. Both processes then believe they hold the lock and race - // on every file write, which is exactly what the lock exists to - // prevent. So probe with a non-blocking acquire first: - // * Ok => no live holder (a crashed run's flock is already - // auto-released by the kernel), so breaking the - // leftover file is safe. Drop our probe handle and - // fall through to remove + re-acquire below. + // CRITICAL: never steal a lock from a *live* holder, and never + // unlink the lock file at all. Removal defeats mutual exclusion + // in two ways: + // * unlinking a *held* file leaves the holder flocking the + // old (unlinked) inode while we lock a fresh one — both + // processes then believe they hold the lock and race on + // every file write, which is exactly what the lock exists + // to prevent; + // * even when a probe proves no holder at unlink time, a + // competitor mid-`acquire` that opened the file *before* + // the unlink can flock the orphaned inode *after* it, with + // the same double-hold result. + // A leftover file from a crashed run needs no removal anyway: + // the kernel released the dead process's advisory lock along + // with its file handle, so a non-blocking acquire simply + // reclaims the file in place. + // * Ok => no live holder — the probe guard IS the lock. + // Return it; `broke_lock` reports whether a + // pre-existing leftover was reclaimed so callers + // attach the warning event. // * Held => a live holder exists. Refuse rather than steal — // this is the case `--break-lock` was *wrongly* // relied on for, and the only one where it mattered. // * Io => surface the real fault. match acquire(socket_dir, Duration::ZERO) { - Ok(guard) => drop(guard), + Ok(guard) => { + // Only a *pre-existing* leftover counts as "broke a + // lock". If the probe itself just created the file on a + // clean tree, the flag was a no-op for the warning + // surface — `broke_lock` stays false so callers don't + // emit a spurious event. + let broke_lock = lock_existed; + if broke_lock && !silent && !json { + eprintln!( + "Warning: --break-lock reclaimed stale {} (no live holder).", + path.display() + ); + } + return Ok(LockAcquired { guard, broke_lock }); + } Err(LockError::Held) => { // The probe above is a *non-blocking* try-once // (`Duration::ZERO`), so report a zero wait. Threading @@ -110,6 +135,9 @@ pub fn acquire_or_emit( // `break_probe_held_message` takes no timeout precisely so // the wrong value can't be passed back in. let msg = break_probe_held_message(); + // The user already passed --break-lock and it was refused: + // advising it again would be self-defeating. Only the + // inspect path remains actionable. emit( command, json, @@ -117,59 +145,23 @@ pub fn acquire_or_emit( dry_run, "lock_held", &msg, - Some(socket_dir), + Hint::UnlockOnly, ); return Err(1); } Err(LockError::Io { path, source }) => { let msg = format!("failed to open lock file at {}: {}", path.display(), source); - emit(command, json, silent, dry_run, "lock_io", &msg, None); - return Err(1); - } - } - - match std::fs::remove_file(&path) { - Ok(()) => { - // Only a *pre-existing* leftover counts as "broke a - // lock". If the probe itself just created the file on a - // clean tree, removing it is a no-op for the warning - // surface — `broke_lock` stays false so callers don't - // emit a spurious event. - broke_lock = lock_existed; - if broke_lock && !silent && !json { - eprintln!( - "Warning: --break-lock removed {} before acquisition.", - path.display() - ); - } - } - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - // No file to break — silently proceed to the normal - // acquire path. Documented as a no-op so scripts can - // pass --break-lock unconditionally on retry. - } - Err(source) => { - let msg = format!( - "failed to remove lock file at {}: {}", - path.display(), - source - ); - emit( - command, - json, - silent, - dry_run, - "lock_break_failed", - &msg, - None, - ); + emit(command, json, silent, dry_run, "lock_io", &msg, Hint::None); return Err(1); } } } match acquire(socket_dir, timeout) { - Ok(guard) => Ok(LockAcquired { guard, broke_lock }), + Ok(guard) => Ok(LockAcquired { + guard, + broke_lock: false, + }), Err(LockError::Held) => { let msg = held_message(timeout); emit( @@ -179,13 +171,13 @@ pub fn acquire_or_emit( dry_run, "lock_held", &msg, - Some(socket_dir), + Hint::UnlockOrBreakLock, ); Err(1) } Err(LockError::Io { path, source }) => { let msg = format!("failed to open lock file at {}: {}", path.display(), source); - emit(command, json, silent, dry_run, "lock_io", &msg, None); + emit(command, json, silent, dry_run, "lock_io", &msg, Hint::None); Err(1) } } @@ -199,7 +191,7 @@ pub fn lock_broken_event(socket_dir: &Path) -> PatchEvent { PatchEvent::artifact(PatchAction::Skipped).with_reason( LOCK_BROKEN_CODE, format!( - "--break-lock removed {}/apply.lock before acquisition", + "--break-lock reclaimed stale {}/apply.lock (no live holder)", socket_dir.display() ), ) @@ -259,6 +251,16 @@ fn error_envelope(command: Command, dry_run: bool, code: &str, message: &str) -> env } +/// Remediation hint appended under the human-mode error line. The +/// `--break-lock` advice is only valid when the caller hasn't already +/// tried it — a refused `--break-lock` (live holder) must not advise +/// rerunning with `--break-lock`, which is exactly what just failed. +enum Hint { + None, + UnlockOnly, + UnlockOrBreakLock, +} + fn emit( command: Command, json: bool, @@ -266,7 +268,7 @@ fn emit( dry_run: bool, code: &str, message: &str, - hint_dir: Option<&Path>, + hint: Hint, ) { if json { println!( @@ -275,10 +277,16 @@ fn emit( ); } else if !silent { eprintln!("Error: {message}."); - if hint_dir.is_some() { - eprintln!( - " Run `socket-patch unlock` to inspect, or rerun with --break-lock if you're sure no holder exists." - ); + match hint { + Hint::None => {} + Hint::UnlockOnly => { + eprintln!(" Run `socket-patch unlock` to inspect."); + } + Hint::UnlockOrBreakLock => { + eprintln!( + " Run `socket-patch unlock` to inspect, or rerun with --break-lock if you're sure no holder exists." + ); + } } } } @@ -384,10 +392,10 @@ mod tests { } /// `break_lock=true` against a pre-existing lock file with no - /// holder removes the file and acquires fresh. `broke_lock` flag - /// surfaces so callers can attach the warning event. + /// holder reclaims the file in place and acquires. `broke_lock` + /// flag surfaces so callers can attach the warning event. #[test] - fn acquire_or_emit_break_lock_removes_and_acquires() { + fn acquire_or_emit_break_lock_reclaims_stale_file_and_acquires() { let dir = tempfile::tempdir().unwrap(); // Pre-stage a lock file with no holder — simulates the // post-crash leftover scenario. @@ -405,9 +413,9 @@ mod tests { .unwrap(); assert!( acquired.broke_lock, - "broke_lock should be true when a lock file existed and was removed" + "broke_lock should be true when a stale lock file existed and was reclaimed" ); - // Lock file has been re-created by `acquire` and we hold it. + // The lock file persists (never unlinked) and we hold it. assert!(dir.path().join("apply.lock").is_file()); } @@ -496,6 +504,87 @@ mod tests { )); } + /// Regression: the break-lock sequence must not open a window in + /// which a competitor can be robbed of a lock it legitimately + /// acquired. The buggy shape probed, then `remove_file`d the lock + /// file, then re-acquired: a competitor that flocked (or had merely + /// *opened*) the file before the unlink kept a valid lock on the + /// orphaned inode while the re-acquire locked a fresh one — two + /// live holders at once, the exact double-hold the probe exists to + /// prevent. The fixed shape never unlinks: the probe guard is the + /// lock. + /// + /// The competitor thread increments a shared holder count only + /// while it genuinely holds the OS lock, as does the main thread + /// for the guard `acquire_or_emit` hands back. With real mutual + /// exclusion the count can never exceed 1, so the test is + /// deterministic-green on correct code; under the buggy window the + /// hammer lands in the gap within a handful of iterations. + #[test] + fn break_lock_window_cannot_defeat_mutual_exclusion() { + use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + use std::sync::Arc; + + let dir = tempfile::tempdir().unwrap(); + let lock_dir = dir.path().to_path_buf(); + let holders = Arc::new(AtomicUsize::new(0)); + let violated = Arc::new(AtomicBool::new(false)); + let stop = Arc::new(AtomicBool::new(false)); + + // Competitor: grabs the lock the instant it is free, holds it + // briefly, releases, retries. Mirrors a concurrent + // `socket-patch apply` racing a `--break-lock` invocation. + let hammer = { + let lock_dir = lock_dir.clone(); + let holders = Arc::clone(&holders); + let violated = Arc::clone(&violated); + let stop = Arc::clone(&stop); + std::thread::spawn(move || { + while !stop.load(Ordering::SeqCst) { + if let Ok(guard) = acquire(&lock_dir, Duration::ZERO) { + if holders.fetch_add(1, Ordering::SeqCst) != 0 { + violated.store(true, Ordering::SeqCst); + } + std::thread::sleep(Duration::from_micros(500)); + holders.fetch_sub(1, Ordering::SeqCst); + drop(guard); + } + } + }) + }; + + for _ in 0..2000 { + if violated.load(Ordering::SeqCst) { + break; + } + // silent=true so refused iterations stay quiet; refusal + // (the hammer currently holds) is a correct outcome here. + if let Ok(acquired) = acquire_or_emit( + &lock_dir, + Command::Apply, + false, + true, + false, + Duration::ZERO, + true, // break_lock + ) { + if holders.fetch_add(1, Ordering::SeqCst) != 0 { + violated.store(true, Ordering::SeqCst); + } + holders.fetch_sub(1, Ordering::SeqCst); + drop(acquired); + } + } + stop.store(true, Ordering::SeqCst); + hammer.join().unwrap(); + + assert!( + !violated.load(Ordering::SeqCst), + "--break-lock let two processes hold the apply lock at once: \ + the lock file must never be unlinked" + ); + } + /// Whole-second budgets read naturally in the contention message. #[test] fn held_message_reports_whole_seconds() { diff --git a/crates/socket-patch-cli/src/commands/remove.rs b/crates/socket-patch-cli/src/commands/remove.rs index bba182b..460f9e4 100644 --- a/crates/socket-patch-cli/src/commands/remove.rs +++ b/crates/socket-patch-cli/src/commands/remove.rs @@ -95,7 +95,7 @@ pub async fn run(args: RemoveArgs) -> i32 { socket_dir, Command::Remove, args.common.json, - false, // remove has no --silent on its own; use false + args.common.silent, false, // remove has no --dry-run Duration::from_secs(args.common.lock_timeout.unwrap_or(0)), args.common.break_lock, @@ -182,7 +182,7 @@ pub async fn run(args: RemoveArgs) -> i32 { // to multiple manifest entries (PyPI release variants), make the // blast radius explicit so the user understands why a single // `remove pkg:pypi/foo@1.0` is removing several variants. - if !args.common.json { + if !args.common.json && !args.common.silent { if args.identifier.starts_with("pkg:") && !args.identifier.contains('?') && matching.len() > 1 @@ -211,7 +211,7 @@ pub async fn run(args: RemoveArgs) -> i32 { let prompt = format!("Remove {} patch(es) and rollback files?", matching.len()); if !confirm(&prompt, true, args.common.yes, args.common.json) { - if !args.common.json { + if !args.common.json && !args.common.silent { println!("Removal cancelled."); } return 0; @@ -220,7 +220,7 @@ pub async fn run(args: RemoveArgs) -> i32 { // First, rollback the patch if not skipped let mut rollback_count = 0; if !args.skip_rollback { - if !args.common.json { + if !args.common.json && !args.common.silent { println!("Rolling back patch before removal..."); } match rollback_patches( @@ -228,7 +228,7 @@ pub async fn run(args: RemoveArgs) -> i32 { &manifest_path, Some(&args.identifier), false, - args.common.json, // silent when JSON + args.common.json || args.common.silent, args.common.offline, args.common.global, args.common.global_prefix.clone(), @@ -267,7 +267,7 @@ pub async fn run(args: RemoveArgs) -> i32 { .filter(|r| r.success && all_files_already_original(r)) .count(); - if !args.common.json { + if !args.common.json && !args.common.silent { if rollback_count > 0 { println!("Rolled back {rollback_count} package(s)"); } @@ -406,7 +406,7 @@ pub async fn run(args: RemoveArgs) -> i32 { return 1; } - if !args.common.json { + if !args.common.json && !args.common.silent { println!("Removed {} patch(es) from manifest:", removed.len()); for purl in &removed { println!(" - {purl}"); @@ -420,7 +420,7 @@ pub async fn run(args: RemoveArgs) -> i32 { let mut blobs_removed = 0; if let Ok(cleanup_result) = cleanup_unused_blobs(&manifest, &blobs_path, false).await { blobs_removed = cleanup_result.blobs_removed; - if !args.common.json && cleanup_result.blobs_removed > 0 { + if !args.common.json && !args.common.silent && cleanup_result.blobs_removed > 0 { println!("\n{}", format_cleanup_result(&cleanup_result, false)); } } diff --git a/crates/socket-patch-cli/src/commands/repair.rs b/crates/socket-patch-cli/src/commands/repair.rs index 329b573..f83b9fb 100644 --- a/crates/socket-patch-cli/src/commands/repair.rs +++ b/crates/socket-patch-cli/src/commands/repair.rs @@ -13,7 +13,7 @@ use socket_patch_core::utils::telemetry::{track_patch_repair_failed, track_patch use std::path::Path; use std::time::Duration; -use crate::args::{apply_env_toggles, GlobalArgs}; +use crate::args::{apply_env_toggles, parse_bool_flag, GlobalArgs}; use crate::commands::lock_cli::{acquire_or_emit, lock_broken_event}; use crate::json_envelope::{Command, Envelope, EnvelopeError, PatchAction, PatchEvent, Status}; @@ -24,10 +24,18 @@ pub struct RepairArgs { /// Only download missing artifacts; skip the cleanup phase. /// Incompatible with `--offline`. + /// + /// `value_parser = parse_bool_flag` matches the `GlobalArgs` bool flags: + /// clap's default bool parser accepts only the literal strings + /// `true`/`false` from the env binding, so `SOCKET_DOWNLOAD_ONLY=1` (or + /// an exported-but-empty `SOCKET_DOWNLOAD_ONLY=`) aborted every `repair` + /// invocation. This flag is also outside `GLOBAL_ARG_ENV_VARS`, so + /// `main`'s empty-var scrub never rescues it. #[arg( long = "download-only", env = "SOCKET_DOWNLOAD_ONLY", - default_value_t = false + default_value_t = false, + value_parser = parse_bool_flag, )] pub download_only: bool, } diff --git a/crates/socket-patch-cli/src/commands/rollback.rs b/crates/socket-patch-cli/src/commands/rollback.rs index 169a8d6..c6bf203 100644 --- a/crates/socket-patch-cli/src/commands/rollback.rs +++ b/crates/socket-patch-cli/src/commands/rollback.rs @@ -14,7 +14,7 @@ use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::time::Duration; -use crate::args::{apply_env_toggles, GlobalArgs}; +use crate::args::{apply_env_toggles, parse_bool_flag, GlobalArgs}; use crate::commands::lock_cli::{acquire_or_emit, LOCK_BROKEN_CODE}; use crate::ecosystem_dispatch::{find_packages_for_rollback, partition_purls}; use crate::json_envelope::Command as EnvelopeCommand; @@ -28,7 +28,19 @@ pub struct RollbackArgs { pub common: GlobalArgs, /// Rollback a patch by fetching beforeHash blobs from API (no manifest required). - #[arg(long = "one-off", env = "SOCKET_ONE_OFF", default_value_t = false)] + /// + /// `value_parser = parse_bool_flag` matches the `GlobalArgs` bool flags: + /// clap's default bool parser accepts only the literal strings + /// `true`/`false` from the env binding, so `SOCKET_ONE_OFF=1` (or an + /// exported-but-empty `SOCKET_ONE_OFF=`) aborted every `rollback` + /// invocation. This flag is also outside `GLOBAL_ARG_ENV_VARS`, so + /// `main`'s empty-var scrub never rescues it. + #[arg( + long = "one-off", + env = "SOCKET_ONE_OFF", + default_value_t = false, + value_parser = parse_bool_flag, + )] pub one_off: bool, } @@ -103,8 +115,18 @@ async fn try_rollback_local_go( package_path: pkg_path.display().to_string(), success: true, files_verified: Vec::new(), - files_rolled_back: patch.files.keys().cloned().collect(), + // The engine leaves `files_rolled_back` empty on dry-run (verify + // only); match it so the JSON `rolledBack` count never claims a dry + // run mutated anything. + files_rolled_back: if common.dry_run { + Vec::new() + } else { + patch.files.keys().cloned().collect() + }, error: None, + // The go redirect leaves the module cache pristine — no in-place + // bytes changed, so there is no sidecar state to resync. + sidecar: None, }; if let Err(e) = remove_go_redirect( purl, @@ -177,7 +199,12 @@ fn get_before_hash_blobs(manifest: &PatchManifest) -> HashSet { let mut blobs = HashSet::new(); for patch in manifest.patches.values() { for file_info in patch.files.values() { - blobs.insert(file_info.before_hash.clone()); + // An empty beforeHash is the "file created by the patch" sentinel, + // not a blob: rollback deletes the file instead of restoring + // content, so there is nothing to download or gate on. + if !file_info.before_hash.is_empty() { + blobs.insert(file_info.before_hash.clone()); + } } } blobs @@ -244,6 +271,11 @@ fn result_to_json(result: &RollbackResult) -> serde_json::Value { "success": result.success, "error": result.error, "filesRolledBack": result.files_rolled_back, + // Rollback-side sidecar resync record (e.g. cargo's + // `.cargo-checksum.json` rewritten back to original hashes), or + // an error-severity advisory when the resync failed. Null when + // no sidecar applied — same serialization as `error` above. + "sidecar": result.sidecar, "filesVerified": result.files_verified.iter().map(|f| { serde_json::json!({ "file": f.file, @@ -294,7 +326,7 @@ pub async fn run(args: RollbackArgs) -> i32 { .unwrap() ); } else { - eprintln!("One-off rollback mode: fetching patch data..."); + eprintln!("Error: One-off rollback mode is not yet implemented"); } return 1; } @@ -360,7 +392,7 @@ pub async fn run(args: RollbackArgs) -> i32 { warnings.push(serde_json::json!({ "code": LOCK_BROKEN_CODE, "message": format!( - "--break-lock removed {}/apply.lock before acquisition", + "--break-lock reclaimed stale {}/apply.lock (no live holder)", socket_dir.display() ), })); @@ -580,11 +612,28 @@ async fn rollback_patches_inner( setup: None, }; + // Partition PURLs by ecosystem up front. The before-blob gate and the + // download below must only consider patches this run can actually roll + // back — the `--ecosystems` filter plus the ecosystems compiled into this + // build. An out-of-scope patch with an absent before-blob must not abort + // (or trigger fetches for) a run that will never restore it. Mirrors + // apply's `scoped_manifest`. + let rollback_purls: Vec = patches_to_rollback.iter().map(|p| p.purl.clone()).collect(); + let partitioned = partition_purls(&rollback_purls, args.common.ecosystems.as_deref()); + let in_scope: HashSet = partitioned + .values() + .flat_map(|purls| purls.iter().cloned()) + .collect(); + let mut scoped_manifest = filtered_manifest.clone(); + scoped_manifest + .patches + .retain(|purl, _| in_scope.contains(purl)); + // Check for missing beforeHash blobs. Local-redirect PURLs (local-mode go) // are excluded: their rollback just drops the project-local redirect + copy // and reads no blobs, so a missing before-blob must not block an offline // redirect rollback. - let gate_manifest = exclude_local_redirects(&filtered_manifest, &args.common); + let gate_manifest = exclude_local_redirects(&scoped_manifest, &args.common); let missing_blobs = get_missing_before_blobs(&gate_manifest, &blobs_path).await; if !missing_blobs.is_empty() { if args.common.offline { @@ -626,10 +675,6 @@ async fn rollback_patches_inner( } } - // Partition PURLs by ecosystem - let rollback_purls: Vec = patches_to_rollback.iter().map(|p| p.purl.clone()).collect(); - let partitioned = partition_purls(&rollback_purls, args.common.ecosystems.as_deref()); - let crawler_options = CrawlerOptions { cwd: args.common.cwd.clone(), global: args.common.global, @@ -936,6 +981,7 @@ mod tests { files_verified, files_rolled_back: rolled_back.iter().map(|s| s.to_string()).collect(), error: None, + sidecar: None, } } @@ -1022,11 +1068,6 @@ mod tests { // manifest re-introduces local-go before-hashes that were never // downloaded, spuriously aborting a mixed rollback. - #[cfg(any(feature = "cargo", feature = "golang"))] - use socket_patch_core::manifest::schema::PatchFileInfo; - - // Only the cargo/golang-gated before-blob gate tests use this helper. - #[cfg(any(feature = "cargo", feature = "golang"))] fn record_with_file(uuid: &str, path: &str, before_hash: &str) -> PatchRecord { let mut rec = make_record(uuid); let mut files = HashMap::new(); @@ -1041,6 +1082,37 @@ mod tests { rec } + /// Regression: an empty `beforeHash` (the "file created by the patch" + /// sentinel) is not a blob. The missing-before-blob gate must ignore it: + /// `blobs_path.join("")` resolves to the blobs directory itself, so when + /// the blobs dir does not exist yet (fresh checkout of a committed + /// manifest, or a cache that was cleaned) the phantom "" counted as a + /// missing blob -- an `--offline` rollback of a new-file-only patch + /// aborted with "1 blob(s) are missing" even though it needs zero blobs, + /// and an online rollback fired a pointless download of blob "". + #[tokio::test] + async fn missing_before_blobs_ignores_new_file_sentinel() { + let mut patches = HashMap::new(); + patches.insert( + "pkg:npm/foo@1.0.0".to_string(), + record_with_file("uuid-npm", "created.js", ""), + ); + let manifest = PatchManifest { + patches, + setup: None, + }; + + // Blobs dir does NOT exist (nothing ever downloaded). + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + + let missing = get_missing_before_blobs(&manifest, &blobs).await; + assert!( + missing.is_empty(), + "a new-file-only patch needs no before-blobs, got {missing:?}" + ); + } + /// Cargo now patches in place (vendored or registry cache) and rolls back /// by restoring from before-blobs — exactly like npm/pypi. So a cargo PURL /// must NOT be excluded by the before-blob gate: a missing cargo before-blob @@ -1239,6 +1311,74 @@ mod tests { ); } + /// Regression: a dry-run local-go rollback must not CLAIM files were + /// rolled back. The engine leaves `files_rolled_back` empty on dry-run + /// (verify only — `rollback_package_patch` pushes into it only on the + /// mutating path), and the JSON envelope counts `rolledBack` from a + /// non-empty `files_rolled_back`. Before the fix the go backend populated + /// it unconditionally, so `rollback --dry-run --json` reported + /// `rolledBack: 1` (with the files listed in `filesRolledBack`) for a run + /// that mutated nothing. + #[cfg(feature = "golang")] + #[tokio::test] + async fn try_rollback_local_go_dry_run_reports_no_files_rolled_back() { + use socket_patch_core::patch::go_mod_edit::{ + ensure_replace_entry, read_replace_entries, GO_PATCHES_DIR, + }; + + const MODULE: &str = "github.com/foo/bar"; + const VERSION: &str = "v1.4.2"; + const PURL: &str = "pkg:golang/github.com/foo/bar@v1.4.2"; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + tokio::fs::write( + root.join("go.mod"), + "module myproj\n\ngo 1.21\n\nrequire github.com/foo/bar v1.4.2\n", + ) + .await + .unwrap(); + assert!( + ensure_replace_entry(root, MODULE, VERSION, GO_PATCHES_DIR, false) + .await + .unwrap() + ); + let copy_dir = root.join(".socket/go-patches/github.com/foo/bar@v1.4.2"); + tokio::fs::create_dir_all(©_dir).await.unwrap(); + + let patch = record_with_file("uuid-go", "errors.go", "go_before"); + let common = crate::args::GlobalArgs { + cwd: root.to_path_buf(), + dry_run: true, + ..crate::args::GlobalArgs::default() + }; + let result = try_rollback_local_go(PURL, root, &patch, &common) + .await + .expect("go PURL in local mode must be handled by the go backend"); + + assert!( + result.success, + "dry-run rollback failed: {:?}", + result.error + ); + assert!( + result.files_rolled_back.is_empty(), + "dry-run must not claim files were rolled back (the JSON \ + `rolledBack` count is derived from this), got {:?}", + result.files_rolled_back + ); + // And dry-run must not have mutated anything: the redirect and the + // patched copy both survive. + assert!( + read_replace_entries(root) + .await + .iter() + .any(|e| e.module == MODULE && e.socket_owned()), + "dry-run must leave the replace directive in place" + ); + assert!(copy_dir.exists(), "dry-run must leave the patched copy"); + } + /// A go PURL under `--global` is an in-place module-cache rollback, NOT a /// redirect — `try_rollback_local_go` must decline it so the caller falls /// through to `rollback_package_patch`. @@ -1262,4 +1402,117 @@ mod tests { "global go must not use the redirect backend" ); } + + // --- Before-blob gate `--ecosystems` scoping -------------------------- + // + // Twin of apply's (fixed) "offline guard unscoped" bug: the gate must + // only consider patches this run can actually roll back — the + // `--ecosystems` filter plus the ecosystems compiled into this build. + + /// Regression: an out-of-scope patch's missing before-blob must not abort + /// an `--ecosystems`-scoped rollback. Before the fix the gate ran on the + /// identifier-filtered manifest BEFORE `partition_purls`, so + /// `rollback --ecosystems npm --offline` aborted the whole run because a + /// pypi patch — which this run would never touch — was missing its + /// before-blob (and online, the gate triggered needless downloads for it). + #[tokio::test] + async fn before_blob_gate_ignores_ecosystem_filtered_patches() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let socket = root.join(".socket"); + let blobs = socket.join("blobs"); + tokio::fs::create_dir_all(&blobs).await.unwrap(); + + // npm patch (in scope): before-blob present. + // pypi patch (filtered out by `--ecosystems npm`): before-blob ABSENT. + let mut patches = HashMap::new(); + patches.insert( + "pkg:npm/foo@1.0.0".to_string(), + record_with_file("uuid-npm", "package/index.js", "npm_before_hash"), + ); + patches.insert( + "pkg:pypi/six@1.16.0".to_string(), + record_with_file("uuid-pypi", "six.py", "pypi_before_hash"), + ); + let manifest = PatchManifest { + patches, + setup: None, + }; + let manifest_path = socket.join("manifest.json"); + tokio::fs::write(&manifest_path, serde_json::to_string(&manifest).unwrap()) + .await + .unwrap(); + tokio::fs::write(blobs.join("npm_before_hash"), b"x") + .await + .unwrap(); + + // With no npm package installed under the tempdir the run finds + // nothing to do — but it must get past the gate and report success, + // not abort over a blob it would never read. + let (success, results, _vendored_skipped) = rollback_patches( + root, + &manifest_path, + None, + false, // dry_run + true, // silent + true, // offline + false, // global + None, + Some(vec!["npm".to_string()]), + ) + .await + .expect("rollback must not error"); + assert!(results.is_empty(), "nothing installed, nothing rolled back"); + assert!( + success, + "an out-of-scope patch's missing before-blob must not abort an \ + --ecosystems-scoped offline rollback" + ); + } + + /// The scoped gate still protects in-scope patches: with no + /// `--ecosystems` filter, a missing before-blob for an in-scope npm patch + /// must abort the offline run exactly as before. + #[tokio::test] + async fn before_blob_gate_still_blocks_in_scope_missing_blob() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let socket = root.join(".socket"); + let blobs = socket.join("blobs"); + tokio::fs::create_dir_all(&blobs).await.unwrap(); + + let mut patches = HashMap::new(); + patches.insert( + "pkg:npm/foo@1.0.0".to_string(), + record_with_file("uuid-npm", "package/index.js", "npm_before_hash"), + ); + let manifest = PatchManifest { + patches, + setup: None, + }; + let manifest_path = socket.join("manifest.json"); + tokio::fs::write(&manifest_path, serde_json::to_string(&manifest).unwrap()) + .await + .unwrap(); + // The npm before-blob is deliberately absent. + + let (success, results, _vendored_skipped) = rollback_patches( + root, + &manifest_path, + None, + false, // dry_run + true, // silent + true, // offline + false, // global + None, + None, // no ecosystem filter — the npm patch is in scope + ) + .await + .expect("rollback must not error"); + assert!(results.is_empty()); + assert!( + !success, + "an in-scope missing before-blob must still abort the offline run" + ); + } } diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index 73c9709..f395763 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -1052,7 +1052,11 @@ pub async fn run(args: ScanArgs) -> i32 { "packages" }; - let show_progress = !args.common.json && stderr_is_tty(); + // `--silent` is "errors only" (CLI_CONTRACT.md): progress, the crawl + // summary, the results table, and the per-patch listing are all + // suppressed below, mirroring `list`/`get`/`repair`/`remove`. Errors + // and the JSON envelope are unaffected. + let show_progress = !args.common.json && !args.common.silent && stderr_is_tty(); if show_progress { eprint!("Scanning {scan_target}..."); @@ -1137,6 +1141,8 @@ pub async fn run(args: ScanArgs) -> i32 { embed_vex_into_json(&args.common, &args.vex, &manifest_path, 0, &mut result).await; println!("{}", serde_json::to_string_pretty(&result).unwrap()); return code; + } else if args.common.silent { + // Errors only: the empty-scan hint is informational. } else if args.common.global || args.common.global_prefix.is_some() { println!("No global packages found."); } else { @@ -1177,7 +1183,7 @@ pub async fn run(args: ScanArgs) -> i32 { format!(" ({})", eco_parts.join(", ")) }; - if !args.common.json { + if !args.common.json && !args.common.silent { if show_progress { eprintln!("\rFound {package_count} packages{eco_summary}"); } else { @@ -1297,7 +1303,7 @@ pub async fn run(args: ScanArgs) -> i32 { .map(|p| p.patches.len()) .sum(); - if !args.common.json { + if !args.common.json && !args.common.silent { if total_patches_found > 0 { if show_progress { eprintln!( @@ -1578,141 +1584,148 @@ pub async fn run(args: ScanArgs) -> i32 { let use_color = stdout_is_tty(); if all_packages_with_patches.is_empty() { - println!("\nNo patches available for installed packages."); + if !args.common.silent { + println!("\nNo patches available for installed packages."); + } return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await; } - let mut updates_available = 0usize; + // The whole table + summary section is presentational only (nothing + // computed inside is consumed downstream), so `--silent` skips it + // wholesale. + if !args.common.silent { + let mut updates_available = 0usize; + + // Canonical set of PURLs with a newer patch available, computed once via + // `detect_updates` (the same source the JSON `updates` array uses). The + // table path MUST agree with the JSON path, so reuse that result rather + // than re-deriving it: comparing against *any* batch patch (instead of the + // first/candidate one `select_patches` would resolve to) over-reports + // updates whenever the manifest already holds the newest patch but older + // patches also appear in the batch. + let update_purls: HashSet<&str> = updates.iter().map(|u| u.purl.as_str()).collect(); - // Canonical set of PURLs with a newer patch available, computed once via - // `detect_updates` (the same source the JSON `updates` array uses). The - // table path MUST agree with the JSON path, so reuse that result rather - // than re-deriving it: comparing against *any* batch patch (instead of the - // first/candidate one `select_patches` would resolve to) over-reports - // updates whenever the manifest already holds the newest patch but older - // patches also appear in the batch. - let update_purls: HashSet<&str> = updates.iter().map(|u| u.purl.as_str()).collect(); + // Print table + println!("\n{}", "=".repeat(100)); + println!( + "{} {} {} VULNERABILITIES", + "PACKAGE".to_string() + &" ".repeat(33), + "PATCHES".to_string() + " ", + "SEVERITY".to_string() + &" ".repeat(8), + ); + println!("{}", "=".repeat(100)); - // Print table - println!("\n{}", "=".repeat(100)); - println!( - "{} {} {} VULNERABILITIES", - "PACKAGE".to_string() + &" ".repeat(33), - "PATCHES".to_string() + " ", - "SEVERITY".to_string() + &" ".repeat(8), - ); - println!("{}", "=".repeat(100)); + for pkg in &all_packages_with_patches { + // Char-safe truncation: a byte slice (`&pkg.purl[..37]`) panics + // when the cut lands mid-codepoint. PURLs can carry non-ASCII + // names/qualifiers, so route through the shared helper. + let display_purl = truncate_with_ellipsis(&pkg.purl, 40); - for pkg in &all_packages_with_patches { - // Char-safe truncation: a byte slice (`&pkg.purl[..37]`) panics - // when the cut lands mid-codepoint. PURLs can carry non-ASCII - // names/qualifiers, so route through the shared helper. - let display_purl = truncate_with_ellipsis(&pkg.purl, 40); + let pkg_free = pkg.patches.iter().filter(|p| p.tier == "free").count(); + let pkg_paid = pkg.patches.iter().filter(|p| p.tier == "paid").count(); - let pkg_free = pkg.patches.iter().filter(|p| p.tier == "free").count(); - let pkg_paid = pkg.patches.iter().filter(|p| p.tier == "paid").count(); + let count_str = if pkg_paid > 0 { + if can_access_paid_patches { + format!("{}+{}", pkg_free, pkg_paid) + } else { + format!( + "{}+{}", + pkg_free, + color(&pkg_paid.to_string(), "33", use_color) + ) + } + } else { + format!("{}", pkg_free) + }; - let count_str = if pkg_paid > 0 { - if can_access_paid_patches { - format!("{}+{}", pkg_free, pkg_paid) + // Get highest severity + let severity = pkg + .patches + .iter() + .filter_map(|p| p.severity.as_deref()) + .min_by_key(|s| severity_order(s)) + .unwrap_or("unknown"); + + // Collect vuln IDs (deterministic: deduped, CVEs then GHSAs, + // each group sorted — see collect_vuln_ids). + let vuln_ids = collect_vuln_ids(pkg); + let vuln_str = if vuln_ids.len() > 2 { + format!("{} (+{})", vuln_ids[..2].join(", "), vuln_ids.len() - 2) + } else if vuln_ids.is_empty() { + "-".to_string() } else { - format!( - "{}+{}", - pkg_free, - color(&pkg_paid.to_string(), "33", use_color) - ) + vuln_ids.join(", ") + }; + + // Check for updates — consult the canonical `detect_updates` result + // (mirrored into `update_purls`) so the human table and JSON `updates` + // array never disagree. + let has_update = update_purls.contains(pkg.purl.as_str()); + if has_update { + updates_available += 1; } - } else { - format!("{}", pkg_free) - }; - // Get highest severity - let severity = pkg - .patches - .iter() - .filter_map(|p| p.severity.as_deref()) - .min_by_key(|s| severity_order(s)) - .unwrap_or("unknown"); - - // Collect vuln IDs (deterministic: deduped, CVEs then GHSAs, - // each group sorted — see collect_vuln_ids). - let vuln_ids = collect_vuln_ids(pkg); - let vuln_str = if vuln_ids.len() > 2 { - format!("{} (+{})", vuln_ids[..2].join(", "), vuln_ids.len() - 2) - } else if vuln_ids.is_empty() { - "-".to_string() - } else { - vuln_ids.join(", ") - }; + let update_marker = if has_update { + color(" [UPDATE]", "33", use_color) + } else { + String::new() + }; - // Check for updates — consult the canonical `detect_updates` result - // (mirrored into `update_purls`) so the human table and JSON `updates` - // array never disagree. - let has_update = update_purls.contains(pkg.purl.as_str()); - if has_update { - updates_available += 1; + println!( + "{:<40} {:>8} {:<16} {}{}", + display_purl, + count_str, + format_severity(severity, use_color), + vuln_str, + update_marker, + ); } - let update_marker = if has_update { - color(" [UPDATE]", "33", use_color) - } else { - String::new() - }; + println!("{}", "=".repeat(100)); - println!( - "{:<40} {:>8} {:<16} {}{}", - display_purl, - count_str, - format_severity(severity, use_color), - vuln_str, - update_marker, - ); - } - - println!("{}", "=".repeat(100)); + // Summary + if can_access_paid_patches { + println!( + "\nSummary: {} package(s) with {} available patch(es)", + all_packages_with_patches.len(), + total_patches, + ); + } else { + println!( + "\nSummary: {} package(s) with {} free patch(es)", + all_packages_with_patches.len(), + free_patches, + ); + if paid_patches > 0 { + println!( + "{}", + color( + &format!( + " + {} additional patch(es) available with paid subscription", + paid_patches + ), + "33", + use_color, + ), + ); + println!( + "\nUpgrade to Socket's paid plan to access all patches: https://socket.dev/pricing" + ); + } + } - // Summary - if can_access_paid_patches { - println!( - "\nSummary: {} package(s) with {} available patch(es)", - all_packages_with_patches.len(), - total_patches, - ); - } else { - println!( - "\nSummary: {} package(s) with {} free patch(es)", - all_packages_with_patches.len(), - free_patches, - ); - if paid_patches > 0 { + if updates_available > 0 { println!( - "{}", + "\n{}", color( - &format!( - " + {} additional patch(es) available with paid subscription", - paid_patches - ), + &format!("{updates_available} package(s) have newer patches available."), "33", use_color, ), ); - println!( - "\nUpgrade to Socket's paid plan to access all patches: https://socket.dev/pricing" - ); } } - if updates_available > 0 { - println!( - "\n{}", - color( - &format!("{updates_available} package(s) have newer patches available."), - "33", - use_color, - ), - ); - } - // Count downloadable patches let downloadable_count = if can_access_paid_patches { all_packages_with_patches.len() @@ -1724,7 +1737,9 @@ pub async fn run(args: ScanArgs) -> i32 { }; if downloadable_count == 0 { - println!("\nNo downloadable patches (paid subscription required)."); + if !args.common.silent { + println!("\nNo downloadable patches (paid subscription required)."); + } return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await; } @@ -1750,7 +1765,9 @@ pub async fn run(args: ScanArgs) -> i32 { all_search_results.extend(response.patches); } Err(e) => { - eprintln!("\n Warning: could not fetch details for {}: {e}", pkg.purl); + if !args.common.silent { + eprintln!("\n Warning: could not fetch details for {}: {e}", pkg.purl); + } } } } @@ -1783,76 +1800,83 @@ pub async fn run(args: ScanArgs) -> i32 { } else { selected.into_iter().partition(|p| is_vendored(&p.purl)) }; - for p in &vendored_selected { - println!( - " [skip] {} (vendored — run scan --vendor to update)", - p.purl - ); + if !args.common.silent { + for p in &vendored_selected { + println!( + " [skip] {} (vendored — run scan --vendor to update)", + p.purl + ); + } } if selected.is_empty() && !args.vendor { - println!("No patches selected."); + if !args.common.silent { + println!("No patches selected."); + } return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await; } // Display detailed summary of selected patches before confirming - if args.vendor { - println!("\nPatches to vendor:\n"); - } else { - println!("\nPatches to apply:\n"); - } - for patch in &selected { - // Collect CVE/GHSA IDs and highest severity from vulnerabilities - let mut vuln_ids: Vec = Vec::new(); - let mut highest_severity: Option<&str> = None; - for (id, vuln) in &patch.vulnerabilities { - if vuln.cves.is_empty() { - vuln_ids.push(id.clone()); - } else { - for cve in &vuln.cves { - vuln_ids.push(cve.clone()); + // (presentational only — skipped wholesale under --silent). + if !args.common.silent { + if args.vendor { + println!("\nPatches to vendor:\n"); + } else { + println!("\nPatches to apply:\n"); + } + for patch in &selected { + // Collect CVE/GHSA IDs and highest severity from vulnerabilities + let mut vuln_ids: Vec = Vec::new(); + let mut highest_severity: Option<&str> = None; + for (id, vuln) in &patch.vulnerabilities { + if vuln.cves.is_empty() { + vuln_ids.push(id.clone()); + } else { + for cve in &vuln.cves { + vuln_ids.push(cve.clone()); + } + } + let sev = vuln.severity.as_str(); + if highest_severity.is_none_or(|cur| severity_order(sev) < severity_order(cur)) { + highest_severity = Some(sev); } } - let sev = vuln.severity.as_str(); - if highest_severity.is_none_or(|cur| severity_order(sev) < severity_order(cur)) { - highest_severity = Some(sev); - } - } - let sev_display = highest_severity.unwrap_or("unknown"); - let sev_colored = format_severity(sev_display, use_color); + let sev_display = highest_severity.unwrap_or("unknown"); + let sev_colored = format_severity(sev_display, use_color); - // Char-safe: descriptions come straight from the API and routinely - // contain non-ASCII text; a `&desc[..69]` byte slice would panic. - let desc = truncate_with_ellipsis(&patch.description, 72); + // Char-safe: descriptions come straight from the API and routinely + // contain non-ASCII text; a `&desc[..69]` byte slice would panic. + let desc = truncate_with_ellipsis(&patch.description, 72); - println!( - " {} [{}] {}", - patch.purl, - patch.tier.to_uppercase(), - sev_colored, - ); - if !vuln_ids.is_empty() { - println!(" Fixes: {}", vuln_ids.join(", ")); - } - // Show per-vulnerability summaries - for vuln in patch.vulnerabilities.values() { - if !vuln.summary.is_empty() { - // Char-safe: vulnerability summaries are API-sourced free - // text; a `&summary[..73]` byte slice would panic mid-codepoint. - let summary = truncate_with_ellipsis(&vuln.summary, 76); - let cve_label = if vuln.cves.is_empty() { - String::new() - } else { - format!("{}: ", vuln.cves.join(", ")) - }; - println!(" - {cve_label}{summary}"); + println!( + " {} [{}] {}", + patch.purl, + patch.tier.to_uppercase(), + sev_colored, + ); + if !vuln_ids.is_empty() { + println!(" Fixes: {}", vuln_ids.join(", ")); } + // Show per-vulnerability summaries + for vuln in patch.vulnerabilities.values() { + if !vuln.summary.is_empty() { + // Char-safe: vulnerability summaries are API-sourced free + // text; a `&summary[..73]` byte slice would panic mid-codepoint. + let summary = truncate_with_ellipsis(&vuln.summary, 76); + let cve_label = if vuln.cves.is_empty() { + String::new() + } else { + format!("{}: ", vuln.cves.join(", ")) + }; + println!(" - {cve_label}{summary}"); + } + } + if !desc.is_empty() { + println!(" {desc}"); + } + println!(); } - if !desc.is_empty() { - println!(" {desc}"); - } - println!(); } // `--dry-run` is a non-mutating preview (see the global flag's doc and @@ -1861,15 +1885,17 @@ pub async fn run(args: ScanArgs) -> i32 { // before the confirm prompt, the download/apply, and the prune GC — all // of which mutate the manifest and `.socket/` on disk. if args.common.dry_run { - let action = if args.vendor { - "download and vendor" - } else { - "download and apply" - }; - println!( - "\n[dry-run] Would {action} {} patch(es). No changes made.", - selected.len() - ); + if !args.common.silent { + let action = if args.vendor { + "download and vendor" + } else { + "download and apply" + }; + println!( + "\n[dry-run] Would {action} {} patch(es). No changes made.", + selected.len() + ); + } return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await; } @@ -1877,9 +1903,11 @@ pub async fn run(args: ScanArgs) -> i32 { let verb = if args.vendor { "vendor" } else { "apply" }; let prompt = format!("Download and {verb} {} patch(es)?", selected.len()); if !confirm(&prompt, true, args.common.yes, args.common.json) { - println!("\nTo apply a patch, run:"); - println!(" socket-patch get "); - println!(" socket-patch get "); + if !args.common.silent { + println!("\nTo apply a patch, run:"); + println!(" socket-patch get "); + println!(" socket-patch get "); + } return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await; } @@ -1893,7 +1921,7 @@ pub async fn run(args: ScanArgs) -> i32 { global: args.common.global, global_prefix: args.common.global_prefix.clone(), json: false, - silent: false, + silent: args.common.silent, download_mode: args.common.download_mode.clone(), api_overrides: args.common.api_client_overrides(), all_releases: args.all_releases, @@ -1928,7 +1956,7 @@ pub async fn run(args: ScanArgs) -> i32 { if prune && !args.vendor { let gc = run_apply_gc(&manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await; let total = gc.blobs.blobs_removed + gc.diffs.blobs_removed + gc.packages.blobs_removed; - if !gc.pruned.is_empty() || total > 0 { + if !args.common.silent && (!gc.pruned.is_empty() || total > 0) { println!( "\nGC: pruned {} manifest entr{} and removed {} orphan file{} ({}).", gc.pruned.len(), diff --git a/crates/socket-patch-cli/src/commands/setup.rs b/crates/socket-patch-cli/src/commands/setup.rs index c450134..c11899f 100644 --- a/crates/socket-patch-cli/src/commands/setup.rs +++ b/crates/socket-patch-cli/src/commands/setup.rs @@ -154,7 +154,7 @@ fn report_no_files(args: &SetupArgs, status: &str, counts: &[(&str, i64)]) -> i3 "{}", serde_json::to_string_pretty(&serde_json::Value::Object(map)).unwrap() ); - } else { + } else if !args.common.silent { println!("No package.json, Python, Bundler, or Composer project found"); } 0 @@ -828,8 +828,11 @@ async fn append_patch_consistency_entries( global_prefix: common.global_prefix.clone(), batch_size: 0, // unused for find_packages_for_rollback }; + // `--json` reserves stdout for the check report: silence the dispatch's + // human chrome ("Using at: ...") like apply/rollback do. let package_paths = - find_packages_for_rollback(&partitioned, &crawler_options, common.silent).await; + find_packages_for_rollback(&partitioned, &crawler_options, common.silent || common.json) + .await; let outcome = applied_patches(&manifest, &package_paths).await; for failed in &outcome.failed { @@ -875,7 +878,10 @@ enum CheckState { /// (so `--dry-run` is a harmless no-op here). Exits 0 only when all are /// configured and none failed to parse. async fn run_check(args: &SetupArgs) -> i32 { - if !args.common.json { + // `--silent` is "errors only" (CLI_CONTRACT.md): suppress the entire + // human-readable report, mirroring `list`/`repair`/`get`/`remove`/`scan`. + // The exit code still distinguishes the configuration states. + if !args.common.json && !args.common.silent { println!("Searching for package.json / Python / Bundler / Composer manifests..."); } @@ -987,7 +993,7 @@ async fn run_check(args: &SetupArgs) -> i32 { })) .unwrap() ); - } else { + } else if !args.common.silent { println!("\nConfiguration status:\n"); for (_, path, state, err) in &entries { let rel = pathdiff(path, &args.common.cwd); @@ -1032,7 +1038,11 @@ fn render_removed(new: &Option) -> String { /// Python `socket-patch-hook` dependency). Honors `--dry-run`, `--yes`, `--json`. async fn run_remove(args: &SetupArgs) -> i32 { let common = &args.common; - if !common.json { + // `--silent` is "errors only" (CLI_CONTRACT.md): mute the human-readable + // chatter just like `--json` does; the mutation and exit code are + // unaffected, and prompting follows the shared `confirm()` semantics. + let quiet = common.json || common.silent; + if !quiet { println!("Searching for package.json / Python / Bundler / Composer manifests..."); } @@ -1067,7 +1077,7 @@ async fn run_remove(args: &SetupArgs) -> i32 { None => Vec::new(), }; - if !common.json { + if !quiet { print_remove_preview(&npm_preview, &py_preview, &extra_preview, common); } @@ -1104,10 +1114,12 @@ async fn run_remove(args: &SetupArgs) -> i32 { &extra_preview, &[], ); - } else if preview_errs > 0 { - println!("Nothing removed; {preview_errs} item(s) could not be processed (see errors above)."); - } else { - println!("No socket-patch install hooks found to remove."); + } else if !common.silent { + if preview_errs > 0 { + println!("Nothing removed; {preview_errs} item(s) could not be processed (see errors above)."); + } else { + println!("No socket-patch install hooks found to remove."); + } } return if preview_errs > 0 { 1 } else { 0 }; } @@ -1116,7 +1128,7 @@ async fn run_remove(args: &SetupArgs) -> i32 { if common.dry_run { if common.json { print_remove_envelope("dry_run", &npm_preview, &py_preview, &extra_preview, &[]); - } else { + } else if !common.silent { println!("\nSummary:"); println!(" {n_remove} item(s) would have socket-patch removed"); } @@ -1140,7 +1152,7 @@ async fn run_remove(args: &SetupArgs) -> i32 { } } - if !common.json { + if !quiet { println!("\nRemoving changes..."); } let mut npm_results = Vec::new(); @@ -1182,7 +1194,7 @@ async fn run_remove(args: &SetupArgs) -> i32 { &extra_results, &warnings, ); - } else { + } else if !common.silent { let removed = npm_results .iter() .filter(|r| r.status == RemoveStatus::Removed) @@ -1379,7 +1391,11 @@ fn print_remove_envelope( async fn run_setup(args: &SetupArgs) -> i32 { let common = &args.common; - if !common.json { + // `--silent` is "errors only" (CLI_CONTRACT.md): mute the human-readable + // chatter just like `--json` does; the mutation and exit code are + // unaffected, and prompting follows the shared `confirm()` semantics. + let quiet = common.json || common.silent; + if !quiet { println!("Configuring socket-patch install hooks..."); } @@ -1414,7 +1430,7 @@ async fn run_setup(args: &SetupArgs) -> i32 { })) .unwrap() ); - } else { + } else if !common.silent { println!("No package.json, Python, Bundler, or Composer project found"); } return 0; @@ -1450,7 +1466,7 @@ async fn run_setup(args: &SetupArgs) -> i32 { None => Vec::new(), }; - if !common.json { + if !quiet { print_setup_preview(&npm_preview, &py_preview, &extra_preview, common); } @@ -1488,10 +1504,12 @@ async fn run_setup(args: &SetupArgs) -> i32 { py_plan.as_ref(), &[], ); - } else if preview_errors > 0 { - println!("No hooks were changed; {preview_errors} item(s) could not be processed (see errors above)."); - } else { - println!("All install hooks are already configured with socket-patch!"); + } else if !common.silent { + if preview_errors > 0 { + println!("No hooks were changed; {preview_errors} item(s) could not be processed (see errors above)."); + } else { + println!("All install hooks are already configured with socket-patch!"); + } } return if preview_errors > 0 { 1 } else { 0 }; } @@ -1507,7 +1525,7 @@ async fn run_setup(args: &SetupArgs) -> i32 { py_plan.as_ref(), &[], ); - } else { + } else if !common.silent { println!("\nSummary (dry run):"); println!(" {n_changes} item(s) would be updated"); } @@ -1530,7 +1548,7 @@ async fn run_setup(args: &SetupArgs) -> i32 { } } - if !common.json { + if !quiet { println!("\nApplying changes..."); } @@ -1581,7 +1599,7 @@ async fn run_setup(args: &SetupArgs) -> i32 { py_plan.as_ref(), &warnings, ); - } else { + } else if !common.silent { let updated = npm_results .iter() .filter(|r| r.status == UpdateStatus::Updated) diff --git a/crates/socket-patch-cli/src/commands/unlock.rs b/crates/socket-patch-cli/src/commands/unlock.rs index f911ead..77d4926 100644 --- a/crates/socket-patch-cli/src/commands/unlock.rs +++ b/crates/socket-patch-cli/src/commands/unlock.rs @@ -23,7 +23,7 @@ use clap::Args; use socket_patch_core::patch::apply_lock::{acquire, LockError}; use socket_patch_core::utils::telemetry::{track_patch_unlock_failed, track_patch_unlocked}; -use crate::args::{apply_env_toggles, GlobalArgs}; +use crate::args::{apply_env_toggles, parse_bool_flag, GlobalArgs}; use crate::json_envelope::{Command, Envelope, EnvelopeError}; #[derive(Args)] @@ -34,10 +34,19 @@ pub struct UnlockArgs { /// When the lock is free, also delete the lock file. Refused if /// the lock is currently held — use `--break-lock` on the /// mutating subcommand instead for that scenario. + /// + /// `value_parser = parse_bool_flag` matches the `GlobalArgs` bool + /// flags: clap's default bool parser accepts only the literal + /// strings `true`/`false` from the env binding, so + /// `SOCKET_UNLOCK_RELEASE=1` (or an exported-but-empty + /// `SOCKET_UNLOCK_RELEASE=`) aborted every `unlock` invocation. + /// This flag is also outside `GLOBAL_ARG_ENV_VARS`, so `main`'s + /// empty-var scrub never rescues it. #[arg( long = "release", env = "SOCKET_UNLOCK_RELEASE", - default_value_t = false + default_value_t = false, + value_parser = parse_bool_flag, )] pub release: bool, } @@ -45,7 +54,13 @@ pub struct UnlockArgs { pub async fn run(args: UnlockArgs) -> i32 { apply_env_toggles(&args.common); - let socket_dir = args.common.cwd.join(".socket"); + // Derive the lock directory exactly like the mutating subcommands + // do (`manifest_path.parent()`) — they're the processes whose lock + // this command exists to observe. Hardcoding `/.socket` here + // would probe a directory nobody locks whenever `--manifest-path` + // points elsewhere. + let manifest_path = args.common.resolved_manifest_path(); + let socket_dir = manifest_path.parent().unwrap_or(Path::new(".")); let lock_file = socket_dir.join("apply.lock"); let api_token = args.common.api_token.clone(); let org_slug = args.common.org.clone(); @@ -59,7 +74,13 @@ pub async fn run(args: UnlockArgs) -> i32 { // user passed --release. Telemetry and the emitted envelope // must agree on this. track_patch_unlocked(false, false, api_token.as_deref(), org_slug.as_deref()).await; - return emit_free(args.common.json, &lock_file, false, args.release); + return emit_free( + args.common.json, + args.common.silent, + &lock_file, + false, + args.release, + ); } // Snapshot whether a lock file already exists *before* acquiring. @@ -70,7 +91,7 @@ pub async fn run(args: UnlockArgs) -> i32 { // created), we have to capture this now. let lock_existed = lock_file.exists(); - match acquire(&socket_dir, Duration::ZERO) { + match acquire(socket_dir, Duration::ZERO) { Ok(guard) => { // We successfully claimed the lock — nobody else holds // it. Release our handle before deleting the file so the @@ -95,7 +116,13 @@ pub async fn run(args: UnlockArgs) -> i32 { org_slug.as_deref(), ) .await; - emit_free(args.common.json, &lock_file, lock_existed, true) + emit_free( + args.common.json, + args.common.silent, + &lock_file, + lock_existed, + true, + ) } Err(e) if e.kind() == std::io::ErrorKind::NotFound => { // The file was never created (e.g. socket @@ -108,7 +135,13 @@ pub async fn run(args: UnlockArgs) -> i32 { org_slug.as_deref(), ) .await; - emit_free(args.common.json, &lock_file, false, true) + emit_free( + args.common.json, + args.common.silent, + &lock_file, + false, + true, + ) } Err(e) => { let msg = format!( @@ -124,7 +157,13 @@ pub async fn run(args: UnlockArgs) -> i32 { } } else { track_patch_unlocked(false, false, api_token.as_deref(), org_slug.as_deref()).await; - emit_free(args.common.json, &lock_file, false, false) + emit_free( + args.common.json, + args.common.silent, + &lock_file, + false, + false, + ) } } Err(LockError::Held) => { @@ -173,7 +212,10 @@ pub async fn run(args: UnlockArgs) -> i32 { /// Print the "free" success envelope and return exit code 0. /// `removed` is true when `--release` actually deleted the file /// (vs. the no-op case where the file didn't exist). -fn emit_free(json: bool, lock_file: &Path, removed: bool, release: bool) -> i32 { +/// `silent` suppresses the human-readable lines (the JSON envelope is +/// machine output and always prints) — same `--silent` contract as the +/// sibling subcommands. +fn emit_free(json: bool, silent: bool, lock_file: &Path, removed: bool, release: bool) -> i32 { if json { // Build the success body by hand rather than re-using the // shared `Envelope` shape — the `events`/`summary` fields @@ -188,6 +230,8 @@ fn emit_free(json: bool, lock_file: &Path, removed: bool, release: bool) -> i32 "released": removed, }); println!("{}", serde_json::to_string_pretty(&body).unwrap()); + } else if silent { + // Suppress the informational lines; the exit code carries the verdict. } else if release && removed { println!("Lock is free. Removed {}.", lock_file.display()); } else if release { diff --git a/crates/socket-patch-cli/src/commands/vex.rs b/crates/socket-patch-cli/src/commands/vex.rs index 8761032..727c28e 100644 --- a/crates/socket-patch-cli/src/commands/vex.rs +++ b/crates/socket-patch-cli/src/commands/vex.rs @@ -26,7 +26,7 @@ use socket_patch_core::vex::{ VendorContext, VerifyOutcome, }; -use crate::args::{apply_env_toggles, GlobalArgs}; +use crate::args::{apply_env_toggles, parse_bool_flag, GlobalArgs}; use crate::ecosystem_dispatch::{find_packages_for_rollback, partition_purls}; use crate::json_envelope::{Command, Envelope, EnvelopeError, PatchAction, PatchEvent}; @@ -55,10 +55,18 @@ pub struct VexArgs { /// emitted; this flag flips that off — useful when generating a /// VEX doc on a build machine that doesn't have the patched files /// laid out yet. + /// + /// `value_parser = parse_bool_flag` matches the `GlobalArgs` bool flags: + /// clap's default bool parser accepts only the literal strings + /// `true`/`false` from the env binding, so `SOCKET_VEX_NO_VERIFY=1` (or + /// an exported-but-empty `SOCKET_VEX_NO_VERIFY=`) aborted the parse. + /// This var is also outside `GLOBAL_ARG_ENV_VARS`, so `main`'s empty-var + /// scrub never rescues it. #[arg( long = "no-verify", env = "SOCKET_VEX_NO_VERIFY", - default_value_t = false + default_value_t = false, + value_parser = parse_bool_flag, )] pub no_verify: bool, @@ -69,7 +77,12 @@ pub struct VexArgs { pub doc_id: Option, /// Emit compact JSON instead of pretty-printed. - #[arg(long = "compact", env = "SOCKET_VEX_COMPACT", default_value_t = false)] + #[arg( + long = "compact", + env = "SOCKET_VEX_COMPACT", + default_value_t = false, + value_parser = parse_bool_flag, + )] pub compact: bool, } @@ -96,10 +109,16 @@ pub struct VexEmbedArgs { /// Skip the on-disk file-hash check when building the VEX document and /// trust the manifest. See `socket-patch vex --no-verify`. + /// + /// `value_parser = parse_bool_flag`: these embedded flags share their + /// env vars with the standalone `vex` flags, so without it an ambient + /// `SOCKET_VEX_NO_VERIFY=1` (or `=`) aborted every host command parse — + /// including `apply` running from a postinstall hook. #[arg( long = "vex-no-verify", env = "SOCKET_VEX_NO_VERIFY", - default_value_t = false + default_value_t = false, + value_parser = parse_bool_flag, )] pub vex_no_verify: bool, @@ -111,7 +130,8 @@ pub struct VexEmbedArgs { #[arg( long = "vex-compact", env = "SOCKET_VEX_COMPACT", - default_value_t = false + default_value_t = false, + value_parser = parse_bool_flag, )] pub vex_compact: bool, } @@ -314,7 +334,12 @@ pub(crate) async fn generate_vex( ..Default::default() } } else { - let package_paths = resolve_package_paths(common, manifest).await; + // stdout belongs to machine output here: the envelope in `--json` + // mode, or the VEX document itself when `output` is None. Silence + // the dispatch's human chrome ("Using at: ...") in both, + // mirroring apply/rollback's `silent || json` gating. + let quiet = common.silent || common.json || params.output.is_none(); + let package_paths = resolve_package_paths(common, manifest, quiet).await; let vendor = load_vendor_context(common, manifest).await; socket_patch_core::vex::applied_patches_with_vendor( manifest, @@ -660,6 +685,7 @@ fn are_safe_go_redirect_coords(module: &str, version: &str) -> bool { async fn resolve_package_paths( common: &GlobalArgs, manifest: &PatchManifest, + quiet: bool, ) -> HashMap { let purls: Vec = manifest.patches.keys().cloned().collect(); let partitioned = partition_purls(&purls, common.ecosystems.as_deref()); @@ -680,7 +706,7 @@ async fn resolve_package_paths( // as `package_not_found`. The rollback variant fans each base path // back out to every qualified manifest PURL — the same mapping the // manifest was written with (`get` uses the same resolver). - find_packages_for_rollback(&partitioned, &crawler_options, common.silent).await + find_packages_for_rollback(&partitioned, &crawler_options, quiet).await } fn emit_envelope_error(args: &VexArgs, code: &str, message: &str) { diff --git a/crates/socket-patch-cli/src/json_envelope.rs b/crates/socket-patch-cli/src/json_envelope.rs index 5a0919b..9d88fde 100644 --- a/crates/socket-patch-cli/src/json_envelope.rs +++ b/crates/socket-patch-cli/src/json_envelope.rs @@ -69,9 +69,10 @@ pub struct Envelope { /// JOIN against `events[]`. /// /// Empty (and omitted from JSON via `skip_serializing_if`) for - /// commands that don't produce sidecar work — `rollback`, - /// `repair`, `list`, etc. — and for apply runs against ecosystems - /// with no sidecar contract (e.g. npm). + /// commands that don't surface sidecar records here — `rollback` + /// reports its sidecar *resync* per-result in its own envelope, + /// `repair`/`list` produce no sidecar work — and for apply runs + /// against ecosystems with no sidecar contract (e.g. npm). #[serde(skip_serializing_if = "Vec::is_empty")] pub sidecars: Vec, /// Present only when `--vex ` was passed to `apply`/`scan` and diff --git a/crates/socket-patch-cli/src/main.rs b/crates/socket-patch-cli/src/main.rs index bceaa27..995a726 100644 --- a/crates/socket-patch-cli/src/main.rs +++ b/crates/socket-patch-cli/src/main.rs @@ -8,7 +8,25 @@ async fn main() { // names. A one-shot deprecation warning fires per legacy name set. promote_legacy_env_vars(); - let argv: Vec = std::env::args().collect(); + // Then drop exported-but-empty SOCKET_* globals (`SOCKET_CWD=` means + // "unset", not "crash the parse"). Must run after the promotion so a + // blanked legacy name is scrubbed too. + socket_patch_cli::args::scrub_empty_global_env_vars(); + + // The parser surface is `String`-typed, but argv is raw bytes on Unix — + // `std::env::args()` would *panic* on a non-Unicode argument. Collect + // `args_os` instead and turn a bad argument into the contract's clap + // usage error (stderr + exit 2) rather than a crash. + let argv: Vec = match std::env::args_os() + .map(std::ffi::OsString::into_string) + .collect::>() + { + Ok(argv) => argv, + Err(bad_arg) => { + eprintln!("error: invalid UTF-8 was detected in one or more arguments: {bad_arg:?}"); + std::process::exit(2); + } + }; let cli = match parse_with_uuid_fallback(argv) { Ok(cli) => cli, Err(err) => err.exit(), diff --git a/crates/socket-patch-cli/src/output.rs b/crates/socket-patch-cli/src/output.rs index 1c84219..d00a94c 100644 --- a/crates/socket-patch-cli/src/output.rs +++ b/crates/socket-patch-cli/src/output.rs @@ -23,7 +23,8 @@ pub fn format_severity(s: &str, use_color: bool) -> String { match s.to_lowercase().as_str() { "critical" => format!("\x1b[91m{s}\x1b[0m"), "high" => format!("\x1b[31m{s}\x1b[0m"), - "medium" => format!("\x1b[33m{s}\x1b[0m"), + // GHSA emits `moderate`; same tier as medium (see get.rs severity_rank). + "medium" | "moderate" => format!("\x1b[33m{s}\x1b[0m"), "low" => format!("\x1b[36m{s}\x1b[0m"), _ => s.to_string(), } @@ -50,7 +51,8 @@ pub enum SelectError { /// /// - `skip_prompt` (from `-y` flag) or `is_json`: return `default_yes` immediately. /// - Non-TTY stdin: return `default_yes` with a stderr warning. -/// - Interactive: print prompt to stderr, read line; empty = `default_yes`. +/// - Interactive: print prompt to stderr, read line; empty = `default_yes`; +/// unreadable input (e.g. non-UTF-8 bytes) = no. pub fn confirm(prompt: &str, default_yes: bool, skip_prompt: bool, is_json: bool) -> bool { if skip_prompt || is_json { return default_yes; @@ -63,7 +65,12 @@ pub fn confirm(prompt: &str, default_yes: bool, skip_prompt: bool, is_json: bool eprint!("{prompt} {hint} "); io::stderr().flush().unwrap(); let mut answer = String::new(); - io::stdin().read_line(&mut answer).unwrap(); + if io::stdin().read_line(&mut answer).is_err() { + // Terminals can deliver non-UTF-8 bytes (e.g. a Latin-1 paste); + // `read_line` reports those as InvalidData. Treat any read + // failure like an unrecognized answer (decline), not a panic. + return false; + } let answer = answer.trim().to_lowercase(); if answer.is_empty() { return default_yes; @@ -226,6 +233,18 @@ mod tests { assert_eq!(format_severity("low", true), "\x1b[36mlow\x1b[0m"); } + #[test] + fn format_severity_moderate_is_medium_tier_yellow() { + // Regression: GHSA emits `moderate` for the medium tier (see + // get.rs `severity_rank`), and both scan.rs call sites pass raw + // API severities straight through. Dropping `moderate` into the + // unknown arm rendered a medium-tier vuln with no color at all — + // less prominent than `low` (cyan). + assert_eq!(format_severity("moderate", true), "\x1b[33mmoderate\x1b[0m"); + assert_eq!(format_severity("MODERATE", true), "\x1b[33mMODERATE\x1b[0m"); + assert_eq!(format_severity("moderate", false), "moderate"); + } + #[test] fn format_severity_critical_is_more_prominent_than_high() { // Regression: `critical` is the worst severity and must render at diff --git a/crates/socket-patch-cli/tests/apply_invariants.rs b/crates/socket-patch-cli/tests/apply_invariants.rs index 8131edb..b7ea3aa 100644 --- a/crates/socket-patch-cli/tests/apply_invariants.rs +++ b/crates/socket-patch-cli/tests/apply_invariants.rs @@ -150,6 +150,147 @@ fn assert_summary_all_zero(summary: &serde_json::Value) { } } +const SCOPED_NPM_PURL: &str = "pkg:npm/scopedpkg@1.0.0"; +const SCOPED_ORIGINAL: &[u8] = b"module.exports = function vulnerable() { return 'pwn'; };\n"; +const SCOPED_PATCHED: &[u8] = b"module.exports = function safe() { return 'ok'; };\n"; + +/// Git SHA-256: `SHA256("blob \0" ++ content)`. Computed +/// independently here so the manifest hashes are NOT derived from the +/// code under test (no circular oracle). +fn git_sha256(content: &[u8]) -> String { + let header = format!("blob {}\0", content.len()); + let mut hasher = Sha256::new(); + hasher.update(header.as_bytes()); + hasher.update(content); + hex::encode(hasher.finalize()) +} + +/// Lay down a project with TWO manifest patches: +/// - an npm patch that is fully applicable offline (package installed, +/// patched blob present in `.socket/blobs/`), and +/// - a pypi patch whose blob is missing from `.socket/` entirely. +/// +/// Used to prove the offline no-local-source guard is scoped to the +/// patches the run can actually apply (`--ecosystems` filter). +fn write_mixed_scope_project(root: &Path) { + let before = git_sha256(SCOPED_ORIGINAL); + let after = git_sha256(SCOPED_PATCHED); + + std::fs::write( + root.join("package.json"), + r#"{"name":"scope-test","version":"0.0.0"}"#, + ) + .expect("write package.json"); + + let pkg = root.join("node_modules").join("scopedpkg"); + std::fs::create_dir_all(&pkg).expect("create package dir"); + std::fs::write( + pkg.join("package.json"), + r#"{"name":"scopedpkg","version":"1.0.0"}"#, + ) + .expect("write pkg package.json"); + std::fs::write(pkg.join("index.js"), SCOPED_ORIGINAL).expect("write index.js"); + + let socket = root.join(".socket"); + std::fs::create_dir_all(socket.join("blobs")).expect("create blobs"); + std::fs::write(socket.join("blobs").join(&after), SCOPED_PATCHED).expect("write blob"); + let manifest = format!( + r#"{{ + "patches": {{ + "{SCOPED_NPM_PURL}": {{ + "uuid": "33333333-3333-4333-8333-333333333333", + "exportedAt": "2024-01-01T00:00:00Z", + "files": {{ + "package/index.js": {{ "beforeHash": "{before}", "afterHash": "{after}" }} + }}, + "vulnerabilities": {{}}, + "description": "in-scope npm patch with local sources", + "license": "MIT", + "tier": "free" + }}, + "pkg:pypi/__ghost_pkg__@9.9.9": {{ + "uuid": "44444444-4444-4444-8444-444444444444", + "exportedAt": "2024-01-01T00:00:00Z", + "files": {{ + "ghost.py": {{ + "beforeHash": "2222222222222222222222222222222222222222222222222222222222222222", + "afterHash": "3333333333333333333333333333333333333333333333333333333333333333" + }} + }}, + "vulnerabilities": {{}}, + "description": "out-of-scope pypi patch with NO local source", + "license": "MIT", + "tier": "free" + }} + }} +}}"# + ); + std::fs::write(socket.join("manifest.json"), manifest).expect("write manifest"); +} + +/// Regression: the `--offline` no-local-source guard (and the download +/// planner feeding it) must only consider patches that are in scope for +/// THIS run. A patch filtered out by `--ecosystems` — or belonging to an +/// ecosystem this build can't apply at all — will never be applied, so +/// its missing `.socket/` sources must not fail a run whose in-scope +/// patches are all locally applicable. +/// +/// Before the fix, the guard scanned the WHOLE manifest: here the +/// out-of-scope pypi patch (no blob on disk) tripped the offline bail and +/// the fully-applicable npm patch was never applied (exit 1, no events). +#[test] +fn offline_ecosystems_filter_ignores_out_of_scope_missing_source() { + let tmp = tempfile::tempdir().expect("tempdir"); + write_mixed_scope_project(tmp.path()); + + let (code, stdout) = run_apply( + tmp.path(), + &["--offline", "--silent", "--ecosystems", "npm"], + ); + let v: serde_json::Value = + serde_json::from_str(&stdout).expect("apply --json must emit valid JSON"); + assert_eq!( + code, 0, + "all in-scope (npm) patches have local sources; the out-of-scope pypi \ + patch must not trip the offline bail. envelope:\n{v}" + ); + assert_eq!(v["status"], "success", "expected a clean apply, got {v}"); + let events = v["events"].as_array().expect("events array"); + assert!( + events + .iter() + .any(|e| e["action"] == "applied" && e["purl"] == SCOPED_NPM_PURL), + "the in-scope npm patch must actually be applied; got {events:?}" + ); + // The patched bytes really landed on disk. + assert_eq!( + std::fs::read( + tmp.path() + .join("node_modules") + .join("scopedpkg") + .join("index.js") + ) + .expect("read patched file"), + SCOPED_PATCHED, + "in-scope npm patch must be written to disk" + ); + + // CONTROL: the same fixture WITHOUT the `--ecosystems` filter puts the + // sourceless pypi patch in scope, so the documented offline bail must + // still fire — the fix scopes the guard, it does not disable it. + let tmp2 = tempfile::tempdir().expect("tempdir"); + write_mixed_scope_project(tmp2.path()); + let (code2, stdout2) = run_apply(tmp2.path(), &["--offline", "--silent"]); + assert_eq!( + code2, 1, + "with no ecosystem filter the sourceless pypi patch is in scope and \ + must still trip the offline bail; stdout=\n{stdout2}" + ); + let v2: serde_json::Value = + serde_json::from_str(&stdout2).expect("apply --json must emit valid JSON"); + assert_eq!(v2["status"], "partialFailure", "{v2}"); +} + #[test] fn offline_with_missing_source_emits_partial_failure() { let tmp = tempfile::tempdir().expect("tempdir"); diff --git a/crates/socket-patch-cli/tests/apply_network.rs b/crates/socket-patch-cli/tests/apply_network.rs index 3dfd19a..d9bb628 100644 --- a/crates/socket-patch-cli/tests/apply_network.rs +++ b/crates/socket-patch-cli/tests/apply_network.rs @@ -243,18 +243,22 @@ async fn apply_with_ecosystem_filter_excluding_npm_skips_all_npm_patches() { write_manifest_with_patch(&socket, purl, uuid, &before_hash, &after_hash); let (code, stdout, stderr) = run_apply(tmp.path(), &mock.uri(), &["--ecosystems", "pypi"]); - // Filtering out npm leaves nothing in scope: apply reports this as a - // partial-failure (exit 1, status "partialFailure", all-zero summary). - // Pin the exact contract — a disjoint `0 || 1` accept would let a - // regression that flipped the exit code (or started "succeeding" while - // silently doing nothing) slip through. + // Filtering out npm leaves nothing in scope: there is genuinely no + // work this run can do, so apply is a clean no-op SUCCESS (exit 0) — + // the same documented contract as an empty manifest (npm `postinstall` + // runs `apply` on every install). This test previously pinned exit + // 1/partialFailure, but that outcome was an artifact of a scoping bug: + // the excluded npm patch's missing artifacts were fetched (and failed, + // against this route-less mock) BEFORE the `--ecosystems` filter was + // applied, so the run never reached the no-in-scope success path. The + // filter now scopes the source probes and download planner up front. assert_eq!( - code, 1, - "ecosystem filter with nothing in scope must exit 1; stdout={stdout}; stderr={stderr}" + code, 0, + "ecosystem filter with nothing in scope is a clean no-op success; stdout={stdout}; stderr={stderr}" ); let v: serde_json::Value = serde_json::from_str(stdout.trim()).expect("valid JSON"); assert_eq!(v["command"], "apply"); - assert_eq!(v["status"], "partialFailure"); + assert_eq!(v["status"], "success"); assert_eq!(v["summary"]["applied"], 0); // Nothing in the npm ecosystem may even be discovered/downloaded once // it's filtered out — guards against the filter being applied only at @@ -271,6 +275,18 @@ async fn apply_with_ecosystem_filter_excluding_npm_skips_all_npm_patches() { v["summary"]["failed"], 0, "skipping out-of-scope is not a failure" ); + // The excluded patch's artifacts must not be fetched AT ALL — the + // filter scopes the download planner itself, not just the write step. + // (Only artifact endpoints are checked; telemetry may ping the API.) + let requests = mock.received_requests().await.unwrap_or_default(); + let artifact_requests: Vec<_> = requests + .iter() + .filter(|r| r.url.path().contains("/patches/")) + .collect(); + assert!( + artifact_requests.is_empty(), + "no patch artifacts may be fetched for a filtered-out ecosystem; got {artifact_requests:?}" + ); // The excluded npm patch must not appear as an applied/patched event — // an empty `events` array or one without our purl is fine, but a // "patched" event for the skipped purl would mean the filter leaked. diff --git a/crates/socket-patch-cli/tests/cli_argv_non_utf8.rs b/crates/socket-patch-cli/tests/cli_argv_non_utf8.rs new file mode 100644 index 0000000..1e77389 --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_argv_non_utf8.rs @@ -0,0 +1,95 @@ +//! Regression tests: non-UTF-8 bytes in argv must be a clean usage error. +//! +//! On Unix, argv is raw bytes — a junk-byte filename (or a path typed in a +//! non-UTF-8 locale) is a perfectly legal process argument. `main.rs` used to +//! collect argv via `std::env::args()`, which *panics* on the first +//! non-Unicode argument: the binary died with a Rust panic message and exit +//! code 101 ("please report this bug" territory) before clap ever saw the +//! command line. The contract treats malformed invocations as clap usage +//! errors (exit `2`, message on stderr) — see `setup --check --remove` in +//! `CLI_CONTRACT.md` — so a bad byte in argv must take that path too. +//! +//! These tests run the compiled binary as a subprocess because the bug lives +//! in `main.rs` itself (the argv collection step), upstream of everything the +//! in-process parser tests can reach. + +#![cfg(unix)] + +use std::ffi::OsStr; +use std::os::unix::ffi::OsStrExt; +use std::process::Command; + +const BINARY: &str = env!("CARGO_BIN_EXE_socket-patch"); + +/// An argument that is valid on the OS level but not valid UTF-8. +fn non_utf8_arg() -> &'static OsStr { + OsStr::from_bytes(b"\xff\xfe") +} + +/// Run the binary with the given args in a hermetic env and capture output. +fn run(args: &[&OsStr]) -> (Option, String, String) { + let mut cmd = Command::new(BINARY); + for a in args { + cmd.arg(a); + } + // Scrub the global env-var surface so ambient SOCKET_* vars can never + // perturb where the invocation fails (the assertion is about the argv + // path, not env handling). + for var in socket_patch_cli::args::GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + let out = cmd.output().expect("spawn socket-patch"); + ( + out.status.code(), + String::from_utf8_lossy(&out.stdout).into_owned(), + String::from_utf8_lossy(&out.stderr).into_owned(), + ) +} + +/// Shared assertions: a clean clap-style usage error, not a panic. +fn assert_clean_usage_error(code: Option, stdout: &str, stderr: &str) { + // Not killed by a signal, and not the panic runtime's exit 101 — the + // contract's usage-error code is 2. + assert_eq!( + code, + Some(2), + "non-UTF-8 argv must exit with the usage-error code 2; stderr was:\n{stderr}" + ); + assert!( + !stderr.contains("panicked"), + "non-UTF-8 argv must not crash with a Rust panic; stderr was:\n{stderr}" + ); + assert!( + stderr.to_lowercase().contains("invalid utf-8"), + "stderr must explain the invalid UTF-8 argument; stderr was:\n{stderr}" + ); + // Diagnostics belong on stderr; stdout must stay clean (machine-readable + // consumers pipe stdout). + assert!( + stdout.is_empty(), + "usage error must not write to stdout; stdout was:\n{stdout}" + ); +} + +#[test] +fn non_utf8_arg_after_subcommand_is_clean_usage_error() { + let (code, stdout, stderr) = run(&[OsStr::new("list"), non_utf8_arg()]); + assert_clean_usage_error(code, &stdout, &stderr); +} + +#[test] +fn non_utf8_bare_first_arg_is_clean_usage_error() { + // First positional slot — the position `parse_with_uuid_fallback` probes + // for the bare-UUID rewrite. The argv collection must fail cleanly before + // any of that machinery runs. + let (code, stdout, stderr) = run(&[non_utf8_arg()]); + assert_clean_usage_error(code, &stdout, &stderr); +} + +#[test] +fn non_utf8_cwd_value_is_clean_usage_error() { + // A non-UTF-8 *path* handed to `--cwd` is the realistic way users hit + // this: shell tab-completion of a junk-byte directory name. + let (code, stdout, stderr) = run(&[OsStr::new("list"), OsStr::new("--cwd"), non_utf8_arg()]); + assert_clean_usage_error(code, &stdout, &stderr); +} diff --git a/crates/socket-patch-cli/tests/cli_get_silent.rs b/crates/socket-patch-cli/tests/cli_get_silent.rs new file mode 100644 index 0000000..4552d78 --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_get_silent.rs @@ -0,0 +1,65 @@ +//! `get --silent` contract test. +//! +//! CLI_CONTRACT.md defines `--silent` as "Errors only". Regression +//! guard: `get` gated all of its human-readable chatter on `!json` alone +//! and hardcoded `silent: false` into the `DownloadParams` it builds, so +//! `get --silent` printed everything anyway. Runs fully offline: a bare +//! package-name identifier in an empty project dir takes the +//! crawl → "No packages found" path and exits 0 before any API call. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use socket_patch_cli::args::GLOBAL_ARG_ENV_VARS; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +/// Run `socket-patch get` in `cwd` with a scrubbed SOCKET_* environment +/// so ambient developer/CI configuration (tokens, org slugs, silent +/// toggles) can't change the branch under test. +fn run_get(cwd: &Path, args: &[&str]) -> (i32, String) { + let mut cmd = Command::new(binary()); + cmd.arg("get").args(args).current_dir(cwd); + for var in GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + for var in [ + "SOCKET_SAVE_ONLY", + "SOCKET_ONE_OFF", + "SOCKET_ALL_RELEASES", + "SOCKET_PATCH_API_URL", + "SOCKET_PATCH_API_TOKEN", + "SOCKET_PATCH_PROXY_URL", + ] { + cmd.env_remove(var); + } + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + let out = cmd.output().expect("run socket-patch get"); + ( + out.status.code().unwrap_or(-1), + String::from_utf8_lossy(&out.stdout).to_string(), + ) +} + +#[test] +fn get_silent_produces_no_stdout() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (code, stdout) = run_get(tmp.path(), &["--silent", "no-such-package-zzz"]); + assert_eq!(code, 0, "no-packages path must exit 0; stdout={stdout:?}"); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout; got {stdout:?}" + ); + + // Control run: the same scenario WITHOUT --silent must print the + // human messages — otherwise the assertion above passes vacuously. + let tmp2 = tempfile::tempdir().expect("tempdir"); + let (loud_code, loud_stdout) = run_get(tmp2.path(), &["no-such-package-zzz"]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("No packages found"), + "non-silent run must print the no-packages message; got {loud_stdout:?}" + ); +} diff --git a/crates/socket-patch-cli/tests/cli_global_args.rs b/crates/socket-patch-cli/tests/cli_global_args.rs index 3edfcf6..bbcb84b 100644 --- a/crates/socket-patch-cli/tests/cli_global_args.rs +++ b/crates/socket-patch-cli/tests/cli_global_args.rs @@ -641,6 +641,67 @@ const GLOBAL_ENV_VARS: &[&str] = &[ "SOCKET_TELEMETRY_DISABLED", ]; +/// An exported-but-**empty** non-bool env var must mean "unset", not crash. +/// +/// `parse_bool_flag` gave the *bool* globals the empty-means-false semantic, +/// but `SOCKET_CWD=`, `SOCKET_GLOBAL_PREFIX=`, `SOCKET_LOCK_TIMEOUT=` and +/// `SOCKET_ECOSYSTEMS=` (the same blank-without-unsetting shell/CI idiom) +/// still aborted every subcommand at clap-parse time ("a value is required" / +/// "cannot parse integer from empty string"), and empty +/// `SOCKET_DOWNLOAD_MODE=` / `SOCKET_MANIFEST_PATH=` leaked `""` past the +/// documented defaults. The binary now scrubs empty `GlobalArgs` env vars +/// before clap parses (`args::scrub_empty_global_env_vars` in `main`), +/// restoring the documented CLI > env > default precedence for blank vars. +/// This spawns the real binary because the scrub is `main` wiring. +#[test] +#[serial_test::serial] +fn empty_nonbool_env_vars_do_not_crash_the_binary() { + let tmp = tempfile::tempdir().expect("tempdir"); + let mut cmd = std::process::Command::new(env!("CARGO_BIN_EXE_socket-patch")); + cmd.current_dir(tmp.path()); + // Start from a clean slate (no ambient SOCKET_* bleed into the child)… + for var in GLOBAL_ENV_VARS { + cmd.env_remove(var); + } + // …then export every non-bool global blank, the way `VAR=` does. + for var in [ + "SOCKET_CWD", + "SOCKET_MANIFEST_PATH", + "SOCKET_GLOBAL_PREFIX", + "SOCKET_LOCK_TIMEOUT", + "SOCKET_ECOSYSTEMS", + "SOCKET_DOWNLOAD_MODE", + ] { + cmd.env(var, ""); + } + // Keep the spawned process from attempting telemetry network calls. + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + + let out = cmd + .args(["list", "--json"]) + .output() + .expect("spawn socket-patch"); + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + + assert_ne!( + out.status.code(), + Some(2), + "blank env vars must not abort the clap parse.\nstderr: {stderr}", + ); + // The command must reach normal execution: with the blanks treated as + // unset, `list --json` in an empty temp dir resolves the default manifest + // path and emits the manifest_not_found envelope (exit 1). + let envelope: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap_or_else(|e| { + panic!("expected a JSON envelope on stdout, got {e}.\nstdout: {stdout}\nstderr: {stderr}") + }); + assert_eq!( + envelope["error"]["code"], "manifest_not_found", + "blank env vars must fall back to defaults: {envelope}", + ); + assert_eq!(out.status.code(), Some(1), "manifest_not_found exits 1"); +} + fn save_and_clear_global_env() -> Vec<(&'static str, Option)> { let saved: Vec<(&'static str, Option)> = GLOBAL_ENV_VARS .iter() diff --git a/crates/socket-patch-cli/tests/cli_parse_list.rs b/crates/socket-patch-cli/tests/cli_parse_list.rs index ca5b11f..4b4099c 100644 --- a/crates/socket-patch-cli/tests/cli_parse_list.rs +++ b/crates/socket-patch-cli/tests/cli_parse_list.rs @@ -860,3 +860,98 @@ fn absolute_manifest_path_content_wins_over_cwd_via_binary() { "cwd decoy manifest must NOT be listed when absolute path is given: {stdout}" ); } + +// --------------------------------------------------------------------------- +// `--silent` contract — CLI_CONTRACT.md defines `--silent` as "Errors only". +// Regression guard: `run()` gated the human-readable listing on `!json` +// alone, so `list --silent` still printed the full patch table (and the +// "No patches found in manifest." line for an empty manifest). Mirrors the +// `get --silent` / `repair --silent` regressions fixed earlier. +// --------------------------------------------------------------------------- + +/// Like [`run_list_binary`] but with every `GlobalArgs` env var scrubbed, +/// so ambient developer/CI configuration (SOCKET_SILENT, SOCKET_JSON, +/// tokens…) can't change the branch under test, and telemetry disabled so +/// the test stays offline. +fn run_list_binary_scrubbed(cwd: &Path, extra: &[&str]) -> std::process::Output { + let mut cmd = Command::new(env!("CARGO_BIN_EXE_socket-patch")); + cmd.arg("list").arg("--cwd").arg(cwd).args(extra); + for var in socket_patch_cli::args::GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + cmd.output().expect("failed to execute socket-patch binary") +} + +#[test] +fn silent_suppresses_human_listing_via_binary() { + let tmp = tempfile::tempdir().unwrap(); + write_manifest_in(tmp.path(), &populated_manifest()); + + let out = run_list_binary_scrubbed(tmp.path(), &["--silent"]); + let stdout = String::from_utf8_lossy(&out.stdout); + assert_eq!( + out.status.code(), + Some(0), + "list --silent must still exit 0" + ); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout for a populated manifest; got {stdout:?}" + ); + + // Control run: the same manifest WITHOUT --silent must print the table — + // otherwise the assertion above passes vacuously. + let loud = run_list_binary_scrubbed(tmp.path(), &[]); + assert_eq!(loud.status.code(), Some(0)); + assert!( + String::from_utf8_lossy(&loud.stdout).contains("Package: pkg:npm/test-pkg@1.0.0"), + "non-silent run must print the listing" + ); +} + +#[test] +fn silent_suppresses_no_patches_message_via_binary() { + let tmp = tempfile::tempdir().unwrap(); + write_manifest_in(tmp.path(), &PatchManifest::new()); + + let out = run_list_binary_scrubbed(tmp.path(), &["--silent"]); + let stdout = String::from_utf8_lossy(&out.stdout); + assert_eq!( + out.status.code(), + Some(0), + "empty list --silent must exit 0" + ); + assert!( + stdout.trim().is_empty(), + "--silent must suppress the no-patches message; got {stdout:?}" + ); +} + +#[test] +fn silent_does_not_mute_json_envelope_via_binary() { + // `--json` output is the machine-readable result, not human chatter: + // `--silent --json` must still emit the envelope (matching `get`/`repair`). + let tmp = tempfile::tempdir().unwrap(); + write_manifest_in(tmp.path(), &populated_manifest()); + + let out = run_list_binary_scrubbed(tmp.path(), &["--silent", "--json"]); + assert_eq!(out.status.code(), Some(0)); + let v: serde_json::Value = serde_json::from_str(String::from_utf8_lossy(&out.stdout).trim()) + .expect("--silent --json must still print the JSON envelope"); + assert_eq!(v["command"], "list"); + assert_eq!(v["summary"]["discovered"], 1); +} + +#[test] +fn silent_keeps_missing_manifest_error_on_stderr_via_binary() { + // "Errors only": the missing-manifest diagnostic must survive --silent. + let tmp = tempfile::tempdir().unwrap(); + + let out = run_list_binary_scrubbed(tmp.path(), &["--silent"]); + assert_eq!(out.status.code(), Some(1), "missing manifest must exit 1"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Manifest not found"), + "error output must NOT be muted by --silent" + ); +} diff --git a/crates/socket-patch-cli/tests/cli_parse_repair.rs b/crates/socket-patch-cli/tests/cli_parse_repair.rs index ebb2fc5..ac87853 100644 --- a/crates/socket-patch-cli/tests/cli_parse_repair.rs +++ b/crates/socket-patch-cli/tests/cli_parse_repair.rs @@ -380,6 +380,46 @@ fn repair_gc_alias_accepts_flags() { assert_eq!(snapshot(&args), expected); } +/// Regression: an exported-but-empty `SOCKET_DOWNLOAD_ONLY=` — the shell/CI +/// idiom for blanking a variable without unsetting it — must mean "unset, +/// fall back to the default (false)", not abort every `repair` invocation +/// with a ValueValidation error. The flattened `GlobalArgs` bool flags +/// already have this semantic via `parse_bool_flag`; `repair`'s own +/// `--download-only` env binding must match (it is also outside +/// `GLOBAL_ARG_ENV_VARS`, so `main`'s empty-var scrub never rescues it). +#[test] +#[serial_test::serial] +fn empty_download_only_env_var_parses_as_false_not_crash() { + let _scrub = EnvScrub::new(); + std::env::set_var("SOCKET_DOWNLOAD_ONLY", ""); + let parsed = Cli::try_parse_from(["socket-patch", "repair"]); + std::env::remove_var("SOCKET_DOWNLOAD_ONLY"); + let cli = parsed.expect("empty SOCKET_DOWNLOAD_ONLY must not abort the parse"); + match cli.command { + Commands::Repair(a) => assert!( + !a.download_only, + "empty SOCKET_DOWNLOAD_ONLY must resolve to false" + ), + _ => panic!("expected Repair"), + } +} + +/// The truthy env spellings keep working through the empty-string fix: +/// `SOCKET_DOWNLOAD_ONLY=1` must set the flag exactly like `--download-only`. +#[test] +#[serial_test::serial] +fn truthy_download_only_env_var_sets_flag() { + let _scrub = EnvScrub::new(); + std::env::set_var("SOCKET_DOWNLOAD_ONLY", "1"); + let parsed = Cli::try_parse_from(["socket-patch", "repair"]); + std::env::remove_var("SOCKET_DOWNLOAD_ONLY"); + let cli = parsed.expect("SOCKET_DOWNLOAD_ONLY=1 must parse"); + match cli.command { + Commands::Repair(a) => assert!(a.download_only), + _ => panic!("expected Repair"), + } +} + #[test] #[serial_test::serial] fn repair_unknown_flag_is_unknown_argument_error() { diff --git a/crates/socket-patch-cli/tests/cli_parse_unlock.rs b/crates/socket-patch-cli/tests/cli_parse_unlock.rs new file mode 100644 index 0000000..ebfb033 --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_parse_unlock.rs @@ -0,0 +1,138 @@ +//! CLI contract tests for the `unlock` subcommand's parse surface. +//! +//! Focus: the `--release` / `SOCKET_UNLOCK_RELEASE` env binding. +//! Regression guard: the flag shipped without `value_parser = +//! parse_bool_flag`, so clap's default bool parser accepted only the +//! literal strings `true`/`false` from the env — `SOCKET_UNLOCK_RELEASE=1` +//! (or an exported-but-empty `SOCKET_UNLOCK_RELEASE=`) aborted every +//! `unlock` invocation with a ValueValidation error. The flag is also +//! outside `GLOBAL_ARG_ENV_VARS`, so `main`'s empty-var scrub never +//! rescues it. Same bug class previously fixed on `repair +//! --download-only` and `rollback --one-off`. +//! +//! ## Hermeticity +//! +//! Mirrors `cli_parse_repair.rs`: every parse runs with the full +//! `SOCKET_*` surface scrubbed (see [`EnvScrub`]) and every test is +//! `#[serial_test::serial]` so the process-global env mutation can't +//! race a concurrent parse. + +use clap::Parser; +use socket_patch_cli::{Cli, Commands}; + +/// Every `SOCKET_*` env var that clap consults while parsing `unlock` +/// (its own `--release` flag plus the flattened `GlobalArgs`). +const SOCKET_ENV_VARS: &[&str] = &[ + // GlobalArgs + "SOCKET_CWD", + "SOCKET_MANIFEST_PATH", + "SOCKET_API_URL", + "SOCKET_API_TOKEN", + "SOCKET_ORG_SLUG", + "SOCKET_PROXY_URL", + "SOCKET_ECOSYSTEMS", + "SOCKET_DOWNLOAD_MODE", + "SOCKET_OFFLINE", + "SOCKET_GLOBAL", + "SOCKET_GLOBAL_PREFIX", + "SOCKET_JSON", + "SOCKET_VERBOSE", + "SOCKET_SILENT", + "SOCKET_DRY_RUN", + "SOCKET_YES", + "SOCKET_LOCK_TIMEOUT", + "SOCKET_BREAK_LOCK", + "SOCKET_DEBUG", + "SOCKET_TELEMETRY_DISABLED", + // UnlockArgs-specific + "SOCKET_UNLOCK_RELEASE", +]; + +/// RAII guard that removes every [`SOCKET_ENV_VARS`] entry on +/// construction and restores the prior value on drop. Pair with +/// `#[serial_test::serial]` so the global env mutation never races +/// another test. +struct EnvScrub(Vec<(&'static str, Option)>); + +impl EnvScrub { + fn new() -> Self { + let saved = SOCKET_ENV_VARS + .iter() + .map(|&k| { + let prev = std::env::var(k).ok(); + std::env::remove_var(k); + (k, prev) + }) + .collect(); + EnvScrub(saved) + } +} + +impl Drop for EnvScrub { + fn drop(&mut self) { + for (k, v) in &self.0 { + match v { + Some(val) => std::env::set_var(k, val), + None => std::env::remove_var(k), + } + } + } +} + +fn release_of(cli: Cli) -> bool { + match cli.command { + Commands::Unlock(a) => a.release, + _ => panic!("expected Unlock"), + } +} + +#[test] +#[serial_test::serial] +fn unlock_release_defaults_to_false() { + let _scrub = EnvScrub::new(); + let cli = Cli::try_parse_from(["socket-patch", "unlock"]).expect("parse"); + assert!( + !release_of(cli), + "bare `unlock` must default release to false" + ); +} + +#[test] +#[serial_test::serial] +fn unlock_release_long_flag_sets_true() { + let _scrub = EnvScrub::new(); + let cli = Cli::try_parse_from(["socket-patch", "unlock", "--release"]).expect("parse"); + assert!(release_of(cli), "`--release` must set the flag"); +} + +/// Regression: an exported-but-empty `SOCKET_UNLOCK_RELEASE=` — the +/// shell/CI idiom for blanking a variable without unsetting it — must +/// mean "unset, fall back to the default (false)", not abort every +/// `unlock` invocation with a ValueValidation error. +#[test] +#[serial_test::serial] +fn empty_unlock_release_env_var_parses_as_false_not_crash() { + let _scrub = EnvScrub::new(); + std::env::set_var("SOCKET_UNLOCK_RELEASE", ""); + let parsed = Cli::try_parse_from(["socket-patch", "unlock"]); + std::env::remove_var("SOCKET_UNLOCK_RELEASE"); + let cli = parsed.expect("empty SOCKET_UNLOCK_RELEASE must not abort the parse"); + assert!( + !release_of(cli), + "empty SOCKET_UNLOCK_RELEASE must resolve to false" + ); +} + +/// Regression: the truthy env spellings must work — +/// `SOCKET_UNLOCK_RELEASE=1` must behave exactly like `--release` +/// instead of aborting the parse. +#[test] +#[serial_test::serial] +fn truthy_unlock_release_env_var_sets_flag() { + let _scrub = EnvScrub::new(); + std::env::set_var("SOCKET_UNLOCK_RELEASE", "1"); + let parsed = Cli::try_parse_from(["socket-patch", "unlock"]); + std::env::remove_var("SOCKET_UNLOCK_RELEASE"); + let cli = parsed.expect("SOCKET_UNLOCK_RELEASE=1 must parse"); + assert!(release_of(cli), "SOCKET_UNLOCK_RELEASE=1 must set release"); +} diff --git a/crates/socket-patch-cli/tests/cli_parse_vex.rs b/crates/socket-patch-cli/tests/cli_parse_vex.rs new file mode 100644 index 0000000..ca757f3 --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_parse_vex.rs @@ -0,0 +1,194 @@ +//! CLI contract tests for the `vex` subcommand's env-bound bool flags, plus +//! the `VexEmbedArgs` twins flattened into `apply` and `scan`. +//! +//! Regression target: `--no-verify` / `--compact` (and `--vex-no-verify` / +//! `--vex-compact`) are env-bound bools (`SOCKET_VEX_NO_VERIFY` / +//! `SOCKET_VEX_COMPACT`). With clap's default bool value parser those env +//! bindings accept only the literal strings `true`/`false`, so the common +//! CI spellings (`SOCKET_VEX_NO_VERIFY=1`) — and the exported-but-empty +//! idiom (`SOCKET_VEX_NO_VERIFY=`) — aborted the parse with a +//! ValueValidation error. Because `VexEmbedArgs` is flattened into `apply` +//! and `scan`, the ambient env var broke those commands too (including +//! `apply` running from a postinstall hook). The fix wires +//! `value_parser = parse_bool_flag`, matching the `GlobalArgs` bool flags +//! and `repair --download-only` / `unlock --release`. These vars are also +//! outside `GLOBAL_ARG_ENV_VARS`, so `main`'s empty-var scrub never rescues +//! them. +//! +//! ## Hermeticity +//! +//! Every parse runs with the full set of `SOCKET_*` vars scrubbed (see +//! [`EnvScrub`]) and each test is `#[serial_test::serial]` because the +//! process environment is global. This mirrors `cli_parse_repair.rs`. + +use clap::Parser; +use socket_patch_cli::{Cli, Commands}; + +/// Every `SOCKET_*` env var clap consults while parsing `vex`, `apply`, or +/// `scan` (their own flags plus the flattened `GlobalArgs` and +/// `VexEmbedArgs`). Scrubbed around each parse so ambient shell/CI values +/// can't mask or fabricate a result. +const SOCKET_ENV_VARS: &[&str] = &[ + // GlobalArgs + "SOCKET_CWD", + "SOCKET_MANIFEST_PATH", + "SOCKET_API_URL", + "SOCKET_API_TOKEN", + "SOCKET_ORG_SLUG", + "SOCKET_PROXY_URL", + "SOCKET_ECOSYSTEMS", + "SOCKET_DOWNLOAD_MODE", + "SOCKET_OFFLINE", + "SOCKET_GLOBAL", + "SOCKET_GLOBAL_PREFIX", + "SOCKET_JSON", + "SOCKET_VERBOSE", + "SOCKET_SILENT", + "SOCKET_DRY_RUN", + "SOCKET_YES", + "SOCKET_LOCK_TIMEOUT", + "SOCKET_BREAK_LOCK", + "SOCKET_DEBUG", + "SOCKET_TELEMETRY_DISABLED", + // VexArgs / VexEmbedArgs + "SOCKET_VEX", + "SOCKET_VEX_OUTPUT", + "SOCKET_VEX_PRODUCT", + "SOCKET_VEX_NO_VERIFY", + "SOCKET_VEX_DOC_ID", + "SOCKET_VEX_COMPACT", + // ApplyArgs-specific + "SOCKET_FORCE", + // ScanArgs-specific + "SOCKET_BATCH_SIZE", + "SOCKET_ALL_RELEASES", +]; + +/// RAII guard that removes every [`SOCKET_ENV_VARS`] entry on construction and +/// restores the prior value on drop. Holding one of these around a clap parse +/// guarantees the parse sees only what's on the argv (plus whatever the test +/// itself sets), not the developer's shell. Pair with `#[serial_test::serial]` +/// so the global env mutation never races another test. +struct EnvScrub(Vec<(&'static str, Option)>); + +impl EnvScrub { + fn new() -> Self { + let saved = SOCKET_ENV_VARS + .iter() + .map(|&k| { + let prev = std::env::var(k).ok(); + std::env::remove_var(k); + (k, prev) + }) + .collect(); + EnvScrub(saved) + } +} + +impl Drop for EnvScrub { + fn drop(&mut self) { + for (k, v) in &self.0 { + match v { + Some(val) => std::env::set_var(k, val), + None => std::env::remove_var(k), + } + } + } +} + +/// Scrub the env, set `var=value`, parse `argv`, restore. Returns the parse +/// result so callers can assert on success or failure. +fn parse_with_env(var: &str, value: &str, argv: &[&str]) -> Result { + let _scrub = EnvScrub::new(); + std::env::set_var(var, value); + let parsed = Cli::try_parse_from(argv); + std::env::remove_var(var); + parsed +} + +/// The truthy env spellings must work: `SOCKET_VEX_NO_VERIFY=1` must set +/// `vex --no-verify` exactly like the flag, not abort the parse. +#[test] +#[serial_test::serial] +fn truthy_vex_no_verify_env_sets_flag_on_vex() { + let cli = parse_with_env("SOCKET_VEX_NO_VERIFY", "1", &["socket-patch", "vex"]) + .expect("SOCKET_VEX_NO_VERIFY=1 must parse, not abort"); + match cli.command { + Commands::Vex(a) => assert!(a.no_verify, "SOCKET_VEX_NO_VERIFY=1 must set --no-verify"), + _ => panic!("expected Vex"), + } +} + +/// An exported-but-empty `SOCKET_VEX_NO_VERIFY=` — the shell/CI idiom for +/// blanking a variable without unsetting it — must mean "unset, fall back +/// to the default (false)", not abort every `vex` invocation. +#[test] +#[serial_test::serial] +fn empty_vex_no_verify_env_parses_as_false_on_vex() { + let cli = parse_with_env("SOCKET_VEX_NO_VERIFY", "", &["socket-patch", "vex"]) + .expect("empty SOCKET_VEX_NO_VERIFY must not abort the parse"); + match cli.command { + Commands::Vex(a) => assert!(!a.no_verify, "empty SOCKET_VEX_NO_VERIFY must be false"), + _ => panic!("expected Vex"), + } +} + +/// `SOCKET_VEX_COMPACT=1` must set `vex --compact`. +#[test] +#[serial_test::serial] +fn truthy_vex_compact_env_sets_flag_on_vex() { + let cli = parse_with_env("SOCKET_VEX_COMPACT", "1", &["socket-patch", "vex"]) + .expect("SOCKET_VEX_COMPACT=1 must parse, not abort"); + match cli.command { + Commands::Vex(a) => assert!(a.compact, "SOCKET_VEX_COMPACT=1 must set --compact"), + _ => panic!("expected Vex"), + } +} + +/// `VexEmbedArgs` shares the env var names with the standalone flags, so an +/// ambient `SOCKET_VEX_NO_VERIFY=1` must also parse (and set +/// `--vex-no-verify`) on `apply` — this is the postinstall-hook blast +/// radius: before the fix the env var aborted every `apply` run. +#[test] +#[serial_test::serial] +fn truthy_vex_no_verify_env_sets_embedded_flag_on_apply() { + let cli = parse_with_env("SOCKET_VEX_NO_VERIFY", "1", &["socket-patch", "apply"]) + .expect("SOCKET_VEX_NO_VERIFY=1 must not abort `apply`"); + match cli.command { + Commands::Apply(a) => assert!( + a.vex.vex_no_verify, + "SOCKET_VEX_NO_VERIFY=1 must set apply's --vex-no-verify" + ), + _ => panic!("expected Apply"), + } +} + +/// The empty-var idiom must likewise not abort `scan` (the other +/// `VexEmbedArgs` host), and must leave the embedded flag at its default. +#[test] +#[serial_test::serial] +fn empty_vex_compact_env_parses_as_false_on_scan() { + let cli = parse_with_env("SOCKET_VEX_COMPACT", "", &["socket-patch", "scan"]) + .expect("empty SOCKET_VEX_COMPACT must not abort `scan`"); + match cli.command { + Commands::Scan(a) => assert!(!a.vex.vex_compact, "empty SOCKET_VEX_COMPACT must be false"), + _ => panic!("expected Scan"), + } +} + +/// The explicit CLI flags keep working through the env fix (the custom +/// value parser must not change flag-only usage). +#[test] +#[serial_test::serial] +fn bare_flags_still_parse_without_env() { + let _scrub = EnvScrub::new(); + let cli = Cli::try_parse_from(["socket-patch", "vex", "--no-verify", "--compact"]) + .expect("bare flags must parse"); + match cli.command { + Commands::Vex(a) => { + assert!(a.no_verify); + assert!(a.compact); + } + _ => panic!("expected Vex"), + } +} diff --git a/crates/socket-patch-cli/tests/cli_remove_silent.rs b/crates/socket-patch-cli/tests/cli_remove_silent.rs new file mode 100644 index 0000000..a3f6823 --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_remove_silent.rs @@ -0,0 +1,195 @@ +//! `remove --silent` contract tests. +//! +//! CLI_CONTRACT.md defines `--silent` as "Errors only". Regression +//! guard: `remove` gated all of its human-readable chatter on `!json` +//! alone, hardcoded `silent: false` into `acquire_or_emit` (so the +//! `--break-lock` stale-lock warning printed anyway), and passed only +//! `json` as `rollback_patches`' silent param — so `remove --silent` +//! printed everything. Same bug class previously fixed in `list`, +//! `repair`, and `get`. Runs fully offline: the patch record has no +//! files (so rollback fetches no blobs) and the project dir has no +//! installed packages, so the internal rollback takes the +//! "not installed" path and the manifest mutation needs no network. +//! +//! Stderr assertions ignore the "No SOCKET_API_TOKEN set" client +//! warning: it's printed unconditionally by +//! `get_api_client_with_overrides` in core for every command and is +//! out of scope for `remove`'s `--silent` gating. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use socket_patch_cli::args::GLOBAL_ARG_ENV_VARS; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +const ONE_PATCH_MANIFEST: &str = r#"{ + "patches": { + "pkg:npm/__remove_silent_test__@1.0.0": { + "uuid": "33333333-3333-4333-8333-333333333333", + "exportedAt": "2024-01-01T00:00:00Z", + "files": {}, + "vulnerabilities": {}, + "description": "synthetic remove --silent test patch", + "license": "MIT", + "tier": "free" + } + } +}"#; + +fn make_socket_dir(root: &Path) -> PathBuf { + let socket = root.join(".socket"); + std::fs::create_dir_all(&socket).expect("create .socket"); + std::fs::write(socket.join("manifest.json"), ONE_PATCH_MANIFEST).expect("write manifest"); + socket +} + +/// Run `socket-patch remove` in `cwd` with a scrubbed SOCKET_* environment +/// so ambient developer/CI configuration (tokens, silent toggles) can't +/// change the branch under test. +fn run_remove(cwd: &Path, args: &[&str]) -> (i32, String, String) { + let mut cmd = Command::new(binary()); + cmd.arg("remove").args(args).current_dir(cwd); + for var in GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + cmd.env_remove("SOCKET_SKIP_ROLLBACK"); + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + let out = cmd.output().expect("run socket-patch remove"); + ( + out.status.code().unwrap_or(-1), + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + +/// A successful `remove --silent --yes` (rollback included — the package +/// is simply not installed) must produce no output on either stream: +/// no "will be removed" listing, no "Rolling back" / "No packages found +/// to rollback" progress, no "Removed N patch(es)" summary. +#[test] +fn remove_silent_produces_no_output_on_success() { + let tmp = tempfile::tempdir().expect("tempdir"); + let socket = make_socket_dir(tmp.path()); + + let (code, stdout, stderr) = run_remove( + tmp.path(), + &["pkg:npm/__remove_silent_test__@1.0.0", "--silent", "--yes"], + ); + assert_eq!( + code, 0, + "remove must succeed; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout; got {stdout:?}" + ); + let stderr_rest: Vec<&str> = stderr + .lines() + .filter(|l| !l.contains("SOCKET_API_TOKEN") && !l.trim().is_empty()) + .collect(); + assert!( + stderr_rest.is_empty(), + "--silent must produce no stderr chatter on success; got {stderr_rest:?}" + ); + + // The removal must still have happened — silent suppresses output, + // not the mutation. + let body = std::fs::read_to_string(socket.join("manifest.json")).expect("read manifest"); + let v: serde_json::Value = serde_json::from_str(&body).expect("parse manifest"); + assert!( + v["patches"].as_object().expect("patches object").is_empty(), + "patch entry must be removed from the manifest" + ); + + // Control run: the same scenario WITHOUT --silent must print the + // human messages — otherwise the assertions above pass vacuously. + let tmp2 = tempfile::tempdir().expect("tempdir"); + make_socket_dir(tmp2.path()); + let (loud_code, loud_stdout, loud_stderr) = run_remove( + tmp2.path(), + &["pkg:npm/__remove_silent_test__@1.0.0", "--yes"], + ); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("Rolling back patch before removal"), + "non-silent run must print rollback progress; got {loud_stdout:?}" + ); + assert!( + loud_stdout.contains("Removed 1 patch(es) from manifest"), + "non-silent run must print the removal summary; got {loud_stdout:?}" + ); + assert!( + loud_stderr.contains("will be removed"), + "non-silent run must print the pre-removal listing; got {loud_stderr:?}" + ); +} + +/// `--silent` must also reach the lock helper: reclaiming a stale +/// `apply.lock` via `--break-lock` prints a stderr warning that +/// `acquire_or_emit` gates on its `silent` param — which `remove` +/// hardcoded to `false`. +#[test] +fn remove_silent_suppresses_break_lock_warning() { + let tmp = tempfile::tempdir().expect("tempdir"); + let socket = make_socket_dir(tmp.path()); + std::fs::write(socket.join("apply.lock"), b"").expect("write stale lock"); + + let (code, stdout, stderr) = run_remove( + tmp.path(), + &[ + "pkg:npm/__remove_silent_test__@1.0.0", + "--silent", + "--yes", + "--break-lock", + "--skip-rollback", + ], + ); + assert_eq!( + code, 0, + "remove must succeed; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + !stderr.contains("reclaimed stale"), + "--silent must suppress the stale-lock warning; got {stderr:?}" + ); + + // Control run: without --silent the warning must appear. + let tmp2 = tempfile::tempdir().expect("tempdir"); + let socket2 = make_socket_dir(tmp2.path()); + std::fs::write(socket2.join("apply.lock"), b"").expect("write stale lock"); + let (loud_code, _loud_stdout, loud_stderr) = run_remove( + tmp2.path(), + &[ + "pkg:npm/__remove_silent_test__@1.0.0", + "--yes", + "--break-lock", + "--skip-rollback", + ], + ); + assert_eq!(loud_code, 0); + assert!( + loud_stderr.contains("reclaimed stale"), + "non-silent --break-lock must print the stale-lock warning; got {loud_stderr:?}" + ); +} + +/// Errors must still print under `--silent` ("errors only", not "nothing"): +/// an unknown identifier keeps its stderr message and exit 1. +#[test] +fn remove_silent_keeps_error_output() { + let tmp = tempfile::tempdir().expect("tempdir"); + make_socket_dir(tmp.path()); + + let (code, _stdout, stderr) = run_remove( + tmp.path(), + &["pkg:npm/__no_such_package__@9.9.9", "--silent", "--yes"], + ); + assert_eq!(code, 1, "unknown identifier must exit 1"); + assert!( + stderr.contains("No patch found matching identifier"), + "--silent must NOT suppress error output; got {stderr:?}" + ); +} diff --git a/crates/socket-patch-cli/tests/cli_scan_silent.rs b/crates/socket-patch-cli/tests/cli_scan_silent.rs new file mode 100644 index 0000000..bd38ccb --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_scan_silent.rs @@ -0,0 +1,347 @@ +//! `scan --silent` contract tests. +//! +//! CLI_CONTRACT.md defines `--silent` as "Errors only". Regression +//! guard: `scan` gated all of its human-readable output on `!json` +//! alone — the "No packages found" hint, the "Found N packages" / +//! "Found N patches" stderr chatter, the results table, the summary, +//! the "Patches to apply" listing, and the post-apply GC line all +//! printed under `--silent` — and the human download path hardcoded +//! `silent: false` into `DownloadParams`, so the nested apply step's +//! progress printed too. Same bug class previously fixed in `list`, +//! `repair`, `get`, and `remove`. +//! +//! The apply-flow test runs against a wiremock API (same fixture shape +//! as `scan_sync_e2e.rs`) so the full human-mode scan→select→download→ +//! apply pipeline is exercised without the network. +//! +//! Stderr assertions ignore the "No SOCKET_API_TOKEN set" client +//! warning: it's printed unconditionally by +//! `get_api_client_with_overrides` in core for every command and is +//! out of scope for `scan`'s `--silent` gating. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use sha2::{Digest, Sha256}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +use socket_patch_cli::args::GLOBAL_ARG_ENV_VARS; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +const ORG_SLUG: &str = "test-org"; +const UUID: &str = "11111111-1111-4111-8111-111111111111"; + +fn git_sha256(content: &[u8]) -> String { + let header = format!("blob {}\0", content.len()); + let mut hasher = Sha256::new(); + hasher.update(header.as_bytes()); + hasher.update(content); + hex::encode(hasher.finalize()) +} + +fn write_root(root: &Path) { + std::fs::write( + root.join("package.json"), + r#"{ "name": "scan-silent-test", "version": "0.0.0" }"#, + ) + .unwrap(); +} + +fn write_npm_package(root: &Path, name: &str, version: &str, content: &[u8]) { + let pkg_dir = root.join("node_modules").join(name); + std::fs::create_dir_all(&pkg_dir).unwrap(); + std::fs::write( + pkg_dir.join("package.json"), + format!(r#"{{ "name": "{name}", "version": "{version}" }}"#), + ) + .unwrap(); + std::fs::write(pkg_dir.join("index.js"), content).unwrap(); +} + +/// Run `socket-patch scan` in `cwd` with a scrubbed SOCKET_* environment +/// so ambient developer/CI configuration (tokens, silent toggles) can't +/// change the branch under test. +fn run_scan(cwd: &Path, args: &[&str]) -> (i32, String, String) { + let mut cmd = Command::new(binary()); + cmd.arg("scan").args(args).current_dir(cwd); + for var in GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + cmd.env_remove("SOCKET_BATCH_SIZE"); + cmd.env_remove("SOCKET_ALL_RELEASES"); + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + let out = cmd.output().expect("run socket-patch scan"); + ( + out.status.code().unwrap_or(-1), + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + +/// Non-error stderr lines: drop the unconditional core API-token warning +/// (both its lead line and its "Got: ... Continuing anyway" continuation) +/// and blank lines, keep everything else. +fn stderr_chatter(stderr: &str) -> Vec { + stderr + .lines() + .filter(|l| { + !l.contains("SOCKET_API_TOKEN") + && !l.contains("Continuing anyway") + && !l.trim().is_empty() + }) + .map(|l| l.to_string()) + .collect() +} + +/// Mount the three endpoints the human-mode apply flow hits: batch +/// discovery, per-package search, and the full patch view (inline blob). +/// Fixture shape mirrors `scan_sync_e2e.rs`. +async fn mount_one_patch_api(mock: &MockServer, purl: &str, before: &[u8]) { + let before_hash = git_sha256(before); + let after_hash = git_sha256(b"after\n"); + let encoded = purl + .replace(':', "%3A") + .replace('/', "%2F") + .replace('@', "%40"); + + Mock::given(method("POST")) + .and(path(format!("/v0/orgs/{ORG_SLUG}/patches/batch"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "packages": [{ + "purl": purl, + "patches": [{ + "uuid": UUID, + "purl": purl, + "tier": "free", + "cveIds": [], + "ghsaIds": [], + "severity": "high", + "title": "silent test patch" + }] + }], + "canAccessPaidPatches": false, + }))) + .mount(mock) + .await; + + Mock::given(method("GET")) + .and(path(format!( + "/v0/orgs/{ORG_SLUG}/patches/by-package/{encoded}" + ))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "patches": [{ + "uuid": UUID, + "purl": purl, + "publishedAt": "2024-01-01T00:00:00Z", + "description": "Silent test patch", + "license": "MIT", + "tier": "free", + "vulnerabilities": {} + }], + "canAccessPaidPatches": false, + }))) + .mount(mock) + .await; + + // base64 of "after\n" — inline so the apply step needs no blob endpoint. + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/{ORG_SLUG}/patches/view/{UUID}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "uuid": UUID, + "purl": purl, + "publishedAt": "2024-01-01T00:00:00Z", + "files": { + "package/index.js": { + "beforeHash": before_hash, + "afterHash": after_hash, + "blobContent": "YWZ0ZXIK", + } + }, + "vulnerabilities": {}, + "description": "Silent test patch", + "license": "MIT", + "tier": "free", + }))) + .mount(mock) + .await; +} + +/// `scan --silent` in a project with no installed packages must produce +/// no output at all (the "No packages found. Run ... install first." +/// hint is informational, not an error — the scan itself succeeded). +/// Fully offline: the crawl finds nothing, so the API is never queried. +#[test] +fn scan_silent_no_packages_produces_no_output() { + let tmp = tempfile::tempdir().expect("tempdir"); + write_root(tmp.path()); + + let (code, stdout, stderr) = run_scan(tmp.path(), &["--silent"]); + assert_eq!( + code, 0, + "empty scan must succeed; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout; got {stdout:?}" + ); + let chatter = stderr_chatter(&stderr); + assert!( + chatter.is_empty(), + "--silent must produce no stderr chatter on success; got {chatter:?}" + ); + + // Control run: the same scenario WITHOUT --silent must print the + // hint — otherwise the assertions above pass vacuously. + let (loud_code, loud_stdout, _) = run_scan(tmp.path(), &[]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("No packages found"), + "non-silent empty scan must print the install hint; got {loud_stdout:?}" + ); +} + +/// The full human-mode apply flow under `--silent --yes` must stay +/// quiet end to end: no "Found N packages" / "Found N patches" stderr +/// chatter, no results table, no "Patches to apply" listing, and no +/// download/apply progress from the nested `download_and_apply_patches` +/// call (which `scan` configured with a hardcoded `silent: false`). +/// The mutation itself must still happen. +#[tokio::test] +async fn scan_silent_apply_flow_produces_no_output_but_still_applies() { + let purl = "pkg:npm/silent-target@1.0.0"; + let before = b"before\n"; + + let mock = MockServer::start().await; + mount_one_patch_api(&mock, purl, before).await; + + let tmp = tempfile::tempdir().expect("tempdir"); + write_root(tmp.path()); + write_npm_package(tmp.path(), "silent-target", "1.0.0", before); + + let (code, stdout, stderr) = run_scan( + tmp.path(), + &[ + "--silent", + "--yes", + "--api-url", + &mock.uri(), + "--api-token", + "fake-token", + "--org", + ORG_SLUG, + ], + ); + assert_eq!( + code, 0, + "scan apply must succeed; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout; got {stdout:?}" + ); + let chatter = stderr_chatter(&stderr); + assert!( + chatter.is_empty(), + "--silent must produce no stderr chatter on success; got {chatter:?}" + ); + + // Silent suppresses output, not the mutation: the patch must have + // been applied to disk and recorded in the manifest. + let patched = + std::fs::read(tmp.path().join("node_modules/silent-target/index.js")).expect("read file"); + assert_eq!( + patched, b"after\n", + "the patch must still be applied under --silent" + ); + let manifest = + std::fs::read_to_string(tmp.path().join(".socket/manifest.json")).expect("read manifest"); + let v: serde_json::Value = serde_json::from_str(&manifest).expect("parse manifest"); + assert_eq!( + v["patches"][purl]["uuid"], UUID, + "the manifest must still record the patch under --silent" + ); + + // Control run: the same flow WITHOUT --silent must print the table + // and the pre-apply listing — otherwise the assertions above pass + // vacuously. + let tmp2 = tempfile::tempdir().expect("tempdir"); + write_root(tmp2.path()); + write_npm_package(tmp2.path(), "silent-target", "1.0.0", before); + let (loud_code, loud_stdout, loud_stderr) = run_scan( + tmp2.path(), + &[ + "--yes", + "--api-url", + &mock.uri(), + "--api-token", + "fake-token", + "--org", + ORG_SLUG, + ], + ); + assert_eq!( + loud_code, 0, + "control run must succeed; stderr={loud_stderr:?}" + ); + assert!( + loud_stdout.contains("PACKAGE"), + "non-silent scan must print the results table; got {loud_stdout:?}" + ); + assert!( + loud_stdout.contains("Patches to apply:"), + "non-silent scan must print the pre-apply listing; got {loud_stdout:?}" + ); + assert!( + loud_stderr.contains("Found 1 packages"), + "non-silent scan must print the crawl summary on stderr; got {loud_stderr:?}" + ); +} + +/// Errors must still print under `--silent` ("errors only", not +/// "nothing"): when every API batch fails, the failure message keeps +/// its stderr output and exit 1 — but the informational "Found N +/// packages" line that precedes it must still be suppressed. +#[tokio::test] +async fn scan_silent_keeps_error_output() { + let mock = MockServer::start().await; + Mock::given(method("POST")) + .and(path(format!("/v0/orgs/{ORG_SLUG}/patches/batch"))) + .respond_with(ResponseTemplate::new(500)) + .mount(&mock) + .await; + + let tmp = tempfile::tempdir().expect("tempdir"); + write_root(tmp.path()); + write_npm_package(tmp.path(), "silent-target", "1.0.0", b"before\n"); + + let (code, _stdout, stderr) = run_scan( + tmp.path(), + &[ + "--silent", + "--yes", + "--api-url", + &mock.uri(), + "--api-token", + "fake-token", + "--org", + ORG_SLUG, + ], + ); + assert_eq!( + code, 1, + "all-batches-failed scan must exit 1; stderr={stderr:?}" + ); + assert!( + stderr.contains("API batch queries failed"), + "--silent must NOT suppress error output; got {stderr:?}" + ); + assert!( + !stderr.contains("Found 1 packages"), + "--silent must suppress the informational crawl summary even on \ + the error path; got {stderr:?}" + ); +} diff --git a/crates/socket-patch-cli/tests/cli_setup_silent.rs b/crates/socket-patch-cli/tests/cli_setup_silent.rs new file mode 100644 index 0000000..0e159d6 --- /dev/null +++ b/crates/socket-patch-cli/tests/cli_setup_silent.rs @@ -0,0 +1,247 @@ +//! `setup --silent` contract tests. +//! +//! CLI_CONTRACT.md defines `--silent` as "Errors only". Regression +//! guard: `setup` (and its `--check` / `--remove` modes) gated all of +//! its human-readable output on `!json` alone — the "Configuring..." / +//! "Searching..." headers, the previews, the summaries, the +//! configuration-status report, and the commit hints all printed under +//! `--silent`. Same bug class previously fixed in `list`, `repair`, +//! `get`, `remove`, and `scan`. +//! +//! `--silent` suppresses informational output only: the mutation still +//! happens, exit codes still distinguish states, and (matching the +//! shared `confirm()` helper) prompting is unaffected — these tests +//! pass `--yes` like the scan/remove silent suites. Runs fully offline: +//! npm-only fixtures, no API calls. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use socket_patch_cli::args::GLOBAL_ARG_ENV_VARS; + +fn binary() -> PathBuf { + env!("CARGO_BIN_EXE_socket-patch").into() +} + +const UNCONFIGURED_PACKAGE_JSON: &str = r#"{ + "name": "setup-silent-test", + "version": "0.0.0" +}"#; + +fn write_root(root: &Path) { + std::fs::write(root.join("package.json"), UNCONFIGURED_PACKAGE_JSON).unwrap(); +} + +/// Run `socket-patch setup` in `cwd` with a scrubbed SOCKET_* environment +/// so ambient developer/CI configuration (tokens, silent toggles) can't +/// change the branch under test. +fn run_setup(cwd: &Path, args: &[&str]) -> (i32, String, String) { + let mut cmd = Command::new(binary()); + cmd.arg("setup").args(args).current_dir(cwd); + for var in GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + cmd.env_remove("SOCKET_SETUP_EXCLUDE"); + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + let out = cmd.output().expect("run socket-patch setup"); + ( + out.status.code().unwrap_or(-1), + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + +/// Non-error stderr lines: drop the unconditional core API-token warning +/// (printed by shared client/telemetry plumbing, out of scope for +/// `setup`'s `--silent` gating) and blank lines, keep everything else. +fn stderr_chatter(stderr: &str) -> Vec { + stderr + .lines() + .filter(|l| { + !l.contains("SOCKET_API_TOKEN") + && !l.contains("Continuing anyway") + && !l.trim().is_empty() + }) + .map(|l| l.to_string()) + .collect() +} + +/// `setup --silent --yes` must wire the postinstall hook without printing +/// anything: no "Configuring socket-patch install hooks..." header, no +/// preview, no summary, no commit hints. +#[test] +fn setup_silent_configures_but_prints_nothing() { + let tmp = tempfile::tempdir().expect("tempdir"); + write_root(tmp.path()); + + let (code, stdout, stderr) = run_setup(tmp.path(), &["--silent", "--yes"]); + assert_eq!( + code, 0, + "setup must succeed; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout; got {stdout:?}" + ); + let chatter = stderr_chatter(&stderr); + assert!( + chatter.is_empty(), + "--silent must produce no stderr chatter on success; got {chatter:?}" + ); + + // Silent suppresses output, not the mutation: the hook must be wired. + let pkg = std::fs::read_to_string(tmp.path().join("package.json")).expect("read package.json"); + assert!( + pkg.contains("socket-patch"), + "the postinstall hook must still be wired under --silent; got {pkg:?}" + ); + + // Control run: the same scenario WITHOUT --silent must print the + // header and summary — otherwise the assertions above pass vacuously. + let tmp2 = tempfile::tempdir().expect("tempdir"); + write_root(tmp2.path()); + let (loud_code, loud_stdout, _) = run_setup(tmp2.path(), &["--yes"]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("Configuring socket-patch install hooks"), + "non-silent setup must print the header; got {loud_stdout:?}" + ); + assert!( + loud_stdout.contains("item(s) updated"), + "non-silent setup must print the summary; got {loud_stdout:?}" + ); +} + +/// `setup --check --silent` must print nothing in both states; the exit +/// code alone distinguishes configured (0) from needs-configuration (1), +/// mirroring the `list --silent` fix. +#[test] +fn setup_check_silent_prints_nothing_in_both_states() { + // Unconfigured: exit 1, no output. + let tmp = tempfile::tempdir().expect("tempdir"); + write_root(tmp.path()); + let (code, stdout, stderr) = run_setup(tmp.path(), &["--check", "--silent"]); + assert_eq!( + code, 1, + "unconfigured --check must exit 1; stdout={stdout:?}" + ); + assert!( + stdout.trim().is_empty(), + "--check --silent must produce no stdout; got {stdout:?}" + ); + let chatter = stderr_chatter(&stderr); + assert!( + chatter.is_empty(), + "--check --silent must produce no stderr chatter; got {chatter:?}" + ); + + // Configured (after a real setup): exit 0, no output. + let (setup_code, _, _) = run_setup(tmp.path(), &["--silent", "--yes"]); + assert_eq!( + setup_code, 0, + "setup must succeed before the configured check" + ); + let (code2, stdout2, _) = run_setup(tmp.path(), &["--check", "--silent"]); + assert_eq!( + code2, 0, + "configured --check must exit 0; stdout={stdout2:?}" + ); + assert!( + stdout2.trim().is_empty(), + "configured --check --silent must produce no stdout; got {stdout2:?}" + ); + + // Control run: without --silent the status report must print. + let (loud_code, loud_stdout, _) = run_setup(tmp.path(), &["--check"]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("Configuration status"), + "non-silent --check must print the status report; got {loud_stdout:?}" + ); +} + +/// `setup --remove --silent --yes` must revert the hook without printing +/// anything: no "Searching..." header, no proposed-changes preview, no +/// summary, no pip-uninstall hint. +#[test] +fn setup_remove_silent_prints_nothing_but_removes() { + let tmp = tempfile::tempdir().expect("tempdir"); + write_root(tmp.path()); + let (setup_code, _, _) = run_setup(tmp.path(), &["--silent", "--yes"]); + assert_eq!(setup_code, 0, "setup must succeed before remove"); + + let (code, stdout, stderr) = run_setup(tmp.path(), &["--remove", "--silent", "--yes"]); + assert_eq!( + code, 0, + "remove must succeed; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "--remove --silent must produce no stdout; got {stdout:?}" + ); + let chatter = stderr_chatter(&stderr); + assert!( + chatter.is_empty(), + "--remove --silent must produce no stderr chatter on success; got {chatter:?}" + ); + + // Silent suppresses output, not the mutation: the hook must be gone. + let pkg = std::fs::read_to_string(tmp.path().join("package.json")).expect("read package.json"); + assert!( + !pkg.contains("socket-patch"), + "the postinstall hook must still be removed under --silent; got {pkg:?}" + ); + + // Control run: a non-silent remove on a configured repo must print + // the preview and summary — otherwise the assertions above pass + // vacuously. + let tmp2 = tempfile::tempdir().expect("tempdir"); + write_root(tmp2.path()); + let (_, _, _) = run_setup(tmp2.path(), &["--silent", "--yes"]); + let (loud_code, loud_stdout, _) = run_setup(tmp2.path(), &["--remove", "--yes"]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("Proposed changes"), + "non-silent remove must print the preview; got {loud_stdout:?}" + ); + assert!( + loud_stdout.contains("item(s) had socket-patch removed"), + "non-silent remove must print the summary; got {loud_stdout:?}" + ); +} + +/// The `no_files` path (no project found at all) is informational, not an +/// error: under `--silent` it must print nothing and exit 0. Covers both +/// the plain-setup inline branch and the shared `report_no_files` helper +/// that `--check` / `--remove` use. +#[test] +fn setup_silent_no_files_prints_nothing() { + let tmp = tempfile::tempdir().expect("tempdir"); + + for mode in [&[][..], &["--check"][..], &["--remove"][..]] { + let mut args: Vec<&str> = mode.to_vec(); + args.push("--silent"); + let (code, stdout, stderr) = run_setup(tmp.path(), &args); + assert_eq!( + code, 0, + "no_files must exit 0 for {mode:?}; stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "--silent no_files must produce no stdout for {mode:?}; got {stdout:?}" + ); + let chatter = stderr_chatter(&stderr); + assert!( + chatter.is_empty(), + "--silent no_files must produce no stderr chatter for {mode:?}; got {chatter:?}" + ); + } + + // Control run: without --silent the hint must print. + let (loud_code, loud_stdout, _) = run_setup(tmp.path(), &[]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("No package.json, Python, Bundler, or Composer project found"), + "non-silent no_files must print the hint; got {loud_stdout:?}" + ); +} diff --git a/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs b/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs index ebf496f..24ad8db 100644 --- a/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs +++ b/crates/socket-patch-cli/tests/e2e_safety_cargo_build.rs @@ -368,6 +368,83 @@ fn apply_then_cargo_check_succeeds() { let _ = after; } +/// Rollback twin of the headline test: after apply rewrote both the +/// source and `.cargo-checksum.json`, `socket-patch rollback` must +/// restore BOTH — the original bytes AND the original checksum entry. +/// Before the rollback-side sidecar resync, rollback restored only the +/// bytes, leaving the patched hash in the checksum file — and the +/// negative control above proves cargo then refuses to build the +/// rolled-back crate ("checksum ... has changed"). +#[test] +#[ignore] +fn rollback_after_apply_then_cargo_check_succeeds() { + if !has_command("cargo") { + eprintln!("SKIP: cargo not on PATH"); + return; + } + let root = tempfile::tempdir().unwrap(); + let consumer = stage_consumer(root.path()); + let cargo_home = root.path().join(".cargo-home"); + generate_lockfile(&consumer, &cargo_home); + + // Baseline must build. + assert!(cargo_check(&consumer, &cargo_home).status.success()); + + let (before, _after) = stage_socket_manifest(&consumer); + // Rollback restores from the before-hash blob; stage it alongside + // the after-blob exactly as apply's snapshot would have left it. + write_blob( + &consumer.join(".socket"), + &before, + ORIGINAL_LIB_RS.as_bytes(), + ); + + let (_stdout, _stderr) = assert_run_ok( + &consumer, + &["apply", "--cwd", consumer.to_str().unwrap()], + "socket-patch apply", + ); + assert_eq!( + std::fs::read_to_string(consumer.join("vendor/safety-fixture/src/lib.rs")).unwrap(), + PATCHED_LIB_RS, + "apply must land the patched content first" + ); + + let (_stdout, _stderr) = assert_run_ok( + &consumer, + &["rollback", "--cwd", consumer.to_str().unwrap()], + "socket-patch rollback", + ); + + // Bytes are back to the original... + assert_eq!( + std::fs::read_to_string(consumer.join("vendor/safety-fixture/src/lib.rs")).unwrap(), + ORIGINAL_LIB_RS, + "rollback must restore the original source" + ); + // ...and the checksum entry was resynced to the original hash. + let post_checksum: serde_json::Value = serde_json::from_str( + &std::fs::read_to_string(consumer.join("vendor/safety-fixture/.cargo-checksum.json")) + .unwrap(), + ) + .unwrap(); + let expected_lib_hash = sha256_hex(ORIGINAL_LIB_RS.as_bytes()); + assert_eq!( + post_checksum["files"]["src/lib.rs"].as_str(), + Some(expected_lib_hash.as_str()), + "rollback must resync .cargo-checksum.json to the original SHA256.\npost: {post_checksum}" + ); + + // The whole point: the rolled-back vendored crate still builds. + let out = cargo_check(&consumer, &cargo_home); + assert!( + out.status.success(), + "cargo check should succeed after rollback resynced the sidecar.\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); +} + /// JSON envelope sanity check on the same scenario: assert apply /// reports the cargo sidecar in the new top-level `envelope.sidecars[]` /// list with the structured shape. diff --git a/crates/socket-patch-cli/tests/e2e_safety_lock.rs b/crates/socket-patch-cli/tests/e2e_safety_lock.rs index 61539ca..1b37fa3 100644 --- a/crates/socket-patch-cli/tests/e2e_safety_lock.rs +++ b/crates/socket-patch-cli/tests/e2e_safety_lock.rs @@ -329,16 +329,18 @@ fn helper_lock_is_actually_exclusive() { } /// `apply --break-lock` against a pre-staged lock file (no live -/// holder) removes the file before acquisition and proceeds with -/// the apply pass. The JSON envelope must surface the -/// `lock_broken` warning event so the action is auditable. +/// holder) reclaims the file and proceeds with the apply pass. The +/// JSON envelope must surface the `lock_broken` warning event so the +/// action is auditable. /// /// Setup mirrors the OS-level scenario: a previous run crashed and /// left `apply.lock` behind, but the OS-level flock was released /// (so a fresh acquire would succeed even without --break-lock). -/// The --break-lock path is the safe-by-design version of `rm`. +/// The --break-lock path is the safe-by-design version of `rm` — +/// it never actually unlinks (that would defeat mutual exclusion), +/// it verifies no live holder and records the audit event. #[test] -fn break_lock_removes_stale_file_and_records_warning() { +fn break_lock_reclaims_stale_file_and_records_warning() { let dir = tempfile::tempdir().unwrap(); let socket_dir = dir.path().join(".socket"); setup_socket_dir(&socket_dir); @@ -403,7 +405,46 @@ fn break_lock_removes_stale_file_and_records_warning() { // The inode is kept for subsequent acquires. assert!( socket_dir.join("apply.lock").is_file(), - "apply.lock should be re-created after --break-lock acquires" + "apply.lock should still exist after --break-lock acquires" + ); +} + +/// Regression: when `--break-lock` itself is refused because a LIVE +/// holder owns the lock, the stderr hint must not advise rerunning +/// with `--break-lock` — the user just did exactly that, and the +/// probe refused precisely because a holder exists, so the advice +/// can only loop. (The plain-contention hint, with no --break-lock +/// passed, rightly keeps suggesting the flag — see +/// `lock_held_human_mode_mentions_other_process`.) +#[test] +fn break_lock_refusal_does_not_advise_break_lock_again() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + setup_socket_dir(&socket_dir); + let _external = take_external_lock(&socket_dir); + + let (code, stdout, stderr) = run(dir.path(), &["apply", "--break-lock"]); + assert_eq!( + code, 1, + "--break-lock against a live holder must refuse with exit 1.\nstderr:\n{stderr}" + ); + assert!( + stdout.trim().is_empty(), + "human mode must not print a JSON envelope to stdout, got:\n{stdout}" + ); + assert!( + stderr.contains("Error: another socket-patch process is operating in this directory"), + "stderr should carry the lock_held error line, got:\n{stderr}" + ); + // Still actionable: the inspect path remains valid. + assert!( + stderr.contains("socket-patch unlock"), + "stderr should still point at `socket-patch unlock`, got:\n{stderr}" + ); + // The regression: no self-defeating "rerun with --break-lock". + assert!( + !stderr.contains("rerun with --break-lock"), + "a refused --break-lock must not advise rerunning with --break-lock, got:\n{stderr}" ); } diff --git a/crates/socket-patch-cli/tests/e2e_safety_unlock.rs b/crates/socket-patch-cli/tests/e2e_safety_unlock.rs index d8e751e..cfd78e1 100644 --- a/crates/socket-patch-cli/tests/e2e_safety_unlock.rs +++ b/crates/socket-patch-cli/tests/e2e_safety_unlock.rs @@ -413,6 +413,144 @@ fn unlock_human_mode_release_reports_noop_when_no_leftover() { ); } +/// `--silent` ("Suppress non-error output") must blank the human-mode +/// free line. Regression guard: `emit_free` gated its human output on +/// `!json` alone — `unlock --silent` printed "Lock is free." to stdout +/// while the rest of the file (held branch, `emit_error`) honored the +/// flag. Same bug class previously fixed in `list`, `repair`, `get`, +/// `remove`, `scan`, and `setup`. +#[test] +fn unlock_silent_suppresses_free_output() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + + let (code, stdout, stderr) = run(dir.path(), &["unlock", "--silent"]); + assert_eq!(code, 0, "stdout={stdout}\nstderr={stderr}"); + assert!( + stdout.trim().is_empty(), + "--silent must produce no stdout on a free probe, got:\n{stdout}" + ); + + // Control run: the same probe WITHOUT --silent must print the free + // line — otherwise the assertion above passes vacuously. + let (loud_code, loud_stdout, _) = run(dir.path(), &["unlock"]); + assert_eq!(loud_code, 0); + assert!( + loud_stdout.contains("Lock is free."), + "non-silent free probe must print the free line, got:\n{loud_stdout}" + ); +} + +/// `--silent --release` suppresses the output, not the mutation: the +/// leftover lock file must still be deleted, with nothing on stdout. +#[test] +fn unlock_silent_release_still_deletes_but_stays_quiet() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + let lock_file = socket_dir.join("apply.lock"); + std::fs::write(&lock_file, b"").unwrap(); + + let (code, stdout, stderr) = run(dir.path(), &["unlock", "--silent", "--release"]); + assert_eq!(code, 0, "stdout={stdout}\nstderr={stderr}"); + assert!( + stdout.trim().is_empty(), + "--silent --release must produce no stdout, got:\n{stdout}" + ); + assert!( + !lock_file.exists(), + "--silent must not suppress the release itself" + ); +} + +/// `--silent` must NOT blank the JSON envelope — `--json --silent` is +/// the standard scripting combination and the machine output is the +/// whole point of it. +#[test] +fn unlock_silent_keeps_json_output() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + + let (code, stdout, stderr) = run(dir.path(), &["unlock", "--json", "--silent"]); + assert_eq!(code, 0, "stdout={stdout}\nstderr={stderr}"); + let env = parse_json_envelope(&stdout); + assert_eq!( + json_string(&env, "status"), + Some("free"), + "--silent must not suppress the JSON envelope: {stdout}" + ); +} + +/// `unlock` must probe the SAME lock the mutating subcommands take. +/// Every mutating command derives the lock directory from +/// `--manifest-path` (`resolved_manifest_path().parent()`); `unlock` +/// hardcoded `/.socket` instead, so with a custom manifest path it +/// probed a directory nobody locks — reporting `free` (exit 0) while +/// `apply`/`remove` held their lock. For the CI-gating use case this +/// command exists for, that's the worst possible wrong answer. +#[test] +fn unlock_honors_manifest_path_when_probing() { + let dir = tempfile::tempdir().unwrap(); + let custom_dir = dir.path().join("custom"); + let _external = take_external_lock(&custom_dir); + + let (code, stdout, stderr) = run( + dir.path(), + &[ + "unlock", + "--json", + "--manifest-path", + "custom/manifest.json", + ], + ); + assert_eq!( + code, 1, + "a held custom-manifest-path lock must read as held; stdout={stdout}\nstderr={stderr}" + ); + let env = parse_json_envelope(&stdout); + let code_field = env + .get("error") + .and_then(|e| e.get("code")) + .and_then(|c| c.as_str()); + assert_eq!(code_field, Some("lock_held"), "envelope: {stdout}"); +} + +/// Companion free-side guard: `--release` with a custom +/// `--manifest-path` must remove the leftover next to THAT manifest, +/// not silently no-op because `/.socket` doesn't exist. +#[test] +fn unlock_release_honors_manifest_path() { + let dir = tempfile::tempdir().unwrap(); + let custom_dir = dir.path().join("custom"); + std::fs::create_dir_all(&custom_dir).unwrap(); + let lock_file = custom_dir.join("apply.lock"); + std::fs::write(&lock_file, b"crashed-run-leftover").unwrap(); + + let (code, stdout, stderr) = run( + dir.path(), + &[ + "unlock", + "--json", + "--release", + "--manifest-path", + "custom/manifest.json", + ], + ); + assert_eq!(code, 0, "stdout={stdout}\nstderr={stderr}"); + let env = parse_json_envelope(&stdout); + assert_eq!( + env.get("released").and_then(|v| v.as_bool()), + Some(true), + "the custom-path leftover was removed, so released must be true: {stdout}" + ); + assert!( + !lock_file.exists(), + "--release must delete the leftover next to the resolved manifest path" + ); +} + /// Human-mode (`unlock` without `--json`) emits a stderr hint /// pointing the user at `--break-lock` when the lock is held. /// Pinned at the substring level so the helpful guidance survives diff --git a/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs b/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs index aa30c82..0d844ac 100644 --- a/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs +++ b/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs @@ -810,6 +810,169 @@ fn rollback_dispatch_branch_composer() { assert_rollback_restored(root, "composer", &fixture); } +// --------------------------------------------------------------------------- +// Machine-output purity at dispatch call sites. +// +// The scan macro in `ecosystem_dispatch` prints "Using at: " to +// STDOUT whenever the crawl is global (`--global` / `--global-prefix`) and +// the caller did not pass `silent = true`. `apply` and `rollback` pass +// `silent || json`, but the `vex` and `setup --check` call sites passed only +// `silent`, so in `--json` mode (envelope on stdout) — and in vex's +// doc-to-stdout mode — the chrome line corrupted the machine stream. +// `--global-prefix` makes the leak deterministic: the npm crawler returns +// the prefix verbatim as a node_modules root, so `paths` is never empty. +// --------------------------------------------------------------------------- + +use socket_patch_cli::args::GLOBAL_ARG_ENV_VARS; + +/// Run the binary with a scrubbed SOCKET_* environment so ambient +/// developer/CI configuration (tokens, silent/json toggles, vex modes) +/// can't change the branch under test. +fn run_scrubbed(cwd: &Path, args: &[&str]) -> (i32, String, String) { + let mut cmd = Command::new(binary()); + cmd.args(args).current_dir(cwd); + for var in GLOBAL_ARG_ENV_VARS { + cmd.env_remove(var); + } + for var in [ + "SOCKET_VEX", + "SOCKET_VEX_OUTPUT", + "SOCKET_VEX_PRODUCT", + "SOCKET_VEX_NO_VERIFY", + "SOCKET_VEX_DOC_ID", + "SOCKET_VEX_COMPACT", + "SOCKET_SETUP_EXCLUDE", + ] { + cmd.env_remove(var); + } + cmd.env("SOCKET_TELEMETRY_DISABLED", "1"); + let out = cmd.output().expect("run socket-patch"); + ( + out.status.code().unwrap_or(-1), + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + +/// `vex --json` reserves stdout for the envelope (`--output` is mandatory +/// in that mode for exactly that reason). A global-prefixed npm crawl must +/// not leak the dispatch's "Using at:" line into the stream. +#[test] +fn vex_json_global_prefix_stdout_is_pure_json() { + let tmp = tempfile::tempdir().unwrap(); + write_root_package_json(tmp.path()); + write_manifest(tmp.path(), "pkg:npm/__dispatch_test__@1.0.0"); + let gp = tmp.path().join("gprefix"); + std::fs::create_dir_all(&gp).unwrap(); + let out_file = tmp.path().join("vex.json"); + + let (code, stdout, stderr) = run_scrubbed( + tmp.path(), + &[ + "vex", + "--json", + "--output", + out_file.to_str().unwrap(), + "--product", + "pkg:npm/__product__@1.0.0", + "--global-prefix", + gp.to_str().unwrap(), + ], + ); + + let env: Value = serde_json::from_str(stdout.trim()).unwrap_or_else(|e| { + panic!( + "vex --json stdout must be exactly the JSON envelope — the dispatch's \ + 'Using at:' chrome must not leak onto stdout ({e}); \ + stdout={stdout:?} stderr={stderr:?}" + ) + }); + // Prove the run got PAST the package crawl (a bail-out before + // `resolve_package_paths` would make the purity assertion vacuous): the + // file-less patch fails verification, so the envelope must be the + // post-crawl `no_applicable_patches` error with its soft exit 1. + assert_eq!(env["command"], "vex", "stdout={stdout:?}"); + assert_eq!( + env["error"]["code"], "no_applicable_patches", + "expected the post-crawl verification error (proves the crawl ran); stdout={stdout:?}" + ); + assert_eq!(code, 1, "stdout={stdout:?} stderr={stderr:?}"); +} + +/// Standalone `vex` with no `--output` writes the VEX document itself to +/// stdout; every other vex line deliberately goes to stderr. The dispatch +/// chrome must not be the one exception. +#[test] +fn vex_doc_to_stdout_global_prefix_emits_no_chrome_on_stdout() { + let tmp = tempfile::tempdir().unwrap(); + write_root_package_json(tmp.path()); + write_manifest(tmp.path(), "pkg:npm/__dispatch_test__@1.0.0"); + let gp = tmp.path().join("gprefix"); + std::fs::create_dir_all(&gp).unwrap(); + + let (code, stdout, stderr) = run_scrubbed( + tmp.path(), + &[ + "vex", + "--product", + "pkg:npm/__product__@1.0.0", + "--global-prefix", + gp.to_str().unwrap(), + ], + ); + + // The file-less fixture fails verification after the crawl, so no doc + // is emitted: the no-applicable error goes to stderr with exit 1 and + // stdout must be completely empty. + assert_eq!( + code, 1, + "expected the no_applicable_patches soft failure; stdout={stdout:?} stderr={stderr:?}" + ); + assert!( + stderr.contains("No applied patches"), + "expected the post-crawl no-applicable error on stderr (proves the crawl ran); \ + stderr={stderr:?}" + ); + assert!( + stdout.trim().is_empty(), + "vex doc-to-stdout mode must keep stdout empty when no document is emitted — \ + the dispatch's 'Using at:' chrome leaked: {stdout:?}" + ); +} + +/// `setup --check --json` prints its JSON report to stdout after the patch +/// consistency pass, which crawls via the dispatch. The chrome line must +/// not precede (and corrupt) the report. +#[test] +fn setup_check_json_global_prefix_stdout_is_pure_json() { + let tmp = tempfile::tempdir().unwrap(); + write_root_package_json(tmp.path()); + write_manifest(tmp.path(), "pkg:npm/__dispatch_test__@1.0.0"); + let gp = tmp.path().join("gprefix"); + std::fs::create_dir_all(&gp).unwrap(); + + let (_code, stdout, stderr) = run_scrubbed( + tmp.path(), + &[ + "setup", + "--check", + "--json", + "--global-prefix", + gp.to_str().unwrap(), + ], + ); + + let report: Value = serde_json::from_str(stdout.trim()).unwrap_or_else(|e| { + panic!( + "setup --check --json stdout must be exactly the JSON report — the \ + dispatch's 'Using at:' chrome must not leak onto stdout ({e}); \ + stdout={stdout:?} stderr={stderr:?}" + ) + }); + assert!(report["status"].is_string(), "stdout={stdout:?}"); + assert!(report["files"].is_array(), "stdout={stdout:?}"); +} + #[cfg(feature = "nuget")] #[test] // Experimental ecosystem (nuget), kept OFF the blocking CI suite — see the diff --git a/crates/socket-patch-cli/tests/in_process_get_uuid_fallback.rs b/crates/socket-patch-cli/tests/in_process_get_uuid_fallback.rs new file mode 100644 index 0000000..ea7ac3a --- /dev/null +++ b/crates/socket-patch-cli/tests/in_process_get_uuid_fallback.rs @@ -0,0 +1,96 @@ +//! In-process regression test for the `get ` auth→proxy fallback. +//! +//! Regression guard: `run()` correctly retried a 401/403 from the +//! authenticated patch-view endpoint against the public proxy — but then +//! `save_and_apply_patch` RE-FETCHED the patch with a freshly-built +//! authenticated client, hitting the same 401 and exiting 1. The +//! already-fetched `PatchResponse` must be carried through to the save +//! step so a stale token still yields free patches end to end (the whole +//! point of the fallback). + +use serial_test::serial; +use socket_patch_cli::args::GlobalArgs; +use socket_patch_cli::commands::get::{run, GetArgs}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +const ORG: &str = "test-org"; +const UUID: &str = "22222222-2222-4222-8222-222222222222"; +const PURL: &str = "pkg:npm/fallback-pkg@1.0.0"; +const AFTER_HASH: &str = "1111111111111111111111111111111111111111111111111111111111111111"; + +#[tokio::test] +#[serial] +async fn stale_token_uuid_get_falls_back_to_proxy_end_to_end() { + let server = MockServer::start().await; + + // The authenticated endpoint rejects the stale token. + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/{ORG}/patches/view/{UUID}"))) + .respond_with(ResponseTemplate::new(401)) + .mount(&server) + .await; + + // The public proxy serves the free patch. + Mock::given(method("GET")) + .and(path(format!("/patch/view/{UUID}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "uuid": UUID, + "purl": PURL, + "publishedAt": "2024-06-01T00:00:00Z", + "files": { + "package/index.js": { + "beforeHash": "0000000000000000000000000000000000000000000000000000000000000000", + "afterHash": AFTER_HASH, + "blobContent": "cGF0Y2hlZAo=", + "beforeBlobContent": "b3JpZ2luYWwK", + } + }, + "vulnerabilities": {}, + "description": "fallback test patch", "license": "MIT", "tier": "free", + }))) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let args = GetArgs { + identifier: UUID.to_string(), + common: GlobalArgs { + cwd: tmp.path().to_path_buf(), + api_url: server.uri(), + api_token: Some("stale-token".to_string()), + org: Some(ORG.to_string()), + proxy_url: server.uri(), + json: true, + no_telemetry: true, + ..GlobalArgs::default() + }, + id: true, + cve: false, + ghsa: false, + package: false, + // save_only isolates the fallback/save path from the apply step. + save_only: true, + one_off: false, + all_releases: false, + }; + + let code = run(args).await; + assert_eq!( + code, 0, + "a stale token must fall back to the proxy and still save the free patch" + ); + + // The patch made it into the manifest... + let body = std::fs::read_to_string(tmp.path().join(".socket/manifest.json")).expect("manifest"); + let manifest: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert_eq!( + manifest["patches"][PURL]["uuid"], UUID, + "manifest must carry the proxy-fetched patch; manifest={manifest}" + ); + // ...and its blob was written. + assert!( + tmp.path().join(".socket/blobs").join(AFTER_HASH).exists(), + "after-blob must be written to .socket/blobs" + ); +} diff --git a/crates/socket-patch-cli/tests/interactive_prompts_e2e.rs b/crates/socket-patch-cli/tests/interactive_prompts_e2e.rs index 77ccb3a..377f917 100644 --- a/crates/socket-patch-cli/tests/interactive_prompts_e2e.rs +++ b/crates/socket-patch-cli/tests/interactive_prompts_e2e.rs @@ -39,6 +39,12 @@ fn binary() -> PathBuf { /// before sending input — the PTY buffers the input until the /// child reads it, so timing-coupling isn't needed. fn run_in_pty(args: &[&str], cwd: &Path, input: &str, timeout: Duration) -> (i32, String) { + run_in_pty_bytes(args, cwd, input.as_bytes(), timeout) +} + +/// Byte-level variant of [`run_in_pty`] for input that is not valid +/// UTF-8 (e.g. a Latin-1 paste at an interactive prompt). +fn run_in_pty_bytes(args: &[&str], cwd: &Path, input: &[u8], timeout: Duration) -> (i32, String) { let pty_system = native_pty_system(); let pair = pty_system .openpty(PtySize { @@ -92,7 +98,7 @@ fn run_in_pty(args: &[&str], cwd: &Path, input: &str, timeout: Duration) -> (i32 // no pre-sleep is needed — dialoguer/rustyline will read it when // their prompt loop polls stdin. let mut writer = pair.master.take_writer().expect("take writer"); - let _ = writer.write_all(input.as_bytes()); + let _ = writer.write_all(input); let _ = writer.flush(); drop(writer); @@ -347,6 +353,58 @@ fn remove_interactive_n_cancels() { ); } +#[test] +fn remove_interactive_non_utf8_answer_declines_without_panic() { + let tmp = tempfile::tempdir().unwrap(); + write_remove_manifest(tmp.path()); + + // A terminal can deliver non-UTF-8 bytes at the prompt (e.g. a + // Latin-1 paste: `é` = 0xE9); `read_line` reports them as an + // InvalidData error. Regression: `confirm()` unwrapped that error + // and panicked (exit 101) instead of treating the garbage like any + // other unrecognized answer (decline). + let (code, output) = run_in_pty_bytes( + &[ + "remove", + "pkg:npm/__interactive_remove__@1.0.0", + "--skip-rollback", + ], + tmp.path(), + b"\xE9\n", + Duration::from_secs(15), + ); + assert!( + !output.contains("panicked"), + "non-UTF-8 answer must not panic the CLI; got: {output}" + ); + assert_eq!( + code, 0, + "non-UTF-8 answer must decline cleanly, not crash; got: {output}" + ); + // The interactive confirm MUST have run (same vacuity guard as the + // y/n tests above), and the unreadable answer must land on "no". + assert!( + output.contains("Remove 1 patch(es) and rollback files?"), + "remove must have shown the interactive confirm prompt; got: {output}" + ); + assert!( + !output.contains("Non-interactive mode"), + "remove must NOT have taken the non-interactive branch in a PTY; got: {output}" + ); + assert!( + output.contains("Removal cancelled"), + "non-UTF-8 answer must be treated as 'no'; got: {output}" + ); + // Declined: the manifest entry must be intact. + let body = std::fs::read_to_string(tmp.path().join(".socket/manifest.json")).unwrap(); + let manifest: serde_json::Value = serde_json::from_str(&body).unwrap(); + let original: serde_json::Value = serde_json::from_str(REMOVE_MANIFEST).unwrap(); + assert_eq!( + manifest, original, + "declined remove must not mutate the manifest" + ); +} + // --------------------------------------------------------------------------- // Apply non-JSON without --yes also exercises confirm() flow, // even though apply auto-proceeds in non-interactive contexts. diff --git a/crates/socket-patch-cli/tests/rollback_invariants.rs b/crates/socket-patch-cli/tests/rollback_invariants.rs index e165060..5fa0e72 100644 --- a/crates/socket-patch-cli/tests/rollback_invariants.rs +++ b/crates/socket-patch-cli/tests/rollback_invariants.rs @@ -167,6 +167,92 @@ fn rollback_one_off_with_identifier_reports_not_implemented() { ); } +/// Regression: `SOCKET_ONE_OFF=1` must set `--one-off` exactly like the flag. +/// clap's default bool parser accepts only the literal strings `true`/`false` +/// from an env binding, so any other truthy spelling aborted every `rollback` +/// invocation with a clap usage error (exit 2) before it could do any work. +/// `value_parser = parse_bool_flag` gives the flag the same env vocabulary as +/// the `GlobalArgs` bools. Reaching the one-off stub's "not yet implemented" +/// envelope proves the env var landed as `true`. +#[test] +fn truthy_one_off_env_var_sets_flag() { + let tmp = tempfile::tempdir().expect("tempdir"); + let out = rollback_cmd(tmp.path()) + .env("SOCKET_ONE_OFF", "1") + .args(["--json", "33333333-3333-4333-8333-333333333333"]) + .output() + .expect("run socket-patch"); + assert_eq!( + out.status.code(), + Some(1), + "SOCKET_ONE_OFF=1 must parse, not abort with a usage error; stderr=\n{}", + String::from_utf8_lossy(&out.stderr) + ); + let stdout = String::from_utf8_lossy(&out.stdout); + let v: serde_json::Value = serde_json::from_str(&stdout) + .expect("JSON envelope (a clap usage error means the env var aborted the parse)"); + assert_eq!(v["status"], "error"); + let err = v["error"].as_str().expect("error message string"); + assert!( + err.contains("not yet implemented"), + "expected the one-off stub (proving one_off=true), got: {err}" + ); +} + +/// Regression: an exported-but-empty `SOCKET_ONE_OFF=` — the shell/CI idiom +/// for blanking a variable without unsetting it — must mean "unset, fall back +/// to false", not abort the run. (This flag is outside `GLOBAL_ARG_ENV_VARS`, +/// so `main`'s empty-var scrub never rescues it; the parser itself must +/// tolerate the empty string.) With one-off correctly off, a manifest-less +/// rollback reaches the normal "Manifest not found" error. +#[test] +fn empty_one_off_env_var_parses_as_false_not_crash() { + let tmp = tempfile::tempdir().expect("tempdir"); + let out = rollback_cmd(tmp.path()) + .env("SOCKET_ONE_OFF", "") + .args(["--json"]) + .output() + .expect("run socket-patch"); + assert_eq!( + out.status.code(), + Some(1), + "empty SOCKET_ONE_OFF must parse, not abort with a usage error; stderr=\n{}", + String::from_utf8_lossy(&out.stderr) + ); + let stdout = String::from_utf8_lossy(&out.stdout); + let v: serde_json::Value = serde_json::from_str(&stdout) + .expect("JSON envelope (a clap usage error means the env var aborted the parse)"); + assert_eq!(v["status"], "error"); + let err = v["error"].as_str().expect("error message string"); + assert!( + err.contains("Manifest not found"), + "empty SOCKET_ONE_OFF must resolve to false (normal rollback path), got: {err}" + ); +} + +/// Human (non-JSON) one-off must surface the same not-implemented error the +/// JSON envelope carries. Before the fix the human branch printed a +/// misleading "One-off rollback mode: fetching patch data..." progress line +/// — for work that never happens — and exited 1 with no error at all. +#[test] +fn rollback_one_off_human_reports_not_implemented_error() { + let tmp = tempfile::tempdir().expect("tempdir"); + let out = rollback_cmd(tmp.path()) + .args(["--one-off", "33333333-3333-4333-8333-333333333333"]) + .output() + .expect("run socket-patch"); + assert_eq!(out.status.code(), Some(1), "one-off mode must exit 1 today"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("not yet implemented"), + "human one-off must state the not-implemented error; stderr=\n{stderr}" + ); + assert!( + !stderr.contains("fetching patch data"), + "must not print a progress line for work that never happens; stderr=\n{stderr}" + ); +} + #[test] fn rollback_unknown_identifier_emits_error() { let tmp = tempfile::tempdir().expect("tempdir"); diff --git a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs index cff2a90..9ab5121 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs @@ -389,6 +389,51 @@ mod host_guard { ); } + /// Regression: a commented-out hook line is NOT a configured project. + /// + /// pip never installs a `# socket-patch[hook]` comment, and plain `setup` + /// (whose `requirements_add` strips comments before probing) would still + /// append the hook — but the `--check` probe read the raw file and saw the + /// marker inside the comment, reporting `configured` (exit 0) for a + /// project with no hook at all. Check and setup must agree on the same + /// bytes. + #[test] + fn pypi_check_ignores_commented_out_hook_host() { + const REQ_COMMENTED: &str = "requests==2.31.0\n# socket-patch[hook]\n"; + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let root_s = root.to_str().unwrap(); + std::fs::write(root.join("requirements.txt"), REQ_COMMENTED).unwrap(); + assert!( + !has_hook_line(REQ_COMMENTED), + "fixture: the commented-out line must not count as a hook line" + ); + + let (code, out, err) = run(root, &["setup", "--check", "--cwd", root_s, "--json"]); + assert_eq!( + code, 1, + "setup --check must FAIL (exit 1): a commented-out hook dep is not \ + configured.\nstdout:\n{out}\nstderr:\n{err}" + ); + let v = parse_json(&out, "check (commented-out)"); + assert_eq!( + json_str(&v, "status", "check (commented-out)"), + "needs_configuration", + "a commented-out hook line must report needs_configuration:\n{v}" + ); + assert_eq!( + json_str( + &pth_entry(&v, "check (commented-out)"), + "status", + "check (commented-out) pth" + ), + "needs_configuration", + "the requirements.txt pth entry must read needs_configuration:\n{v}" + ); + // --check must NEVER write. + assert_requirements(root, REQ_COMMENTED, "after check (commented-out)"); + } + /// Regression: classic-Poetry projects. /// /// `setup` writes the hook into a Poetry manifest as the *structural* diff --git a/crates/socket-patch-core/Cargo.toml b/crates/socket-patch-core/Cargo.toml index 795b88f..155852f 100644 --- a/crates/socket-patch-core/Cargo.toml +++ b/crates/socket-patch-core/Cargo.toml @@ -29,6 +29,9 @@ tempfile = { workspace = true } zip = { workspace = true } base64 = { workspace = true } +[target.'cfg(unix)'.dependencies] +libc = { workspace = true } + [features] # `cargo` and `golang` are default features (npm + PyPI + Ruby gems are # unconditional). Mirror the CLI's defaults so a plain `cargo build` of the diff --git a/crates/socket-patch-core/src/api/blob_fetcher.rs b/crates/socket-patch-core/src/api/blob_fetcher.rs index daa5f79..544ddda 100644 --- a/crates/socket-patch-core/src/api/blob_fetcher.rs +++ b/crates/socket-patch-core/src/api/blob_fetcher.rs @@ -409,11 +409,10 @@ pub fn format_fetch_result(result: &FetchMissingBlobsResult) -> String { result.results.iter().filter(|r| !r.success).collect(); for r in failed_results.iter().take(5) { - let short_hash = if r.hash.len() >= 12 { - &r.hash[..12] - } else { - &r.hash - }; + // Truncate by characters, not bytes: the hash field carries + // arbitrary manifest strings, and a byte slice panics when index + // 12 lands inside a multibyte char. + let short_hash: String = r.hash.chars().take(12).collect(); let err = r.error.as_deref().unwrap_or("unknown error"); lines.push(format!(" - {}...: {}", short_hash, err)); } @@ -777,6 +776,34 @@ mod tests { assert!(output.contains("abc...")); } + #[test] + fn test_format_multibyte_hash_does_not_panic() { + // Regression: the failed-blob detail line truncated `hash` with a + // byte slice (`&r.hash[..12]`). The hash field carries arbitrary + // manifest strings (afterHash / patch uuid); when byte 12 falls + // inside a multibyte char the slice panicked ("byte index 12 is not + // a char boundary"), crashing apply/repair/rollback human output + // instead of reporting the failed download. + let hash = format!("{}→tail-of-corrupted-hash", "a".repeat(11)); + let result = FetchMissingBlobsResult { + total: 1, + downloaded: 0, + failed: 1, + skipped: 0, + results: vec![BlobFetchResult { + hash, + success: false, + error: Some("Invalid hash format".into()), + }], + }; + let output = format_fetch_result(&result); + assert!(output.contains("Failed to download 1 blob(s)")); + assert!( + output.contains("aaaaaaaaaaa→..."), + "12-char prefix expected: {output:?}" + ); + } + #[test] fn test_format_error_none() { let result = FetchMissingBlobsResult { diff --git a/crates/socket-patch-core/src/api/client.rs b/crates/socket-patch-core/src/api/client.rs index 6f60148..5a8df6f 100644 --- a/crates/socket-patch-core/src/api/client.rs +++ b/crates/socket-patch-core/src/api/client.rs @@ -31,7 +31,8 @@ fn get_severity_order(severity: Option<&str>) -> u8 { match severity.map(|s| s.to_lowercase()).as_deref() { Some("critical") => 0, Some("high") => 1, - Some("medium") => 2, + // GHSA emits `moderate` for the medium tier. + Some("medium") | Some("moderate") => 2, Some("low") => 3, _ => 4, } @@ -1164,6 +1165,24 @@ mod tests { ); } + #[test] + fn test_severity_order_moderate_is_medium_tier() { + // Regression: GHSA emits `moderate` for the medium tier (the same + // convention output.rs `format_severity` and get.rs `severity_rank` + // already follow). The moderate-blind ordering lumped it in with + // "unknown" (rank 4), ranking it *below* low. + assert_eq!( + get_severity_order(Some("moderate")), + get_severity_order(Some("medium")) + ); + assert!(get_severity_order(Some("moderate")) < get_severity_order(Some("low"))); + // Case-insensitive like every other tier. + assert_eq!( + get_severity_order(Some("MODERATE")), + get_severity_order(Some("medium")) + ); + } + #[test] fn test_convert_search_result_to_batch_info() { let mut vulns = HashMap::new(); @@ -1286,6 +1305,46 @@ mod tests { assert_eq!(info.severity, Some("critical".into())); } + #[test] + fn test_convert_all_moderate_vulns_report_moderate_severity() { + // Regression: a patch whose vulns are all GHSA-`moderate` reported + // `severity: None` — the moderate-blind order gave it rank 4, equal + // to the `None` starting point, so the highest-severity tracker + // never fired. Tokenless `scan` (public-proxy batch fallback) then + // showed these patches with no severity at all. + let mut vulns = HashMap::new(); + vulns.insert( + "GHSA-1111".into(), + make_vuln("Moderate vuln", "MODERATE", vec!["CVE-2024-0001"]), + ); + let patch = make_patch(vulns, "desc"); + let info = convert_search_result_to_batch_info(patch); + assert_eq!( + info.severity, + Some("MODERATE".into()), + "all-moderate patch must report moderate, not None" + ); + } + + #[test] + fn test_convert_moderate_outranks_low() { + // Regression: `moderate` (GHSA medium tier) used to rank below + // `low`, so a moderate+low patch reported `low` as its highest + // severity. + let mut vulns = HashMap::new(); + vulns.insert( + "GHSA-1111".into(), + make_vuln("Low vuln", "low", vec!["CVE-2024-0001"]), + ); + vulns.insert( + "GHSA-2222".into(), + make_vuln("Moderate vuln", "moderate", vec!["CVE-2024-0002"]), + ); + let patch = make_patch(vulns, "desc"); + let info = convert_search_result_to_batch_info(patch); + assert_eq!(info.severity, Some("moderate".into())); + } + #[test] fn test_convert_duplicate_cves_deduplicated() { let mut vulns = HashMap::new(); diff --git a/crates/socket-patch-core/src/composer_setup/mod.rs b/crates/socket-patch-core/src/composer_setup/mod.rs index 030add7..9445ffa 100644 --- a/crates/socket-patch-core/src/composer_setup/mod.rs +++ b/crates/socket-patch-core/src/composer_setup/mod.rs @@ -212,8 +212,13 @@ fn ensure_scripts_object(root: &mut Map) -> &mut Map, event: &str) -> bool { + if event_contains_marker(scripts.get(event)) { + return false; + } let cmd = Value::String(APPLY_COMMAND.to_string()); match scripts.get_mut(event) { None => { @@ -221,21 +226,13 @@ fn add_command_to_event(scripts: &mut Map, event: &str) -> bool { true } Some(Value::String(s)) => { - if s == APPLY_COMMAND { - false - } else { - let existing = Value::String(s.clone()); - scripts.insert(event.to_string(), Value::Array(vec![existing, cmd])); - true - } + let existing = Value::String(s.clone()); + scripts.insert(event.to_string(), Value::Array(vec![existing, cmd])); + true } Some(Value::Array(arr)) => { - if arr.iter().any(|v| v.as_str() == Some(APPLY_COMMAND)) { - false - } else { - arr.push(cmd); - true - } + arr.push(cmd); + true } // A non-string/array script value is user data we won't clobber. Some(_) => false, @@ -267,6 +264,57 @@ fn remove_command_from_event(scripts: &mut Map, event: &str) -> b // ── async wrappers ─────────────────────────────────────────────────────────── +/// Atomically write `content` to `path`. +/// +/// A bare `fs::write` truncates the target before writing, so a crash, power +/// loss, or interrupted process mid-write would leave the user's committed +/// `composer.json` truncated or empty — destroying the file we only meant to +/// append two script events to. Instead we write to a sibling stage file, +/// fsync it, then rename over the target (rename is atomic on the same +/// filesystem) so a reader ever sees either the old bytes or the complete new +/// bytes. Mirrors the hardened writer in `package_json/update.rs`. +async fn atomic_write(path: &Path, content: &str) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "composer.json".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content.as_bytes()).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = tokio::fs::rename(&stage, path).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory + // so the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) +} + /// Wire the project: append our command to the composer script events. pub async fn add_hook(project: &ComposerProject, dry_run: bool) -> ComposerEditResult { edit(&project.composer_json, dry_run, composer_add).await @@ -293,7 +341,7 @@ async fn edit( None => Ok(false), Some(new) => { if !dry_run { - fs::write(composer_json, &new) + atomic_write(composer_json, &new) .await .map_err(|e| e.to_string())?; } @@ -580,6 +628,78 @@ mod tests { } } + #[test] + fn test_add_noops_on_flag_variant_hook() { + // Regression: `setup --check` (is_hook_present) treats any + // `socket-patch apply` variant as configured, and `setup` must agree — + // appending the stock command next to a user-customized flag set would + // run the hook twice on every install. Mirrors the npm backend's + // `script_is_configured` contract (loose marker on both sides). + let customized = "{\"scripts\":{\ + \"post-install-cmd\":[\"socket-patch apply --offline --ecosystems composer\"],\ + \"post-update-cmd\":\"socket-patch apply --offline --ecosystems composer\"}}"; + assert!(is_hook_present(customized), "variant reads as configured"); + assert!( + composer_add(customized).unwrap().is_none(), + "add must not duplicate a hook --check already reports as configured" + ); + } + + // ── atomic-write contract (no truncation / no stage litter) ────── + // + // The edit must go through stage+fsync+rename, never a bare truncating + // write, so a crash can't leave the user's committed composer.json empty. + + #[cfg(unix)] + #[tokio::test] + async fn test_add_replaces_readonly_manifest_atomically() { + use std::os::unix::fs::PermissionsExt; + // Oracle for the truncating-write bug: rename needs only directory + // write permission, while a bare `fs::write` must open the target + // itself for writing — so a read-only composer.json distinguishes the + // two (EACCES under truncate, clean replace under stage+rename, same + // as the npm/pypi/cargo/go manifest writers). + let dir = tempfile::tempdir().unwrap(); + let cj = dir.path().join("composer.json"); + fs::write(&cj, BASIC).await.unwrap(); + std::fs::set_permissions(&cj, std::fs::Permissions::from_mode(0o444)).unwrap(); + + let project = discover_composer_project(dir.path()).await.unwrap(); + let res = add_hook(&project, false).await; + assert_eq!( + res.status, + ComposerSetupStatus::Updated, + "err: {:?}", + res.error + ); + assert!(is_hook_present(&fs::read_to_string(&cj).await.unwrap())); + } + + #[tokio::test] + async fn test_edit_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + let cj = dir.path().join("composer.json"); + fs::write(&cj, BASIC).await.unwrap(); + let project = discover_composer_project(dir.path()).await.unwrap(); + + assert_eq!( + add_hook(&project, false).await.status, + ComposerSetupStatus::Updated + ); + assert_eq!( + remove_hook(&project, false).await.status, + ComposerSetupStatus::Updated + ); + assert_eq!(fs::read_to_string(&cj).await.unwrap(), BASIC); + + // No half-written `.socket-stage-*` sibling left behind. + let mut rd = fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = rd.next_entry().await.unwrap() { + let name = entry.file_name().to_string_lossy().into_owned(); + assert!(!name.starts_with(".socket-stage-"), "stage litter: {name}"); + } + } + #[test] fn test_add_then_check_consistency() { // For every input where add reports a change, is_hook_present must be true. diff --git a/crates/socket-patch-core/src/crawlers/deno_crawler.rs b/crates/socket-patch-core/src/crawlers/deno_crawler.rs index 493a92a..5a12c2d 100644 --- a/crates/socket-patch-core/src/crawlers/deno_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/deno_crawler.rs @@ -116,6 +116,18 @@ impl DenoCrawler { let Some(((scope, name), version)) = crate::utils::purl::parse_jsr_purl(purl) else { continue; }; + // SECURITY: scope/name/version come straight from the (untrusted) + // manifest PURL and are joined onto the cache root below. A real + // JSR coordinate is a single path segment, so reject any that + // could traverse out of the cache (`..`/`.`, a separator, NUL). + // Unlike the cargo/npm crawlers there is no content check to catch + // a bogus path, and jsr patches in place — so fail closed here. + if !(is_safe_jsr_component(scope) + && is_safe_jsr_component(name) + && is_safe_jsr_component(version)) + { + continue; + } // Cache layout: //// let pkg_dir = jsr_cache_path.join(scope).join(name).join(version); if !is_dir(&pkg_dir).await { @@ -188,6 +200,21 @@ async fn scan_jsr_cache(root: &Path, seen: &mut HashSet, out: &mut Vec bool { + !component.is_empty() + && component != "." + && component != ".." + && !component.contains('/') + && !component.contains('\\') + && !component.contains('\0') +} + /// Returns true if `cwd` looks like a Deno project. /// /// Markers checked: `deno.json`, `deno.jsonc`, `deno.lock`. None are @@ -499,6 +526,68 @@ mod tests { assert!(crawler.crawl_all(&opts).await.is_empty()); } + #[tokio::test] + async fn find_by_purls_rejects_traversal_in_version() { + // SECURITY: `find_by_purls` joins the PURL's scope/name/version + // straight onto the cache root and (unlike the cargo/npm crawlers) + // does NO content verification — only an `is_dir` check — and jsr + // has no redirect backend, so the resolved dir is patched in place. + // A tampered manifest PURL whose version walks `..` must therefore + // be refused: otherwise it resolves to a real directory OUTSIDE the + // cache and `apply` writes into it. + let tmp = tempfile::tempdir().unwrap(); + let cache = tmp.path().join("cache"); + // Real intermediate dirs so the OS resolves the `..` segments — + // path resolution requires every prefix component to exist. + tokio::fs::create_dir_all(cache.join("@x").join("y")) + .await + .unwrap(); + // The escape target lives OUTSIDE the cache root. + let outside = tmp.path().join("outside").join("leak"); + tokio::fs::create_dir_all(&outside).await.unwrap(); + + // version = `../../../outside/leak` walks cache/@x/y -> tmp, then + // back down into outside/leak. + let purl = "pkg:jsr/@x/y@../../../outside/leak"; + let crawler = DenoCrawler; + let result = crawler + .find_by_purls(&cache, &[purl.to_string()]) + .await + .unwrap(); + + assert!( + result.is_empty(), + "a traversing version must not resolve outside the cache, got {result:?}" + ); + } + + #[tokio::test] + async fn find_by_purls_rejects_traversal_in_name() { + // Twin of the version case: the package name is also untrusted and + // joined directly. A name containing `..`/separators must be + // refused before any disk access. + let tmp = tempfile::tempdir().unwrap(); + // Nest the cache two levels down so the `..` escape lands on a real + // dir we control rather than walking above the tempdir. + let cache = tmp.path().join("a").join("b").join("cache"); + tokio::fs::create_dir_all(cache.join("@x")).await.unwrap(); + let outside = tmp.path().join("a").join("leak").join("1.0.0"); + tokio::fs::create_dir_all(&outside).await.unwrap(); + + // name = `../../../leak` walks cache/@x -> a, then into leak/1.0.0. + let purl = "pkg:jsr/@x/../../../leak@1.0.0"; + let crawler = DenoCrawler; + let result = crawler + .find_by_purls(&cache, &[purl.to_string()]) + .await + .unwrap(); + + assert!( + result.is_empty(), + "a traversing name must not resolve outside the cache, got {result:?}" + ); + } + #[tokio::test] async fn find_by_purls_skips_absent_version_keeps_present() { let tmp = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/crawlers/go_crawler.rs b/crates/socket-patch-core/src/crawlers/go_crawler.rs index 8765ccc..a818374 100644 --- a/crates/socket-patch-core/src/crawlers/go_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/go_crawler.rs @@ -179,6 +179,17 @@ impl GoCrawler { for purl in purls { if let Some((module_path, version)) = crate::utils::purl::parse_golang_purl(purl) { + // SECURITY: `module_path`/`version` come straight from the + // (untrusted) manifest PURL and are joined onto the cache root + // below. In global mode the resolved directory is patched IN + // PLACE (no `replace`-redirect backend stands between the + // crawler and disk), so a tampered PURL with a `..` segment + // must not be able to escape the cache. Reject fail-closed + // before the `is_dir` probe — the twin of the deno crawler's + // `is_safe_jsr_component` gate. + if !is_safe_module_coordinate(module_path, version) { + continue; + } // Encode the module path AND the version for the filesystem. // Go case-escapes both halves of the directory name, so a // version like `v1.0.0-RC1` must be looked up as @@ -370,6 +381,33 @@ fn split_module_path(module_path: &str) -> (&str, &str) { } } +/// Whether a `(module_path, version)` pair parsed from an untrusted PURL is +/// safe to join onto the module-cache root in [`GoCrawler::find_by_purls`]. +/// +/// A Go module path legitimately contains `/` separators +/// (`github.com/foo/bar`), so the path is validated **per segment** rather +/// than rejecting all separators — but a real path never has an empty, `.`, +/// or `..` segment (a leading `/` yields an empty first segment, so absolute +/// paths are rejected here too). A version is a single segment with no +/// separator. Backslashes and NULs are rejected outright. This mirrors the +/// `go_redirect` coordinate guard and fails closed so a tampered manifest PURL +/// cannot traverse out of the cache. +fn is_safe_module_coordinate(module_path: &str, version: &str) -> bool { + let module_ok = !module_path.is_empty() + && !module_path.contains('\\') + && !module_path.contains('\0') + && module_path + .split('/') + .all(|seg| !seg.is_empty() && seg != "." && seg != ".."); + let version_ok = !version.is_empty() + && version != "." + && version != ".." + && !version.contains('/') + && !version.contains('\\') + && !version.contains('\0'); + module_ok && version_ok +} + /// Check whether a path is a directory. async fn is_dir(path: &Path) -> bool { tokio::fs::metadata(path) @@ -924,6 +962,35 @@ mod tests { assert_eq!(result[&qualified].name, "gin"); } + #[tokio::test] + async fn test_find_by_purls_rejects_module_path_traversal() { + // SECURITY: `module_path`/`version` come straight from the (untrusted) + // manifest PURL and are joined onto the module-cache root. In global + // mode the resolved directory is patched IN PLACE (no `replace` + // redirect backend guards it), so a `..` segment must be rejected + // fail-closed — otherwise a tampered PURL escapes the cache. Twin of + // the deno crawler's `is_safe_jsr_component` gate. + let parent = tempfile::tempdir().unwrap(); + let cache = parent.path().join("cache"); + tokio::fs::create_dir_all(&cache).await.unwrap(); + + // A real directory one level ABOVE the cache root. With no guard, + // `cache.join("../outside/evil@v1.0.0")` resolves straight to it, and + // every intermediate component exists so the `is_dir` probe succeeds. + let outside = parent.path().join("outside").join("evil@v1.0.0"); + tokio::fs::create_dir_all(&outside).await.unwrap(); + + let crawler = GoCrawler::new(); + let purls = vec!["pkg:golang/../outside/evil@v1.0.0".to_string()]; + let result = crawler.find_by_purls(&cache, &purls).await.unwrap(); + + assert!( + result.is_empty(), + "a `..` segment in the module path must be rejected, not resolved \ + to a directory outside the cache root" + ); + } + #[tokio::test] async fn test_find_by_purls_absent_returns_empty_ok() { // No matching directory on disk → Ok(empty map), never an Err. diff --git a/crates/socket-patch-core/src/crawlers/maven_crawler.rs b/crates/socket-patch-core/src/crawlers/maven_crawler.rs index 0879244..2afcd35 100644 --- a/crates/socket-patch-core/src/crawlers/maven_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/maven_crawler.rs @@ -273,6 +273,41 @@ fn parse_path_coordinates( Some((group_id, artifact_id, version)) } +/// Whether the PURL-derived Maven coordinates are safe to join onto the +/// repository root in [`MavenCrawler::find_by_purls`]. +/// +/// The coordinates come straight from the (untrusted) manifest PURL and are +/// joined onto the repo root, after which the resolved directory is patched IN +/// PLACE (Maven has no `replace`-redirect backend). A tampered PURL must not be +/// able to traverse out of the repository. `verify_maven_at_path` only checks +/// for a `.pom` file, so it is no defense — this gate is. Fails closed. +/// +/// - `artifact_id` and `version` are each a single path segment, so a real one +/// never contains a separator, a `.`/`..` segment, a backslash, or a NUL. +/// - `group_id` is dot-separated and run through [`group_id_to_path`] (each +/// `.` becomes `/`). Requiring every dot-split segment to be non-empty +/// rejects the forms that would convert to an absolute or `..`-bearing path +/// (`.` -> `/`, `.a` -> `/a`, `a..b` -> `a//b`). +/// +/// Mirrors the `go_crawler` / `deno_crawler` coordinate guards. +fn is_safe_maven_coordinate(group_id: &str, artifact_id: &str, version: &str) -> bool { + let safe_segment = |s: &str| { + !s.is_empty() + && s != "." + && s != ".." + && !s.contains('/') + && !s.contains('\\') + && !s.contains('\0') + }; + let group_ok = !group_id.is_empty() + && !group_id.contains('\\') + && !group_id.contains('\0') + && group_id + .split('.') + .all(|seg| !seg.is_empty() && seg != "." && seg != ".."); + group_ok && safe_segment(artifact_id) && safe_segment(version) +} + // --------------------------------------------------------------------------- // MavenCrawler // --------------------------------------------------------------------------- @@ -375,6 +410,14 @@ impl MavenCrawler { if let Some((group_id, artifact_id, version)) = crate::utils::purl::parse_maven_purl(purl) { + // SECURITY: the coordinates are untrusted manifest input joined + // onto the repo root and then patched IN PLACE. Reject anything + // that could traverse out of the repository before touching the + // filesystem — the `.pom` check below is no defense. + if !is_safe_maven_coordinate(group_id, artifact_id, version) { + continue; + } + let expected_path = src_path .join(group_id_to_path(group_id)) .join(artifact_id) @@ -1076,6 +1119,77 @@ mod tests { assert_eq!(pkg.namespace, Some("org.apache.commons".to_string())); } + #[tokio::test] + async fn test_find_by_purls_rejects_traversal_coordinate() { + // SECURITY: a tampered manifest PURL whose coordinates carry a `..` + // segment (here the artifactId `../../escaped`) must NOT resolve to a + // directory outside the Maven repo root. Maven patches are applied IN + // PLACE at the directory the crawler returns (no redirect backend + // stands between resolution and disk), so an escape means an + // arbitrary out-of-tree write. `verify_maven_at_path` only checks for + // a `.pom` file, which does nothing to stop traversal — hence the + // fail-closed coordinate guard. Twin of the go/deno crawler guards. + let root = tempfile::tempdir().unwrap(); + let repo = root.path().join("repo"); + // The intermediate group dir must exist for the OS to resolve the + // `..` segments — real `~/.m2/repository` trees are full of them. + tokio::fs::create_dir_all(repo.join("g")).await.unwrap(); + + // An out-of-tree directory that DOES contain a `.pom`, so the only + // thing standing between the attacker and a match is the guard. + let escaped = root.path().join("escaped").join("1.0.0"); + tokio::fs::create_dir_all(&escaped).await.unwrap(); + tokio::fs::write(escaped.join("evil.pom"), "") + .await + .unwrap(); + + // repo/g/../../escaped/1.0.0 == root/escaped/1.0.0 + let purls = vec!["pkg:maven/g/../../escaped@1.0.0".to_string()]; + + let crawler = MavenCrawler::new(); + let result = crawler.find_by_purls(&repo, &purls).await.unwrap(); + assert!( + result.is_empty(), + "traversal PURL must not resolve to an out-of-tree directory, got {result:?}" + ); + } + + #[test] + fn test_is_safe_maven_coordinate() { + // Legit coordinates pass. + assert!(is_safe_maven_coordinate( + "org.apache.commons", + "commons-lang3", + "3.12.0" + )); + assert!(is_safe_maven_coordinate( + "com.google.guava", + "guava", + "32.1.3-jre" + )); + // `..` in any single-segment coordinate is rejected. + assert!(!is_safe_maven_coordinate("g", "..", "1.0.0")); + assert!(!is_safe_maven_coordinate("g", "../../escaped", "1.0.0")); + assert!(!is_safe_maven_coordinate("g", "a", "..")); + // A `/` in the artifactId/version (never legitimate) is rejected. + assert!(!is_safe_maven_coordinate("g", "a/b", "1.0.0")); + assert!(!is_safe_maven_coordinate("g", "a", "1/0")); + // groupId forms that convert to an absolute or empty-segment path + // (`.` -> `/`, `.a` -> `/a`) are rejected. + assert!(!is_safe_maven_coordinate(".", "a", "1.0.0")); + assert!(!is_safe_maven_coordinate("..", "a", "1.0.0")); + assert!(!is_safe_maven_coordinate(".org", "a", "1.0.0")); + assert!(!is_safe_maven_coordinate("org.", "a", "1.0.0")); + assert!(!is_safe_maven_coordinate("a..b", "a", "1.0.0")); + // Backslash / NUL anywhere is rejected. + assert!(!is_safe_maven_coordinate("g", "a\\b", "1.0.0")); + assert!(!is_safe_maven_coordinate("g\0x", "a", "1.0.0")); + // Empty coordinates are rejected. + assert!(!is_safe_maven_coordinate("", "a", "1.0.0")); + assert!(!is_safe_maven_coordinate("g", "", "1.0.0")); + assert!(!is_safe_maven_coordinate("g", "a", "")); + } + // ---- crawl_all tests ---- #[tokio::test] diff --git a/crates/socket-patch-core/src/crawlers/npm_crawler.rs b/crates/socket-patch-core/src/crawlers/npm_crawler.rs index b827240..91cb624 100644 --- a/crates/socket-patch-core/src/crawlers/npm_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/npm_crawler.rs @@ -341,6 +341,16 @@ impl NpmCrawler { for purl in purls { if let Some((ns, name, version)) = Self::parse_purl_components(purl) { + // SECURITY: `ns`/`name` come straight from the (untrusted) + // manifest PURL and are joined onto `node_modules_path` below, + // then patched in place. A real npm scope/name is a single + // path segment, so reject any that could traverse out of the + // tree (`pkg:npm/../../evil@1.0.0`). Fail closed — twin of the + // deno/go/maven coordinate gates. + let ns_safe = ns.as_deref().map(is_safe_npm_component).unwrap_or(true); + if !ns_safe || !is_safe_npm_component(&name) { + continue; + } let dir_key = match &ns { Some(ns_str) => format!("{ns_str}/{name}"), None => name.clone(), @@ -723,6 +733,24 @@ async fn is_dir(path: &Path) -> bool { .unwrap_or(false) } +/// Whether a PURL-derived path component is safe to join onto the +/// `node_modules` root. An npm package's scope (`@types`) and bare name +/// (`node`) are each a single path segment, so a real one never contains a +/// separator, a `.`/`..` segment, a backslash, or a NUL. `find_by_purls` +/// joins these straight from the (untrusted) manifest PURL onto the +/// `node_modules` root and then patches the resolved package in place, so a +/// tampered PURL like `pkg:npm/../../evil@1.0.0` would otherwise read (and +/// later write) out of tree. Reject those fail-closed. Twin of the deno +/// (`is_safe_jsr_component`), go, and maven coordinate gates. +fn is_safe_npm_component(component: &str) -> bool { + !component.is_empty() + && component != "." + && component != ".." + && !component.contains('/') + && !component.contains('\\') + && !component.contains('\0') +} + #[cfg(test)] mod tests { use super::*; @@ -1084,6 +1112,95 @@ mod tests { assert_eq!(result.get(&q2).unwrap().path, foo_dir); } + /// SECURITY regression: a tampered manifest PURL whose *name* carries a + /// `..` traversal must not let `find_by_purls` resolve a package outside + /// the `node_modules` root. The crawler joins the PURL-derived directory + /// key straight onto `node_modules_path` and the resolved path is then + /// patched in place, so an unguarded join would read (and later write) + /// out of tree. Twin of the deno/go/maven `is_safe_*_coordinate` gates. + #[tokio::test] + async fn test_find_by_purls_rejects_traversal_in_name() { + let root = tempfile::tempdir().unwrap(); + let nm = root.path().join("node_modules"); + tokio::fs::create_dir_all(&nm).await.unwrap(); + + // A victim package living OUTSIDE node_modules, reachable only via + // `..`. `node_modules/../evil` == `/evil`. + let evil_dir = root.path().join("evil"); + tokio::fs::create_dir_all(&evil_dir).await.unwrap(); + tokio::fs::write( + evil_dir.join("package.json"), + r#"{"name": "evil", "version": "1.0.0"}"#, + ) + .await + .unwrap(); + + let crawler = NpmCrawler::new(); + let traversal = "pkg:npm/../evil@1.0.0".to_string(); + let result = crawler + .find_by_purls(&nm, std::slice::from_ref(&traversal)) + .await + .unwrap(); + + assert!( + result.is_empty(), + "a `..` in the PURL name must not escape node_modules; got {result:?}" + ); + } + + /// SECURITY regression: a `..` smuggled through the *name* half of a + /// scoped PURL must also be rejected. `@x/../../evil` parses to scope + /// `@x` + name `../../evil`; with a real `@x` dir on disk for the kernel + /// to walk, the join climbs clean out of node_modules to `/evil`. + #[tokio::test] + async fn test_find_by_purls_rejects_traversal_via_scope() { + let root = tempfile::tempdir().unwrap(); + let nm = root.path().join("node_modules"); + // A real scope dir so the kernel can resolve the leading `@x` before + // the `..` segments climb — otherwise the walk would ENOENT and the + // test would pass vacuously. + tokio::fs::create_dir_all(nm.join("@x")).await.unwrap(); + + let evil_dir = root.path().join("evil"); + tokio::fs::create_dir_all(&evil_dir).await.unwrap(); + tokio::fs::write( + evil_dir.join("package.json"), + r#"{"name": "evil", "version": "1.0.0"}"#, + ) + .await + .unwrap(); + + let crawler = NpmCrawler::new(); + let traversal = "pkg:npm/@x/../../evil@1.0.0".to_string(); + let result = crawler + .find_by_purls(&nm, std::slice::from_ref(&traversal)) + .await + .unwrap(); + + assert!( + result.is_empty(), + "a `..` smuggled through the scope must not escape node_modules; got {result:?}" + ); + } + + #[test] + fn test_is_safe_npm_component() { + // Legitimate components. + assert!(is_safe_npm_component("lodash")); + assert!(is_safe_npm_component("@types")); + assert!(is_safe_npm_component("node")); + assert!(is_safe_npm_component("some.pkg")); + + // Traversal / separator / NUL / empty. + assert!(!is_safe_npm_component("")); + assert!(!is_safe_npm_component(".")); + assert!(!is_safe_npm_component("..")); + assert!(!is_safe_npm_component("../evil")); + assert!(!is_safe_npm_component("a/b")); + assert!(!is_safe_npm_component("a\\b")); + assert!(!is_safe_npm_component("a\0b")); + } + /// A PURL whose version is not the one on disk must be skipped, while a /// sibling PURL for the installed version is kept. #[tokio::test] diff --git a/crates/socket-patch-core/src/crawlers/nuget_crawler.rs b/crates/socket-patch-core/src/crawlers/nuget_crawler.rs index bfb91d1..31a88bf 100644 --- a/crates/socket-patch-core/src/crawlers/nuget_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/nuget_crawler.rs @@ -110,6 +110,16 @@ impl NuGetCrawler { for purl in purls { if let Some((name, version)) = crate::utils::purl::parse_nuget_purl(purl) { + // SECURITY: the coordinates are untrusted manifest input + // joined onto the package root and then patched IN PLACE + // (NuGet has no redirect backend). Reject anything that + // could traverse out of the root before touching the + // filesystem — `verify_nuget_package` only checks for + // `lib/` or a `.nuspec`, so it is no defense. + if !is_safe_nuget_coordinate(name, version) { + continue; + } + // Try global cache layout: //. // NuGet lowercases BOTH the id and the version when it lays // out the global packages folder, so a prerelease tag like @@ -337,12 +347,40 @@ impl Default for NuGetCrawler { } } +/// Whether the PURL-derived NuGet coordinates are safe to join onto the +/// package root in [`NuGetCrawler::find_by_purls`]. +/// +/// The name and version come straight from the (untrusted) manifest PURL. +/// Each is used as a single path segment in the global-cache layout and as +/// part of the `.` directory name in the legacy layout, after +/// which the resolved directory is patched IN PLACE (NuGet has no redirect +/// backend) — so a tampered PURL must not be able to traverse out of the +/// root. A real NuGet id/version never contains a separator, a `.`/`..` +/// segment, a backslash, or a NUL. Fails closed. Mirrors the +/// maven/go/deno/npm crawler coordinate guards. +fn is_safe_nuget_coordinate(name: &str, version: &str) -> bool { + let safe_segment = |s: &str| { + !s.is_empty() + && s != "." + && s != ".." + && !s.contains('/') + && !s.contains('\\') + && !s.contains('\0') + }; + safe_segment(name) && safe_segment(version) +} + /// Get the NuGet global packages folder. /// /// Checks `NUGET_PACKAGES` env var, falls back to `~/.nuget/packages/`. fn nuget_home() -> PathBuf { + // NuGet itself treats an empty NUGET_PACKAGES as unset and falls back + // to the default folder; honoring "" here would make global discovery + // probe `is_dir("")` and silently scan nothing. if let Ok(custom) = std::env::var("NUGET_PACKAGES") { - return PathBuf::from(custom); + if !custom.is_empty() { + return PathBuf::from(custom); + } } let home = std::env::var("HOME") @@ -911,6 +949,7 @@ mod tests { } #[tokio::test] + #[serial_test::serial] async fn test_nuget_home_env_var() { // Test that NUGET_PACKAGES env var is respected let custom = "/tmp/test-nuget-packages"; @@ -920,6 +959,86 @@ mod tests { std::env::remove_var("NUGET_PACKAGES"); } + /// Regression: NuGet itself treats an empty `NUGET_PACKAGES` as unset + /// and falls back to `~/.nuget/packages` (its settings layer checks + /// IsNullOrEmpty). Honoring the empty string here produced + /// `PathBuf::from("")`, which fails the `is_dir` probe — so global-mode + /// discovery silently scanned nothing instead of the real cache. + #[tokio::test] + #[serial_test::serial] + async fn test_nuget_home_empty_env_var_falls_back_to_default() { + let prev = std::env::var("NUGET_PACKAGES").ok(); + std::env::set_var("NUGET_PACKAGES", ""); + let home = nuget_home(); + match prev { + Some(v) => std::env::set_var("NUGET_PACKAGES", v), + None => std::env::remove_var("NUGET_PACKAGES"), + } + assert!( + home.ends_with(Path::new(".nuget").join("packages")), + "empty NUGET_PACKAGES must fall back to ~/.nuget/packages, got {home:?}" + ); + } + + #[test] + fn test_is_safe_nuget_coordinate() { + // Real coordinates pass, including dotted ids and prerelease tags. + assert!(is_safe_nuget_coordinate("Newtonsoft.Json", "13.0.3")); + assert!(is_safe_nuget_coordinate("Contoso.Widgets", "2.0.0-RC1")); + assert!(is_safe_nuget_coordinate("xunit", "2.6.2+build.5")); + + // Traversal / separator smuggling fails closed. + assert!(!is_safe_nuget_coordinate("..", "1.0.0")); + assert!(!is_safe_nuget_coordinate("../escaped", "1.0.0")); + assert!(!is_safe_nuget_coordinate("a/b", "1.0.0")); + assert!(!is_safe_nuget_coordinate("a\\b", "1.0.0")); + assert!(!is_safe_nuget_coordinate("a\0b", "1.0.0")); + assert!(!is_safe_nuget_coordinate("a", "..")); + assert!(!is_safe_nuget_coordinate("a", "../../escaped/1.0.0")); + assert!(!is_safe_nuget_coordinate("a", "1/0")); + assert!(!is_safe_nuget_coordinate("a", ".")); + assert!(!is_safe_nuget_coordinate("", "1.0.0")); + assert!(!is_safe_nuget_coordinate("a", "")); + } + + /// SECURITY regression: a tampered manifest PURL whose name or version + /// carries a `..`/separator must NOT resolve to a directory outside the + /// scanned package root. NuGet patches are applied IN PLACE at the + /// directory the crawler returns (no redirect backend stands between + /// resolution and disk), so an escape means an arbitrary out-of-tree + /// write. `verify_nuget_package` only checks for `lib/` or a `.nuspec`, + /// which does nothing to stop traversal — hence the fail-closed + /// coordinate guard. Twin of the maven/go/deno/npm crawler guards. + #[tokio::test] + async fn test_find_by_purls_rejects_traversal_coordinate() { + let root = tempfile::tempdir().unwrap(); + let cache = root.path().join("cache"); + // The intermediate name dir must exist for the OS to resolve the + // `..` in the version-traversal probe below. + tokio::fs::create_dir_all(cache.join("foo")).await.unwrap(); + + // An out-of-tree directory that DOES verify (has `lib/`), so the + // only thing standing between the attacker and a match is the guard. + let escaped = root.path().join("escaped").join("1.0.0"); + tokio::fs::create_dir_all(escaped.join("lib")) + .await + .unwrap(); + + let purls = vec![ + // name traversal: cache/../escaped/1.0.0 == root/escaped/1.0.0 + "pkg:nuget/../escaped@1.0.0".to_string(), + // version traversal: cache/foo/../../escaped/1.0.0 + "pkg:nuget/foo@../../escaped/1.0.0".to_string(), + ]; + + let crawler = NuGetCrawler::new(); + let result = crawler.find_by_purls(&cache, &purls).await.unwrap(); + assert!( + result.is_empty(), + "traversal PURL must not resolve to an out-of-tree directory, got {result:?}" + ); + } + /// `".1.0.0"` — first match-index of `.` is `i=0` (followed by /// `1`), `i+1 < dir_name.len()` is true, split_idx = Some(0). /// The name slice ends up empty; the defensive guard at the diff --git a/crates/socket-patch-core/src/crawlers/python_crawler.rs b/crates/socket-patch-core/src/crawlers/python_crawler.rs index 1868aea..e400a13 100644 --- a/crates/socket-patch-core/src/crawlers/python_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/python_crawler.rs @@ -702,13 +702,12 @@ impl PythonCrawler { } /// Parse a PyPI PURL string to extract name and version. - /// Strips qualifiers before parsing. + /// Strips qualifiers and subpath before parsing. fn parse_pypi_purl(purl: &str) -> Option<(String, String)> { - // Strip qualifiers - let base = match purl.find('?') { - Some(idx) => &purl[..idx], - None => purl, - }; + // A `#subpath` can appear without a preceding `?qualifier`, so the + // shared helper cuts at whichever comes first — a `?`-only cut would + // leak the subpath into the version and miss the installed package. + let base = crate::utils::purl::strip_purl_qualifiers(purl); let rest = base.strip_prefix("pkg:pypi/")?; let at_idx = rest.rfind('@')?; @@ -783,6 +782,24 @@ mod tests { assert_eq!(ver, "2.28.0"); } + /// The PURL grammar is `pkg:type/ns/name@version?qualifiers#subpath`; + /// a subpath can appear WITHOUT a preceding qualifier. Cutting only at + /// `?` lets a bare `#subpath` leak into the version (`2.28.0#src/...`), + /// silently failing the installed-package match. + #[test] + fn test_parse_pypi_purl_with_subpath() { + let (name, ver) = + PythonCrawler::parse_pypi_purl("pkg:pypi/requests@2.28.0#src/requests").unwrap(); + assert_eq!(name, "requests"); + assert_eq!(ver, "2.28.0"); + + // Qualifier + subpath together (subpath follows qualifiers). + let (name, ver) = + PythonCrawler::parse_pypi_purl("pkg:pypi/requests@2.28.0?artifact_id=abc#src").unwrap(); + assert_eq!(name, "requests"); + assert_eq!(ver, "2.28.0"); + } + #[test] fn test_parse_pypi_purl_invalid() { assert!(PythonCrawler::parse_pypi_purl("pkg:npm/lodash@4.17.21").is_none()); diff --git a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs index 50f2c33..e8526db 100644 --- a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs @@ -95,6 +95,17 @@ impl RubyCrawler { for purl in purls { if let Some((name, version)) = crate::utils::purl::parse_gem_purl(purl) { + // SECURITY: name/version come straight from the (untrusted) + // manifest PURL and are formatted into a `-` + // dir name joined onto `gem_path` below. A real gem + // coordinate is a single path segment, so reject any that + // could traverse out of the gem root (`..`/`.`, a separator, + // an absolute path, NUL). `verify_gem_at_path` only checks + // for `lib/`/`.gemspec` and gems patch in place, so fail + // closed here — same as the deno/go/maven/npm/nuget guards. + if !is_safe_gem_coordinate(name, version) { + continue; + } // The purl is the base PURL (qualifiers stripped upstream). // Resolve it to the installed gem dir, which may carry a // `-` suffix for platform gems. @@ -393,6 +404,26 @@ fn gem_homes_to_gems_dirs(gempath: &str) -> Vec { .collect() } +/// Whether a PURL-derived gem coordinate is safe to join onto the gem root. +/// SECURITY: `find_by_purls` formats name/version into a `-` +/// directory name joined onto `gem_path`, and a real gem name/version is +/// dash/dot/word characters only — never a separator, NUL, or bare dot +/// segment. `verify_gem_at_path` only checks for `lib/`/`.gemspec` and gems +/// are patched in place, so a tampered manifest PURL (`pkg:gem/../x@1.0`, +/// an absolute name, a `/`-bearing version) must be rejected here, fail +/// closed. Mirrors the deno/go/maven/npm/nuget crawler coordinate guards. +fn is_safe_gem_coordinate(name: &str, version: &str) -> bool { + let safe = |s: &str| { + !s.is_empty() + && s != "." + && s != ".." + && !s.contains('/') + && !s.contains('\\') + && !s.contains('\0') + }; + safe(name) && safe(version) +} + /// Check whether a path is a directory. async fn is_dir(path: &Path) -> bool { tokio::fs::metadata(path) @@ -855,6 +886,101 @@ mod tests { ); } + // ── PURL coordinate traversal (untrusted manifest input) ────── + + /// A tampered manifest PURL whose name carries `..` must not resolve + /// to a directory outside the gem root. `locate_gem_dir` joins + /// `-` straight onto `gem_path`, and + /// `verify_gem_at_path` only checks for `lib/`/`.gemspec`, so without + /// a coordinate gate `pkg:gem/../outside@1.0.0` escapes the gem store + /// and the patch applies in place out of tree. + #[tokio::test] + async fn find_by_purls_rejects_traversal_coordinates() { + let dir = tempfile::tempdir().unwrap(); + let gems = dir.path().join("gems"); + tokio::fs::create_dir_all(&gems).await.unwrap(); + // A verifying "gem" OUTSIDE the gem root that `..` escapes to. + tokio::fs::create_dir_all(dir.path().join("outside-1.0.0").join("lib")) + .await + .unwrap(); + + let crawler = RubyCrawler::new(); + let purls = vec!["pkg:gem/../outside@1.0.0".to_string()]; + let result = crawler.find_by_purls(&gems, &purls).await.unwrap(); + assert!( + result.is_empty(), + "`..` name must not escape the gem root: {result:?}" + ); + } + + /// An absolute path smuggled in as the gem name replaces the gem root + /// wholesale in `Path::join` — must be rejected fail-closed. + #[tokio::test] + async fn find_by_purls_rejects_absolute_coordinates() { + let dir = tempfile::tempdir().unwrap(); + let gems = dir.path().join("gems"); + tokio::fs::create_dir_all(&gems).await.unwrap(); + let outside = dir.path().join("abs"); + tokio::fs::create_dir_all(outside.join("evil-1.0.0").join("lib")) + .await + .unwrap(); + + let crawler = RubyCrawler::new(); + let purl = format!("pkg:gem/{}@1.0.0", outside.join("evil").display()); + let result = crawler.find_by_purls(&gems, &[purl]).await.unwrap(); + assert!( + result.is_empty(), + "absolute name must not replace the gem root: {result:?}" + ); + } + + /// A separator smuggled into the *version* half of the coordinate is + /// just as dangerous as one in the name — both halves are formatted + /// into the joined `-` segment. + #[tokio::test] + async fn find_by_purls_rejects_separator_in_version() { + let dir = tempfile::tempdir().unwrap(); + let gems = dir.path().join("gems"); + tokio::fs::create_dir_all(&gems).await.unwrap(); + // `foo-1.0/../../outside-1.0.0` needs `foo-1.0` to traverse through. + tokio::fs::create_dir_all(gems.join("foo-1.0")) + .await + .unwrap(); + tokio::fs::create_dir_all(dir.path().join("outside-1.0.0").join("lib")) + .await + .unwrap(); + + let crawler = RubyCrawler::new(); + let purls = vec!["pkg:gem/foo@1.0/../../outside-1.0.0".to_string()]; + let result = crawler.find_by_purls(&gems, &purls).await.unwrap(); + assert!( + result.is_empty(), + "version with separators must not escape the gem root: {result:?}" + ); + } + + /// Unit contract for the coordinate gate: real gem names/versions pass, + /// anything with a separator, NUL, or bare dot segment fails closed. + #[test] + fn test_is_safe_gem_coordinate() { + assert!(is_safe_gem_coordinate("rails", "7.1.0")); + assert!(is_safe_gem_coordinate("aws-sdk-s3", "1.143.0")); + assert!(is_safe_gem_coordinate("ruby2_keywords", "0.0.5")); + assert!(is_safe_gem_coordinate("nokogiri", "1.16.5.pre.rc1")); + + assert!(!is_safe_gem_coordinate("", "1.0.0")); + assert!(!is_safe_gem_coordinate("rails", "")); + assert!(!is_safe_gem_coordinate("..", "1.0.0")); + assert!(!is_safe_gem_coordinate(".", "1.0.0")); + assert!(!is_safe_gem_coordinate("rails", "..")); + assert!(!is_safe_gem_coordinate("../outside", "1.0.0")); + assert!(!is_safe_gem_coordinate("a/b", "1.0.0")); + assert!(!is_safe_gem_coordinate("rails", "1.0/../../x")); + assert!(!is_safe_gem_coordinate("a\\b", "1.0.0")); + assert!(!is_safe_gem_coordinate("a\0b", "1.0.0")); + assert!(!is_safe_gem_coordinate("/abs/evil", "1.0.0")); + } + /// Gem names with embedded underscores/digits and multi-dash names /// must keep their full name; the version starts at the first /// dash-then-digit boundary. diff --git a/crates/socket-patch-core/src/gem_setup/mod.rs b/crates/socket-patch-core/src/gem_setup/mod.rs index e966bab..44dc933 100644 --- a/crates/socket-patch-core/src/gem_setup/mod.rs +++ b/crates/socket-patch-core/src/gem_setup/mod.rs @@ -33,7 +33,7 @@ pub use update::{ /// The in-tree plugin directory, relative to the project root. pub const PLUGIN_DIR: &str = ".socket/bundler-plugin"; /// First line of every generated plugin file — the ownership signal for removal -/// (we never delete a directory whose `plugins.rb` lacks it). +/// (we never delete a file that lacks it). pub const GENERATED_MARKER: &str = "# Code generated by `socket-patch setup`. DO NOT EDIT."; /// The generated `plugins.rb` body (the two-trigger idempotent applier). @@ -138,22 +138,44 @@ pub async fn add_plugin_files(root: &Path, dry_run: bool) -> GemEditResult { GemEditResult::from_result("gem_plugin", dir.display().to_string(), result) } -/// Remove the generated plugin directory — but only when its `plugins.rb` -/// carries our [`GENERATED_MARKER`], so a user-authored file at that path is -/// never deleted. Idempotent: `AlreadyConfigured` when nothing of ours is there. +/// Whether the file at `path` carries our [`GENERATED_MARKER`] as its first +/// line — the per-file ownership test for removal. +async fn is_generated(path: &Path) -> bool { + match fs::read_to_string(path).await { + Ok(content) => content.starts_with(GENERATED_MARKER), + Err(_) => false, + } +} + +/// Delete a generated file, tolerating it already being gone but surfacing +/// any other failure (a swallowed error here would report a false "removed"). +async fn remove_generated(path: &Path) -> Result<(), String> { + match fs::remove_file(path).await { + Ok(()) => Ok(()), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(format!("remove {}: {e}", path.display())), + } +} + +/// Remove the generated plugin files — each only when it carries our +/// [`GENERATED_MARKER`], so a user-authored file at either path is never +/// deleted (and an orphaned generated file is still cleaned up). Idempotent: +/// `AlreadyConfigured` when nothing of ours is there. pub async fn remove_plugin_files(root: &Path, dry_run: bool) -> GemEditResult { let dir = plugin_dir(root); let result = async { - let ours = match fs::read_to_string(plugins_rb_path(root)).await { - Ok(content) => content.starts_with(GENERATED_MARKER), - Err(_) => false, - }; - if !ours { + let rb_ours = is_generated(&plugins_rb_path(root)).await; + let spec_ours = is_generated(&gemspec_path(root)).await; + if !rb_ours && !spec_ours { return Ok(false); } if !dry_run { - let _ = fs::remove_file(plugins_rb_path(root)).await; - let _ = fs::remove_file(gemspec_path(root)).await; + if rb_ours { + remove_generated(&plugins_rb_path(root)).await?; + } + if spec_ours { + remove_generated(&gemspec_path(root)).await?; + } // Prune the now-empty plugin dir (leave .socket/ — apply uses it). let _ = fs::remove_dir(&dir).await; } @@ -348,6 +370,87 @@ mod tests { ); } + #[cfg(unix)] + #[tokio::test] + async fn test_remove_surfaces_removal_failure_as_error() { + // Deleting a file requires write permission on its directory. With the + // plugin dir read-only, the removes fail — that failure must surface as + // `Error`, not be swallowed into a false "removed" success while the + // files are in fact still there. + use std::os::unix::fs::PermissionsExt; + + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + add_plugin_files(root, false).await; + fs::set_permissions(&plugin_dir(root), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + let r = remove_plugin_files(root, false).await; + + // Restore so the tempdir can be cleaned up regardless of the outcome. + fs::set_permissions(&plugin_dir(root), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + + assert!( + plugin_files_present(root).await, + "the read-only dir means nothing was actually removed" + ); + assert_eq!( + r.status, + GemSetupStatus::Error, + "a failed removal must report Error, not a false success" + ); + assert!( + r.error.is_some(), + "the failure carries the io error message" + ); + } + + #[tokio::test] + async fn test_remove_cleans_orphaned_gemspec() { + // plugins.rb gone (crash between the two removes, or a manual delete) + // but our generated gemspec is still there. Remove must clean the + // orphan — not report not_configured forever while a generated file + // lingers in the repo. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&gemspec_path(root), GEMSPEC).await; + + let r = remove_plugin_files(root, false).await; + assert_eq!( + r.status, + GemSetupStatus::Updated, + "an orphaned generated gemspec is ours to remove" + ); + assert!(!gemspec_path(root).exists(), "orphan gemspec removed"); + assert!(!plugin_dir(root).exists(), "emptied plugin dir pruned"); + } + + #[tokio::test] + async fn test_remove_spares_user_authored_gemspec() { + // Ownership is per file: our plugins.rb is removed, but a user-authored + // (marker-less) file at the gemspec path must never be deleted on the + // strength of plugins.rb's marker alone. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&plugins_rb_path(root), PLUGINS_RB).await; + write(&gemspec_path(root), "# my own gemspec\n").await; + + let r = remove_plugin_files(root, false).await; + assert_eq!(r.status, GemSetupStatus::Updated); + assert!(!plugins_rb_path(root).exists(), "our plugins.rb removed"); + assert!( + gemspec_path(root).exists(), + "marker-less user file at the gemspec path must be left alone" + ); + assert_eq!( + fs::read_to_string(gemspec_path(root)).await.unwrap(), + "# my own gemspec\n" + ); + } + #[tokio::test] async fn test_add_plugin_files_writes_each_template_to_its_own_path() { // Guards the path↔content mapping: plugins.rb must get PLUGINS_RB and the diff --git a/crates/socket-patch-core/src/gem_setup/update.rs b/crates/socket-patch-core/src/gem_setup/update.rs index b7ea96b..9fa9e53 100644 --- a/crates/socket-patch-core/src/gem_setup/update.rs +++ b/crates/socket-patch-core/src/gem_setup/update.rs @@ -59,6 +59,57 @@ impl GemEditResult { } } +/// Atomically write `content` to `path`. +/// +/// A bare `fs::write` truncates the target before writing, so a crash, power +/// loss, or interrupted process mid-write would leave the user's committed +/// `Gemfile` truncated or empty — destroying the file we only meant to +/// append a three-line block to. Instead we write to a sibling stage file, +/// fsync it, then rename over the target (rename is atomic on the same +/// filesystem) so a reader ever sees either the old bytes or the complete new +/// bytes. Mirrors the hardened writer in `composer_setup` / `package_json`. +async fn atomic_write(path: &Path, content: &str) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "Gemfile".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content.as_bytes()).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = tokio::fs::rename(&stage, path).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory + // so the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) +} + /// Stable substring identifying our managed block — `setup --check` and the /// add/remove edits all key on it, so a user-authored `plugin` line is never /// mistaken for ours. @@ -128,7 +179,9 @@ async fn edit_gemfile_add(gemfile: &Path, dry_run: bool) -> GemEditResult { None => Ok(false), Some(new) => { if !dry_run { - fs::write(gemfile, &new).await.map_err(|e| e.to_string())?; + atomic_write(gemfile, &new) + .await + .map_err(|e| e.to_string())?; } Ok(true) } @@ -151,7 +204,9 @@ async fn edit_gemfile_remove(gemfile: &Path, dry_run: bool) -> GemEditResult { None => Ok(false), Some(new) => { if !dry_run { - fs::write(gemfile, &new).await.map_err(|e| e.to_string())?; + atomic_write(gemfile, &new) + .await + .map_err(|e| e.to_string())?; } Ok(true) } @@ -370,6 +425,77 @@ mod tests { ); } + // ── atomic-write contract (no truncation / no stage litter) ────── + // + // The Gemfile edit must go through stage+fsync+rename, never a bare + // truncating write, so a crash can't leave the user's committed Gemfile + // truncated or empty. + + #[cfg(unix)] + #[tokio::test] + async fn test_add_replaces_readonly_gemfile_atomically() { + use std::os::unix::fs::PermissionsExt; + // Oracle for the truncating-write bug: rename needs only directory + // write permission, while a bare `fs::write` must open the target + // itself for writing — so a read-only Gemfile distinguishes the two + // (EACCES under truncate, clean replace under stage+rename, same as + // the composer/npm/pypi/cargo/go manifest writers). + let dir = tempfile::tempdir().unwrap(); + let gemfile = dir.path().join("Gemfile"); + fs::write(&gemfile, GEMFILE).await.unwrap(); + std::fs::set_permissions(&gemfile, std::fs::Permissions::from_mode(0o444)).unwrap(); + + let res = edit_gemfile_add(&gemfile, false).await; + assert_eq!(res.status, GemSetupStatus::Updated, "err: {:?}", res.error); + assert!(is_plugin_directive_present( + &fs::read_to_string(&gemfile).await.unwrap() + )); + } + + #[cfg(unix)] + #[tokio::test] + async fn test_remove_replaces_readonly_gemfile_atomically() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let gemfile = dir.path().join("Gemfile"); + fs::write(&gemfile, gemfile_add(GEMFILE).unwrap()) + .await + .unwrap(); + std::fs::set_permissions(&gemfile, std::fs::Permissions::from_mode(0o444)).unwrap(); + + let res = edit_gemfile_remove(&gemfile, false).await; + assert_eq!(res.status, GemSetupStatus::Updated, "err: {:?}", res.error); + assert_eq!( + fs::read_to_string(&gemfile).await.unwrap(), + GEMFILE, + "read-only Gemfile restored byte-for-byte via stage+rename" + ); + } + + #[tokio::test] + async fn test_edit_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + let gemfile = dir.path().join("Gemfile"); + fs::write(&gemfile, GEMFILE).await.unwrap(); + + assert_eq!( + edit_gemfile_add(&gemfile, false).await.status, + GemSetupStatus::Updated + ); + assert_eq!( + edit_gemfile_remove(&gemfile, false).await.status, + GemSetupStatus::Updated + ); + assert_eq!(fs::read_to_string(&gemfile).await.unwrap(), GEMFILE); + + // No half-written `.socket-stage-*` sibling left behind. + let mut rd = fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = rd.next_entry().await.unwrap() { + let name = entry.file_name().to_string_lossy().into_owned(); + assert!(!name.starts_with(".socket-stage-"), "stage litter: {name}"); + } + } + #[tokio::test] async fn test_full_roundtrip_via_project() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/manifest/operations.rs b/crates/socket-patch-core/src/manifest/operations.rs index 8ec9d19..8d6b04c 100644 --- a/crates/socket-patch-core/src/manifest/operations.rs +++ b/crates/socket-patch-core/src/manifest/operations.rs @@ -31,12 +31,18 @@ pub fn get_after_hash_blobs(manifest: &PatchManifest) -> HashSet { /// Get only beforeHash blobs referenced by a manifest. /// Used for rollback operations -- we need the original file content to restore. +/// +/// An empty `beforeHash` is the "file created by the patch" sentinel, not a +/// blob reference (rollback deletes the file instead of restoring content), +/// so it is excluded from the set. pub fn get_before_hash_blobs(manifest: &PatchManifest) -> HashSet { let mut blobs = HashSet::new(); for record in manifest.patches.values() { for file_info in record.files.values() { - blobs.insert(file_info.before_hash.clone()); + if !file_info.before_hash.is_empty() { + blobs.insert(file_info.before_hash.clone()); + } } } @@ -260,6 +266,37 @@ mod tests { assert_eq!(blobs.len(), 0); } + // Regression: an empty `beforeHash` is the documented "file created by the + // patch" sentinel (get records it, apply/rollback branch on it) -- it is + // valid manifest data, not a blob reference. The before-blob set must skip + // it: a caller that treats every entry as fetchable would try to download + // blob "", and an existence probe via `blobs_path.join("")` resolves to + // the blobs directory itself, turning "is this blob on disk" into "does + // the directory exist". + #[test] + fn test_get_before_hash_blobs_skips_new_file_sentinel() { + let mut manifest = create_test_manifest(); + let record = manifest.patches.get_mut("pkg:npm/pkg-a@1.0.0").unwrap(); + record.files.insert( + "package/created-by-patch.js".to_string(), + PatchFileInfo { + before_hash: String::new(), // new-file sentinel + after_hash: AFTER_HASH_1.to_string(), + }, + ); + + let blobs = get_before_hash_blobs(&manifest); + assert!( + !blobs.contains(""), + "the empty new-file sentinel is not a blob and must not be in the set" + ); + // The real before-hashes all survive. + assert_eq!(blobs.len(), 3); + for b in [BEFORE_HASH_1, BEFORE_HASH_2, BEFORE_HASH_3] { + assert!(blobs.contains(b)); + } + } + #[test] fn test_validate_manifest_valid() { let json = serde_json::json!({ diff --git a/crates/socket-patch-core/src/package_json/detect.rs b/crates/socket-patch-core/src/package_json/detect.rs index 8a671fb..569f118 100644 --- a/crates/socket-patch-core/src/package_json/detect.rs +++ b/crates/socket-patch-core/src/package_json/detect.rs @@ -67,9 +67,17 @@ pub fn is_setup_configured(package_json: &serde_json::Value) -> ScriptSetupStatu } } +/// Strip a leading UTF-8 BOM. npm and Node tolerate (and strip) a BOM in +/// package.json — files saved by Windows editors commonly carry one — but +/// serde_json rejects it, so every parse of user-supplied package.json content +/// must go through this first or npm-valid manifests error out. +fn strip_bom(content: &str) -> &str { + content.strip_prefix('\u{feff}').unwrap_or(content) +} + /// Check if a package.json content string is properly configured. pub fn is_setup_configured_str(content: &str) -> ScriptSetupStatus { - match serde_json::from_str::(content) { + match serde_json::from_str::(strip_bom(content)) { Ok(val) => is_setup_configured(&val), Err(_) => ScriptSetupStatus { postinstall_configured: false, @@ -303,8 +311,8 @@ pub fn remove_package_json_object(package_json: &mut serde_json::Value) -> Scrip pub fn remove_package_json_content( content: &str, ) -> Result<(bool, String, ScriptRemoveStatus), String> { - let mut package_json: serde_json::Value = - serde_json::from_str(content).map_err(|e| format!("Invalid package.json: {e}"))?; + let mut package_json: serde_json::Value = serde_json::from_str(strip_bom(content)) + .map_err(|e| format!("Invalid package.json: {e}"))?; if !package_json.is_object() { return Err("Invalid package.json: root is not a JSON object".to_string()); @@ -334,8 +342,8 @@ pub fn update_package_json_content( content: &str, pm: PackageManager, ) -> Result<(bool, String, String, String, String, String), String> { - let mut package_json: serde_json::Value = - serde_json::from_str(content).map_err(|e| format!("Invalid package.json: {e}"))?; + let mut package_json: serde_json::Value = serde_json::from_str(strip_bom(content)) + .map_err(|e| format!("Invalid package.json: {e}"))?; // A package.json must be a JSON object; otherwise there is nowhere to add // lifecycle scripts. @@ -472,6 +480,18 @@ mod tests { assert!(status.needs_update); } + #[test] + fn test_configured_str_utf8_bom() { + // npm strips a leading BOM when reading package.json; a BOM'd, + // configured manifest must read as configured, not as unparseable + // (which would mis-report it as needing setup). + let content = "\u{feff}{\"scripts\":{\"postinstall\":\"npx @socketsecurity/socket-patch apply --silent --ecosystems npm\",\"dependencies\":\"npx @socketsecurity/socket-patch apply --silent --ecosystems npm\"}}"; + let status = is_setup_configured_str(content); + assert!(status.postinstall_configured); + assert!(status.dependencies_configured); + assert!(!status.needs_update); + } + #[test] fn test_configured_str_legacy_npx_pattern() { let content = diff --git a/crates/socket-patch-core/src/package_json/find.rs b/crates/socket-patch-core/src/package_json/find.rs index d015bfb..dd7c944 100644 --- a/crates/socket-patch-core/src/package_json/find.rs +++ b/crates/socket-patch-core/src/package_json/find.rs @@ -163,7 +163,16 @@ fn parse_pnpm_workspace_patterns(yaml_content: &str) -> Vec { for line in yaml_content.lines() { let trimmed = line.trim(); - if trimmed == "packages:" { + // The header may carry an inline comment (`packages: # globs`); a `#` + // opens a comment only when preceded by whitespace. + let is_packages_header = match trimmed.strip_prefix("packages:") { + Some("") => true, + Some(rest) => { + rest.starts_with(|c: char| c.is_whitespace()) && rest.trim_start().starts_with('#') + } + None => false, + }; + if is_packages_header { in_packages = true; continue; } @@ -247,6 +256,15 @@ async fn collect_workspace_members( return; } for pattern in &config.patterns { + // npm (`@npmcli/map-workspaces`), yarn, and pnpm all support + // `!`-prefixed exclusion patterns, processed in order: a negation + // removes whatever earlier patterns matched. Resolve the negated + // pattern with the same matcher and drop those members. + if let Some(negated) = pattern.strip_prefix('!') { + let excluded = find_packages_matching_pattern(root_path, negated).await; + results.retain(|loc| !excluded.contains(&loc.path)); + continue; + } let packages = find_packages_matching_pattern(root_path, pattern).await; for p in packages { let member_dir = p.parent().map(Path::to_path_buf); @@ -296,6 +314,16 @@ async fn find_packages_matching_pattern(root_path: &Path, pattern: &str) -> Vec< if last == "*" { search_one_level(&search_path, &mut results).await; } else { + // Globstar matches zero segments too — npm/pnpm glob + // `/**/package.json`, which matches the prefix dir's + // own `package.json` — so the prefix directory itself is a + // candidate member, not just its descendants. (For a bare + // `**` this re-finds the root manifest; the caller's de-dup + // keeps the root entry.) + let own_pkg = search_path.join("package.json"); + if fs::metadata(&own_pkg).await.is_ok() { + results.push(own_pkg); + } search_recursive(&search_path, &mut results).await; } } @@ -1039,6 +1067,123 @@ mod tests { ); } + #[tokio::test] + async fn test_find_workspace_negation_excludes_member() { + // npm (`@npmcli/map-workspaces`), yarn, and pnpm all support + // `!`-prefixed exclusion patterns: a member matched by an earlier + // pattern and then negated is NOT a workspace member. Previously the + // `!pattern` was treated as a literal directory named `!packages`, so + // the exclusion was silently ignored and `setup` edited a package.json + // the user had explicitly excluded. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["packages/*", "!packages/private"]}"#, + ) + .await + .unwrap(); + for member in ["a", "private"] { + let m = dir.path().join("packages").join(member); + fs::create_dir_all(&m).await.unwrap(); + fs::write(m.join("package.json"), r#"{"name":"m"}"#) + .await + .unwrap(); + } + let result = find_package_json_files(dir.path()).await; + let members: Vec<_> = result.files.iter().filter(|f| f.is_workspace).collect(); + assert_eq!( + members.len(), + 1, + "negated member must be excluded: {:?}", + result.files.iter().map(|f| &f.path).collect::>() + ); + assert!(members[0].path.ends_with("packages/a/package.json")); + } + + #[tokio::test] + async fn test_find_workspace_glob_negation_excludes_subtree() { + // A negation can itself be a glob (pnpm's docs show `!**/test/**`); + // `!legacy/**` must remove every member an earlier positive pattern + // picked up under legacy/. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["**", "!legacy/**"]}"#, + ) + .await + .unwrap(); + let app = dir.path().join("app"); + fs::create_dir_all(&app).await.unwrap(); + fs::write(app.join("package.json"), r#"{"name":"app"}"#) + .await + .unwrap(); + let old = dir.path().join("legacy").join("old"); + fs::create_dir_all(&old).await.unwrap(); + fs::write(old.join("package.json"), r#"{"name":"old"}"#) + .await + .unwrap(); + let result = find_package_json_files(dir.path()).await; + let members: Vec<_> = result.files.iter().filter(|f| f.is_workspace).collect(); + assert_eq!( + members.len(), + 1, + "legacy subtree must be excluded: {:?}", + result.files.iter().map(|f| &f.path).collect::>() + ); + assert!(members[0].path.ends_with("app/package.json")); + } + + #[tokio::test] + async fn test_find_double_glob_matches_prefix_dir_itself() { + // Globstar matches zero segments: npm/pnpm resolve members by globbing + // `apps/**/package.json`, which matches `apps/package.json` itself. A + // package living at the pattern's prefix directory is a workspace + // member too, not just its descendants — previously it was silently + // skipped and never configured. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["apps/**"]}"#, + ) + .await + .unwrap(); + let apps = dir.path().join("apps"); + fs::create_dir_all(&apps).await.unwrap(); + fs::write(apps.join("package.json"), r#"{"name":"apps"}"#) + .await + .unwrap(); + let web = apps.join("web"); + fs::create_dir_all(&web).await.unwrap(); + fs::write(web.join("package.json"), r#"{"name":"web"}"#) + .await + .unwrap(); + let result = find_package_json_files(dir.path()).await; + assert!( + result + .files + .iter() + .any(|f| f.is_workspace && f.path.ends_with("apps/package.json")), + "prefix dir's own package.json must be a member: {:?}", + result.files.iter().map(|f| &f.path).collect::>() + ); + assert!( + result + .files + .iter() + .any(|f| f.is_workspace && f.path.ends_with("apps/web/package.json")), + "descendant member must still be found" + ); + } + + #[test] + fn test_parse_pnpm_packages_key_inline_comment() { + // The section header itself may carry an inline comment + // (`packages: # workspace globs`); the exact-equality compare missed + // it and silently dropped the whole section. + let yaml = "packages: # workspace globs\n - packages/*"; + assert_eq!(parse_pnpm_workspace_patterns(yaml), vec!["packages/*"]); + } + // ── detect_package_manager ────────────────────────────────────── #[tokio::test] diff --git a/crates/socket-patch-core/src/package_json/update.rs b/crates/socket-patch-core/src/package_json/update.rs index b445b38..74f917c 100644 --- a/crates/socket-patch-core/src/package_json/update.rs +++ b/crates/socket-patch-core/src/package_json/update.rs @@ -468,6 +468,47 @@ mod tests { assert_eq!(fs::read_to_string(&pkg).await.unwrap(), original); } + /// npm and Node tolerate (and strip) a UTF-8 BOM in package.json — files + /// saved by Windows editors commonly carry one. serde_json does not, so + /// without stripping it a perfectly npm-valid manifest errors out with + /// "Invalid package.json" instead of being configured. + #[tokio::test] + async fn test_update_tolerates_utf8_bom() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write( + &pkg, + "\u{feff}{\"name\":\"x\",\"scripts\":{\"build\":\"tsc\"}}", + ) + .await + .unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!( + result.status, + UpdateStatus::Updated, + "BOM'd package.json is valid for npm and must be updatable, got error: {:?}", + result.error + ); + let content = fs::read_to_string(&pkg).await.unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert!(parsed["scripts"]["postinstall"].is_string()); + assert!(parsed["scripts"]["dependencies"].is_string()); + assert_eq!(parsed["scripts"]["build"], "tsc"); + } + + /// A BOM'd file that is already fully configured must report + /// `AlreadyConfigured` (and stay untouched), not `Error`. + #[tokio::test] + async fn test_update_bom_already_configured() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + let original = "\u{feff}{\"scripts\":{\"postinstall\":\"npx @socketsecurity/socket-patch apply --silent --ecosystems npm\",\"dependencies\":\"npx @socketsecurity/socket-patch apply --silent --ecosystems npm\"}}"; + fs::write(&pkg, original).await.unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::AlreadyConfigured); + assert_eq!(fs::read_to_string(&pkg).await.unwrap(), original); + } + /// An empty file is invalid JSON and must error without writing. #[tokio::test] async fn test_update_empty_file_errors() { @@ -630,6 +671,32 @@ mod tests { assert_eq!(r2.status, RemoveStatus::NotConfigured); } + /// Remove must tolerate a UTF-8 BOM the same way npm does: a BOM'd, + /// configured package.json must be cleanly reverted, not rejected as + /// invalid JSON. + #[tokio::test] + async fn test_remove_tolerates_utf8_bom() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write( + &pkg, + "\u{feff}{\"name\":\"x\",\"scripts\":{\"build\":\"tsc\",\"postinstall\":\"npx @socketsecurity/socket-patch apply --silent --ecosystems npm\"}}", + ) + .await + .unwrap(); + let result = remove_package_json(&pkg, false).await; + assert_eq!( + result.status, + RemoveStatus::Removed, + "BOM'd package.json is valid for npm and must be removable, got error: {:?}", + result.error + ); + let content = fs::read_to_string(&pkg).await.unwrap(); + assert!(!content.contains("socket-patch")); + let parsed: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert_eq!(parsed["scripts"]["build"], "tsc"); + } + #[tokio::test] async fn test_remove_invalid_json_errors_and_leaves_file() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/patch/apply.rs b/crates/socket-patch-core/src/patch/apply.rs index 3e379a3..e880707 100644 --- a/crates/socket-patch-core/src/patch/apply.rs +++ b/crates/socket-patch-core/src/patch/apply.rs @@ -254,23 +254,28 @@ pub async fn verify_file_patch( /// A package@version may resolve to several patch variants (PyPI /// `?artifact_id=...` releases, one per wheel/sdist). Only one /// distribution is ever installed in a given environment, so only one -/// variant can apply. This mirrors the first-file hash check the apply -/// pipeline uses: a variant matches when its first patched file is not -/// in a [`VerifyStatus::HashMismatch`] state against the on-disk -/// package. A variant with no files (nothing to verify) is treated as a -/// match. +/// variant can apply. This mirrors the representative-file hash check +/// the apply pipeline uses: a variant matches when its representative +/// patched file is not in a [`VerifyStatus::HashMismatch`] state +/// against the on-disk package. A variant with no files (nothing to +/// verify) is treated as a match. /// /// `variants` maps a variant key (typically a qualified PURL) to that /// variant's patched files. Returns the indices of **every** variant -/// whose first patched file is in a [`VerifyStatus::Ready`] or +/// whose representative patched file is in a [`VerifyStatus::Ready`] or /// [`VerifyStatus::AlreadyPatched`] state — i.e. its `beforeHash` (or -/// `afterHash`, if already applied) matches the installed bytes. +/// `afterHash`, if already applied) matches the installed bytes. The +/// representative is the lexicographically smallest file with a +/// non-empty `beforeHash`: only a file that modifies existing content +/// can discriminate between distributions (a new file verifies Ready +/// everywhere), and the deterministic pick keeps selection stable +/// across runs (`HashMap` iteration order is randomized). /// /// A [`VerifyStatus::NotFound`] (a missing pre-existing file) or /// [`VerifyStatus::HashMismatch`] does **not** count as a match: those /// signal the variant describes a distribution that is *not* present on -/// disk. A variant with no files (nothing to verify) is treated as a -/// match. +/// disk. A variant with no discriminating file (no files at all, or +/// only new files — nothing to verify) is treated as a match. /// /// Returning all matches (not just the first) is what lets ecosystems /// whose variants *coexist* on disk work — e.g. Maven, where several @@ -286,8 +291,19 @@ pub async fn select_installed_variants( ) -> Vec { let mut matched = Vec::new(); for (idx, (_key, files)) in variants.iter().enumerate() { - // No files to verify — nothing to disqualify the variant. - let Some((file_name, file_info)) = files.iter().next() else { + // Representative file: only a file that modifies existing content + // (non-empty `beforeHash`) can discriminate between distributions — + // a NEW file (empty `beforeHash`) verifies Ready against any + // environment, so it can neither identify nor disqualify a variant. + // Take the lexicographically smallest such key so the choice is + // deterministic (`HashMap` iteration order is randomized per + // instance). No discriminating file (no files at all, or only new + // files) — nothing to disqualify the variant. + let representative = files + .iter() + .filter(|(_, info)| !info.before_hash.is_empty()) + .min_by(|(a, _), (b, _)| a.cmp(b)); + let Some((file_name, file_info)) = representative else { matched.push(idx); continue; }; @@ -869,7 +885,26 @@ pub async fn apply_package_patch( use crate::patch::sidecars::{ dispatch_fixup, SidecarAdvisory, SidecarAdvisoryCode, SidecarRecord, SidecarSeverity, }; - match dispatch_fixup(package_key, pkg_path, &result.files_patched, files).await { + // Include files verified `AlreadyPatched` alongside the ones + // written this run: a previous apply that failed partway left + // them patched on disk but returned before this boundary, so + // their sidecar entries (e.g. `.cargo-checksum.json` hashes) + // are still pre-patch — and this retry is the only chance to + // resync them. They exist at their after-hash, so rehashing is + // a no-op rewrite in the common already-synced case. + let fixup_files: Vec = result + .files_patched + .iter() + .cloned() + .chain( + result + .files_verified + .iter() + .filter(|v| v.status == VerifyStatus::AlreadyPatched) + .map(|v| v.file.clone()), + ) + .collect(); + match dispatch_fixup(package_key, pkg_path, &fixup_files, files).await { Ok(Some(record)) => result.sidecar = Some(record), Ok(None) => {} Err(e) => { @@ -2338,6 +2373,97 @@ mod tests { .unwrap(); } + /// Variant selection must be driven by an on-disk `beforeHash` match + /// against a file that can actually discriminate between + /// distributions. A NEW file (empty `beforeHash`) verifies Ready + /// against ANY environment, so it must never be the basis for + /// selecting a variant. Regression: the representative file was taken + /// via `HashMap::iter().next()`, whose order is randomized per map + /// instance — whenever the new file came up first, a variant + /// describing a different, NOT-installed distribution matched, and + /// the result flipped between runs (wrong-variant rollback attempts, + /// wrong variants kept by `get`). The loop re-builds the maps each + /// round so the randomized iteration order is exercised. + #[tokio::test] + async fn test_select_installed_variants_new_file_never_drives_selection() { + let dir = tempfile::tempdir().unwrap(); + let installed = b"installed wheel bytes"; + tokio::fs::write(dir.path().join("mod.py"), installed) + .await + .unwrap(); + let installed_hash = compute_git_sha256_from_bytes(installed); + let other_hash = compute_git_sha256_from_bytes(b"other wheel bytes"); + + for round in 0..64 { + // Variant A: matches the installed distribution. + let mut variant_a = HashMap::new(); + variant_a.insert( + "mod.py".to_string(), + PatchFileInfo { + before_hash: installed_hash.clone(), + after_hash: "a".repeat(64), + }, + ); + variant_a.insert( + "zz_new_shim.py".to_string(), + PatchFileInfo { + before_hash: String::new(), // new file + after_hash: "b".repeat(64), + }, + ); + // Variant B: a different distribution (mod.py bytes differ), + // but it adds the same new file. + let mut variant_b = HashMap::new(); + variant_b.insert( + "mod.py".to_string(), + PatchFileInfo { + before_hash: other_hash.clone(), + after_hash: "c".repeat(64), + }, + ); + variant_b.insert( + "zz_new_shim.py".to_string(), + PatchFileInfo { + before_hash: String::new(), // new file + after_hash: "d".repeat(64), + }, + ); + + let variants: Vec<(&str, &HashMap)> = vec![ + ("pkg:pypi/x@1.0.0?artifact_id=installed", &variant_a), + ("pkg:pypi/x@1.0.0?artifact_id=other", &variant_b), + ]; + let matched = select_installed_variants(dir.path(), &variants).await; + assert_eq!( + matched, + vec![0], + "round {round}: only the installed variant may match — a new \ + file (empty beforeHash) must never drive selection" + ); + } + } + + /// A variant whose files are ALL new (no `beforeHash` anywhere) has + /// nothing that can disqualify it against the installed bytes — it + /// must keep matching, consistent with the documented no-files + /// behavior. + #[tokio::test] + async fn test_select_installed_variants_all_new_files_variant_matches() { + let dir = tempfile::tempdir().unwrap(); + let mut variant = HashMap::new(); + variant.insert( + "shim.py".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash: "a".repeat(64), + }, + ); + let variants: Vec<(&str, &HashMap)> = + vec![("pkg:pypi/x@1.0.0?artifact_id=only", &variant)]; + let matched = select_installed_variants(dir.path(), &variants).await; + assert_eq!(matched, vec![0]); + } + #[test] fn test_applied_via_as_tag() { assert_eq!(AppliedVia::Package.as_tag(), "package"); @@ -2352,4 +2478,104 @@ mod tests { assert!(sources.packages_path.is_none()); assert!(sources.diffs_path.is_none()); } + + /// Regression (retried partial apply wedges cargo): a previous apply + /// that failed partway (e.g. a missing blob for the second file) left + /// the first file PATCHED on disk but returned before the sidecar + /// boundary, so `.cargo-checksum.json` still carries that file's + /// ORIGINAL hash. On the retry the file verifies `AlreadyPatched` and + /// is skipped by the patch loop — but it must still be included in the + /// sidecar fixup, or its checksum entry stays stale forever and + /// `cargo build` refuses the crate even though the retry reported + /// success. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn test_apply_retry_resyncs_already_patched_checksum_entries() { + fn plain_sha256(b: &[u8]) -> String { + use sha2::{Digest, Sha256}; + let mut h = Sha256::new(); + h.update(b); + format!("{:x}", h.finalize()) + } + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + let pkg = pkg_dir.path(); + + // State left by the interrupted run: a.rs already patched, b.rs + // still original, checksum entries both at ORIGINAL hashes. + tokio::fs::write(pkg.join("a.rs"), b"patched a") + .await + .unwrap(); + tokio::fs::write(pkg.join("b.rs"), b"original b") + .await + .unwrap(); + let checksum = serde_json::json!({ + "files": { + "a.rs": plain_sha256(b"original a"), + "b.rs": plain_sha256(b"original b"), + }, + "package": "x", + }); + tokio::fs::write( + pkg.join(".cargo-checksum.json"), + serde_json::to_string_pretty(&checksum).unwrap(), + ) + .await + .unwrap(); + + // The retry has b's blob available. + let b_after = compute_git_sha256_from_bytes(b"patched b"); + tokio::fs::write(blobs_dir.path().join(&b_after), b"patched b") + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "a.rs".to_string(), + PatchFileInfo { + before_hash: compute_git_sha256_from_bytes(b"original a"), + after_hash: compute_git_sha256_from_bytes(b"patched a"), + }, + ); + files.insert( + "b.rs".to_string(), + PatchFileInfo { + before_hash: compute_git_sha256_from_bytes(b"original b"), + after_hash: b_after, + }, + ); + + let result = apply_package_patch( + "pkg:cargo/mycrate@1.0.0", + pkg, + &files, + &PatchSources::blobs_only(blobs_dir.path()), + None, + false, + false, + ) + .await; + + assert!(result.success, "retry must succeed: {:?}", result.error); + assert_eq!(result.files_patched, vec!["b.rs".to_string()]); + + let post: serde_json::Value = serde_json::from_str( + &tokio::fs::read_to_string(pkg.join(".cargo-checksum.json")) + .await + .unwrap(), + ) + .unwrap(); + assert_eq!( + post["files"]["b.rs"].as_str().unwrap(), + plain_sha256(b"patched b"), + "the freshly patched file's entry must be rewritten" + ); + assert_eq!( + post["files"]["a.rs"].as_str().unwrap(), + plain_sha256(b"patched a"), + "an AlreadyPatched file from the interrupted run must be resynced \ + too — a stale original-hash entry wedges cargo build" + ); + } } diff --git a/crates/socket-patch-core/src/patch/copy_tree.rs b/crates/socket-patch-core/src/patch/copy_tree.rs index 707f268..26eed36 100644 --- a/crates/socket-patch-core/src/patch/copy_tree.rs +++ b/crates/socket-patch-core/src/patch/copy_tree.rs @@ -73,7 +73,15 @@ pub(crate) fn force_remove_dir_all(dir: &Path) -> std::io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; - for entry in walkdir::WalkDir::new(dir).into_iter().flatten() { + // `follow_root_links` defaults to true: a symlink *at* `dir` + // would otherwise be followed and the external target tree + // chmod'd. Disabled, a symlink root is yielded as a symlink + // and hits the skip below. + for entry in walkdir::WalkDir::new(dir) + .follow_root_links(false) + .into_iter() + .flatten() + { let ft = entry.file_type(); // Never chmod a symlink: `set_permissions` follows the link // and would mutate its *target's* mode — which may live @@ -296,4 +304,51 @@ mod tests { ); assert!(outside.exists()); } + + /// Regression: the perm-relax retry must not traverse *through* a + /// symlinked root either. walkdir follows root symlinks by default + /// (`follow_root_links`), so if the tree path itself is a symlink and the + /// first remove fails (e.g. its parent dir is unwritable), the relax loop + /// would descend into the external target and chmod everything in it to + /// 0o755/0o644 — mutating a tree entirely outside `.socket/`. + #[cfg(unix)] + #[tokio::test] + async fn relax_loop_must_not_traverse_symlinked_root() { + let base = tempfile::tempdir().unwrap(); + // External target tree with restrictive perms. + let target = base.path().join("target"); + fs::create_dir_all(&target).unwrap(); + fs::write(target.join("secret.txt"), b"secret").unwrap(); + fs::set_permissions(target.join("secret.txt"), fs::Permissions::from_mode(0o600)).unwrap(); + + // Symlink at the tree path; read-only parent so the first + // remove_dir_all (an unlink of the symlink) fails and the relax + // retry path runs. + let parent = base.path().join("parent"); + fs::create_dir_all(&parent).unwrap(); + let root = parent.join("tree"); + std::os::unix::fs::symlink(&target, &root).unwrap(); + fs::set_permissions(&parent, fs::Permissions::from_mode(0o555)).unwrap(); + + let result = remove_tree(&root).await; + + // Restore parent so tempdir cleanup works. + fs::set_permissions(&parent, fs::Permissions::from_mode(0o755)).unwrap(); + + assert!( + result.is_err(), + "removal cannot succeed under a read-only parent" + ); + let mode = fs::metadata(target.join("secret.txt")) + .unwrap() + .permissions() + .mode() + & 0o777; + assert_eq!( + mode, 0o600, + "file behind symlinked root was chmod'd to {:o}", + mode + ); + assert_eq!(fs::read(target.join("secret.txt")).unwrap(), b"secret"); + } } diff --git a/crates/socket-patch-core/src/patch/cow.rs b/crates/socket-patch-core/src/patch/cow.rs index 4bdefc5..605683f 100644 --- a/crates/socket-patch-core/src/patch/cow.rs +++ b/crates/socket-patch-core/src/patch/cow.rs @@ -84,11 +84,15 @@ pub async fn break_hardlink_if_needed(path: &Path) -> std::io::Result return Ok(CowAction::BrokeSymlink); } - // Regular file. Hardlink defense is Unix-only — see module docs. + // Hardlink defense is Unix-only — see module docs. The break only + // makes sense for regular files: a directory always has nlink >= 2 + // (read() would fail EISDIR), and read() on a hardlinked FIFO blocks + // forever waiting for a writer. Non-regular inodes are not cow's + // problem — leave them untouched. #[cfg(unix)] { use std::os::unix::fs::MetadataExt; - if lstat.nlink() > 1 { + if lstat.is_file() && lstat.nlink() > 1 { // Atomic-rename-over-self pattern: copy our content into // a fresh inode, then rename over the original. The other // links keep pointing at the original inode (which now @@ -350,6 +354,53 @@ mod tests { assert_eq!(leftover_stage_count(dir.path()), 0); } + /// Non-regular inodes must never be routed into the hardlink + /// break: `read()` on a FIFO blocks forever waiting for a writer, + /// so a hardlinked FIFO (`nlink == 2`) at a patched path would + /// hang the whole apply. It must come back promptly as + /// `AlreadyPrivate` — content-copying only makes sense for + /// regular files. + #[cfg(unix)] + #[tokio::test] + async fn hardlinked_fifo_is_not_routed_into_hardlink_break() { + let dir = tempfile::tempdir().unwrap(); + let fifo = dir.path().join("pipe"); + let status = std::process::Command::new("mkfifo") + .arg(&fifo) + .status() + .unwrap(); + assert!(status.success()); + let link = dir.path().join("pipe-link"); + tokio::fs::hard_link(&fifo, &link).await.unwrap(); + + let action = tokio::time::timeout( + std::time::Duration::from_secs(2), + break_hardlink_if_needed(&link), + ) + .await + .expect("must not block reading the FIFO") + .unwrap(); + assert_eq!(action, CowAction::AlreadyPrivate); + assert_eq!(leftover_stage_count(dir.path()), 0); + } + + /// A directory always has `nlink >= 2` on Unix, which a bare + /// `nlink > 1` check misreads as a hardlinked file — `read()` then + /// fails EISDIR instead of the documented no-op. Directories are + /// not cow's problem; report `AlreadyPrivate` and leave them + /// untouched. + #[tokio::test] + async fn directory_is_not_routed_into_hardlink_break() { + let dir = tempfile::tempdir().unwrap(); + let d = dir.path().join("pkg-subdir"); + tokio::fs::create_dir(&d).await.unwrap(); + tokio::fs::create_dir(d.join("child")).await.unwrap(); + + let action = break_hardlink_if_needed(&d).await.unwrap(); + assert_eq!(action, CowAction::AlreadyPrivate); + assert!(tokio::fs::metadata(&d).await.unwrap().is_dir()); + } + /// Idempotency: calling twice in a row on a regular file is fine /// and reports `AlreadyPrivate` both times. #[tokio::test] diff --git a/crates/socket-patch-core/src/patch/file_hash.rs b/crates/socket-patch-core/src/patch/file_hash.rs index 1597731..ac563ce 100644 --- a/crates/socket-patch-core/src/patch/file_hash.rs +++ b/crates/socket-patch-core/src/patch/file_hash.rs @@ -22,10 +22,23 @@ use crate::hash::git_sha256::compute_git_sha256_from_reader; /// special file (FIFO, device, …) and hashing it is never meaningful here, and /// on some platforms a directory can read as zero bytes — which would otherwise /// be silently reported as the empty-blob hash. +/// +/// On Unix the open itself is non-blocking (`O_NONBLOCK`): a plain `open(2)` +/// of a FIFO with `O_RDONLY` waits for a writer that may never come, which +/// would hang the patch engine forever *before* the regular-file guard below +/// ever runs. With `O_NONBLOCK` the open returns immediately (it has no effect +/// on reads of regular files) and the guard rejects the FIFO with an error. pub async fn compute_file_git_sha256(filepath: impl AsRef) -> Result { let filepath = filepath.as_ref(); // Open the file once; everything below operates on this single descriptor. + #[cfg(unix)] + let file = tokio::fs::OpenOptions::new() + .read(true) + .custom_flags(libc::O_NONBLOCK) + .open(filepath) + .await?; + #[cfg(not(unix))] let file = tokio::fs::File::open(filepath).await?; // Size comes from the open handle (fstat), so it and the bytes we hash are @@ -165,6 +178,42 @@ mod tests { assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); } + /// A FIFO at the hashed path must be rejected promptly with an error, not + /// block forever. A plain `open(2)` with `O_RDONLY` on a FIFO waits for a + /// writer that never comes, so without a non-blocking open the `is_file` + /// guard is unreachable for exactly the special-file case it documents — + /// a FIFO planted at a manifest-listed path would hang apply/rollback + /// verification indefinitely. + #[cfg(unix)] + #[tokio::test] + async fn test_compute_file_git_sha256_rejects_fifo_without_hanging() { + let dir = tempfile::tempdir().unwrap(); + let fifo = dir.path().join("pipe"); + + let status = std::process::Command::new("mkfifo") + .arg(&fifo) + .status() + .expect("mkfifo must be runnable"); + assert!(status.success(), "mkfifo failed"); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(5), + compute_file_git_sha256(&fifo), + ) + .await; + + let Ok(result) = result else { + // The open is wedged in a `spawn_blocking` thread that the runtime + // waits for on shutdown; connect a writer to release it so this + // test can FAIL instead of hanging the whole suite. + let _ = std::fs::OpenOptions::new().write(true).open(&fifo); + panic!("hashing a FIFO must error promptly, not hang"); + }; + + let err = result.expect_err("FIFO must be rejected, never hashed"); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + } + /// A broken symlink (dangling target) must surface the open error rather /// than panicking or returning a hash. #[cfg(unix)] diff --git a/crates/socket-patch-core/src/patch/go_redirect.rs b/crates/socket-patch-core/src/patch/go_redirect.rs index 33d351a..34e5778 100644 --- a/crates/socket-patch-core/src/patch/go_redirect.rs +++ b/crates/socket-patch-core/src/patch/go_redirect.rs @@ -30,7 +30,7 @@ use crate::patch::apply::{ apply_package_patch, normalize_file_path, ApplyResult, PatchSources, VerifyResult, VerifyStatus, }; use crate::patch::file_hash::compute_file_git_sha256; -use crate::utils::purl::{build_golang_purl, parse_golang_purl}; +use crate::utils::purl::{build_golang_purl, parse_golang_purl, strip_purl_qualifiers}; use super::copy_tree::{fresh_copy, remove_tree}; use super::go_mod_edit::{ @@ -336,9 +336,13 @@ pub async fn reconcile_go_redirects( } // (b) Orphan copy dirs not referenced by a desired PURL (catches copies left - // behind by a hand-deleted directive or a version bump). + // behind by a hand-deleted directive or a version bump). A desired manifest + // key may carry `?qualifiers`/`#subpath` (raw API PURL), while the PURL + // reconstructed from the copy dir is the canonical base — compare bases, or + // a qualified key's freshly applied copy is pruned as an orphan. + let desired_bases: HashSet<&str> = desired.iter().map(|p| strip_purl_qualifiers(p)).collect(); for (purl, dir) in collect_copy_modules(&project_root.join(GO_PATCHES_DIR)).await { - if !desired.contains(&purl) { + if !desired_bases.contains(purl.as_str()) { if !dry_run { let _ = remove_tree(&dir).await; } @@ -1400,6 +1404,73 @@ mod tests { assert!(!are_safe_redirect_coords("github.com/foo/bar", "")); } + /// SECURITY regression: a leading drive-letter segment (`C:/evil`) passes + /// the per-segment checks (it is not `.`/`..`, has no `\` and no leading + /// `/`), but on Windows `Path::join` REPLACES the base path when handed an + /// absolute path — so a tampered `pkg:golang/C:/evil@v1.0.0` would resolve + /// the copy dir to `C:\evil@v1.0.0` and `fresh_copy`/`remove_tree` would + /// write/delete there, outside `.socket/go-patches/`. A real Go module + /// path element / version never contains `:` (letters, digits, `-._~` + /// only), so rejecting it is fail-closed on every platform. + #[test] + fn test_safe_redirect_coords_reject_windows_drive() { + assert!(!are_safe_redirect_coords("C:/evil", "v1.0.0")); + assert!(!are_safe_redirect_coords("c:/evil", "v1.0.0")); + assert!(!are_safe_redirect_coords("C:", "v1.0.0")); + assert!(!are_safe_redirect_coords("github.com/foo/bar", "C:evil")); + } + + /// A manifest key may carry `?qualifiers` / `#subpath` (the keys are raw + /// API PURLs; `parse_golang_purl` strips both, which is why apply and + /// verify tolerate them). Reconcile must compare desired PURLs by their + /// canonical base — not raw string equality — or the just-applied copy of + /// a qualified key is "pruned" as an orphan while its socket-owned + /// `replace` survives (the module is still desired), leaving go.mod + /// pointing at a deleted directory. + #[tokio::test] + async fn test_reconcile_keeps_qualified_desired_purl() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + let qualified = "pkg:golang/github.com/foo/bar@v1.4.2?type=module"; + // The CLI keys the copy off the parsed (qualifier-stripped) coords. + let (module, version) = parse_golang_purl(qualified).unwrap(); + let result = apply_go_redirect( + qualified, + module, + version, + &pristine, + root, + GO_PATCHES_DIR, + &files, + &sources, + None, + false, + false, + ) + .await; + assert!(result.success, "apply failed: {:?}", result.error); + + let desired: HashSet = [qualified.to_string()].into_iter().collect(); + let removed = reconcile_go_redirects(root, &desired, false).await; + assert!( + removed.is_empty(), + "a desired (qualified) redirect must not be pruned: {removed:?}" + ); + assert!( + root.join(".socket/go-patches/github.com/foo/bar@v1.4.2") + .exists(), + "copy of a desired patch must survive reconcile" + ); + assert!( + read_replace_entries(root) + .await + .iter() + .any(|e| e.module == MODULE && e.socket_owned()), + "socket-owned replace must survive" + ); + } + /// SECURITY regression: a tampered manifest PURL with `..` in the module path /// must NOT let `apply` copy + write the patched tree outside /// `.socket/go-patches/`. Without the guard `copy_dir_for` would resolve to diff --git a/crates/socket-patch-core/src/patch/package.rs b/crates/socket-patch-core/src/patch/package.rs index 7113df7..53803fe 100644 --- a/crates/socket-patch-core/src/patch/package.rs +++ b/crates/socket-patch-core/src/patch/package.rs @@ -68,7 +68,29 @@ fn normalize_entry_path(path: &str) -> &str { /// extraction step itself — the on-disk write site is the single, /// hash-verified path inside `apply_file_patch`. pub fn read_archive_to_map(archive_path: &Path) -> Result>, ArchiveError> { + // Open non-blockingly and require a regular file. A plain `open(2)` of a + // FIFO planted at the archive path waits for a writer that may never + // come — wedging the whole apply run before any parsing happens (the + // caller only stats the path first, which a FIFO passes). `O_NONBLOCK` + // has no effect on regular-file reads; the handle-based `is_file` guard + // then rejects FIFOs/devices outright (mirrors + // `compute_file_git_sha256` in file_hash.rs). + #[cfg(unix)] + let file = { + use std::os::unix::fs::OpenOptionsExt; + std::fs::OpenOptions::new() + .read(true) + .custom_flags(libc::O_NONBLOCK) + .open(archive_path)? + }; + #[cfg(not(unix))] let file = std::fs::File::open(archive_path)?; + if !file.metadata()?.is_file() { + return Err(ArchiveError::Io(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("archive {} is not a regular file", archive_path.display()), + ))); + } // Hard-cap decompressed bytes to defuse gzip / tar bombs. Reads // beyond the limit yield EOF, which the tar parser surfaces as a // truncated-archive error. @@ -537,6 +559,51 @@ mod tests { assert_eq!(map.values().next().map(|v| v.as_slice()), Some(&b"ok"[..])); } + /// A FIFO planted at the archive path must be rejected promptly with an + /// error, not block forever. A plain `open(2)` with `O_RDONLY` on a FIFO + /// waits for a writer that may never come, and the caller + /// (`load_archive_if_present`) only stats the path before calling — a stat + /// a FIFO passes — so without a non-blocking open the whole apply run + /// wedges before any parsing or validation runs. + #[cfg(unix)] + #[test] + fn test_read_archive_rejects_fifo_without_hanging() { + let dir = tempfile::tempdir().unwrap(); + let fifo = dir.path().join("arc.tar.gz"); + + let status = std::process::Command::new("mkfifo") + .arg(&fifo) + .status() + .expect("mkfifo must be runnable"); + assert!(status.success(), "mkfifo failed"); + + let (tx, rx) = std::sync::mpsc::channel(); + let fifo_for_thread = fifo.clone(); + std::thread::spawn(move || { + let _ = tx.send(read_archive_to_map(&fifo_for_thread)); + }); + + match rx.recv_timeout(std::time::Duration::from_secs(5)) { + Ok(result) => { + let err = result.expect_err("FIFO archive must be rejected, never parsed"); + assert!( + matches!( + &err, + ArchiveError::Io(e) if e.kind() == std::io::ErrorKind::InvalidInput + ), + "expected InvalidInput for FIFO archive, got {err:?}" + ); + } + Err(_) => { + // The open is wedged in the spawned thread; connect a writer + // to release it so this test can FAIL instead of hanging the + // whole suite. + let _ = std::fs::OpenOptions::new().write(true).open(&fifo); + panic!("reading a FIFO archive must error promptly, not hang"); + } + } + } + #[test] fn test_read_archive_skips_non_regular_entries() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/patch/path_safety.rs b/crates/socket-patch-core/src/patch/path_safety.rs index eeb801e..56c0eff 100644 --- a/crates/socket-patch-core/src/patch/path_safety.rs +++ b/crates/socket-patch-core/src/patch/path_safety.rs @@ -5,26 +5,33 @@ //! directories (`.socket/go-patches/…`, `.socket/vendor/…`) and the //! lockfile/config entries that point at them. Those files are committed and //! tamper-able, so every coordinate must be validated **fail-closed before any -//! disk access**: a `..`/`.` segment, an absolute path, a backslash, or a NUL -//! would otherwise let a poisoned manifest copy, write, or delete a tree at an -//! arbitrary filesystem location outside the project. +//! disk access**: a `..`/`.` segment, an absolute path, a backslash, a colon, +//! or a NUL would otherwise let a poisoned manifest copy, write, or delete a +//! tree at an arbitrary filesystem location outside the project. +//! +//! Colons are rejected because a leading `C:` makes the coordinate an +//! absolute Windows path that `Path::join` substitutes wholesale for the +//! base; no legitimate package name, version, or Go module path contains one. /// A single path segment (cargo crate name, version string, gem name, …): -/// no separators, not `.`/`..`, no backslash/NUL, non-empty. +/// no separators, not `.`/`..`, no backslash/colon/NUL, non-empty. pub(crate) fn is_safe_single_segment(s: &str) -> bool { !s.is_empty() && s != "." && s != ".." && !s.contains('/') && !s.contains('\\') + && !s.contains(':') && !s.contains('\0') } /// A multi-segment relative path (Go module path `github.com/foo/bar`, npm /// scoped name `@scope/name`, composer `vendor/name`): `/`-separated segments, -/// each non-empty and not `.`/`..`; no leading `/`, no backslash, no NUL. +/// each non-empty and not `.`/`..`; no leading `/`, no backslash, no colon, +/// no NUL. pub(crate) fn is_safe_multi_segment(s: &str) -> bool { - if s.is_empty() || s.starts_with('/') || s.contains('\\') || s.contains('\0') { + if s.is_empty() || s.starts_with('/') || s.contains('\\') || s.contains(':') || s.contains('\0') + { return false; } s.split('/') @@ -79,6 +86,9 @@ mod tests { assert!(!is_safe_single_segment("a/b")); assert!(!is_safe_single_segment("a\\b")); assert!(!is_safe_single_segment("a\0b")); + // A leading `C:` is an absolute Windows path under `Path::join`. + assert!(!is_safe_single_segment("C:evil")); + assert!(!is_safe_single_segment("c:")); } #[test] @@ -100,6 +110,10 @@ mod tests { assert!(!is_safe_multi_segment("foo/./bar")); assert!(!is_safe_multi_segment("foo\\bar")); assert!(!is_safe_multi_segment("foo\0bar")); + // Windows drive-letter escapes: `C:/…` joins as an absolute path. + assert!(!is_safe_multi_segment("C:/evil")); + assert!(!is_safe_multi_segment("c:/evil")); + assert!(!is_safe_multi_segment("C:")); } #[test] diff --git a/crates/socket-patch-core/src/patch/rollback.rs b/crates/socket-patch-core/src/patch/rollback.rs index 12b5c31..20ac998 100644 --- a/crates/socket-patch-core/src/patch/rollback.rs +++ b/crates/socket-patch-core/src/patch/rollback.rs @@ -39,6 +39,14 @@ pub struct RollbackResult { pub files_verified: Vec, pub files_rolled_back: Vec, pub error: Option, + /// Ecosystem sidecar resync outcome — the rollback-side twin of + /// [`ApplyResult::sidecar`](crate::patch::apply::ApplyResult::sidecar). + /// `Some` when the ecosystem's integrity sidecar was resynced after + /// the restore (today: cargo's `.cargo-checksum.json`) or when that + /// resync failed (an `Error`-severity advisory; the files themselves + /// are still rolled back). `None` when no sidecar applied or no + /// files were rolled back (dry run, already original). + pub sidecar: Option, } /// Normalize file path by removing the "package/" prefix if present. @@ -90,16 +98,36 @@ pub async fn verify_file_rollback( // For new files (empty beforeHash), rollback means deleting the file. if is_new_file { - if tokio::fs::metadata(&filepath).await.is_err() { - // File already doesn't exist — already rolled back. - return VerifyRollbackResult { - file: file_name.to_string(), - status: VerifyRollbackStatus::AlreadyOriginal, - message: None, - current_hash: None, - expected_hash: None, - target_hash: None, - }; + // Probe the directory ENTRY (`symlink_metadata`), not the symlink + // target: a dangling symlink left where the patch-added file was + // makes `metadata` report ENOENT, which mis-classified the entry + // as already rolled back — the package rollback claimed success + // while silently leaving the stray entry behind. Only a true + // NotFound means already-gone; any other stat error (ELOOP, + // EACCES) is an unverifiable state and must fail closed. + match tokio::fs::symlink_metadata(&filepath).await { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + // File already doesn't exist — already rolled back. + return VerifyRollbackResult { + file: file_name.to_string(), + status: VerifyRollbackStatus::AlreadyOriginal, + message: None, + current_hash: None, + expected_hash: None, + target_hash: None, + }; + } + Err(e) => { + return VerifyRollbackResult { + file: file_name.to_string(), + status: VerifyRollbackStatus::NotFound, + message: Some(format!("Failed to stat file: {}", e)), + current_hash: None, + expected_hash: None, + target_hash: None, + }; + } + Ok(_) => {} } let current_hash = compute_file_git_sha256(&filepath).await.unwrap_or_default(); if current_hash == file_info.after_hash { @@ -167,6 +195,28 @@ pub async fn verify_file_rollback( }; } + // SECURITY: `beforeHash` comes from the same untrusted manifest as the + // file keys, but is used as a path component under the blobs directory. + // `Path::join` discards the base on an absolute "hash" and `..` walks + // out, so an unvalidated value would turn the blob probe — and the + // rollback loop's blob read — into an out-of-tree existence oracle, a + // content-hash leak via the mismatch error, or an unbounded-read DoS + // (`/dev/zero`, FIFO hang). Real blob hashes are plain hex and always + // pass; anything path-unsafe is refused fail-closed. + if !crate::patch::apply::is_safe_relative_subpath(&file_info.before_hash) { + return VerifyRollbackResult { + file: file_name.to_string(), + status: VerifyRollbackStatus::MissingBlob, + message: Some(format!( + "Unsafe before-blob hash (escapes blobs directory): {}", + file_info.before_hash + )), + current_hash: Some(current_hash), + expected_hash: None, + target_hash: None, + }; + } + // Check if before blob exists (required to actually restore the file) let before_blob_path = blobs_path.join(&file_info.before_hash); if tokio::fs::metadata(&before_blob_path).await.is_err() { @@ -267,6 +317,7 @@ pub async fn rollback_package_patch( files_verified: Vec::new(), files_rolled_back: Vec::new(), error: None, + sidecar: None, }; // First, verify all files @@ -349,6 +400,18 @@ pub async fn rollback_package_patch( continue; } + // SECURITY: defense-in-depth twin of the verify-time guard — never + // join an unvalidated manifest hash onto the blobs directory at the + // read syscall either (mirrors the delete branch above). Verify + // already blocks unsafe hashes, but this read must not depend on it. + if !crate::patch::apply::is_safe_relative_subpath(&file_info.before_hash) { + result.error = Some(format!( + "Unsafe before-blob hash (escapes blobs directory): {}", + file_info.before_hash + )); + return result; + } + // Read original content from blobs let blob_path = blobs_path.join(&file_info.before_hash); let original_content = match tokio::fs::read(&blob_path).await { @@ -378,6 +441,61 @@ pub async fn rollback_package_patch( result.files_rolled_back.push(file_name.clone()); } + // Ecosystem sidecar resync — the rollback-side twin of apply's + // `dispatch_fixup` boundary. Apply rewrote integrity sidecars to the + // patched hashes; with the original bytes now restored those hashes + // are stale in the other direction (cargo refuses to build a vendored + // crate whose `.cargo-checksum.json` disagrees with its sources). + // Best-effort, exactly like apply: a failing resync does NOT undo the + // rollback — the restored bytes are already committed — it surfaces + // as an `Error`-severity `sidecar_fixup_failed` advisory instead. + if !result.files_rolled_back.is_empty() { + use crate::patch::sidecars::{ + dispatch_rollback_fixup, SidecarAdvisory, SidecarAdvisoryCode, SidecarRecord, + SidecarSeverity, + }; + // Include files verified `AlreadyOriginal` alongside the ones + // restored this run: a previous rollback that failed partway + // restored them but returned before this boundary, so their + // sidecar entries still carry the PATCHED hashes apply's fixup + // wrote — and this retry is the only chance to resync them. + // They exist at their before-hash (or, for patch-added files, + // are already deleted, which the resync handles by dropping the + // entry), so the rehash is a no-op rewrite in the common + // already-synced case. + let resync_files: Vec = result + .files_rolled_back + .iter() + .cloned() + .chain( + result + .files_verified + .iter() + .filter(|v| v.status == VerifyRollbackStatus::AlreadyOriginal) + .map(|v| v.file.clone()), + ) + .collect(); + match dispatch_rollback_fixup(package_key, pkg_path, &resync_files).await { + Ok(Some(record)) => result.sidecar = Some(record), + Ok(None) => {} + Err(e) => { + let ecosystem = crate::crawlers::Ecosystem::from_purl(package_key) + .map(|eco| eco.cli_name().to_string()) + .unwrap_or_else(|| "unknown".to_string()); + result.sidecar = Some(SidecarRecord { + purl: package_key.to_string(), + ecosystem, + files: Vec::new(), + advisory: Some(SidecarAdvisory { + code: SidecarAdvisoryCode::SidecarFixupFailed, + severity: SidecarSeverity::Error, + message: format!("sidecar resync failed (files still rolled back): {}", e), + }), + }); + } + } + } + result.success = true; result } @@ -1199,4 +1317,430 @@ mod tests { .await .unwrap(); } + + /// SECURITY (before-blob hash path-escape at verify): `beforeHash` + /// comes from the same untrusted manifest as the file keys, but is + /// joined onto the blobs directory as a path component. A traversal + /// (`../x`) or absolute "hash" must be refused at verification — + /// `Path::join` discards the base on an absolute string and `..` + /// walks out, so an escaping hash that resolved to any existing file + /// verified `Ready` and the rollback loop then read an arbitrary + /// out-of-tree path (existence oracle, unbounded read of `/dev/zero`, + /// FIFO hang). + #[tokio::test] + async fn test_verify_file_rollback_rejects_blob_hash_escape() { + let root = tempfile::tempdir().unwrap(); + let pkg_dir = root.path().join("pkg"); + let blobs_dir = root.path().join("blobs"); + tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + // An out-of-tree file the escaping "hash" resolves to. + let secret = root.path().join("secret.txt"); + tokio::fs::write(&secret, b"out of tree").await.unwrap(); + + let patched = b"patched content"; + tokio::fs::write(pkg_dir.join("index.js"), patched) + .await + .unwrap(); + + let escapes = [ + "../secret.txt".to_string(), + // Absolute path: Path::join discards the blobs-dir base entirely. + secret.to_string_lossy().into_owned(), + ]; + for before_hash in escapes { + let file_info = PatchFileInfo { + before_hash: before_hash.clone(), + after_hash: compute_git_sha256_from_bytes(patched), + }; + let result = verify_file_rollback(&pkg_dir, "index.js", &file_info, &blobs_dir).await; + assert_ne!( + result.status, + VerifyRollbackStatus::Ready, + "hash: {before_hash}" + ); + assert_ne!( + result.status, + VerifyRollbackStatus::AlreadyOriginal, + "hash: {before_hash}" + ); + } + } + + /// SECURITY (before-blob escape at the read site): a poisoned manifest + /// whose `beforeHash` escapes the blobs directory must fail the + /// package rollback with the path-safety error. Regression: the + /// unguarded code read the out-of-tree file and leaked its git-sha256 + /// into the error message ("Got: ") — an existence + + /// content-hash oracle over any host file readable by the user. + #[tokio::test] + async fn test_rollback_package_patch_blob_hash_escape_blocked() { + let root = tempfile::tempdir().unwrap(); + let pkg_dir = root.path().join("pkg"); + let blobs_dir = root.path().join("blobs"); + tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let secret_content = b"top secret contents\n"; + tokio::fs::write(root.path().join("secret.txt"), secret_content) + .await + .unwrap(); + + let patched = b"patched content"; + tokio::fs::write(pkg_dir.join("index.js"), patched) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "index.js".to_string(), + PatchFileInfo { + before_hash: "../secret.txt".to_string(), + after_hash: compute_git_sha256_from_bytes(patched), + }, + ); + + let result = + rollback_package_patch("pkg:npm/test@1.0.0", &pkg_dir, &files, &blobs_dir, false).await; + + assert!(!result.success, "escaping blob hash must be refused"); + assert!(result.files_rolled_back.is_empty()); + let err = result.error.unwrap(); + let secret_hash = compute_git_sha256_from_bytes(secret_content); + assert!( + !err.contains(&secret_hash), + "error must not leak the out-of-tree file's content hash: {err}" + ); + assert!( + err.contains("Unsafe before-blob hash"), + "unexpected error: {err}" + ); + // The patched file must be untouched. + assert_eq!( + tokio::fs::read(pkg_dir.join("index.js")).await.unwrap(), + patched + ); + } + + /// Regression (new-file dangling symlink): `metadata()` follows + /// symlinks, so a dangling symlink left where the patch-added file + /// was reported ENOENT → `AlreadyOriginal`, and the package rollback + /// claimed success while silently leaving the stray entry behind. + /// The entry probe must be `symlink_metadata`: a path occupied by + /// something that is neither the added file nor absent is a modified + /// state and must fail closed, like every other modified state. + #[cfg(unix)] + #[tokio::test] + async fn test_rollback_package_patch_new_file_dangling_symlink_blocks() { + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let path = pkg_dir.path().join("added.js"); + std::os::unix::fs::symlink("does-not-exist", &path).unwrap(); + + let mut files = HashMap::new(); + files.insert( + "added.js".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash: compute_git_sha256_from_bytes(b"added by patch\n"), + }, + ); + + let result = rollback_package_patch( + "pkg:npm/test@1.0.0", + pkg_dir.path(), + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!( + !result.success, + "a dangling symlink at the added path is a modified state and must block" + ); + assert!(result.files_rolled_back.is_empty()); + // The stray entry is still there — it must not be silently ignored. + assert!(tokio::fs::symlink_metadata(&path).await.is_ok()); + } + + /// Regression (cargo sidecar resync): apply rewrites + /// `.cargo-checksum.json` to the *patched* SHA256s (and inserts + /// entries for patch-added files). Rolling the package back restores + /// the original bytes but used to leave the checksum file untouched — + /// original sources verified against patched hashes, so the very next + /// `cargo build` of the vendored crate refused with "checksum ... + /// has changed" (proven by `cargo_check_fails_without_sidecar_fixup` + /// in the cargo-build e2e). Rollback must resync the sidecar: + /// restored files get their original hash back, and the entry for a + /// patch-added (now deleted) file is removed entirely. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn test_rollback_package_patch_cargo_resyncs_checksum_sidecar() { + use sha2::{Digest, Sha256}; + fn sha256_hex(bytes: &[u8]) -> String { + let mut h = Sha256::new(); + h.update(bytes); + format!("{:x}", h.finalize()) + } + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + let pkg = pkg_dir.path(); + + let original = b"pub fn hello() {}\n"; + let patched = b"pub fn hello() { /* patched */ }\n"; + let added = b"pub fn added() {}\n"; + let before_hash = compute_git_sha256_from_bytes(original); + let after_hash = compute_git_sha256_from_bytes(patched); + + // On-disk state is post-apply: patched source + patch-added file. + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + tokio::fs::write(pkg.join("src/lib.rs"), patched) + .await + .unwrap(); + tokio::fs::write(pkg.join("src/new.rs"), added) + .await + .unwrap(); + tokio::fs::write(blobs_dir.path().join(&before_hash), original) + .await + .unwrap(); + + // `.cargo-checksum.json` as apply's sidecar fixup left it: patched + // hashes for the patched file, a fresh entry for the added file, + // untouched entries and the `package` field preserved. + let checksum_path = pkg.join(".cargo-checksum.json"); + let post_apply_checksum = serde_json::json!({ + "files": { + "src/lib.rs": sha256_hex(patched), + "src/new.rs": sha256_hex(added), + "Cargo.toml": "ff".repeat(32), + }, + "package": "tarball-hash-preserved", + }); + tokio::fs::write( + &checksum_path, + serde_json::to_string_pretty(&post_apply_checksum).unwrap(), + ) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "src/lib.rs".to_string(), + PatchFileInfo { + before_hash: before_hash.clone(), + after_hash, + }, + ); + files.insert( + "src/new.rs".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash: compute_git_sha256_from_bytes(added), + }, + ); + + let result = + rollback_package_patch("pkg:cargo/demo@1.0.0", pkg, &files, blobs_dir.path(), false) + .await; + + assert!(result.success, "rollback failed: {:?}", result.error); + assert_eq!(result.files_rolled_back.len(), 2); + assert_eq!( + tokio::fs::read(pkg.join("src/lib.rs")).await.unwrap(), + original + ); + assert!(tokio::fs::metadata(pkg.join("src/new.rs")).await.is_err()); + + // The sidecar must reflect the rolled-back (original) state. + let post: serde_json::Value = + serde_json::from_str(&tokio::fs::read_to_string(&checksum_path).await.unwrap()) + .unwrap(); + let entries = post["files"].as_object().unwrap(); + assert_eq!( + entries["src/lib.rs"].as_str().unwrap(), + sha256_hex(original), + "rollback must restore the original hash in .cargo-checksum.json \ + or cargo refuses to build the rolled-back crate" + ); + assert!( + entries.get("src/new.rs").is_none(), + "the entry apply added for the patch-added file must be removed \ + once rollback deletes that file" + ); + // Untouched entries and the package field survive the resync. + assert_eq!(entries["Cargo.toml"].as_str().unwrap(), "ff".repeat(32)); + assert_eq!(post["package"].as_str().unwrap(), "tarball-hash-preserved"); + + // And the result reports the resync as a sidecar record, the + // rollback-side twin of `ApplyResult::sidecar`. + let sidecar = result + .sidecar + .expect("cargo rollback must report a sidecar resync"); + assert_eq!(sidecar.ecosystem, "cargo"); + assert_eq!(sidecar.purl, "pkg:cargo/demo@1.0.0"); + assert_eq!(sidecar.files.len(), 1); + assert_eq!(sidecar.files[0].path, ".cargo-checksum.json"); + assert!(sidecar.advisory.is_none()); + } + + /// Best-effort boundary: a malformed `.cargo-checksum.json` must not + /// fail the rollback (the bytes are already restored) — it surfaces + /// as an `Error`-severity `sidecar_fixup_failed` advisory, mirroring + /// apply's boundary in `apply_package_patch`. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn test_rollback_package_patch_cargo_sidecar_failure_is_best_effort() { + use crate::patch::sidecars::{SidecarAdvisoryCode, SidecarSeverity}; + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + let pkg = pkg_dir.path(); + + let original = b"original content"; + let patched = b"patched content"; + let before_hash = compute_git_sha256_from_bytes(original); + + tokio::fs::write(pkg.join("lib.rs"), patched).await.unwrap(); + tokio::fs::write(blobs_dir.path().join(&before_hash), original) + .await + .unwrap(); + tokio::fs::write(pkg.join(".cargo-checksum.json"), b"not json") + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "lib.rs".to_string(), + PatchFileInfo { + before_hash: before_hash.clone(), + after_hash: compute_git_sha256_from_bytes(patched), + }, + ); + + let result = + rollback_package_patch("pkg:cargo/demo@1.0.0", pkg, &files, blobs_dir.path(), false) + .await; + + assert!( + result.success, + "sidecar resync failure must not fail the rollback" + ); + assert_eq!( + tokio::fs::read(pkg.join("lib.rs")).await.unwrap(), + original, + "the file restore itself must have happened" + ); + let sidecar = result + .sidecar + .expect("failure must surface as a sidecar record"); + let advisory = sidecar + .advisory + .expect("failure record carries an advisory"); + assert_eq!(advisory.code, SidecarAdvisoryCode::SidecarFixupFailed); + assert_eq!(advisory.severity, SidecarSeverity::Error); + } + + /// Regression (retried partial rollback wedges cargo): a previous + /// rollback that failed partway restored a.rs to its ORIGINAL bytes + /// but returned before the resync boundary, leaving a.rs's + /// `.cargo-checksum.json` entry at the PATCHED hash apply's fixup + /// wrote. On the retry a.rs verifies `AlreadyOriginal` and is skipped + /// by the restore loop — but it must still be included in the sidecar + /// resync, or its entry stays patched-hash over original bytes and + /// `cargo build` refuses the crate even though the retry reported + /// success. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn test_rollback_retry_resyncs_already_original_checksum_entries() { + fn plain_sha256(b: &[u8]) -> String { + use sha2::{Digest, Sha256}; + let mut h = Sha256::new(); + h.update(b); + format!("{:x}", h.finalize()) + } + + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + let pkg = pkg_dir.path(); + + // State left by the interrupted run: a.rs already restored to its + // original bytes (no before-blob needed — AlreadyOriginal + // short-circuits), b.rs still patched. The checksum carries the + // PATCHED hashes apply's fixup wrote for both. + tokio::fs::write(pkg.join("a.rs"), b"original a") + .await + .unwrap(); + tokio::fs::write(pkg.join("b.rs"), b"patched b") + .await + .unwrap(); + let checksum = serde_json::json!({ + "files": { + "a.rs": plain_sha256(b"patched a"), + "b.rs": plain_sha256(b"patched b"), + }, + "package": "x", + }); + tokio::fs::write( + pkg.join(".cargo-checksum.json"), + serde_json::to_string_pretty(&checksum).unwrap(), + ) + .await + .unwrap(); + + // The retry has b's before-blob available. + let b_before = compute_git_sha256_from_bytes(b"original b"); + tokio::fs::write(blobs_dir.path().join(&b_before), b"original b") + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "a.rs".to_string(), + PatchFileInfo { + before_hash: compute_git_sha256_from_bytes(b"original a"), + after_hash: compute_git_sha256_from_bytes(b"patched a"), + }, + ); + files.insert( + "b.rs".to_string(), + PatchFileInfo { + before_hash: b_before, + after_hash: compute_git_sha256_from_bytes(b"patched b"), + }, + ); + + let result = rollback_package_patch( + "pkg:cargo/mycrate@1.0.0", + pkg, + &files, + blobs_dir.path(), + false, + ) + .await; + + assert!(result.success, "retry must succeed: {:?}", result.error); + assert_eq!(result.files_rolled_back, vec!["b.rs".to_string()]); + + let post: serde_json::Value = serde_json::from_str( + &tokio::fs::read_to_string(pkg.join(".cargo-checksum.json")) + .await + .unwrap(), + ) + .unwrap(); + assert_eq!( + post["files"]["b.rs"].as_str().unwrap(), + plain_sha256(b"original b"), + "the freshly restored file's entry must be resynced" + ); + assert_eq!( + post["files"]["a.rs"].as_str().unwrap(), + plain_sha256(b"original a"), + "an AlreadyOriginal file from the interrupted run must be \ + resynced too — a stale patched-hash entry wedges cargo build" + ); + } } diff --git a/crates/socket-patch-core/src/patch/sidecars/cargo.rs b/crates/socket-patch-core/src/patch/sidecars/cargo.rs index f95219c..a1d96ac 100644 --- a/crates/socket-patch-core/src/patch/sidecars/cargo.rs +++ b/crates/socket-patch-core/src/patch/sidecars/cargo.rs @@ -50,11 +50,41 @@ const CHECKSUM_FILE: &str = ".cargo-checksum.json"; pub(crate) async fn fixup( pkg_path: &Path, patched: &[String], +) -> Result, SidecarError> { + sync_checksum(pkg_path, patched, false).await +} + +/// Resync `/.cargo-checksum.json` after a rollback restored +/// the listed files to their original bytes (and deleted patch-added +/// ones). Apply's [`fixup`] rewrote the checksum to the *patched* +/// hashes, so without this resync the rolled-back (original) sources +/// verify against patched hashes and the next `cargo build` of the +/// crate refuses with "checksum ... has changed". +/// +/// Same contract as [`fixup`], except a listed file that no longer +/// exists on disk has its checksum entry *removed* — rollback deletes +/// patch-added files, and a stale entry for a missing file is exactly +/// as build-breaking as a wrong hash. +pub(crate) async fn resync_after_rollback( + pkg_path: &Path, + rolled_back: &[String], +) -> Result, SidecarError> { + sync_checksum(pkg_path, rolled_back, true).await +} + +/// Shared driver for [`fixup`] / [`resync_after_rollback`] — see their +/// docs for the `Ok(None)` / `Err` contract. `remove_missing` selects +/// the rollback semantics for files absent on disk (remove the entry) +/// over apply's (fail — apply just wrote the file, so absence is a bug). +async fn sync_checksum( + pkg_path: &Path, + patched: &[String], + remove_missing: bool, ) -> Result, SidecarError> { let checksum_path = pkg_path.join(CHECKSUM_FILE); // Read the existing file. NotFound is fine — no checksums to update. - let raw = match tokio::fs::read_to_string(&checksum_path).await { + let raw = match read_regular_file(&checksum_path).await { Ok(s) => s, Err(e) if e.kind() == std::io::ErrorKind::NotFound => { return Ok(None); @@ -67,7 +97,7 @@ pub(crate) async fn fixup( } }; - let mut json: Value = serde_json::from_str(&raw).map_err(|e| SidecarError::Malformed { + let mut json: Value = serde_json::from_slice(&raw).map_err(|e| SidecarError::Malformed { path: checksum_path.display().to_string(), detail: e.to_string(), })?; @@ -80,7 +110,7 @@ pub(crate) async fn fixup( detail: "missing or non-object `files` field".to_string(), })?; - update_entries(files, pkg_path, patched).await?; + update_entries(files, pkg_path, patched, remove_missing).await?; // Pretty-print with two-space indent — matches what cargo // itself writes. Not strictly required (cargo accepts any @@ -145,11 +175,14 @@ pub(crate) async fn fixup( /// Entries in the patch list may include the `package/` prefix used /// by the API; the on-disk file lives at `pkg_path.join(normalized)`, /// and the cargo-checksum key is the same `normalized` path. New -/// files added by a patch get a fresh entry. +/// files added by a patch get a fresh entry. With `remove_missing` +/// (the rollback resync), a listed file absent on disk has its entry +/// removed instead of failing — rollback deletes patch-added files. async fn update_entries( files: &mut Map, pkg_path: &Path, patched: &[String], + remove_missing: bool, ) -> Result<(), SidecarError> { for file_name in patched { let normalized = normalize_file_path(file_name).to_string(); @@ -176,12 +209,23 @@ async fn update_entries( } let on_disk = pkg_path.join(&normalized); - let hash = sha256_file(&on_disk) - .await - .map_err(|source| SidecarError::Io { - path: on_disk.display().to_string(), - source, - })?; + let hash = match sha256_file(&on_disk).await { + Ok(hash) => hash, + Err(e) if remove_missing && e.kind() == std::io::ErrorKind::NotFound => { + // Rollback deleted this patch-added file; drop the entry + // apply's fixup inserted for it. Only NotFound qualifies — + // any other failure (EACCES, a FIFO, …) is an unverifiable + // state and must still fail closed. + files.remove(&normalized); + continue; + } + Err(source) => { + return Err(SidecarError::Io { + path: on_disk.display().to_string(), + source, + }); + } + }; files.insert(normalized, Value::String(hash)); } Ok(()) @@ -192,17 +236,52 @@ async fn update_entries( /// Loads the whole file into memory and hashes in one go. /// Cargo source files are bounded (the registry rejects crates /// whose `.crate` tarball exceeds ~10MB unpacked), so a single -/// `read()` is cheaper than the streaming-loop dance and -/// collapses the open + read into one `?` arm — which the +/// read is cheaper than the streaming-loop dance — and the open +/// error passes through untouched, which the /// `dispatch_fixup_cargo_sha256_file_failure_arm` integration /// test drives via a non-existent path. async fn sha256_file(path: &Path) -> std::io::Result { - let bytes = tokio::fs::read(path).await?; + let bytes = read_regular_file(path).await?; let mut hasher = Sha256::new(); hasher.update(&bytes); Ok(format!("{:x}", hasher.finalize())) } +/// Read a whole file, refusing anything that isn't a regular file. +/// +/// Both call sites read paths inside the (untrusted) package tree. A +/// plain `open(2)` with `O_RDONLY` on a FIFO planted at one of those +/// paths waits for a writer that may never come — hanging the patch +/// engine forever before any guard runs. As in +/// [`compute_file_git_sha256`](crate::patch::file_hash::compute_file_git_sha256), +/// the open is non-blocking on Unix (a no-op for regular files) and the +/// `is_file` check on the open handle rejects FIFOs/devices/directories +/// instead of reading them. +async fn read_regular_file(path: &Path) -> std::io::Result> { + use tokio::io::AsyncReadExt; + + #[cfg(unix)] + let mut file = tokio::fs::OpenOptions::new() + .read(true) + .custom_flags(libc::O_NONBLOCK) + .open(path) + .await?; + #[cfg(not(unix))] + let mut file = tokio::fs::File::open(path).await?; + + let metadata = file.metadata().await?; + if !metadata.is_file() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("{} is not a regular file", path.display()), + )); + } + + let mut bytes = Vec::with_capacity(metadata.len() as usize); + file.read_to_end(&mut bytes).await?; + Ok(bytes) +} + #[cfg(test)] mod tests { use super::*; @@ -588,6 +667,192 @@ mod tests { } } + /// Rollback resync: restored files get their on-disk (original) + /// hash back, and the entry for a patch-added file that rollback + /// deleted is removed — a stale entry for a missing file is as + /// build-breaking as a wrong hash. Untouched entries survive. + #[tokio::test] + async fn resync_after_rollback_updates_and_removes_entries() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::create_dir_all(pkg.join("src")).await.unwrap(); + // src/lib.rs is back to its original bytes; src/new.rs (added by + // the patch) was deleted by rollback and does not exist. + tokio::fs::write(pkg.join("src/lib.rs"), b"original lib") + .await + .unwrap(); + + let starting = serde_json::json!({ + "files": { + "src/lib.rs": expected_sha256(b"patched lib"), + "src/new.rs": expected_sha256(b"brand new"), + "Cargo.toml": "11".repeat(32), + }, + "package": "preserved", + }); + tokio::fs::write( + pkg.join(CHECKSUM_FILE), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + + let out = resync_after_rollback(pkg, &["src/lib.rs".to_string(), "src/new.rs".to_string()]) + .await + .unwrap(); + let payload = out.expect("checksum file existed, resync should return a payload"); + assert_eq!(payload.files.len(), 1); + assert_eq!(payload.files[0].path, CHECKSUM_FILE); + assert_eq!(payload.files[0].action, SidecarFileAction::Rewritten); + + let post: serde_json::Value = serde_json::from_str( + &tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)) + .await + .unwrap(), + ) + .unwrap(); + let files = post["files"].as_object().unwrap(); + assert_eq!( + files["src/lib.rs"].as_str().unwrap(), + expected_sha256(b"original lib") + ); + assert!(files.get("src/new.rs").is_none()); + assert_eq!(files["Cargo.toml"].as_str().unwrap(), "11".repeat(32)); + assert_eq!(post["package"].as_str().unwrap(), "preserved"); + } + + /// The remove-missing leniency is strictly rollback-side: apply's + /// `fixup` must still fail on a listed file that is absent on disk + /// (apply just wrote it — absence is a bug, not a deletion). + #[tokio::test] + async fn fixup_still_errors_on_missing_file() { + let d = tempfile::tempdir().unwrap(); + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "x", + }); + tokio::fs::write( + d.path().join(CHECKSUM_FILE), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + + let err = fixup(d.path(), &["src/lib.rs".to_string()]) + .await + .unwrap_err(); + assert!(matches!( + err, + SidecarError::Io { source, .. } if source.kind() == std::io::ErrorKind::NotFound + )); + } + + /// The resync shares apply's fail-closed path guard: an escaping + /// rolled-back key must be refused, never hashed or removed. + #[tokio::test] + async fn resync_refuses_dotdot_escape_path() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path().join("pkg"); + tokio::fs::create_dir_all(&pkg).await.unwrap(); + let starting = serde_json::json!({ + "files": { "Cargo.toml": "ff".repeat(32) }, + "package": "x", + }); + let original = serde_json::to_string_pretty(&starting).unwrap(); + tokio::fs::write(pkg.join(CHECKSUM_FILE), &original) + .await + .unwrap(); + + let err = resync_after_rollback(&pkg, &["../secret.txt".to_string()]) + .await + .unwrap_err(); + assert!(matches!( + err, + SidecarError::Io { source, .. } if source.kind() == std::io::ErrorKind::InvalidData + )); + assert_eq!( + tokio::fs::read_to_string(pkg.join(CHECKSUM_FILE)) + .await + .unwrap(), + original, + "checksum must not be rewritten on refusal" + ); + } + + /// DoS regression (FIFO checksum file): the checksum file is read + /// straight out of the (untrusted) package tree on every cargo + /// apply. A FIFO planted at `.cargo-checksum.json` made the plain + /// `open(2)` wait for a writer that never comes — wedging apply + /// forever *after* the patch bytes were committed. Same DoS class + /// already fixed in `file_hash.rs` and `package.rs`: the open must + /// be non-blocking and non-regular files must be rejected. + #[cfg(unix)] + #[tokio::test] + async fn fifo_checksum_file_errors_promptly() { + let d = tempfile::tempdir().unwrap(); + let fifo = d.path().join(CHECKSUM_FILE); + let status = std::process::Command::new("mkfifo") + .arg(&fifo) + .status() + .expect("mkfifo must be runnable"); + assert!(status.success(), "mkfifo failed"); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(5), + fixup(d.path(), &["src/lib.rs".to_string()]), + ) + .await; + + let Ok(result) = result else { + // The open is wedged in a `spawn_blocking` thread that the + // runtime waits for on shutdown; connect a writer to release + // it so this test can FAIL instead of hanging the suite. + let _ = std::fs::OpenOptions::new().write(true).open(&fifo); + panic!("a FIFO checksum file must error promptly, not hang apply"); + }; + assert!(matches!(result, Err(SidecarError::Io { .. }))); + } + + /// DoS regression (FIFO patched-file target): `update_entries` + /// hashes each patched path from disk; a FIFO at that path hung the + /// rehash the same way. Must error promptly instead. + #[cfg(unix)] + #[tokio::test] + async fn fifo_patched_file_errors_promptly() { + let d = tempfile::tempdir().unwrap(); + let starting = serde_json::json!({ + "files": { "src/lib.rs": "00".repeat(32) }, + "package": "x", + }); + tokio::fs::write( + d.path().join(CHECKSUM_FILE), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + tokio::fs::create_dir_all(d.path().join("src")) + .await + .unwrap(); + let fifo = d.path().join("src/lib.rs"); + let status = std::process::Command::new("mkfifo") + .arg(&fifo) + .status() + .expect("mkfifo must be runnable"); + assert!(status.success(), "mkfifo failed"); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(5), + fixup(d.path(), &["src/lib.rs".to_string()]), + ) + .await; + + let Ok(result) = result else { + let _ = std::fs::OpenOptions::new().write(true).open(&fifo); + panic!("a FIFO patched file must error promptly, not hang the rehash"); + }; + assert!(matches!(result, Err(SidecarError::Io { .. }))); + } + /// Copy-on-write safety: when `.cargo-checksum.json` is hardlinked /// into a shared store (a vendored tree shared between projects), /// the rewrite must give us a private inode and leave the sibling diff --git a/crates/socket-patch-core/src/patch/sidecars/mod.rs b/crates/socket-patch-core/src/patch/sidecars/mod.rs index 19ceb05..878c3ee 100644 --- a/crates/socket-patch-core/src/patch/sidecars/mod.rs +++ b/crates/socket-patch-core/src/patch/sidecars/mod.rs @@ -96,9 +96,13 @@ pub(crate) fn advisory_only_payload( /// the error case into an `Error`-severity record. /// /// `package_key` is the PURL. `pkg_path` is the package directory -/// on disk. `patched` lists the patch-file keys that were actually -/// written (same convention as `apply_package_patch.files_patched`). -/// `files` is reserved for future use (currently unread). +/// on disk. `patched` lists the patch-file keys now at their patched +/// content: the ones written this run (`apply_package_patch. +/// files_patched`) plus any verified `AlreadyPatched` — an earlier +/// apply that failed partway wrote those but never reached this +/// boundary, so their sidecar entries are still stale and the retry +/// must resync them. `files` is reserved for future use (currently +/// unread). #[allow(unused_variables)] // `pkg_path` is feature-gated below pub async fn dispatch_fixup( package_key: &str, @@ -151,6 +155,50 @@ pub async fn dispatch_fixup( })) } +/// Run the post-*rollback* integrity resync for the package's ecosystem. +/// +/// Apply's [`dispatch_fixup`] rewrote ecosystem sidecars to match the +/// patched bytes; once rollback restores the original bytes those +/// sidecars are stale in the other direction (e.g. `.cargo-checksum.json` +/// carrying patched hashes over original sources wedges `cargo build`). +/// Cargo is the only ecosystem with reversible sidecar state today: +/// NuGet's `.nupkg.metadata` was *deleted* by apply and its +/// `contentHash` cannot be recomputed without the original `.nupkg`, +/// and the PyPI / gem / Go advisories are apply-oriented — a completed +/// rollback needs none. `rolled_back` lists the patch-file keys now at +/// their original state: the ones restored this run plus any verified +/// `AlreadyOriginal` (restored by an earlier partial rollback that +/// never reached this boundary). Same return contract as +/// [`dispatch_fixup`]. +#[allow(unused_variables)] // `pkg_path` is feature-gated below +pub async fn dispatch_rollback_fixup( + package_key: &str, + pkg_path: &Path, + rolled_back: &[String], +) -> Result, SidecarError> { + if rolled_back.is_empty() { + return Ok(None); + } + + let ecosystem = match Ecosystem::from_purl(package_key) { + Some(eco) => eco, + None => return Ok(None), + }; + + let payload: Option = match ecosystem { + #[cfg(feature = "cargo")] + Ecosystem::Cargo => cargo::resync_after_rollback(pkg_path, rolled_back).await?, + _ => None, + }; + + Ok(payload.map(|p| SidecarRecord { + purl: package_key.to_string(), + ecosystem: ecosystem.cli_name().to_string(), + files: p.files, + advisory: p.advisory, + })) +} + #[cfg(test)] mod tests { use super::*; @@ -411,6 +459,82 @@ mod tests { assert_eq!(advisory.severity, SidecarSeverity::Warning); } + /// Rollback dispatcher: a cargo PURL routes to the checksum resync + /// and the record carries the rewritten-file entry; a deleted + /// (patch-added) file's entry is dropped from the map. + #[cfg(feature = "cargo")] + #[tokio::test] + async fn cargo_rollback_dispatch_resyncs_checksum() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + tokio::fs::write(pkg.join("lib.rs"), b"original") + .await + .unwrap(); + let starting = serde_json::json!({ + "files": { + "lib.rs": "00".repeat(32), + "added.rs": "22".repeat(32), + }, + "package": "x", + }); + tokio::fs::write( + pkg.join(".cargo-checksum.json"), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + + let out = dispatch_rollback_fixup( + "pkg:cargo/mycrate@1.0.0", + pkg, + &["lib.rs".to_string(), "added.rs".to_string()], + ) + .await + .unwrap(); + + let record = out.expect("cargo rollback dispatch must produce a record"); + assert_eq!(record.ecosystem, "cargo"); + assert_eq!(record.purl, "pkg:cargo/mycrate@1.0.0"); + assert_eq!(record.files.len(), 1); + assert_eq!(record.files[0].path, ".cargo-checksum.json"); + assert_eq!(record.files[0].action, SidecarFileAction::Rewritten); + + let post: serde_json::Value = serde_json::from_str( + &tokio::fs::read_to_string(pkg.join(".cargo-checksum.json")) + .await + .unwrap(), + ) + .unwrap(); + assert!(post["files"]["lib.rs"].is_string()); + assert_ne!(post["files"]["lib.rs"].as_str().unwrap(), "00".repeat(32)); + assert!(post["files"].get("added.rs").is_none()); + } + + /// Rollback dispatcher: advisory-only ecosystems have nothing to + /// resync — no record, no spurious apply-oriented advisory. + #[tokio::test] + async fn pypi_rollback_dispatch_returns_none() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_rollback_fixup( + "pkg:pypi/requests@2.28.0", + d.path(), + &["package/foo.py".to_string()], + ) + .await + .unwrap(); + assert!(out.is_none()); + } + + /// Rollback dispatcher: empty rolled-back list short-circuits. + #[tokio::test] + async fn empty_rolled_back_returns_none() { + let d = tempfile::tempdir().unwrap(); + let out = dispatch_rollback_fixup("pkg:cargo/mycrate@1.0.0", d.path(), &[]) + .await + .unwrap(); + assert!(out.is_none()); + } + /// When the `cargo` feature is disabled, a `pkg:cargo/` PURL is /// unrecognized by `Ecosystem::from_purl` and `dispatch_fixup` /// returns `None` rather than attempting (or panicking on) a fixup. diff --git a/crates/socket-patch-core/src/pth_hook/detect.rs b/crates/socket-patch-core/src/pth_hook/detect.rs index 8819165..cee2d1e 100644 --- a/crates/socket-patch-core/src/pth_hook/detect.rs +++ b/crates/socket-patch-core/src/pth_hook/detect.rs @@ -120,7 +120,18 @@ pub fn deps_contain_hook(text: &str) -> bool { // `setup`'s state probe), turning a trailing `socket-patch` plus a following // `[hook]` into a phantom marker — a false positive. text.lines().any(|line| { - let normalized: String = line + // Drop a `#` comment first (requirements.txt and TOML both comment + // with `#`): a commented-out `# socket-patch[hook]` declares nothing — + // pip never installs it — and a marker mentioned inside a trailing + // comment must not read as configured. Same first-`#` rule as + // `edit::strip_requirement_comment`, so the `setup --check` / state + // probes (which call this on raw file content) agree with the editors + // (which pre-strip) on identical bytes. + let spec = match line.find('#') { + Some(i) => &line[..i], + None => line, + }; + let normalized: String = spec .to_lowercase() .chars() .filter(|c| !c.is_whitespace()) @@ -179,6 +190,26 @@ mod tests { assert!(deps_contain_hook(pyproject)); } + #[test] + fn test_deps_contain_hook_commented_out_is_not_declared() { + // A commented-out spec declares nothing: pip never installs it, and + // the edit path (`requirements_add` strips comments before probing) + // would still add the hook — so the state probe / `setup --check` + // must not read it as configured. + assert!(!deps_contain_hook( + "# socket-patch[hook]\nrequests==2.31.0\n" + )); + // A marker mentioned inside another dep's trailing comment is not a + // declaration either. + assert!(!deps_contain_hook( + "requests==2.31.0 # TODO: add socket-patch[hook]\n" + )); + // But a real spec WITH a trailing comment is still declared. + assert!(deps_contain_hook( + "socket-patch[hook] # the .pth carrier\n" + )); + } + #[test] fn test_has_table() { let toml = "[tool.poetry]\nname='x'\n[tool.poetry.dependencies]\n"; diff --git a/crates/socket-patch-core/src/pth_hook/edit.rs b/crates/socket-patch-core/src/pth_hook/edit.rs index 4626877..dfeb834 100644 --- a/crates/socket-patch-core/src/pth_hook/edit.rs +++ b/crates/socket-patch-core/src/pth_hook/edit.rs @@ -265,8 +265,19 @@ fn pyproject_add(content: &str) -> Result, String> { let changed = if has_poetry && !real_pep621 { poetry_add(&mut doc)? - } else { + } else if real_pep621 { pep621_add(&mut doc)? + } else { + // Neither surface exists (e.g. a `[build-system]`-only or tool-config-only + // pyproject.toml of a setup.py/setup.cfg project). Synthesizing a + // `[project]` table with only `dependencies` would make the manifest + // invalid — PEP 621 requires `name` and forbids declaring it dynamic — so + // pip/setuptools/uv would refuse to build. Fail closed instead. + return Err( + "pyproject.toml has no `[project]` or `[tool.poetry]` table to host the hook \ + dependency; declare project dependencies (or use requirements.txt) first" + .to_string(), + ); }; Ok(if changed { Some(doc.to_string()) } else { None }) } @@ -362,12 +373,16 @@ fn poetry_add(doc: &mut DocumentMut) -> Result { .unwrap_or_default(); extras.push("hook"); tbl.insert("extras", Item::Value(Value::Array(extras))); - } else { - let version = item - .as_str() - .map(str::to_string) - .unwrap_or_else(|| "*".to_string()); + } else if let Some(version) = item.as_str().map(str::to_string) { deps.insert("socket-patch", Item::Value(hook_inline_table(&version))); + } else { + // Any other shape (e.g. Poetry's multiple-constraints array of + // tables) carries spec data a blanket replacement would destroy. + return Err( + "`tool.poetry.dependencies.socket-patch` has an unsupported shape; \ + add the `hook` extra to it manually" + .to_string(), + ); } return Ok(true); } @@ -660,6 +675,40 @@ mod tests { assert!(pyproject_add("this is = = not toml [[[").is_err()); } + #[test] + fn test_pyproject_add_without_dep_surface_refuses() { + // A pyproject.toml with neither `[project]` nor `[tool.poetry]` (the + // classic setup.py/setup.cfg project that only carries `[build-system]` + // or tool config) has no dependency surface to host the hook. + // Synthesizing a `[project]` table with only `dependencies` makes the + // manifest invalid — PEP 621 requires `name` and forbids making it + // dynamic — so pip/setuptools/uv would refuse to build afterwards. + // The edit must error, not break the user's build. + let build_only = + "[build-system]\nrequires = [\"setuptools\"]\nbuild-backend = \"setuptools.build_meta\"\n"; + assert!( + pyproject_add(build_only).is_err(), + "must not synthesize a name-less [project] table" + ); + let tool_only = "[tool.black]\nline-length = 100\n"; + assert!(pyproject_add(tool_only).is_err()); + } + + #[test] + fn test_poetry_add_multiconstraint_dep_not_clobbered() { + // Poetry's multiple-constraints form declares one dep as an ARRAY of + // constraint tables. That item is neither table-like nor a string, so + // the replace-fallback would silently overwrite the user's whole + // constraint set with `{version = "*", extras = ["hook"]}` — destroying + // their version pins and python markers. Refuse instead. + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\n\ + socket-patch = [{version = \"^1.0\", python = \"^2.7\"}, {version = \"^2.0\", python = \"^3.7\"}]\n"; + assert!( + pyproject_add(toml).is_err(), + "a multi-constraint socket-patch dep must not be silently replaced" + ); + } + #[test] fn test_classic_poetry_with_project_urls_routes_to_poetry() { // `[project.urls]` conjures an implicit `[project]` table; a Poetry 1.x diff --git a/crates/socket-patch-core/src/utils/cleanup_blobs.rs b/crates/socket-patch-core/src/utils/cleanup_blobs.rs index 9c30a55..30cb9f2 100644 --- a/crates/socket-patch-core/src/utils/cleanup_blobs.rs +++ b/crates/socket-patch-core/src/utils/cleanup_blobs.rs @@ -40,7 +40,10 @@ async fn cleanup_dir bool>( if file_name_str.starts_with('.') { continue; } - let path = dir.join(&file_name_str); + // Use the entry's real path: joining the lossy display name back onto + // `dir` breaks for names that are not valid UTF-8 (the mangled path + // does not exist on disk), silently exempting such files from cleanup. + let path = entry.path(); // Use symlink_metadata (lstat) rather than metadata (stat) so we never // follow symlinks: a symlink is not a real socket-patch blob, and a // dangling symlink would otherwise return an error. Tolerate any stat @@ -711,6 +714,41 @@ mod tests { assert!(tokio::fs::metadata(&outside).await.is_ok()); } + // Linux-only: APFS/HFS+ (macOS) and NTFS reject file names that are not + // valid Unicode, so the scenario can only arise on byte-string + // filesystems like ext4. + #[cfg(target_os = "linux")] + #[tokio::test] + async fn test_cleanup_removes_non_utf8_named_orphan() { + // Regression: a stray file whose name is not valid UTF-8 must still + // be considered and removed as an orphan. Joining the *lossy* + // display name back onto the directory produced a path that does not + // exist on disk, so the stat failed and the file was silently + // skipped -- leaked forever despite the "any regular non-hidden file + // is considered for removal" contract. + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let dir = tempfile::tempdir().unwrap(); + let blobs_dir = dir.path().join("blobs"); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + let manifest = create_test_manifest(); + + // 0xFF can never appear in valid UTF-8, so to_string_lossy() mangles + // this name into something that does not exist on disk. + let bad_path = blobs_dir.join(OsStr::from_bytes(b"orphan-\xff\xfe")); + tokio::fs::write(&bad_path, "junk").await.unwrap(); + + let result = cleanup_unused_blobs(&manifest, &blobs_dir, false) + .await + .unwrap(); + + assert_eq!(result.blobs_checked, 1); + assert_eq!(result.blobs_removed, 1); + assert!(tokio::fs::symlink_metadata(&bad_path).await.is_err()); + } + #[test] fn test_format_cleanup_result_dry_run_lists_blobs() { let result = CleanupResult { diff --git a/crates/socket-patch-core/src/utils/telemetry.rs b/crates/socket-patch-core/src/utils/telemetry.rs index 4063819..5fa8088 100644 --- a/crates/socket-patch-core/src/utils/telemetry.rs +++ b/crates/socket-patch-core/src/utils/telemetry.rs @@ -293,12 +293,16 @@ fn resolve_telemetry_endpoint(api_token: Option<&str>, org_slug: Option<&str>) - .ok() .filter(|u| !u.is_empty()) .unwrap_or_else(|| DEFAULT_SOCKET_API_URL.to_string()); + // Trim trailing slashes like `ApiClient::new` does, so a base URL + // of `https://host/` doesn't produce a malformed `//v0/...` path. + let api_url = api_url.trim_end_matches('/'); (format!("{api_url}/v0/orgs/{slug}/telemetry"), true) } _ => { let proxy_url = read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") .filter(|u| !u.is_empty()) .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()); + let proxy_url = proxy_url.trim_end_matches('/'); (format!("{proxy_url}/patch/telemetry"), false) } } @@ -1251,6 +1255,37 @@ mod tests { assert!(!auth); } + /// Regression: a trailing slash on `SOCKET_API_URL` / `SOCKET_PROXY_URL` + /// must not yield a double-slash telemetry path. `ApiClient::new` + /// normalizes its base with `trim_end_matches('/')`, so the same user + /// config works for every API call — telemetry must match, or the + /// fire-and-forget POST silently lands on a malformed `//v0/...` / + /// `//patch/...` path (same malformed-URL class as `/v0/orgs//telemetry`). + #[test] + fn test_resolve_telemetry_endpoint_trims_trailing_slash() { + let orig_api = std::env::var("SOCKET_API_URL").ok(); + let orig_proxy = std::env::var("SOCKET_PROXY_URL").ok(); + + std::env::set_var("SOCKET_API_URL", "https://api.example.test/sub/"); + let (url, auth) = resolve_telemetry_endpoint(Some("tok"), Some("acme")); + assert!(auth); + assert_eq!(url, "https://api.example.test/sub/v0/orgs/acme/telemetry"); + + std::env::set_var("SOCKET_PROXY_URL", "https://proxy.example.test/sub/"); + let (url, auth) = resolve_telemetry_endpoint(None, None); + assert!(!auth); + assert_eq!(url, "https://proxy.example.test/sub/patch/telemetry"); + + match orig_api { + Some(v) => std::env::set_var("SOCKET_API_URL", v), + None => std::env::remove_var("SOCKET_API_URL"), + } + match orig_proxy { + Some(v) => std::env::set_var("SOCKET_PROXY_URL", v), + None => std::env::remove_var("SOCKET_PROXY_URL"), + } + } + /// Regression: an empty-string token or slug must be treated as absent, /// not spliced into the URL/header. Guards the `/v0/orgs//telemetry` /// malformed-URL class that bit the API client. diff --git a/crates/socket-patch-core/src/vex/product.rs b/crates/socket-patch-core/src/vex/product.rs index 0735c41..d39d23d 100644 --- a/crates/socket-patch-core/src/vex/product.rs +++ b/crates/socket-patch-core/src/vex/product.rs @@ -100,9 +100,17 @@ pub async fn detect_product(cwd: &Path) -> DetectResult { result } +/// Strip a leading UTF-8 BOM. npm/Node strip one from package.json and +/// cargo accepts one in Cargo.toml, but serde_json and the line scanner +/// both reject it — without this, manifests the user's own toolchain +/// accepts yield no PURL. Mirrors `package_json/detect.rs`. +fn strip_bom(content: &str) -> &str { + content.strip_prefix('\u{feff}').unwrap_or(content) +} + async fn read_package_json(path: &Path) -> Option { let content = tokio::fs::read_to_string(path).await.ok()?; - let v: serde_json::Value = serde_json::from_str(&content).ok()?; + let v: serde_json::Value = serde_json::from_str(strip_bom(&content)).ok()?; let name = v.get("name")?.as_str()?; let version = v.get("version")?.as_str()?; if name.is_empty() || version.is_empty() { @@ -115,6 +123,9 @@ async fn read_package_json(path: &Path) -> Option { async fn read_pyproject(path: &Path) -> Option { let content = tokio::fs::read_to_string(path).await.ok()?; + // No BOM strip here, unlike npm/cargo: tomllib (and pip's vendored + // tomli) reject a BOM'd pyproject.toml outright, so such a file is + // not a buildable Python project and must keep yielding None. // PEP 621 `[project]` takes precedence (newer projects favor it), // then fall back to Poetry's `[tool.poetry]` for legacy layouts. let (name, version) = scan_toml_section(&content, "project") @@ -124,7 +135,7 @@ async fn read_pyproject(path: &Path) -> Option { async fn read_cargo_toml(path: &Path) -> Option { let content = tokio::fs::read_to_string(path).await.ok()?; - let (name, version) = scan_toml_section(&content, "package")?; + let (name, version) = scan_toml_section(strip_bom(&content), "package")?; Some(format!("pkg:cargo/{name}@{version}")) } @@ -150,7 +161,17 @@ fn scan_toml_section(content: &str, section: &str) -> Option<(String, String)> { continue; } if line.starts_with('[') { - in_section = line == header; + // A header may carry a trailing comment (`[package] # x`) — + // valid TOML that cargo and tomllib both accept. Anything + // else after the closing bracket means a different (or + // malformed) section. + in_section = match line.strip_prefix(header.as_str()) { + Some(rest) => { + let rest = rest.trim_start(); + rest.is_empty() || rest.starts_with('#') + } + None => false, + }; continue; } if !in_section { @@ -221,9 +242,20 @@ fn scan_remote_origin_url(content: &str) -> Option { let mut in_section = false; for raw in content.lines() { let line = raw.trim(); - if line.starts_with('[') && line.ends_with(']') { - in_section = line == "[remote \"origin\"]"; - continue; + if line.starts_with('[') { + // git permits a `;`/`#` comment after the closing bracket; + // such a line is still a section header. Recognizing it + // matters BOTH ways: a commented `[remote "origin"]` must + // open the section, and a commented foreign header must + // CLOSE it — otherwise the next remote's url is + // misattributed to origin. + if let Some(close) = line.find(']') { + let rest = line[close + 1..].trim_start(); + if rest.is_empty() || rest.starts_with('#') || rest.starts_with(';') { + in_section = &line[..=close] == "[remote \"origin\"]"; + continue; + } + } } if !in_section { continue; @@ -1231,6 +1263,138 @@ mod tests { assert!(parse_toml_string_kv("version = 42", "version").is_none()); } + // ── Regression: UTF-8 BOM tolerance ─────────────────────────── + // npm/Node strip a leading BOM from package.json and cargo accepts + // one in Cargo.toml (verified against `npm pkg get` and + // `cargo metadata`), but serde_json and the line scanner both choke + // on it — so a manifest its own toolchain accepts yielded no PURL. + + #[tokio::test] + async fn detect_package_json_with_bom() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("package.json"), + "\u{feff}{\"name\":\"bom-app\",\"version\":\"1.0.0\"}", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:npm/bom-app@1.0.0")); + } + + #[tokio::test] + async fn detect_cargo_toml_with_bom() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("Cargo.toml"), + "\u{feff}[package]\nname = \"bom-rust\"\nversion = \"1.0.0\"\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:cargo/bom-rust@1.0.0")); + } + + /// pyproject.toml is deliberately NOT BOM-stripped: tomllib (and + /// pip's vendored tomli) reject a BOM outright, so a BOM'd + /// pyproject.toml is not a buildable Python project and detection + /// must keep returning None for it. + #[tokio::test] + async fn detect_pyproject_with_bom_stays_none() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "\u{feff}[project]\nname = \"bom-py\"\nversion = \"1.0.0\"\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert!(r.purl.is_none()); + } + + // ── Regression: trailing comments on TOML table headers ─────── + // `[package] # comment` is valid TOML (cargo and tomllib both + // accept it), but the exact `line == header` match treated the + // commented header as a foreign section, so name/version were + // never read. + + #[test] + fn scan_toml_section_header_with_trailing_comment() { + let toml = "[package] # package metadata\nname = \"x\"\nversion = \"1.0\"\n"; + let (n, v) = scan_toml_section(toml, "package").unwrap(); + assert_eq!(n, "x"); + assert_eq!(v, "1.0"); + } + + /// A commented header for a DIFFERENT section must still close the + /// current one — `version` below belongs to `[dependencies]`, not + /// `[package]`. + #[test] + fn scan_toml_commented_foreign_header_still_closes_section() { + let toml = "[package]\nname = \"x\"\n[dependencies] # noted\nversion = \"9.9\"\n"; + assert!(scan_toml_section(toml, "package").is_none()); + } + + /// The prefix match must not over-match: `[packages]` and + /// `[package.metadata]` are different sections, comment or not. + #[test] + fn scan_toml_header_prefix_lookalikes_do_not_match() { + let toml = "[packages] # close but no\nname = \"a\"\nversion = \"1\"\n[package.metadata] # also no\nname = \"b\"\nversion = \"2\"\n"; + assert!(scan_toml_section(toml, "package").is_none()); + } + + #[tokio::test] + async fn detect_cargo_toml_header_with_trailing_comment() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("Cargo.toml"), + "[package] # the crate\nname = \"cmt-rust\"\nversion = \"1.0.0\"\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:cargo/cmt-rust@1.0.0")); + } + + #[tokio::test] + async fn detect_pyproject_header_with_trailing_comment() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "[project] # PEP 621\nname = \"cmt-py\"\nversion = \"0.4.0\"\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:pypi/cmt-py@0.4.0")); + } + + // ── Regression: trailing comments on git config headers ─────── + // git permits `;`/`#` comments after a section header (verified + // with `git config -f`), but the `ends_with(']')` check refused to + // recognize such a line as a header at all. Two failure modes: + // a commented `[remote "origin"]` header was skipped (URL missed), + // and a commented FOREIGN header failed to close an open origin + // section, misattributing the next remote's url to origin. + + #[test] + fn scan_origin_url_header_with_trailing_comment() { + let cfg = "[remote \"origin\"] ; my main remote\n\turl = git@github.com:me/repo.git\n"; + assert_eq!( + scan_remote_origin_url(cfg).as_deref(), + Some("git@github.com:me/repo.git") + ); + } + + /// git resolves this config to NO origin url (the url belongs to + /// `upstream`); returning upstream's url as origin's is a wrong + /// product identity, not just a missed one. + #[test] + fn scan_origin_url_commented_foreign_header_closes_section() { + let cfg = "[remote \"origin\"]\n[remote \"upstream\"] # backup\n\turl = git@github.com:other/repo.git\n"; + assert!(scan_remote_origin_url(cfg).is_none()); + } + #[tokio::test] async fn multi_manifest_all_unparseable_emits_no_warning() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/vex/schema.rs b/crates/socket-patch-core/src/vex/schema.rs index 8257745..b9172d7 100644 --- a/crates/socket-patch-core/src/vex/schema.rs +++ b/crates/socket-patch-core/src/vex/schema.rs @@ -12,7 +12,10 @@ //! literal `@`-prefixed keys. //! * Optional fields use `Option` + `skip_serializing_if = "Option::is_none"` //! so the emitted JSON omits them rather than emitting `null`. Matches -//! the Go implementation's `omitempty` behavior. +//! the Go implementation's `omitempty` behavior. Two exceptions keep +//! the Go zero value instead of `Option`: `products` (empty `Vec` = +//! absent, like `aliases`/`subcomponents`) and the component `@id` +//! (empty `String` = absent), both optional per spec. //! * `version` is the OpenVEX document revision counter (integer, //! starts at 1). NOT the schema version. //! * `Vec` is always present (the spec allows it to be empty @@ -69,6 +72,13 @@ pub struct Statement { /// RFC 3339 timestamp of the most recent revision of this statement. #[serde(skip_serializing_if = "Option::is_none", default)] pub last_updated: Option, + /// Products the statement applies to. Optional per spec — like + /// `timestamp` above it cascades down from the encapsulating + /// document when omitted (see OpenVEX inheritance rules). Our + /// builder always emits at least one, but the type must accept its + /// absence on parse; an empty list omits the key, matching the Go + /// implementation's `products,omitempty`. + #[serde(skip_serializing_if = "Vec::is_empty", default)] pub products: Vec, pub status: Status, /// Optional supplier IRI overriding the document-level author for @@ -102,7 +112,11 @@ pub struct Vulnerability { /// subcomponent list pinpoints the vulnerable transitive dep. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Product { - #[serde(rename = "@id")] + /// Optional IRI per spec — a component may instead be addressed + /// via `identifiers`/`hashes`. Stays a plain `String` (not + /// `Option`) mirroring go-vex's `@id,omitempty` zero value: absent + /// parses as `""`, and `""` is omitted on serialize. + #[serde(rename = "@id", default, skip_serializing_if = "String::is_empty")] pub id: String, /// Optional auxiliary identifiers (PURL, CPE 2.2, CPE 2.3, etc.). /// Keys are the identifier type (e.g. `"purl"`, `"cpe23"`), @@ -121,7 +135,9 @@ pub struct Product { /// the patch covers. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Subcomponent { - #[serde(rename = "@id")] + /// Optional IRI per spec; same zero-value `omitempty` handling as + /// [`Product::id`]. + #[serde(rename = "@id", default, skip_serializing_if = "String::is_empty")] pub id: String, #[serde(skip_serializing_if = "Option::is_none", default)] pub identifiers: Option>, @@ -660,6 +676,132 @@ mod tests { ); } + // ── Statement products is optional/inheritable per spec ──────── + + /// Regression: `products` is OPTIONAL in OpenVEX 0.2.0 — "While a + /// product is required to have a complete statement, this field is + /// optional as it can cascade down from the encapsulating + /// document" (spec, Statement Fields; go-vex tags it + /// `products,omitempty`). A spec-valid statement that omits it + /// MUST parse, not error with "missing field `products`" — the + /// same inheritance rule that made `timestamp` optional above. + #[test] + fn statement_without_products_parses_and_leaves_it_empty() { + let doc_json = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "statements": [ + { + "vulnerability": {"name": "CVE-2014-123456"}, + "timestamp": "2024-01-01T00:00:00Z", + "status": "under_investigation" + } + ] + }"#; + let doc: Document = + serde_json::from_str(doc_json).expect("statement may omit products (inherited)"); + assert!( + doc.statements[0].products.is_empty(), + "omitted products must deserialize to an empty list, not error" + ); + } + + /// Empty `products` serializes by omitting the key, matching the + /// Go implementation's `products,omitempty` (no `"products": []`). + #[test] + fn statement_with_empty_products_omits_key() { + let mut s = minimal_statement(); + s.products = Vec::new(); + let v = serde_json::to_value(&s).unwrap(); + assert!( + v.as_object().unwrap().get("products").is_none(), + "empty products must omit the key (Go omitempty parity)" + ); + } + + // ── Product/Subcomponent `@id` is optional per spec ──────────── + + /// Regression: the component `@id` is OPTIONAL in OpenVEX 0.2.0 — + /// "Optional IRI identifying the component to make it externally + /// referenceable" — a product may instead be addressed via its + /// `identifiers`/`hashes` maps (go-vex tags it `@id,omitempty`). + /// A spec-valid product identified only by `identifiers` MUST + /// parse, not error with "missing field `@id`". + #[test] + fn product_without_at_id_parses_via_identifiers() { + let doc_json = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "statements": [ + { + "vulnerability": {"name": "GHSA-x"}, + "timestamp": "2024-01-01T00:00:00Z", + "products": [{ + "identifiers": {"purl": "pkg:apk/wolfi/git@2.39.0-r1?arch=armv7"}, + "subcomponents": [{"hashes": {"sha256": "abc123"}}] + }], + "status": "not_affected", + "justification": "component_not_present" + } + ] + }"#; + let doc: Document = + serde_json::from_str(doc_json).expect("product may omit @id (identifiers address it)"); + let p = &doc.statements[0].products[0]; + assert_eq!(p.id, "", "absent product @id must default, not error"); + assert_eq!( + p.identifiers.as_ref().unwrap()["purl"], + "pkg:apk/wolfi/git@2.39.0-r1?arch=armv7" + ); + assert_eq!( + p.subcomponents[0].id, "", + "absent subcomponent @id must default" + ); + assert_eq!( + p.subcomponents[0].hashes.as_ref().unwrap()["sha256"], + "abc123" + ); + } + + /// An empty component `@id` is omitted on serialize (Go `omitempty` + /// zero-value parity), so an identifiers-only product round-trips + /// without gaining a bogus `"@id": ""`. + #[test] + fn product_with_empty_id_omits_at_id_key() { + let p = Product { + id: String::new(), + identifiers: Some(BTreeMap::from([( + "purl".to_string(), + "pkg:npm/app@1.0.0".to_string(), + )])), + hashes: None, + subcomponents: vec![Subcomponent { + id: String::new(), + identifiers: None, + hashes: Some(BTreeMap::from([("sha256".to_string(), "abc".to_string())])), + }], + }; + let v = serde_json::to_value(&p).unwrap(); + assert!( + v.as_object().unwrap().get("@id").is_none(), + "empty product @id must be omitted" + ); + assert!( + v["subcomponents"][0] + .as_object() + .unwrap() + .get("@id") + .is_none(), + "empty subcomponent @id must be omitted" + ); + } + // ── Forward-compat: unmodeled spec fields are tolerated ──────── /// OpenVEX 0.2.0 carries fields we intentionally don't model diff --git a/crates/socket-patch-core/tests/crawler_python_e2e.rs b/crates/socket-patch-core/tests/crawler_python_e2e.rs index e58c99b..867f3ad 100644 --- a/crates/socket-patch-core/tests/crawler_python_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_python_e2e.rs @@ -776,6 +776,34 @@ async fn find_by_purls_strips_qualifiers() { assert_eq!(pkg.path, tmp.path()); } +/// A bare `#subpath` (no `?qualifier`) is valid PURL grammar and must be +/// stripped the same way qualifiers are — cutting only at `?` leaks the +/// subpath into the version (`2.28.0#src/requests`), so the installed +/// package silently fails to match. Twin of the strip_purl_qualifiers +/// subpath fix in utils::purl. +#[tokio::test] +async fn find_by_purls_strips_subpath() { + let tmp = tempfile::tempdir().unwrap(); + stage_dist_info(tmp.path(), "requests", "2.28.0").await; + + let crawler = PythonCrawler; + let result = crawler + .find_by_purls( + tmp.path(), + &["pkg:pypi/requests@2.28.0#src/requests".to_string()], + ) + .await + .unwrap(); + assert_eq!(result.len(), 1, "subpath must be stripped before lookup"); + // Same keying contract as the qualifier test: the original PURL. + let pkg = result + .get("pkg:pypi/requests@2.28.0#src/requests") + .expect("result must be keyed by the original subpath PURL"); + assert_eq!(pkg.name, "requests"); + assert_eq!(pkg.version, "2.28.0"); + assert_eq!(pkg.path, tmp.path()); +} + #[tokio::test] async fn find_by_purls_empty_purls_returns_empty() { let tmp = tempfile::tempdir().unwrap(); diff --git a/scripts/fix-bugs.config.example.ts b/scripts/fix-bugs.config.example.ts index 7998321..460006b 100644 --- a/scripts/fix-bugs.config.example.ts +++ b/scripts/fix-bugs.config.example.ts @@ -1,13 +1,14 @@ /** - * Example prompt module for scripts/study-crates.ts. + * Bug-fixing sweep prompt module for scripts/study-crates.ts. * * Pass it with: - * npx tsx scripts/study-crates.ts --prompt-file scripts/study-crates.config.example.ts + * npx tsx scripts/study-crates.ts --prompt-file scripts/fix-bugs.config.example.ts * * The module's default export is a function `(ctx: FileCtx) => string` that * returns the prompt for one file. This gives you full programmatic control: * branch on the crate, the path, the file name, inject extra instructions for - * specific subsystems, etc. + * specific subsystems, etc. The `model` export pins the model for the sweep; + * an explicit --model flag overrides it. * * FileCtx fields available: * file repo-relative POSIX path, e.g. "crates/socket-patch-core/src/lib.rs" @@ -20,15 +21,21 @@ import type { FileCtx } from "./study-crates.ts"; +export const model = "claude-fable-5"; + export default function render(ctx: FileCtx): string { const base = [ - `There are bugs in ${ctx.file} in the ${ctx.crate} crate.`, - `Carefully read the code line by line and fix all of the bugs. Add additional tests to prevent regressions.`, - `If you can't find any problems, it's ok to quit.` + `Review ${ctx.file} in the ${ctx.crate} crate for real production bugs.`, + `Read it line by line. Treat every suspected bug as unconfirmed until you`, + `write a regression test that fails on the current code; then apply the`, + `minimal fix and make the test pass.`, + `Do not refactor, clean up, or restructure beyond what each fix requires,`, + `and never weaken an existing test to get green.`, + `If the file turns out to be clean, say so plainly and stop — do not invent findings.`, ]; - // Example of path-specific emphasis: be extra careful around the patch engine - // and crawlers, which carry the most invariants. + // Path-specific emphasis: the patch engine and crawlers carry the most + // invariants. if (ctx.relInCrate.startsWith("patch/")) { base.push( `This file is part of the patch engine — pay special attention to`, @@ -41,6 +48,9 @@ export default function render(ctx: FileCtx): string { ); } - base.push(`End with a concise 3-6 bullet summary of the most important takeaways.`); + base.push( + `Finish by running the affected tests, then end with a concise 3-6 bullet`, + `summary: bugs found (or "clean"), fixes applied, and test results.`, + ); return base.join(" "); } diff --git a/scripts/simplify.config.ts b/scripts/simplify.config.ts new file mode 100644 index 0000000..427fa57 --- /dev/null +++ b/scripts/simplify.config.ts @@ -0,0 +1,123 @@ +/** + * simplify.config.ts — duplication + dead-code cleanup sweep for study-crates.ts. + * + * Runs one session per source file: + * + * npx tsx scripts/study-crates.ts --prompt-file scripts/simplify.config.ts + * + * IMPORTANT: keep the default --concurrency 1. Unlike the bug sweep, sessions + * routinely edit files OTHER than the one under review (rewriting a duplicating + * file to import from this one, extracting shared helpers), so parallel + * sessions would race on the same files. Sequential sessions compose: each one + * sees the previous sessions' consolidations, so a duplicate pair is resolved + * once and the later file's session finds it already clean. + * + * Start from a clean git tree and review/commit incrementally — the per-file + * raw logs under --out make it easy to attribute each change to its session. + * + * What each session does, given one file: + * 1. Duplication: find functionality this file shares with the rest of the + * workspace and consolidate it — move it to an existing common module, + * import the better implementation from elsewhere, or rewrite the other + * file(s) to use this one. + * 2. Simplification: remove unnecessary abstractions, dead code, and unused + * methods; narrow over-wide interfaces. + * All changes must be strictly behavior-preserving and test-verified. + * + * FileCtx fields available (see study-crates.ts): + * file repo-relative POSIX path, e.g. "crates/socket-patch-core/src/lib.rs" + * abspath absolute path on disk + * crate crate dir name, e.g. "socket-patch-core" + * name basename, e.g. "lib.rs" + * stem basename without extension, e.g. "lib" + * relInCrate path within the crate's src dir, e.g. "api/client.rs" + */ + +import type { FileCtx } from "./study-crates.ts"; + +export const model = "claude-fable-5"; + +export default function render(ctx: FileCtx): string { + const sections: string[] = []; + + sections.push( + `You are simplifying ${ctx.file} in the ${ctx.crate} crate.`, + `The goal is strictly behavior-preserving cleanup: less code, fewer`, + `abstractions, smaller interfaces, no functional change.`, + ``, + `Work through, in order:`, + ``, + `1. Duplication. Read the file, then search the rest of the workspace for`, + `code that overlaps with it — similar helpers, parallel parsing or`, + `validation logic, copy-pasted blocks that have started to diverge. For`, + `each real overlap, first confirm the two sites genuinely share semantics`, + `(near-duplicates in this codebase sometimes differ deliberately), then`, + `pick ONE resolution:`, + ` - if a shared home already exists (e.g. a utils module), keep the single`, + ` best implementation there and update all callers;`, + ` - if another file already has the better implementation, rewrite this`, + ` file to use it;`, + ` - if this file has the better implementation, rewrite the other file(s)`, + ` to import from here, promoting the code to a common module only if`, + ` crate or module boundaries require it.`, + `Prefer the smallest move that removes the duplicate; do not invent a new`, + `common module for a single trivial helper.`, + ``, + `2. Local simplification. Within this file: delete dead code and unused`, + `methods, fields, and parameters; collapse single-use indirections (a trait`, + `with one impl, a wrapper that only forwards, a helper called once whose`, + `body is clearer inline); narrow interfaces to what callers actually use;`, + `and reduce visibility (pub -> pub(crate) or private) when nothing outside`, + `the module uses an item.`, + ``, + `Hard rules:`, + `- Behavior-preserving only: no new features, no semantic changes, and no`, + ` new abstractions — the diff should shrink total code and interface`, + ` surface, not trade one structure for another.`, + `- Before deleting anything as "unused", search the whole workspace,`, + ` including tests, the CLI crate, and feature-gated code (#[cfg(...)]).`, + ` An item unreferenced under default features may be used under another`, + ` feature combination or by an integration test.`, + `- Defensive code is not mess. Fail-closed guards, path-traversal and`, + ` symlink checks, atomic write/rename patterns, and permission handling in`, + ` this codebase are deliberate — do not simplify them away even where they`, + ` look redundant.`, + `- Never delete or weaken a test to make a cleanup possible. Update tests`, + ` only mechanically, when an interface they exercise moved or was renamed.`, + `- If the file is already clean and has no real duplication, say so plainly`, + ` and stop — do not restructure for its own sake.`, + ); + + // Path-specific emphasis: the patch engine and crawlers carry the most + // invariants. + if (ctx.relInCrate.startsWith("patch/")) { + sections.push( + ``, + `This file is part of the patch engine. Apply, rollback, and sidecar`, + `paths intentionally share some shapes while differing in semantics —`, + `consolidate only after verifying both call sites need identical`, + `behavior, and never relax filesystem safety, atomicity, or rollback`, + `correctness while doing so.`, + ); + } else if (ctx.relInCrate.startsWith("crawlers/")) { + sections.push( + ``, + `This is a package-manager crawler. Crawlers for different ecosystems`, + `look similar but encode different on-disk layout rules — only extract`, + `shared helpers where the semantics are truly ecosystem-independent.`, + ); + } + + sections.push( + ``, + `Before finishing, verify: the workspace builds warning-free and the test`, + `suites of every crate you touched pass. If you removed feature-gated or`, + `pub code, check the other feature combinations build too.`, + ``, + `End with a concise 3-6 bullet summary: duplicates consolidated (and in`, + `which direction), abstractions and dead code removed, net line delta, and`, + `test results.`, + ); + + return sections.join("\n"); +} diff --git a/scripts/study-crates.ts b/scripts/study-crates.ts index e43690c..226a25a 100644 --- a/scripts/study-crates.ts +++ b/scripts/study-crates.ts @@ -41,9 +41,11 @@ * {abspath}, {crate}, {name}, {stem}, {relInCrate}. * --prompt-file TS/JS module whose default export is * (ctx: FileCtx) => string (or { render(ctx) }). - * Takes precedence over --prompt. + * Takes precedence over --prompt. May also + * `export const model = "..."` to pin a model. * --out Output dir (default: study-output). - * --model Model passed to claude --model. + * --model Model passed to claude --model. Overrides a + * prompt-file's `model` export. * --filter Only files whose repo-relative path matches. * --crate Limit to a single crate dir name. * --target Which files to study (default: src). `tests` @@ -51,7 +53,8 @@ * harnesses, shared setup modules); `all` does both. * --tests Shorthand for --target tests. * --concurrency Parallel sessions (default: 1 = sequential). - * --timeout Per-file timeout in seconds (default: 1800). + * --timeout Per-file timeout in seconds (default: none — + * sessions run until completion). * --dry-run List files + rendered prompts; run nothing. * -h, --help Show this help. * @@ -166,7 +169,7 @@ function parseArgs(argv: string[]): Args { out: "study-output", target: "src", concurrency: 1, - timeoutSec: 1800, + timeoutSec: Infinity, offset: 0, dryRun: false, help: false, @@ -213,9 +216,14 @@ function parseArgs(argv: string[]): Args { case "--concurrency": a.concurrency = Math.max(1, parseInt(next(), 10) || 1); break; - case "--timeout": - a.timeoutSec = Math.max(1, parseInt(next(), 10) || 1800); + case "--timeout": { + const v = parseInt(next(), 10); + if (!Number.isFinite(v) || v < 1) { + fail(`--timeout must be a positive number of seconds`); + } + a.timeoutSec = v; break; + } case "--offset": a.offset = Math.max(0, parseInt(next(), 10) || 0); break; @@ -245,8 +253,10 @@ Usage: npx tsx scripts/study-crates.ts [options] -p, --prompt