From 42e95bee0c4df13b172d49e8c996aef734635360 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Thu, 11 Jun 2026 15:05:04 -0400 Subject: [PATCH 1/5] fix(purl): percent-decode purl components from the API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patches API serves scoped purls percent-encoded (pkg:npm/%40scope/name@1.0.0) and scan stores them verbatim as manifest keys, but neither the npm crawler nor the vendor coordinate parser decoded them — so apply/vendor reported scoped packages as 'package not installed', and detect_prunable saw every encoded entry as prunable. - utils/purl.rs: percent_decode_purl_component (strict, all-or-nothing, fail-safe passthrough), normalize_purl + purl_eq (compare/display only, never path construction) - npm_crawler parse_purl_components, vendor parse_npm_purl (NpmCoords now owns decoded name/version; base_purl stays verbatim for ledger/ manifest key parity), parse_jsr_purl: decode AFTER /-and-@ splits, BEFORE the is_safe_* guards — %2e%2e/%2f cannot smuggle traversal - detect_prunable + purl_matches_identifier compare normalized forms - human output shows the decoded purl; JSON keeps verbatim keys Co-Authored-By: Claude Fable 5 --- crates/socket-patch-cli/src/commands/get.rs | 4 +- crates/socket-patch-cli/src/commands/scan.rs | 42 +++- .../socket-patch-cli/src/commands/vendor.rs | 8 +- .../src/crawlers/deno_crawler.rs | 13 +- .../src/crawlers/npm_crawler.rs | 125 +++++++++- .../src/patch/vendor/bun_lock.rs | 2 +- .../src/patch/vendor/npm_common.rs | 91 ++++++-- .../src/patch/vendor/npm_lock.rs | 6 +- .../src/patch/vendor/pnpm_lock.rs | 2 +- .../src/patch/vendor/yarn_berry_lock.rs | 2 +- .../src/patch/vendor/yarn_classic_lock.rs | 2 +- crates/socket-patch-core/src/utils/purl.rs | 221 ++++++++++++++++-- 12 files changed, 447 insertions(+), 71 deletions(-) diff --git a/crates/socket-patch-cli/src/commands/get.rs b/crates/socket-patch-cli/src/commands/get.rs index e7359aa..0f520f6 100644 --- a/crates/socket-patch-cli/src/commands/get.rs +++ b/crates/socket-patch-cli/src/commands/get.rs @@ -13,7 +13,7 @@ use socket_patch_core::manifest::schema::{ }; use socket_patch_core::patch::apply::select_installed_variants; use socket_patch_core::utils::fuzzy_match::fuzzy_match_packages; -use socket_patch_core::utils::purl::{is_purl, strip_purl_qualifiers}; +use socket_patch_core::utils::purl::{is_purl, normalize_purl, strip_purl_qualifiers}; use socket_patch_core::utils::telemetry::{track_patch_fetch_failed, track_patch_fetched}; use std::collections::HashMap; use std::fmt; @@ -1030,7 +1030,7 @@ pub async fn download_and_apply_patches( let action = decide_patch_action(&manifest, &patch.purl, &patch.uuid); if let PatchAction::Skipped = action { if !params.json && !params.silent { - eprintln!(" [skip] {} (already in manifest)", patch.purl); + eprintln!(" [skip] {} (already in manifest)", normalize_purl(&patch.purl)); } downloaded_patches.push(serde_json::json!({ "purl": patch.purl, diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index f395763..00b2fe9 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -10,7 +10,7 @@ use socket_patch_core::patch::apply_lock; use socket_patch_core::utils::cleanup_blobs::{ cleanup_unused_archives, cleanup_unused_blobs, CleanupResult, }; -use socket_patch_core::utils::purl::strip_purl_qualifiers; +use socket_patch_core::utils::purl::{normalize_purl, strip_purl_qualifiers}; use socket_patch_core::utils::telemetry::{ track_patch_scan_failed, track_patch_scanned, track_patch_vendor_failed, track_patch_vendored, }; @@ -197,23 +197,29 @@ async fn preview_apply_gc( /// copy is its NORMAL state, not "no longer installed". Without this, a /// wiped node_modules would prune the manifest entry — and the next /// `vendor` run would then reconcile-revert the vendoring itself. +/// +/// Both sides are compared in percent-DECODED form (`normalize_purl`): +/// manifest keys come from the API encoded (`pkg:npm/%40scope/x@1`) while +/// crawler purls carry the literal `@scope` — comparing the raw strings +/// would make every encoded scoped entry look prunable and `--prune`/ +/// `--sync` would GC the very patch it just downloaded. pub(crate) fn detect_prunable( manifest: &PatchManifest, scanned_purls: &HashSet, vendored: &HashSet, ) -> Vec { - let scanned_bases: HashSet<&str> = scanned_purls + let scanned_bases: HashSet = scanned_purls .iter() - .map(|p| strip_purl_qualifiers(p)) + .map(|p| normalize_purl(strip_purl_qualifiers(p)).into_owned()) .collect(); manifest .patches .keys() .filter(|p| { - let base = strip_purl_qualifiers(p); - !scanned_bases.contains(base) + let base = normalize_purl(strip_purl_qualifiers(p)); + !scanned_bases.contains(base.as_ref()) && !vendored.contains(p.as_str()) - && !vendored.contains(base) + && !vendored.contains(strip_purl_qualifiers(p)) }) .cloned() .collect() @@ -1804,7 +1810,7 @@ pub async fn run(args: ScanArgs) -> i32 { for p in &vendored_selected { println!( " [skip] {} (vendored — run scan --vendor to update)", - p.purl + normalize_purl(&p.purl) ); } } @@ -1851,7 +1857,10 @@ pub async fn run(args: ScanArgs) -> i32 { println!( " {} [{}] {}", - patch.purl, + // Human display only: show the decoded form of an + // API-encoded purl (`%40scope` → `@scope`). JSON output + // keeps the verbatim key. + normalize_purl(&patch.purl), patch.tier.to_uppercase(), sev_colored, ); @@ -2244,6 +2253,23 @@ mod tests { ); } + #[test] + fn detect_prunable_encoded_manifest_key_not_pruned() { + // The API serves scoped purls percent-encoded and they land in the + // manifest verbatim; the crawler reports the literal `@scope` form. + // Comparing raw strings would make every encoded scoped entry look + // prunable — `scan --prune` would GC the patch it just downloaded. + let m = manifest_with(&[("pkg:npm/%40scope/x@1.0.0", "uuid-a")]); + let s = scanned(&["pkg:npm/@scope/x@1.0.0"]); + assert!( + detect_prunable(&m, &s, &no_vendored()).is_empty(), + "encoded manifest key must match the decoded scanned purl" + ); + // A genuinely-gone encoded entry still prunes. + let out = detect_prunable(&m, &scanned(&[]), &no_vendored()); + assert_eq!(out, vec!["pkg:npm/%40scope/x@1.0.0".to_string()]); + } + #[test] fn detect_prunable_exempts_qualified_variant_of_vendored_base() { // The ledger key set carries qualifier-stripped bases (see diff --git a/crates/socket-patch-cli/src/commands/vendor.rs b/crates/socket-patch-cli/src/commands/vendor.rs index ac7f9c6..f306f1d 100644 --- a/crates/socket-patch-cli/src/commands/vendor.rs +++ b/crates/socket-patch-cli/src/commands/vendor.rs @@ -27,7 +27,7 @@ use socket_patch_core::patch::vendor::{ self, ecosystem_dir_for_purl, load_state, save_state, RevertOutcome, VendorEntry, VendorOutcome, VendorWarning, }; -use socket_patch_core::utils::purl::strip_purl_qualifiers; +use socket_patch_core::utils::purl::{normalize_purl, strip_purl_qualifiers}; use socket_patch_core::utils::telemetry::{track_patch_vendor_failed, track_patch_vendored}; use socket_patch_core::vex::time::now_rfc3339; use std::collections::{HashMap, HashSet}; @@ -566,7 +566,7 @@ pub(crate) async fn vendor_records( ); } if !common.silent && !common.json { - eprintln!("Cannot vendor {candidate}: {detail}"); + eprintln!("Cannot vendor {}: {detail}", normalize_purl(candidate)); } } Some(VendorOutcome::Done { @@ -579,7 +579,7 @@ pub(crate) async fn vendor_records( if !common.silent && !common.json { eprintln!( "Failed to vendor {}: {}", - candidate, + normalize_purl(candidate), result.error.as_deref().unwrap_or("unknown error") ); } @@ -702,7 +702,7 @@ pub(crate) async fn vendor_records( .with_reason("package_not_installed", "no installed package found"), ); if !common.silent && !common.json { - eprintln!("Cannot vendor {purl}: package not installed"); + eprintln!("Cannot vendor {}: package not installed", normalize_purl(purl)); } } } diff --git a/crates/socket-patch-core/src/crawlers/deno_crawler.rs b/crates/socket-patch-core/src/crawlers/deno_crawler.rs index 5a12c2d..150cc27 100644 --- a/crates/socket-patch-core/src/crawlers/deno_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/deno_crawler.rs @@ -120,16 +120,21 @@ impl DenoCrawler { // manifest PURL and are joined onto the cache root below. A real // JSR coordinate is a single path segment, so reject any that // could traverse out of the cache (`..`/`.`, a separator, NUL). + // The parser percent-decodes components, so these guards see the + // decoded form — `%2e%2e` cannot smuggle a traversal past them. // Unlike the cargo/npm crawlers there is no content check to catch // a bogus path, and jsr patches in place — so fail closed here. - if !(is_safe_jsr_component(scope) - && is_safe_jsr_component(name) - && is_safe_jsr_component(version)) + if !(is_safe_jsr_component(&scope) + && is_safe_jsr_component(&name) + && is_safe_jsr_component(&version)) { continue; } // Cache layout: //// - let pkg_dir = jsr_cache_path.join(scope).join(name).join(version); + let pkg_dir = jsr_cache_path + .join(&*scope) + .join(&*name) + .join(&*version); if !is_dir(&pkg_dir).await { continue; } diff --git a/crates/socket-patch-core/src/crawlers/npm_crawler.rs b/crates/socket-patch-core/src/crawlers/npm_crawler.rs index 91cb624..2d9f8d3 100644 --- a/crates/socket-patch-core/src/crawlers/npm_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/npm_crawler.rs @@ -4,6 +4,7 @@ use std::path::{Path, PathBuf}; use serde::Deserialize; use super::types::{CrawledPackage, CrawlerOptions}; +use crate::utils::purl::{percent_decode_purl_component, strip_purl_qualifiers}; /// Default batch size for crawling. #[cfg(test)] @@ -686,11 +687,7 @@ impl NpmCrawler { /// Parse a PURL string to extract namespace, name, and version. fn parse_purl_components(purl: &str) -> Option<(Option, String, String)> { - // Strip qualifiers - let base = match purl.find('?') { - Some(idx) => &purl[..idx], - None => purl, - }; + let base = strip_purl_qualifiers(purl); let rest = base.strip_prefix("pkg:npm/")?; let at_idx = rest.rfind('@')?; @@ -701,16 +698,33 @@ impl NpmCrawler { return None; } - if name_part.starts_with('@') { - let slash_idx = name_part.find('/')?; - let namespace = name_part[..slash_idx].to_string(); - let name = name_part[slash_idx + 1..].to_string(); - if name.is_empty() { + // SECURITY: components are percent-decoded AFTER the `/`/`@` splits + // above (so an encoded `%2f` cannot create a new path segment here) + // and BEFORE the `is_safe_npm_component` guards in `find_by_purls` + // (so `%2e%2e` cannot smuggle a traversal past them). The API serves + // scoped purls as `pkg:npm/%40scope/name@version`, which must match + // the literal `node_modules/@scope/name` install. + let version = percent_decode_purl_component(version); + + if let Some(slash_idx) = name_part.find('/') { + let namespace = percent_decode_purl_component(&name_part[..slash_idx]); + let name = percent_decode_purl_component(&name_part[slash_idx + 1..]); + // An npm namespace is always an `@scope` (checked post-decode). + if name.is_empty() || !namespace.starts_with('@') { return None; } - Some((Some(namespace), name, version.to_string())) + Some(( + Some(namespace.into_owned()), + name.into_owned(), + version.into_owned(), + )) } else { - Some((None, name_part.to_string(), version.to_string())) + let name = percent_decode_purl_component(name_part); + // A bare `@scope` with no `/name` is not a package name. + if name.starts_with('@') { + return None; + } + Some((None, name.into_owned(), version.into_owned())) } } } @@ -1031,6 +1045,93 @@ mod tests { assert!(!result.contains_key("pkg:npm/not-installed@0.0.1")); } + /// Regression: the patches API serves scoped purls percent-encoded + /// (`pkg:npm/%40scope/name@version`) and `scan` stores them verbatim as + /// manifest keys. `find_by_purls` must decode the components to match + /// the literal `node_modules/@scope/name` install — while keeping the + /// result keyed by the *verbatim* encoded input (downstream contract). + #[test] + fn test_parse_purl_components_percent_encoded_scope() { + let (ns, name, ver) = + NpmCrawler::parse_purl_components("pkg:npm/%40modelcontextprotocol/sdk@1.12.0") + .unwrap(); + assert_eq!(ns.as_deref(), Some("@modelcontextprotocol")); + assert_eq!(name, "sdk"); + assert_eq!(ver, "1.12.0"); + // An encoded bare scope with no `/name` is still not a package. + assert!(NpmCrawler::parse_purl_components("pkg:npm/%40scope@1.0.0").is_none()); + // A `#subpath` without a qualifier must not bleed into the version. + let (_, name, ver) = + NpmCrawler::parse_purl_components("pkg:npm/foo@1.0.0#lib/util").unwrap(); + assert_eq!(name, "foo"); + assert_eq!(ver, "1.0.0"); + } + + #[tokio::test] + async fn test_find_by_purls_percent_encoded_scope_resolves() { + let dir = tempfile::tempdir().unwrap(); + let nm = dir.path().join("node_modules"); + + let sdk_dir = nm.join("@modelcontextprotocol").join("sdk"); + tokio::fs::create_dir_all(&sdk_dir).await.unwrap(); + tokio::fs::write( + sdk_dir.join("package.json"), + r#"{"name": "@modelcontextprotocol/sdk", "version": "1.12.0"}"#, + ) + .await + .unwrap(); + + let crawler = NpmCrawler::new(); + let encoded = "pkg:npm/%40modelcontextprotocol/sdk@1.12.0".to_string(); + let result = crawler + .find_by_purls(&nm, std::slice::from_ref(&encoded)) + .await + .unwrap(); + + assert_eq!(result.len(), 1, "encoded scope must resolve: {result:?}"); + let pkg = result + .get(&encoded) + .expect("result keyed by the verbatim encoded input purl"); + assert_eq!(pkg.path, sdk_dir); + assert_eq!(pkg.name, "sdk"); + assert_eq!(pkg.namespace.as_deref(), Some("@modelcontextprotocol")); + } + + /// SECURITY regression: percent-encoded traversal sequences must be + /// rejected by the post-decode guards — `%2e%2e` decodes to `..` and + /// `%2f` to `/`, so guarding the *encoded* form would be a bypass. + #[tokio::test] + async fn test_find_by_purls_rejects_encoded_traversal() { + let root = tempfile::tempdir().unwrap(); + let nm = root.path().join("node_modules"); + // A real scope dir so a scoped traversal's kernel walk could resolve. + tokio::fs::create_dir_all(nm.join("@x")).await.unwrap(); + + // A victim package OUTSIDE node_modules, reachable only via `..`. + let evil_dir = root.path().join("evil"); + tokio::fs::create_dir_all(&evil_dir).await.unwrap(); + tokio::fs::write( + evil_dir.join("package.json"), + r#"{"name": "evil", "version": "1.0.0"}"#, + ) + .await + .unwrap(); + + let crawler = NpmCrawler::new(); + let purls = vec![ + "pkg:npm/%2e%2e/evil@1.0.0".to_string(), + "pkg:npm/@x/%2e%2e@1.0.0".to_string(), + "pkg:npm/@x/%2e%2e%2f%2e%2e%2fevil@1.0.0".to_string(), + "pkg:npm/..%2fevil@1.0.0".to_string(), + ]; + let result = crawler.find_by_purls(&nm, &purls).await.unwrap(); + + assert!( + result.is_empty(), + "encoded traversal must not escape node_modules; got {result:?}" + ); + } + /// Regression: a qualified PURL (carrying `?qualifiers`) must resolve and /// be keyed by the *verbatim* input PURL — not a reconstructed, stripped /// form. The dispatcher drives npm with `passthrough_purls` + diff --git a/crates/socket-patch-core/src/patch/vendor/bun_lock.rs b/crates/socket-patch-core/src/patch/vendor/bun_lock.rs index 8199bc6..1565d01 100644 --- a/crates/socket-patch-core/src/patch/vendor/bun_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/bun_lock.rs @@ -82,7 +82,7 @@ pub async fn vendor_bun( Ok(coords) => coords, Err(outcome) => return *outcome, }; - let (name, version) = (coords.name, coords.version); + let (name, version) = (coords.name.as_str(), coords.version.as_str()); // BN3 spelling: BARE project-relative path, no `file:`/`./` prefix. let rel_tgz = format!("{}/{}", coords.uuid_dir_rel, tgz_rel_leaf(name, version)); diff --git a/crates/socket-patch-core/src/patch/vendor/npm_common.rs b/crates/socket-patch-core/src/patch/vendor/npm_common.rs index 1b26cc4..1cfc62d 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_common.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_common.rs @@ -21,21 +21,25 @@ use crate::manifest::schema::PatchRecord; use crate::patch::apply::{apply_package_patch, normalize_file_path, ApplyResult, PatchSources}; use crate::patch::copy_tree::{fresh_copy, remove_tree}; use crate::patch::path_safety; -use crate::utils::purl::strip_purl_qualifiers; +use crate::utils::purl::{percent_decode_purl_component, strip_purl_qualifiers}; use super::npm_pack::{pack_deterministic, PackedTarball}; use super::path::vendor_uuid_dir_rel; use super::VendorOutcome; /// Validated npm vendoring coordinates (the output of -/// [`guard_coordinates`]). `name`/`version` borrow from the purl. +/// [`guard_coordinates`]). `name`/`version` are the percent-DECODED purl +/// components (the API serves scoped purls as `%40scope/name`; the +/// lockfile and node_modules carry the literal `@scope/name`). #[derive(Debug)] -pub(super) struct NpmCoords<'a> { - pub name: &'a str, - pub version: &'a str, +pub(super) struct NpmCoords { + pub name: String, + pub version: String, /// `.socket/vendor/npm/` (validated, forward slashes). pub uuid_dir_rel: String, - /// Qualifier-free base PURL. + /// Qualifier-free base PURL — VERBATIM (still encoded when the API + /// encoded it): the ledger's `base_purl`/entry keys must keep + /// matching the manifest keys, which store the purl as-served. pub base_purl: String, } @@ -49,17 +53,17 @@ pub(super) struct NpmCoords<'a> { /// vendor, arbitrary delete on revert) — reject fail-closed before any disk /// access. `Err` carries a ready [`VendorOutcome::Refused`] to bubble /// verbatim. -pub(super) fn guard_coordinates<'a>( - purl: &'a str, +pub(super) fn guard_coordinates( + purl: &str, record: &PatchRecord, -) -> Result, Box> { +) -> Result> { let Some((name, version)) = parse_npm_purl(purl) else { return Err(Box::new(refused( "unsafe_coordinates", format!("cannot parse an npm name@version out of `{purl}`"), ))); }; - if !is_safe_npm_name(name) || !path_safety::is_safe_single_segment(version) { + if !is_safe_npm_name(&name) || !path_safety::is_safe_single_segment(&version) { return Err(Box::new(refused( "unsafe_coordinates", format!( @@ -199,7 +203,7 @@ pub(super) async fn stage_patch_pack( let rel_tgz = format!( "{}/{}", coords.uuid_dir_rel, - tgz_rel_leaf(coords.name, coords.version) + tgz_rel_leaf(&coords.name, &coords.version) ); let dest = project_root.join(&rel_tgz); if let Some(parent) = dest.parent() { @@ -236,8 +240,8 @@ pub(super) async fn stage_patch_pack( Ok(( Some(NpmStagedPack { - name: coords.name.to_string(), - version: coords.version.to_string(), + name: coords.name, + version: coords.version, rel_tgz, packed, staged_pkg_json, @@ -251,14 +255,27 @@ pub(super) async fn stage_patch_pack( /// `pkg:npm/[@scope/]name@version` → `(name, version)`; scoped names keep /// the `@scope/` prefix. The LAST `@` separates the version (a leading /// scope-`@` is at index 0 and never the last `@` of a versioned purl). -pub(super) fn parse_npm_purl(purl: &str) -> Option<(&str, &str)> { +/// +/// Components are percent-DECODED (the API serves `pkg:npm/%40scope/...`). +/// SECURITY: each segment decodes independently AFTER the `/`/`@` splits, +/// and the post-decode `is_safe_npm_name`/`is_safe_single_segment` gates in +/// [`guard_coordinates`] reject any separator or traversal sequence a +/// decode may have surfaced (`%2e%2e`, `%2f`, ...) — decoding never runs +/// after the guards. +pub(super) fn parse_npm_purl(purl: &str) -> Option<(String, String)> { let base = strip_purl_qualifiers(purl); let rest = base.strip_prefix("pkg:npm/")?; let at = rest.rfind('@').filter(|&i| i > 0)?; - let (name, version) = (&rest[..at], &rest[at + 1..]); - if name.is_empty() || version.is_empty() { + let (name_raw, version_raw) = (&rest[..at], &rest[at + 1..]); + if name_raw.is_empty() || version_raw.is_empty() { return None; } + let name = name_raw + .split('/') + .map(percent_decode_purl_component) + .collect::>() + .join("/"); + let version = percent_decode_purl_component(version_raw).into_owned(); Some((name, version)) } @@ -369,18 +386,42 @@ mod tests { fn guard_coordinates_accepts_plain_and_scoped_names() { let record = record_with_uuid(UUID); let coords = guard_coordinates("pkg:npm/left-pad@1.3.0", &record).unwrap(); - assert_eq!((coords.name, coords.version), ("left-pad", "1.3.0")); + assert_eq!((coords.name.as_str(), coords.version.as_str()), ("left-pad", "1.3.0")); assert_eq!(coords.uuid_dir_rel, format!(".socket/vendor/npm/{UUID}")); assert_eq!(coords.base_purl, "pkg:npm/left-pad@1.3.0"); let coords = guard_coordinates("pkg:npm/@scope/pkg@1.0.0?artifact_id=x", &record).unwrap(); - assert_eq!((coords.name, coords.version), ("@scope/pkg", "1.0.0")); + assert_eq!((coords.name.as_str(), coords.version.as_str()), ("@scope/pkg", "1.0.0")); assert_eq!( coords.base_purl, "pkg:npm/@scope/pkg@1.0.0", "qualifiers stripped" ); } + /// The API serves scoped purls percent-encoded; the coordinates must + /// decode to the literal `@scope/name` (which keys the lockfile and + /// the artifact path), while `base_purl` stays verbatim — the ledger + /// must keep matching the manifest key as-served. + #[test] + fn guard_coordinates_decodes_percent_encoded_scope() { + let record = record_with_uuid(UUID); + let coords = + guard_coordinates("pkg:npm/%40modelcontextprotocol/sdk@1.12.0", &record).unwrap(); + assert_eq!( + (coords.name.as_str(), coords.version.as_str()), + ("@modelcontextprotocol/sdk", "1.12.0") + ); + assert_eq!( + coords.base_purl, "pkg:npm/%40modelcontextprotocol/sdk@1.12.0", + "base_purl stays verbatim-encoded (manifest/ledger key parity)" + ); + assert_eq!( + tgz_rel_leaf(&coords.name, &coords.version), + "@modelcontextprotocol/sdk-1.12.0.tgz", + "artifact leaf is built from the decoded name" + ); + } + #[test] fn guard_coordinates_refuses_fail_closed() { let record = record_with_uuid(UUID); @@ -399,6 +440,20 @@ mod tests { guard_coordinates("pkg:npm/x@../1.0.0", &record).unwrap_err(), "unsafe_coordinates", ); + // SECURITY: percent-encoded traversal must be rejected POST-decode — + // guarding the encoded form would be a bypass (`%2e%2e` → `..`). + expect_refusal( + guard_coordinates("pkg:npm/%2e%2e/escape@1.0.0", &record).unwrap_err(), + "unsafe_coordinates", + ); + expect_refusal( + guard_coordinates("pkg:npm/@scope/%2e%2e%2f%2e%2e@1.0.0", &record).unwrap_err(), + "unsafe_coordinates", + ); + expect_refusal( + guard_coordinates("pkg:npm/x@%2e%2e%2f1.0.0", &record).unwrap_err(), + "unsafe_coordinates", + ); // Tampered uuid. let record = record_with_uuid("../../x"); expect_refusal( diff --git a/crates/socket-patch-core/src/patch/vendor/npm_lock.rs b/crates/socket-patch-core/src/patch/vendor/npm_lock.rs index 2be4c7a..bed244b 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_lock.rs @@ -91,7 +91,7 @@ pub async fn vendor_npm( Ok(coords) => coords, Err(outcome) => return *outcome, }; - let (name, version) = (coords.name, coords.version); + let (name, version) = (coords.name.as_str(), coords.version.as_str()); let uuid_dir_rel = coords.uuid_dir_rel; let base_purl = coords.base_purl; @@ -1633,11 +1633,11 @@ mod tests { fn purl_and_name_helpers() { assert_eq!( parse_npm_purl("pkg:npm/left-pad@1.3.0"), - Some(("left-pad", "1.3.0")) + Some(("left-pad".into(), "1.3.0".into())) ); assert_eq!( parse_npm_purl("pkg:npm/@scope/pkg@1.0.0?foo=bar"), - Some(("@scope/pkg", "1.0.0")) + Some(("@scope/pkg".into(), "1.0.0".into())) ); assert_eq!(parse_npm_purl("pkg:npm/@scope/pkg"), None, "no version"); assert_eq!( diff --git a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs index cac16e9..48ec98c 100644 --- a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs @@ -93,7 +93,7 @@ pub async fn vendor_pnpm( Ok(coords) => coords, Err(outcome) => return *outcome, }; - let (name, version) = (coords.name, coords.version); + let (name, version) = (coords.name.as_str(), coords.version.as_str()); let rel_tgz = format!("{}/{}", coords.uuid_dir_rel, tgz_rel_leaf(name, version)); // pnpm spells the override target `file:` with NO // `./` (spike P1 fixtures, verbatim). diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs index 613b503..d24c7c8 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs @@ -83,7 +83,7 @@ pub async fn vendor_yarn_berry( Ok(coords) => coords, Err(outcome) => return *outcome, }; - let (name, version) = (coords.name, coords.version); + let (name, version) = (coords.name.as_str(), coords.version.as_str()); let uuid_dir_rel = coords.uuid_dir_rel.clone(); let base_purl = coords.base_purl.clone(); let rel_tgz = format!("{}/{}", coords.uuid_dir_rel, tgz_rel_leaf(name, version)); diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs index ac1ff84..8d82a00 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs @@ -68,7 +68,7 @@ pub async fn vendor_yarn_classic( Ok(coords) => coords, Err(outcome) => return *outcome, }; - let (name, version) = (coords.name, coords.version); + let (name, version) = (coords.name.as_str(), coords.version.as_str()); let uuid_dir_rel = coords.uuid_dir_rel; let base_purl = coords.base_purl; diff --git a/crates/socket-patch-core/src/utils/purl.rs b/crates/socket-patch-core/src/utils/purl.rs index 6ea0e80..393873d 100644 --- a/crates/socket-patch-core/src/utils/purl.rs +++ b/crates/socket-patch-core/src/utils/purl.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + /// Strip the trailing `?qualifiers` and `#subpath` components from a PURL, /// leaving the canonical `pkg:type/namespace/name@version` base. /// @@ -18,6 +20,94 @@ pub fn strip_purl_qualifiers(purl: &str) -> &str { } } +/// Strictly percent-decode ONE purl path component (a scope, namespace +/// segment, name, or version) AFTER it has been split out of the purl. +/// +/// The patches API serves purls in canonical percent-encoded form +/// (`pkg:npm/%40scope/name@1.0.0`), while crawlers build purls from the +/// literal on-disk names (`pkg:npm/@scope/name@1.0.0`). Parsers must +/// decode the API form to find installed packages. +/// +/// SECURITY: this must only ever be called on a component AFTER the purl +/// has been split on `/` and the version `@` — so an encoded separator +/// (`%2f`) cannot create new path segments at parse time; it surfaces as +/// a literal `/` *inside* one component — and BEFORE the path-safety +/// guards run, so `%2e%2e`, `%2f`, `%5c`, `%00` are rejected post-decode +/// by the same `is_safe_*` gates that reject their literal forms. +/// Guarding the encoded form instead would be a traversal bypass. +/// +/// Decoding is all-or-nothing: an invalid escape (`%G1`, trailing `%4`) +/// or a non-UTF8 decode returns the input unchanged (fail-safe — the +/// undecoded form contains no separators, and `%` is not a legal +/// character in any real package name). Zero-alloc when no `%`. +pub fn percent_decode_purl_component(component: &str) -> Cow<'_, str> { + if !component.contains('%') { + return Cow::Borrowed(component); + } + fn hex_val(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b - b'0'), + b'a'..=b'f' => Some(b - b'a' + 10), + b'A'..=b'F' => Some(b - b'A' + 10), + _ => None, + } + } + let bytes = component.as_bytes(); + let mut out: Vec = Vec::with_capacity(bytes.len()); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' { + let (Some(hi), Some(lo)) = ( + bytes.get(i + 1).copied().and_then(hex_val), + bytes.get(i + 2).copied().and_then(hex_val), + ) else { + // Invalid escape: leave the whole component verbatim. + return Cow::Borrowed(component); + }; + out.push(hi * 16 + lo); + i += 3; + } else { + out.push(bytes[i]); + i += 1; + } + } + match String::from_utf8(out) { + Ok(s) => Cow::Owned(s), + // Decoded bytes are not UTF-8: leave the component verbatim. + Err(_) => Cow::Borrowed(component), + } +} + +/// Canonical string form for purl-to-purl comparison and display: +/// percent-decode each `/`-separated component of the +/// `pkg:type/...@version` base; qualifiers/subpath are appended verbatim. +/// +/// Used ONLY for string equality (`purl_eq`) and human output — never to +/// build filesystem paths (a `%2f` decoding into a name can at worst make +/// two distinct purls compare equal, not change a write location). +pub fn normalize_purl(purl: &str) -> Cow<'_, str> { + if !purl.contains('%') { + return Cow::Borrowed(purl); + } + let split = purl.find(['?', '#']).unwrap_or(purl.len()); + let (base, suffix) = purl.split_at(split); + let mut out = String::with_capacity(purl.len()); + for (i, seg) in base.split('/').enumerate() { + if i > 0 { + out.push('/'); + } + out.push_str(&percent_decode_purl_component(seg)); + } + out.push_str(suffix); + Cow::Owned(out) +} + +/// Purl equality up to percent-encoding of the base components +/// (`pkg:npm/%40scope/x@1` ≡ `pkg:npm/@scope/x@1`). +pub fn purl_eq(a: &str, b: &str) -> bool { + normalize_purl(a) == normalize_purl(b) +} + /// Parse a PyPI PURL to extract name and version. /// /// e.g., `"pkg:pypi/requests@2.28.0?artifact_id=abc"` -> `Some(("requests", "2.28.0"))` @@ -155,7 +245,7 @@ pub fn build_composer_purl(namespace: &str, name: &str, version: &str) -> String /// have a `/` namespace structure. The leading `@` on /// the scope is preserved (matching npm's `@scope/name` convention). #[cfg(feature = "deno")] -pub fn parse_jsr_purl(purl: &str) -> Option<((&str, &str), &str)> { +pub fn parse_jsr_purl(purl: &str) -> Option<((Cow<'_, str>, Cow<'_, str>), Cow<'_, str>)> { let base = strip_purl_qualifiers(purl); let rest = base.strip_prefix("pkg:jsr/")?; let at_idx = rest.rfind('@')?; @@ -167,8 +257,12 @@ pub fn parse_jsr_purl(purl: &str) -> Option<((&str, &str), &str)> { } let slash_idx = name_part.find('/')?; - let scope = &name_part[..slash_idx]; - let name = &name_part[slash_idx + 1..]; + // Decode AFTER splitting on `/`/`@` and BEFORE the shape checks below + // (and the caller's `is_safe_jsr_component` gate) — see + // `percent_decode_purl_component`. The API serves `%40scope`. + let scope = percent_decode_purl_component(&name_part[..slash_idx]); + let name = percent_decode_purl_component(&name_part[slash_idx + 1..]); + let version = percent_decode_purl_component(version); // Scope must be `@`. The bare `@` (length 1) is // invalid — there's no actual scope after the marker. @@ -248,15 +342,22 @@ pub fn is_purl(s: &str) -> bool { /// /// Non-PyPI keys never carry a `?`, so for them this reduces to plain /// equality. +/// +/// Comparison is encoding-tolerant (`purl_eq`): manifest keys come from +/// the API in percent-encoded form (`pkg:npm/%40scope/x@1`) while users +/// type the literal form — both spellings must match either way around. pub fn purl_matches_identifier(manifest_key: &str, identifier: &str) -> bool { if identifier.contains('?') { - manifest_key == identifier + purl_eq(manifest_key, identifier) } else { // Base identifier: compare bases. Strip both sides so a subpath // (`#...`) carried by either the key or the identifier doesn't // defeat the match — `strip_purl_qualifiers(identifier)` is a no-op // for a plain base PURL, so existing behaviour is unchanged. - strip_purl_qualifiers(manifest_key) == strip_purl_qualifiers(identifier) + purl_eq( + strip_purl_qualifiers(manifest_key), + strip_purl_qualifiers(identifier), + ) } } @@ -504,25 +605,31 @@ mod tests { ); } + #[cfg(feature = "deno")] + fn jsr_parts(purl: &str) -> Option<(String, String, String)> { + parse_jsr_purl(purl) + .map(|((s, n), v)| (s.into_owned(), n.into_owned(), v.into_owned())) + } + #[cfg(feature = "deno")] #[test] fn test_parse_jsr_purl() { assert_eq!( - parse_jsr_purl("pkg:jsr/@std/path@0.220.0"), - Some((("@std", "path"), "0.220.0")) + jsr_parts("pkg:jsr/@std/path@0.220.0"), + Some(("@std".into(), "path".into(), "0.220.0".into())) ); assert_eq!( - parse_jsr_purl("pkg:jsr/@luca/flag@1.0.0"), - Some((("@luca", "flag"), "1.0.0")) + jsr_parts("pkg:jsr/@luca/flag@1.0.0"), + Some(("@luca".into(), "flag".into(), "1.0.0".into())) ); // Scope must start with `@`. - assert_eq!(parse_jsr_purl("pkg:jsr/std/path@0.220.0"), None); + assert_eq!(jsr_parts("pkg:jsr/std/path@0.220.0"), None); // Empty pieces. - assert_eq!(parse_jsr_purl("pkg:jsr/@/path@0.220.0"), None); - assert_eq!(parse_jsr_purl("pkg:jsr/@std/@0.220.0"), None); - assert_eq!(parse_jsr_purl("pkg:jsr/@std/path@"), None); + assert_eq!(jsr_parts("pkg:jsr/@/path@0.220.0"), None); + assert_eq!(jsr_parts("pkg:jsr/@std/@0.220.0"), None); + assert_eq!(jsr_parts("pkg:jsr/@std/path@"), None); // Wrong scheme. - assert_eq!(parse_jsr_purl("pkg:npm/@std/path@0.220.0"), None); + assert_eq!(jsr_parts("pkg:npm/@std/path@0.220.0"), None); } #[cfg(feature = "deno")] @@ -661,8 +768,8 @@ mod tests { // Scope `@` + version `@` + qualifier `@` all coexist; only the // version `@` should be honored. assert_eq!( - parse_jsr_purl("pkg:jsr/@std/path@0.220.0?download_url=x@y"), - Some((("@std", "path"), "0.220.0")) + jsr_parts("pkg:jsr/@std/path@0.220.0?download_url=x@y"), + Some(("@std".into(), "path".into(), "0.220.0".into())) ); } @@ -748,6 +855,88 @@ mod tests { )); } + // --- Percent-decoding: API purls carry %-encoded components -------------- + + #[test] + fn test_percent_decode_purl_component() { + // The canonical case: an encoded npm scope marker. + assert_eq!( + percent_decode_purl_component("%40modelcontextprotocol"), + "@modelcontextprotocol" + ); + // Traversal sequences decode — the post-decode safety guards are + // what reject them, not this helper. + assert_eq!(percent_decode_purl_component("%2e%2e"), ".."); + assert_eq!(percent_decode_purl_component("a%2fb"), "a/b"); + assert_eq!(percent_decode_purl_component("%00"), "\0"); + // Invalid escapes leave the WHOLE component verbatim (all-or-nothing). + assert_eq!(percent_decode_purl_component("%G1abc"), "%G1abc"); + assert_eq!(percent_decode_purl_component("abc%4"), "abc%4"); + assert_eq!(percent_decode_purl_component("abc%"), "abc%"); + // Non-UTF8 decode (lone continuation byte) leaves it verbatim. + assert_eq!(percent_decode_purl_component("%FF"), "%FF"); + // No '%' is zero-alloc (borrowed). + assert!(matches!( + percent_decode_purl_component("plain-name"), + Cow::Borrowed(_) + )); + } + + #[test] + fn test_normalize_purl_and_purl_eq() { + assert_eq!( + normalize_purl("pkg:npm/%40modelcontextprotocol/sdk@1.12.0"), + "pkg:npm/@modelcontextprotocol/sdk@1.12.0" + ); + assert!(purl_eq( + "pkg:npm/%40scope/x@1.0.0", + "pkg:npm/@scope/x@1.0.0" + )); + assert!(purl_eq( + "pkg:npm/@scope/x@1.0.0", + "pkg:npm/%40scope/x@1.0.0" + )); + assert!(!purl_eq("pkg:npm/%40scope/x@1.0.0", "pkg:npm/@scope/x@2.0.0")); + // Qualifiers/subpath are preserved verbatim (not decoded). + assert_eq!( + normalize_purl("pkg:npm/%40s/x@1?artifact_id=a%2Fb"), + "pkg:npm/@s/x@1?artifact_id=a%2Fb" + ); + // Unencoded input is unchanged (and borrowed). + assert!(matches!( + normalize_purl("pkg:npm/lodash@4.17.21"), + Cow::Borrowed(_) + )); + } + + #[test] + fn test_purl_matches_identifier_decodes_encoded_key() { + // Encoded manifest key vs literal identifier — and vice versa. + assert!(purl_matches_identifier( + "pkg:npm/%40scope/x@1.0.0", + "pkg:npm/@scope/x@1.0.0" + )); + assert!(purl_matches_identifier( + "pkg:npm/@scope/x@1.0.0", + "pkg:npm/%40scope/x@1.0.0" + )); + assert!(!purl_matches_identifier( + "pkg:npm/%40scope/x@1.0.0", + "pkg:npm/@scope/y@1.0.0" + )); + } + + #[cfg(feature = "deno")] + #[test] + fn test_parse_jsr_purl_percent_encoded_scope() { + let ((scope, name), version) = parse_jsr_purl("pkg:jsr/%40std/path@0.220.0").unwrap(); + assert_eq!(scope, "@std"); + assert_eq!(name, "path"); + assert_eq!(version, "0.220.0"); + // The encoded bare `@` is still rejected post-decode. + assert_eq!(jsr_parts("pkg:jsr/%40/path@0.220.0"), None); + } + // --- Regression: name must not absorb the version separator ------------- #[test] From c3c012f25a52a468b57d3597018c997bb967c962 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Thu, 11 Jun 2026 15:32:14 -0400 Subject: [PATCH 2/5] feat(vendor): auto-force staging on content mismatch + correct already-applied events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vendor stage is a private copy and every apply write path is hash-gated to exactly afterHash, so a beforeHash mismatch (a patch built against different bytes than the installed artifact, or a package already patched in place by apply) no longer fails the vendor: the stage is overwritten with the verified patched content and the overwrite surfaces as a vendor_content_mismatch_overwritten warning event. Missing patch-target files still fail closed without --force (force's silent NotFound skip would pack an artifact without the fix). - shared force_apply_staged / missing_existing_patch_files / mismatch_overwrite_warnings policy helpers in vendor/mod.rs, used by all npm flavors (via stage_patch_pack) + cargo/composer/gem/pypi/ golang backends; dry runs predict the same outcome - vendor.rs: gate the already_vendored rewrite on entry.is_none() — the first vendor of an in-place-applied package now emits Applied (it packed + rewired this run) instead of a miscounted skip - scan --vendor: pre-prompt baseline check annotates mismatched packages before the confirm prompt (best-effort, read-only) - --force narrowed to missing-file tolerance + variant-probe bypass; CLI_CONTRACT.md documents the new warning code Co-Authored-By: Claude Fable 5 --- crates/socket-patch-cli/CLI_CONTRACT.md | 3 +- crates/socket-patch-cli/src/commands/scan.rs | 70 ++++++ .../socket-patch-cli/src/commands/vendor.rs | 44 +++- .../tests/in_process_vendor.rs | 178 +++++++++++++++ .../src/patch/vendor/bun_lock.rs | 1 + .../src/patch/vendor/cargo.rs | 38 ++-- .../src/patch/vendor/composer_lock.rs | 29 +-- .../socket-patch-core/src/patch/vendor/gem.rs | 29 +-- .../src/patch/vendor/golang.rs | 36 +++- .../socket-patch-core/src/patch/vendor/mod.rs | 204 +++++++++++++++++- .../src/patch/vendor/npm_common.rs | 29 ++- .../src/patch/vendor/npm_lock.rs | 171 +++++++++++++++ .../src/patch/vendor/pnpm_lock.rs | 1 + .../src/patch/vendor/pypi.rs | 1 + .../src/patch/vendor/pypi_wheel.rs | 83 ++++++- .../src/patch/vendor/yarn_berry_lock.rs | 1 + .../src/patch/vendor/yarn_classic_lock.rs | 1 + 17 files changed, 840 insertions(+), 79 deletions(-) diff --git a/crates/socket-patch-cli/CLI_CONTRACT.md b/crates/socket-patch-cli/CLI_CONTRACT.md index 7fb1d59..464ef6a 100644 --- a/crates/socket-patch-cli/CLI_CONTRACT.md +++ b/crates/socket-patch-cli/CLI_CONTRACT.md @@ -55,7 +55,7 @@ Beyond the globals above, each subcommand defines a small set of local arguments | Subcommand | Local arg | Env var | Purpose | |---|---|---|---| | `apply` | `--force` / `-f` | `SOCKET_FORCE` | Bypass beforeHash check | -| `vendor` | `--force` / `-f` | `SOCKET_FORCE` | Bypass beforeHash check when staging the vendored copy | +| `vendor` | `--force` / `-f` | `SOCKET_FORCE` | Tolerate missing patch-target files in the stage + bypass the variant probe. A beforeHash mismatch no longer needs it: vendor staging auto-overwrites with the verified patched content (`vendor_content_mismatch_overwritten` warning) | | `vendor` | `--revert` | `SOCKET_VENDOR_REVERT` | Undo vendoring: restore recorded original lockfile fragments + remove `.socket/vendor/` artifacts. Works without a manifest | | `apply`, `scan`, `vendor` | `--vex` | `SOCKET_VEX` | Generate an OpenVEX 0.2.0 document at this path on a successful run; see "embedded VEX" below | | `apply`, `scan`, `vendor` | `--vex-product`, `--vex-no-verify`, `--vex-doc-id`, `--vex-compact` | `SOCKET_VEX_PRODUCT`, `SOCKET_VEX_NO_VERIFY`, `SOCKET_VEX_DOC_ID`, `SOCKET_VEX_COMPACT` | Passthrough to the embedded VEX builder; mirror the standalone `vex` knobs. Inert unless `--vex` is set | @@ -602,6 +602,7 @@ Every `--json` invocation emits a single JSON object that follows the **unified | `vendor_yarn_berry_cache_unsupported` | `failed` | vendor (yarn berry): lock `cacheKey ≠ 10c0` or non-default `.yarnrc.yml` `compressionLevel` — the cache-zip checksum is not reproducible. | | `vendor_override_conflict` | `failed` | vendor (pnpm/yarn-berry): a user-authored override/resolution for the package already exists. | | `vendor_integrity_unverified` | `skipped` (warning) | vendor (pipenv): the lockfile format does not hash-check file entries; the committed wheel bytes are the protection. | +| `vendor_content_mismatch_overwritten` | `skipped` (warning) | vendor: a staged file matched NEITHER beforeHash nor afterHash (patch built against different bytes, or local edits); the stage was overwritten with the verified patched content and the vendor succeeded. | | `vendor_lock_checksums_unsupported` / `vendor_stale_lock_checksum` | `failed` | vendor (gem): an ambiguous/platform CHECKSUMS entry, or a v1-wired lock whose stale token blocks the hot path (run `vendor --revert` + re-vendor). | | `pypi_{poetry,pdm,pipenv}_no_lockfile` | `failed` | vendor (pypi): a lock-less tool marker with no `requirements.txt` fallback — run ` lock`. | | `vendor_*` / `pypi_*` / `gemfile_*` / `lock_*` / `locked_version_mismatch` / `user_authored_*` / `native_extensions_unsupported` / `platform_gem_unsupported` | `failed`/`skipped` | vendor: per-ecosystem refusal + drift vocabulary; see the Vendor command contract section. New tags are additive (MINOR). | diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index 00b2fe9..182e10d 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -225,6 +225,56 @@ pub(crate) fn detect_prunable( .collect() } +/// Vendor-mode pre-prompt check: uuids of selected patches whose installed +/// files match NEITHER beforeHash nor afterHash — the patch was built +/// against different bytes than the installed artifact. Vendoring still +/// succeeds for these (the vendor stage force-applies the verified patched +/// content; see `force_apply_staged`), but the user should learn it BEFORE +/// the confirm prompt, not from a post-hoc warning event. +/// +/// Best-effort and read-only: a detail-fetch failure or an unresolvable +/// installed path just skips the annotation — it never blocks the flow and +/// writes nothing (unlike `download_patch_records`, which stages blobs). +async fn preverify_vendor_baselines( + api_client: &socket_patch_core::api::client::ApiClient, + org_slug: Option<&str>, + selected: &[PatchSearchResult], + crawled: &[socket_patch_core::crawlers::types::CrawledPackage], +) -> HashSet { + use socket_patch_core::manifest::schema::PatchFileInfo; + use socket_patch_core::patch::apply::{verify_file_patch, VerifyStatus}; + use socket_patch_core::utils::purl::purl_eq; + + let mut mismatched: HashSet = HashSet::new(); + for patch in selected { + // API purls come percent-encoded, crawler purls literal — purl_eq + // bridges the two spellings. + let base = strip_purl_qualifiers(&patch.purl); + let Some(pkg) = crawled.iter().find(|c| purl_eq(&c.purl, base)) else { + continue; + }; + let Ok(Some(detail)) = api_client.fetch_patch(org_slug, &patch.uuid).await else { + continue; + }; + for (file, info) in &detail.files { + let info = PatchFileInfo { + before_hash: info.before_hash.clone().unwrap_or_default(), + after_hash: info.after_hash.clone().unwrap_or_default(), + }; + if info.before_hash.is_empty() { + continue; // a new file has no baseline to compare + } + if verify_file_patch(&pkg.path, file, &info).await.status + == VerifyStatus::HashMismatch + { + mismatched.insert(patch.uuid.clone()); + break; + } + } + } + mismatched +} + /// Cross-reference an existing manifest against discovery results to find /// PURLs whose newest available patch UUID differs from the locally-recorded /// one. Used by both the discovery JSON path and the table-print path. @@ -1822,6 +1872,21 @@ pub async fn run(args: ScanArgs) -> i32 { return embed_vex_human(&args.common, &args.vex, &manifest_path, 0).await; } + // Vendor mode: pre-verify baselines so a content mismatch surfaces + // BEFORE the confirm prompt (vendoring still proceeds for these — + // the stage force-applies the verified patched content). + let mismatched_baselines: HashSet = if args.vendor && !args.common.silent { + preverify_vendor_baselines( + &api_client, + effective_org_slug, + &selected, + &filtered_crawled, + ) + .await + } else { + HashSet::new() + }; + // Display detailed summary of selected patches before confirming // (presentational only — skipped wholesale under --silent). if !args.common.silent { @@ -1864,6 +1929,11 @@ pub async fn run(args: ScanArgs) -> i32 { patch.tier.to_uppercase(), sev_colored, ); + if mismatched_baselines.contains(&patch.uuid) { + println!( + " (installed content differs from patch baseline — will vendor patched content)" + ); + } if !vuln_ids.is_empty() { println!(" Fixes: {}", vuln_ids.join(", ")); } diff --git a/crates/socket-patch-cli/src/commands/vendor.rs b/crates/socket-patch-cli/src/commands/vendor.rs index f306f1d..94fd6e7 100644 --- a/crates/socket-patch-cli/src/commands/vendor.rs +++ b/crates/socket-patch-cli/src/commands/vendor.rs @@ -49,8 +49,12 @@ pub struct VendorArgs { #[command(flatten)] pub common: GlobalArgs, - /// Skip pre-vendor hash verification (vendor even if the installed - /// package's files differ from the patch's beforeHash). + /// Tolerate MISSING patch-target files in the staged copy (they are + /// skipped instead of failing the vendor) and bypass the variant + /// probe for multi-release ecosystems. A plain beforeHash mismatch + /// no longer needs this: vendor staging always overwrites mismatched + /// content with the verified patched bytes (surfaced as a + /// `vendor_content_mismatch_overwritten` warning). #[arg( short = 'f', long, @@ -586,16 +590,38 @@ pub(crate) async fn vendor_records( } let mut event = result_to_event(&result, common.dry_run); // The shared translator's in-sync classification reads - // `already_patched`; under `vendor` the contract tag is - // `already_vendored` (artifact + wiring already in sync). + // `already_patched`. Two distinct cases land there: + // + // * `entry` is None — the TRUE in-sync rerun (the backend + // synthesized AlreadyPatched and recorded nothing); + // under `vendor` the contract tag is `already_vendored`. + // * `entry` is Some — the FIRST vendor of a package + // already patched in place by `apply`: every file + // verified AlreadyPatched, but THIS run packed the + // artifact and rewired the lock. That is an Applied + // (`summary.applied` must count it), not a skip. if event.action == PatchAction::Skipped && event.error_code.as_deref() == Some("already_patched") { - event = PatchEvent::new(PatchAction::Skipped, candidate.clone()) - .with_reason( - "already_vendored", - "artifact and lockfile wiring already in sync", - ); + if entry.is_none() { + event = PatchEvent::new(PatchAction::Skipped, candidate.clone()) + .with_reason( + "already_vendored", + "artifact and lockfile wiring already in sync", + ); + } else { + let files = result + .files_verified + .iter() + .map(|f| crate::json_envelope::PatchEventFile { + path: f.file.clone(), + verified: true, + applied_via: None, + }) + .collect(); + event = PatchEvent::new(PatchAction::Applied, candidate.clone()) + .with_files(files); + } } env.record(event); for w in &warnings { diff --git a/crates/socket-patch-cli/tests/in_process_vendor.rs b/crates/socket-patch-cli/tests/in_process_vendor.rs index 0ef740c..791d9f9 100644 --- a/crates/socket-patch-cli/tests/in_process_vendor.rs +++ b/crates/socket-patch-cli/tests/in_process_vendor.rs @@ -1228,3 +1228,181 @@ fn json_envelope_shape() { assert_eq!(env["status"], "noManifest"); assert!(events(&env).is_empty()); } + +// ──────────────── vendor auto-force + already-applied lifecycle ──────────────── + +/// A package already patched IN PLACE by `apply` must vendor cleanly on the +/// first run — and the envelope must report it as `applied` (this run packed +/// the artifact and rewired the lock), NOT `skipped/already_vendored`. The +/// second run is the true in-sync rerun and reports `already_vendored`. +#[test] +fn vendor_after_in_place_apply_emits_applied_event() { + let fx = npm_fixture(); + // Simulate a prior in-place `socket-patch apply`. + std::fs::write(fx.installed_index(), PATCHED_INDEX).unwrap(); + + let (code, env) = vendor_cli(fx.root(), &[]); + assert_eq!(code, 0, "{env:#}"); + let applied = find_event(&env, "applied", None); + assert_eq!(applied["purl"], PURL); + assert_eq!( + env["summary"]["applied"], 1, + "first vendor of an applied package counts as applied: {env:#}" + ); + assert!(fx.tgz_path().exists(), "artifact packed"); + assert!(fx.state_path().exists(), "ledger entry recorded"); + // No mismatch warning: afterHash content is AlreadyPatched, not divergent. + assert!( + !events(&env) + .iter() + .any(|e| e["errorCode"] == "vendor_content_mismatch_overwritten"), + "{env:#}" + ); + + // Second run: artifact + wiring already in sync. + let (code, env) = vendor_cli(fx.root(), &[]); + assert_eq!(code, 0, "{env:#}"); + find_event(&env, "skipped", Some("already_vendored")); + assert_eq!(env["summary"]["applied"], 0); +} + +/// Installed content matching NEITHER hash (a patch built against different +/// bytes than the installed artifact — the flatted@3.3.1 case) still vendors: +/// the stage is overwritten with the verified patched content, the run exits +/// 0 with an `applied` event, and the overwrite surfaces as a +/// `vendor_content_mismatch_overwritten` warning event. +#[test] +fn mismatched_baseline_vendors_with_warning_event() { + let fx = npm_fixture(); + std::fs::write( + fx.installed_index(), + b"module.exports = () => 'divergent';\n", + ) + .unwrap(); + + let (code, env) = vendor_cli(fx.root(), &[]); + assert_eq!(code, 0, "{env:#}"); + let applied = find_event(&env, "applied", None); + assert_eq!(applied["purl"], PURL); + let warning = find_event(&env, "skipped", Some("vendor_content_mismatch_overwritten")); + assert!( + warning["reason"] + .as_str() + .unwrap_or("") + .contains("left-pad@1.3.0"), + "warning names the package: {env:#}" + ); + assert!(fx.tgz_path().exists(), "artifact packed despite the mismatch"); + // The installed tree keeps its divergent bytes (only the stage changed). + assert_eq!( + std::fs::read(fx.installed_index()).unwrap(), + b"module.exports = () => 'divergent';\n" + ); +} + +/// A patch-target file MISSING from the installed package still fails closed +/// (auto-force must not inherit `--force`'s silent NotFound skip — the +/// tarball would ship without the fix); `--force` keeps that tolerance. +#[test] +fn vendor_missing_file_fails_closed_without_force() { + let fx = npm_fixture(); + std::fs::remove_file(fx.installed_index()).unwrap(); + + let (code, env) = vendor_cli(fx.root(), &[]); + assert_ne!(code, 0, "missing patch target must fail: {env:#}"); + let failed = find_event(&env, "failed", None); + assert!( + failed["error"] + .as_str() + .unwrap_or("") + .contains("File not found"), + "{env:#}" + ); + assert_eq!(fx.lock_bytes(), fx.original_lock, "lock byte-untouched"); + assert!(!fx.vendor_dir().exists(), "no artifacts on failure"); + + // --force: the missing file is tolerated (skipped) and the vendor lands. + let fx2 = npm_fixture(); + std::fs::remove_file(fx2.installed_index()).unwrap(); + let (code, env) = vendor_cli(fx2.root(), &["--force"]); + assert_eq!(code, 0, "{env:#}"); +} + +// ──────────────── percent-encoded scoped purls (Fix A integration) ──────────────── + +/// Build a fixture whose installed package is the SCOPED `@scope/left-pad` +/// while the manifest keys the patch by the API's percent-encoded purl +/// (`pkg:npm/%40scope/left-pad@1.3.0`) — exactly what `scan` writes. +fn npm_scoped_fixture() -> NpmFixture { + let fx = npm_fixture_with_purls(&["pkg:npm/%40scope/left-pad@1.3.0"]); + let root = fx.root(); + + // Re-home the installed package under the scope dir. + let scoped = root.join("node_modules/@scope/left-pad"); + std::fs::create_dir_all(scoped.parent().unwrap()).unwrap(); + std::fs::rename(root.join("node_modules/left-pad"), &scoped).unwrap(); + std::fs::write( + scoped.join("package.json"), + br#"{"name":"@scope/left-pad","version":"1.3.0"}"#, + ) + .unwrap(); + + // Re-key the lock entry to the scoped install path. + let mut lock: Value = serde_json::from_slice(&fx.original_lock).unwrap(); + let packages = lock["packages"].as_object_mut().unwrap(); + let entry = packages.remove("node_modules/left-pad").unwrap(); + packages.insert("node_modules/@scope/left-pad".to_string(), entry); + lock["packages"][""]["dependencies"] = json!({ "@scope/left-pad": "^1.3.0" }); + let mut lock_bytes = serde_json::to_vec_pretty(&lock).unwrap(); + lock_bytes.push(b'\n'); + std::fs::write(root.join("package-lock.json"), &lock_bytes).unwrap(); + + fx +} + +/// The API serves scoped purls percent-encoded and `scan` stores them +/// verbatim as manifest keys; vendor must decode them to find the installed +/// `node_modules/@scope/...` package and wire the lock — while the ledger +/// stays keyed by the verbatim encoded purl (manifest parity). +#[test] +fn vendor_resolves_percent_encoded_scope_purl() { + let fx = npm_scoped_fixture(); + + let (code, env) = vendor_cli(fx.root(), &[]); + assert_eq!(code, 0, "{env:#}"); + let applied = find_event(&env, "applied", None); + assert_eq!(applied["purl"], "pkg:npm/%40scope/left-pad@1.3.0"); + + // Artifact lands under the DECODED scope dir. + let tgz = fx + .root() + .join(format!(".socket/vendor/npm/{UUID}/@scope/left-pad-1.3.0.tgz")); + assert!(tgz.exists(), "tarball at the decoded scoped path"); + + // Lock rewired to the vendored artifact. + let lock = fx.lock_value(); + assert_eq!( + lock["packages"]["node_modules/@scope/left-pad"]["resolved"], + json!(format!( + "file:.socket/vendor/npm/{UUID}/@scope/left-pad-1.3.0.tgz" + )) + ); + + // Ledger keyed by the VERBATIM encoded purl (manifest key parity). + let state: Value = + serde_json::from_slice(&std::fs::read(fx.state_path()).unwrap()).unwrap(); + assert!( + state["entries"]["pkg:npm/%40scope/left-pad@1.3.0"].is_object(), + "state keyed by the encoded manifest purl: {state:#}" + ); + + // Round-trip: revert restores the original (scoped) lock bytes. + let (code, env) = vendor_cli(fx.root(), &["--revert"]); + assert_eq!(code, 0, "{env:#}"); + let lock = fx.lock_value(); + assert_eq!( + lock["packages"]["node_modules/@scope/left-pad"]["resolved"], + json!(REG_RESOLVED) + ); + assert!(!fx.vendor_dir().join("npm").exists(), "artifacts removed"); +} diff --git a/crates/socket-patch-core/src/patch/vendor/bun_lock.rs b/crates/socket-patch-core/src/patch/vendor/bun_lock.rs index 1565d01..f1b3c51 100644 --- a/crates/socket-patch-core/src/patch/vendor/bun_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/bun_lock.rs @@ -136,6 +136,7 @@ pub async fn vendor_bun( sources, dry_run, force, + &mut warnings, ) .await { diff --git a/crates/socket-patch-core/src/patch/vendor/cargo.rs b/crates/socket-patch-core/src/patch/vendor/cargo.rs index 85ea5cb..614fd28 100644 --- a/crates/socket-patch-core/src/patch/vendor/cargo.rs +++ b/crates/socket-patch-core/src/patch/vendor/cargo.rs @@ -18,7 +18,7 @@ use std::path::{Path, PathBuf}; use crate::manifest::schema::{PatchFileInfo, PatchRecord}; use crate::patch::apply::{ - apply_package_patch, normalize_file_path, ApplyResult, PatchSources, VerifyResult, VerifyStatus, + normalize_file_path, ApplyResult, PatchSources, VerifyResult, VerifyStatus, }; use crate::patch::copy_tree::{fresh_copy, remove_tree}; use crate::patch::file_hash::compute_file_git_sha256; @@ -269,22 +269,27 @@ pub async fn vendor_cargo_crate( } if dry_run { - // Verify (read-only) against the pristine source — apply_package_patch - // never writes when dry_run — for an accurate "would patch" report, - // without creating the copy or editing config/lock. - let mut result = apply_package_patch( + // Verify (read-only) against the pristine source — the apply + // pipeline never writes when dry_run — for an accurate "would + // patch" report (including the auto-force overwrite warnings the + // real run would emit), without creating the copy or editing + // config/lock. + let mut dry_warnings: Vec = Vec::new(); + let mut result = super::force_apply_staged( purl, pristine_src, - &record.files, + record, sources, - Some(&record.uuid), true, force, + name, + version, + &mut dry_warnings, ) .await; result.package_path = copy_dir.display().to_string(); result.sidecar = None; - return done(result, None, Vec::new()); + return done(result, None, dry_warnings); } // Hot path: already in sync → touch nothing (entry stays with the caller's @@ -333,15 +338,19 @@ pub async fn vendor_cargo_crate( ); } - // Delegate to the hardened pipeline, pointed at the copy. - let mut result = apply_package_patch( + // Delegate to the hardened pipeline (vendor auto-force policy — see + // `force_apply_staged`), pointed at the copy. + let mut warnings: Vec = Vec::new(); + let mut result = super::force_apply_staged( purl, ©_dir, - &record.files, + record, sources, - Some(&record.uuid), false, force, + name, + version, + &mut warnings, ) .await; result.package_path = copy_dir.display().to_string(); @@ -350,7 +359,7 @@ pub async fn vendor_cargo_crate( // Don't leave a half-built copy (or an empty uuid husk) that // verify/sweep would misjudge. let _ = remove_tree(&uuid_dir).await; - return done(result, None, Vec::new()); + return done(result, None, warnings); } // A path-dep copy must never carry a checksum sidecar. The fresh copy @@ -370,10 +379,9 @@ pub async fn vendor_cargo_crate( let _ = remove_tree(&uuid_dir).await; result.success = false; result.error = Some(format!("failed to update .cargo/config.toml: {e}")); - return done(result, None, Vec::new()); + return done(result, None, warnings); } - let mut warnings = Vec::new(); let prior_path = prior_entry.as_ref().and_then(|i| i.path.clone()); if prior_path.as_deref().is_some_and(is_legacy_redirect_path) { warnings.push(VendorWarning::new( diff --git a/crates/socket-patch-core/src/patch/vendor/composer_lock.rs b/crates/socket-patch-core/src/patch/vendor/composer_lock.rs index b2ae9cd..5a533eb 100644 --- a/crates/socket-patch-core/src/patch/vendor/composer_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/composer_lock.rs @@ -35,7 +35,7 @@ use serde_json::{json, Map, Value}; use crate::manifest::schema::{PatchFileInfo, PatchRecord}; use crate::patch::apply::{ - apply_package_patch, is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, + is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, VerifyResult, VerifyStatus, }; use crate::patch::copy_tree::{fresh_copy, remove_tree}; @@ -193,21 +193,24 @@ pub async fn vendor_composer( // ── dry run: verify-only against the installed dir, no writes ──────── if dry_run { - let mut result = apply_package_patch( + let mut dry_warnings: Vec = Vec::new(); + let mut result = super::force_apply_staged( purl, installed_dir, - &record.files, + record, sources, - Some(&record.uuid), true, force, + &pkg, + version, + &mut dry_warnings, ) .await; result.package_path = copy_dir.display().to_string(); return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings: dry_warnings, }; } @@ -225,14 +228,17 @@ pub async fn vendor_composer( warnings: Vec::new(), }; } - let mut result = apply_package_patch( + let mut warnings: Vec = Vec::new(); + let mut result = super::force_apply_staged( purl, ©_dir, - &record.files, + record, sources, - Some(&record.uuid), false, force, + &pkg, + version, + &mut warnings, ) .await; result.package_path = copy_dir.display().to_string(); @@ -242,7 +248,7 @@ pub async fn vendor_composer( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } @@ -256,7 +262,7 @@ pub async fn vendor_composer( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; }; let rewritten = rewrite_lock_entry(original_obj, ©_rel, &record.uuid); @@ -272,12 +278,11 @@ pub async fn vendor_composer( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } // ── marker + ledger entry ──────────────────────────────────────────── - let mut warnings = Vec::new(); let base_purl = build_composer_purl(&vendor, &name, version); let mut vulnerabilities: Vec = record.vulnerabilities.keys().cloned().collect(); vulnerabilities.sort(); diff --git a/crates/socket-patch-core/src/patch/vendor/gem.rs b/crates/socket-patch-core/src/patch/vendor/gem.rs index 5ce51a2..0eccace 100644 --- a/crates/socket-patch-core/src/patch/vendor/gem.rs +++ b/crates/socket-patch-core/src/patch/vendor/gem.rs @@ -53,7 +53,7 @@ use serde_json::Value; use crate::manifest::schema::{PatchFileInfo, PatchRecord}; use crate::patch::apply::{ - apply_package_patch, is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, + is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, VerifyResult, VerifyStatus, }; use crate::patch::copy_tree::{fresh_copy, remove_tree}; @@ -282,21 +282,24 @@ pub async fn vendor_gem( // ── dry run: verify-only against the installed dir, no writes ──────── if dry_run { - let mut result = apply_package_patch( + let mut dry_warnings: Vec = Vec::new(); + let mut result = super::force_apply_staged( purl, installed_dir, - &record.files, + record, sources, - Some(&record.uuid), true, force, + name, + version, + &mut dry_warnings, ) .await; result.package_path = copy_dir.display().to_string(); return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings: dry_warnings, }; } @@ -338,14 +341,17 @@ pub async fn vendor_gem( warnings: Vec::new(), }; } - let mut result = apply_package_patch( + let mut warnings: Vec = Vec::new(); + let mut result = super::force_apply_staged( purl, ©_dir, - &record.files, + record, sources, - Some(&record.uuid), false, force, + name, + version, + &mut warnings, ) .await; result.package_path = copy_dir.display().to_string(); @@ -355,7 +361,7 @@ pub async fn vendor_gem( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } @@ -368,7 +374,7 @@ pub async fn vendor_gem( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } @@ -395,13 +401,12 @@ pub async fn vendor_gem( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } }; // ── marker + ledger entry ──────────────────────────────────────────── - let mut warnings = Vec::new(); let base_purl = build_gem_purl(name, version); let mut vulnerabilities: Vec = record.vulnerabilities.keys().cloned().collect(); vulnerabilities.sort(); diff --git a/crates/socket-patch-core/src/patch/vendor/golang.rs b/crates/socket-patch-core/src/patch/vendor/golang.rs index 8961482..066676c 100644 --- a/crates/socket-patch-core/src/patch/vendor/golang.rs +++ b/crates/socket-patch-core/src/patch/vendor/golang.rs @@ -101,6 +101,29 @@ pub async fn vendor_go_module( .is_some_and(|e| e.owner == Some(ReplaceOwner::GoPatches)); let prior_path = prior.as_ref().and_then(|e| e.path.clone()); + // Vendor auto-force policy (the engine's copy is staged from the + // pristine source, never the user's tree — see `force_apply_staged`): + // missing patch targets still fail closed unless the caller's own + // `--force` asked for the skip tolerance, then the engine apply runs + // forced so a beforeHash mismatch (already-applied module, or a patch + // built against different bytes) overwrites with the verified patched + // content. The engine is shared with the in-place `apply` redirect + // path, whose strict semantics stay unchanged. + let mut warnings: Vec = Vec::new(); + if !force { + let missing = super::missing_existing_patch_files(pristine_src, &record.files).await; + if let Some(first) = missing.first() { + return VendorOutcome::Done { + result: super::failed_apply_result( + purl, + format!("Cannot apply patch: {first} - File not found"), + ), + entry: None, + warnings, + }; + } + } + // The engine does the heavy lifting: fresh copy → hardened apply pipeline // → `replace` upsert (which refuses a user-authored same-version pin). let result = apply_go_redirect( @@ -114,15 +137,18 @@ pub async fn vendor_go_module( sources, Some(&record.uuid), dry_run, - force, + /*force=*/ true, ) .await; + if result.success { + warnings.extend(super::mismatch_overwrite_warnings(&result, module, version)); + } if dry_run { return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } if !result.success { @@ -134,7 +160,7 @@ pub async fn vendor_go_module( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } // A patch with no files is a no-op success: the engine wrote no copy and @@ -143,12 +169,10 @@ pub async fn vendor_go_module( return VendorOutcome::Done { result, entry: None, - warnings: Vec::new(), + warnings, }; } - let mut warnings = Vec::new(); - if takeover { // The `replace` line was already atomically repointed by the upsert; // the apply backend's copy is now unreachable — delete it (built from diff --git a/crates/socket-patch-core/src/patch/vendor/mod.rs b/crates/socket-patch-core/src/patch/vendor/mod.rs index 7d60fdc..1aa70cc 100644 --- a/crates/socket-patch-core/src/patch/vendor/mod.rs +++ b/crates/socket-patch-core/src/patch/vendor/mod.rs @@ -75,7 +75,14 @@ pub mod yarn_classic_lock; pub use path::{ecosystem_dir_for_purl, parse_vendor_path, VendorPathParts, VENDOR_DIR}; pub use state::{load_state, save_state, VendorEntry, VendorState, VENDOR_STATE_REL}; -use crate::patch::apply::ApplyResult; +use std::collections::HashMap; +use std::path::Path; + +use crate::manifest::schema::{PatchFileInfo, PatchRecord}; +use crate::patch::apply::{ + apply_package_patch, is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, + VerifyStatus, +}; /// A non-fatal advisory surfaced as a warning event (`code` is a stable /// reason tag from the CLI contract; `detail` is human text). @@ -94,6 +101,141 @@ impl VendorWarning { } } +/// One warning per staged file whose pre-patch content matched NEITHER +/// `beforeHash` nor `afterHash` and was overwritten with the verified +/// patched content (vendor staging always force-applies — the stage is a +/// private copy, and every apply write path is hash-gated to exactly +/// `afterHash`). +/// +/// Detection rides the verify signature `apply_package_patch` leaves +/// behind: a force-promoted file keeps `status: Ready` WITH +/// `expected_hash: Some(..)` and a differing `current_hash`, whereas a +/// cleanly-verified file carries `expected_hash: None` (see +/// `verify_file_patch`). +pub(crate) fn mismatch_overwrite_warnings( + result: &ApplyResult, + name: &str, + version: &str, +) -> Vec { + let mut warnings: Vec = result + .files_verified + .iter() + .filter(|v| { + v.status == VerifyStatus::Ready + && v.expected_hash.is_some() + && v.current_hash != v.expected_hash + }) + .map(|v| { + VendorWarning::new( + "vendor_content_mismatch_overwritten", + format!( + "installed {name}@{version} does not match this patch's expected original \ + ({}); vendored the patched content anyway", + v.file + ), + ) + }) + .collect(); + // HashMap-driven verify order is randomized; keep warning order stable. + warnings.sort_by(|a, b| a.detail.cmp(&b.detail)); + warnings +} + +/// Patch-target files (non-empty `beforeHash`) absent from the staged +/// copy. Vendor staging force-applies (see [`force_apply_staged`]), and +/// force silently SKIPS missing files — which would pack an artifact +/// without the fix. This pre-check restores the strict apply's +/// fail-closed behavior for the non-`--force` path. Unsafe keys are +/// skipped here: the apply pipeline itself rejects them fail-closed. +pub(crate) async fn missing_existing_patch_files( + staged_dir: &Path, + files: &HashMap, +) -> Vec { + let mut missing: Vec = Vec::new(); + for (file_name, info) in files { + if info.before_hash.is_empty() { + continue; // a new file is expected to not exist yet + } + let normalized = normalize_file_path(file_name); + if !is_safe_relative_subpath(normalized) { + continue; + } + if tokio::fs::metadata(staged_dir.join(normalized)).await.is_err() { + missing.push(file_name.clone()); + } + } + missing.sort(); + missing +} + +/// A failed synthesized [`ApplyResult`] in the shape the strict apply +/// pipeline would have produced (success=false, `error` set, no files). +pub(crate) fn failed_apply_result(purl: &str, error: String) -> ApplyResult { + ApplyResult { + package_key: purl.to_string(), + package_path: String::new(), + success: false, + files_verified: Vec::new(), + files_patched: Vec::new(), + applied_via: HashMap::new(), + error: Some(error), + sidecar: None, + } +} + +/// Run the hardened apply pipeline against a vendor stage/copy with the +/// vendor auto-force policy: +/// +/// * Missing patch-target files fail closed unless the caller's own +/// `--force` asked for that skip tolerance. +/// * The apply itself ALWAYS forces: the stage is a private copy (never +/// the user's tree), and every apply write path is hash-gated to +/// exactly `afterHash` (the archive and blob paths verify content +/// BEFORE writing; the diff path self-disables on a base mismatch) — +/// forcing can only produce the verified patched content or fail +/// closed. This is what lets vendor succeed on a package already +/// patched in place by `apply`, or on a patch whose `beforeHash` was +/// built against different bytes than the installed artifact. +/// * Every force-overwritten file (content matched NEITHER hash) emits a +/// `vendor_content_mismatch_overwritten` warning — including on dry +/// runs, so previews predict the real outcome. +#[allow(clippy::too_many_arguments)] +pub(crate) async fn force_apply_staged( + purl: &str, + staged_dir: &Path, + record: &PatchRecord, + sources: &PatchSources<'_>, + dry_run: bool, + force: bool, + name: &str, + version: &str, + warnings: &mut Vec, +) -> ApplyResult { + if !force { + let missing = missing_existing_patch_files(staged_dir, &record.files).await; + if let Some(first) = missing.first() { + return failed_apply_result( + purl, + format!("Cannot apply patch: {first} - File not found"), + ); + } + } + let result = apply_package_patch( + purl, + staged_dir, + &record.files, + sources, + Some(&record.uuid), + dry_run, + /*force=*/ true, + ) + .await; + if result.success { + warnings.extend(mismatch_overwrite_warnings(&result, name, version)); + } + result +} + /// The result of one backend `vendor_*` call. // // `large_enum_variant`: `Done` is much bigger than `Refused` because it carries @@ -187,3 +329,63 @@ pub async fn vendored_purl_keys( Err(_) => std::collections::HashSet::new(), } } + +#[cfg(test)] +mod policy_tests { + use super::*; + use crate::patch::apply::VerifyResult; + + fn verify(status: VerifyStatus, expected: Option<&str>, current: Option<&str>) -> VerifyResult { + VerifyResult { + file: "package/index.js".to_string(), + status, + message: None, + current_hash: current.map(str::to_string), + expected_hash: expected.map(str::to_string), + target_hash: None, + } + } + + fn result_with(files_verified: Vec) -> ApplyResult { + ApplyResult { + package_key: "pkg:npm/x@1.0.0".to_string(), + package_path: String::new(), + success: true, + files_verified, + files_patched: Vec::new(), + applied_via: HashMap::new(), + error: None, + sidecar: None, + } + } + + /// Only the force-promoted signature (`Ready` + `expected_hash: Some` + + /// differing `current_hash`) flags an overwrite; clean verifies and + /// AlreadyPatched files never do. + #[test] + fn mismatch_overwrite_warnings_detects_promoted_ready() { + // Force-promoted mismatch: flagged. + let r = result_with(vec![verify(VerifyStatus::Ready, Some("aa"), Some("bb"))]); + let w = mismatch_overwrite_warnings(&r, "left-pad", "1.3.0"); + assert_eq!(w.len(), 1); + assert_eq!(w[0].code, "vendor_content_mismatch_overwritten"); + assert!(w[0].detail.contains("left-pad@1.3.0")); + assert!(w[0].detail.contains("package/index.js")); + + // Clean Ready (verify matched beforeHash): expected_hash is None. + let r = result_with(vec![verify(VerifyStatus::Ready, None, Some("aa"))]); + assert!(mismatch_overwrite_warnings(&r, "x", "1").is_empty()); + + // AlreadyPatched (afterHash content): not a mismatch. + let r = result_with(vec![verify( + VerifyStatus::AlreadyPatched, + None, + Some("after"), + )]); + assert!(mismatch_overwrite_warnings(&r, "x", "1").is_empty()); + + // NotFound (force-skipped): not an overwrite. + let r = result_with(vec![verify(VerifyStatus::NotFound, None, None)]); + assert!(mismatch_overwrite_warnings(&r, "x", "1").is_empty()); + } +} diff --git a/crates/socket-patch-core/src/patch/vendor/npm_common.rs b/crates/socket-patch-core/src/patch/vendor/npm_common.rs index 1cfc62d..f54a893 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_common.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_common.rs @@ -12,20 +12,19 @@ //! the project byte-untouched (a dry run stops after verification and //! creates nothing on disk). -use std::collections::HashMap; use std::path::Path; use serde_json::Value; use crate::manifest::schema::PatchRecord; -use crate::patch::apply::{apply_package_patch, normalize_file_path, ApplyResult, PatchSources}; +use crate::patch::apply::{normalize_file_path, ApplyResult, PatchSources}; use crate::patch::copy_tree::{fresh_copy, remove_tree}; use crate::patch::path_safety; use crate::utils::purl::{percent_decode_purl_component, strip_purl_qualifiers}; use super::npm_pack::{pack_deterministic, PackedTarball}; use super::path::vendor_uuid_dir_rel; -use super::VendorOutcome; +use super::{VendorOutcome, VendorWarning}; /// Validated npm vendoring coordinates (the output of /// [`guard_coordinates`]). `name`/`version` are the percent-DECODED purl @@ -130,6 +129,7 @@ pub(super) async fn stage_patch_pack( sources: &PatchSources<'_>, dry_run: bool, force: bool, + warnings: &mut Vec, ) -> Result<(Option, ApplyResult), Box> { let coords = guard_coordinates(purl, record)?; @@ -179,18 +179,21 @@ pub(super) async fn stage_patch_pack( } } - // Delegate to the hardened apply pipeline, pointed at the stage (which + // Delegate to the hardened apply pipeline (with the vendor auto-force + // policy — see `force_apply_staged`), pointed at the stage (which // plays the role of the installed package dir — manifest npm keys carry // the `package/` prefix and `apply` strips it via `normalize_file_path`, // exactly as it does for an in-place npm apply). - let result = apply_package_patch( + let result = super::force_apply_staged( purl, &stage, - &record.files, + record, sources, - Some(&record.uuid), dry_run, force, + &coords.name, + &coords.version, + warnings, ) .await; // A failed patch never packs (wiring is last — the caller returns with @@ -331,16 +334,7 @@ pub(super) fn refused(code: &'static str, detail: String) -> VendorOutcome { /// results. pub(super) fn done_failure(purl: &str, error: String) -> VendorOutcome { VendorOutcome::Done { - result: ApplyResult { - package_key: purl.to_string(), - package_path: String::new(), - success: false, - files_verified: Vec::new(), - files_patched: Vec::new(), - applied_via: HashMap::new(), - error: Some(error), - sidecar: None, - }, + result: super::failed_apply_result(purl, error), entry: None, warnings: Vec::new(), } @@ -350,6 +344,7 @@ pub(super) fn done_failure(purl: &str, error: String) -> VendorOutcome { mod tests { use super::*; use crate::manifest::schema::PatchFileInfo; + use std::collections::HashMap; const UUID: &str = "9f6b2c4e-1d3a-4f6b-8c2d-7e5a9b1c3d5f"; diff --git a/crates/socket-patch-core/src/patch/vendor/npm_lock.rs b/crates/socket-patch-core/src/patch/vendor/npm_lock.rs index bed244b..7e23591 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_lock.rs @@ -175,6 +175,7 @@ pub async fn vendor_npm( sources, dry_run, force, + &mut warnings, ) .await { @@ -1090,6 +1091,176 @@ mod tests { assert!(found, "package/index.js missing from the tarball"); } + /// Read one member's bytes out of the packed tarball. + fn tgz_member(tgz: &[u8], member: &str) -> Option> { + let mut archive = tar::Archive::new(flate2::read::GzDecoder::new(tgz)); + for e in archive.entries().unwrap() { + let mut e = e.unwrap(); + if e.path().unwrap().to_string_lossy() == member { + let mut data = Vec::new(); + std::io::Read::read_to_end(&mut e, &mut data).unwrap(); + return Some(data); + } + } + None + } + + /// Vendor auto-force policy: installed content matching NEITHER hash + /// (e.g. a patch built against different bytes than the registry + /// artifact) is overwritten in the STAGE with the verified patched + /// content; the run succeeds, wires the lock, and surfaces the + /// overwrite as a `vendor_content_mismatch_overwritten` warning. The + /// installed tree is never touched. + #[tokio::test] + async fn vendor_overwrites_mismatched_content_with_warning() { + let fx = fixture().await; + let divergent: &[u8] = b"module.exports = () => 'divergent';\n"; + tokio::fs::write(fx.installed().join("index.js"), divergent) + .await + .unwrap(); + + let (result, entry, warnings) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some(), "first vendor records a ledger entry"); + assert_eq!( + warnings + .iter() + .filter(|w| w.code == "vendor_content_mismatch_overwritten") + .count(), + 1, + "overwrite surfaced exactly once: {warnings:?}" + ); + assert!( + warnings[0].detail.contains("left-pad@1.3.0") + && warnings[0].detail.contains("package/index.js"), + "warning names the package and file: {warnings:?}" + ); + + // The tarball carries the VERIFIED patched bytes, not the divergent + // ones — every apply write path is hash-gated to afterHash. + let tgz = tokio::fs::read(fx.root().join(fx.expected_rel_tgz())) + .await + .unwrap(); + assert_eq!( + tgz_member(&tgz, "package/index.js").unwrap(), + PATCHED_INDEX + ); + + // The installed tree keeps its (divergent) bytes — only the stage + // was overwritten. + assert_eq!( + tokio::fs::read(fx.installed().join("index.js")) + .await + .unwrap(), + divergent + ); + + // The lock was rewired to the vendored artifact. + let lock = fx.read_lock().await; + assert_eq!( + lock["packages"]["node_modules/left-pad"]["resolved"], + json!(format!("file:{}", fx.expected_rel_tgz())) + ); + } + + /// Auto-force must NOT inherit force's silent NotFound skip: a missing + /// patch-target file still fails closed (a tarball without the fix + /// must never be packed), leaving the project byte-untouched. + #[tokio::test] + async fn vendor_missing_patch_file_fails_without_force() { + let fx = fixture().await; + tokio::fs::remove_file(fx.installed().join("index.js")) + .await + .unwrap(); + + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(!result.success, "missing file must fail closed"); + assert!( + result + .error + .as_deref() + .unwrap_or("") + .contains("File not found"), + "error names the missing file: {:?}", + result.error + ); + assert!(entry.is_none()); + assert_eq!( + tokio::fs::read(fx.lock_path()).await.unwrap(), + fx.lock_bytes, + "lock byte-untouched on failure" + ); + assert!( + tokio::fs::metadata(fx.root().join(".socket/vendor")) + .await + .is_err(), + "no artifact dir on failure" + ); + } + + /// `vendor --force` keeps its missing-file tolerance (strict superset + /// of the auto-force policy). + #[tokio::test] + async fn vendor_force_still_skips_missing_files() { + let fx = fixture().await; + tokio::fs::remove_file(fx.installed().join("index.js")) + .await + .unwrap(); + + let blobs = fx.root().join(".socket/blobs"); + let sources = PatchSources::blobs_only(&blobs); + let outcome = vendor_npm( + &fx.purl(), + &fx.installed(), + fx.root(), + &fx.record, + &sources, + "2026-06-09T00:00:00Z", + false, + /*force=*/ true, + ) + .await; + let (result, entry, _) = expect_done(outcome); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + } + + /// A package already patched IN PLACE by `apply` vendors cleanly: the + /// staged copy verifies AlreadyPatched (no mismatch warning — the + /// content is exactly the patch's afterHash) and the tarball ships the + /// patched bytes. + #[tokio::test] + async fn vendor_of_already_applied_package_succeeds() { + let fx = fixture().await; + // Simulate a prior in-place `socket-patch apply`. + tokio::fs::write(fx.installed().join("index.js"), PATCHED_INDEX) + .await + .unwrap(); + + let (result, entry, warnings) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some(), "first vendor records a ledger entry"); + assert!( + warnings + .iter() + .all(|w| w.code != "vendor_content_mismatch_overwritten"), + "afterHash content is AlreadyPatched, not a mismatch: {warnings:?}" + ); + + let tgz = tokio::fs::read(fx.root().join(fx.expected_rel_tgz())) + .await + .unwrap(); + assert_eq!( + tgz_member(&tgz, "package/index.js").unwrap(), + PATCHED_INDEX + ); + let lock = fx.read_lock().await; + assert_eq!( + lock["packages"]["node_modules/left-pad"]["resolved"], + json!(format!("file:{}", fx.expected_rel_tgz())) + ); + } + #[tokio::test] async fn rerun_is_in_sync_and_byte_stable() { let fx = fixture().await; diff --git a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs index 48ec98c..e2ef0af 100644 --- a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs @@ -163,6 +163,7 @@ pub async fn vendor_pnpm( sources, dry_run, force, + &mut warnings, ) .await { diff --git a/crates/socket-patch-core/src/patch/vendor/pypi.rs b/crates/socket-patch-core/src/patch/vendor/pypi.rs index 553a0dc..8b317f3 100644 --- a/crates/socket-patch-core/src/patch/vendor/pypi.rs +++ b/crates/socket-patch-core/src/patch/vendor/pypi.rs @@ -405,6 +405,7 @@ pub async fn vendor_pypi( &dest, dry_run, force, + &mut warnings, ) .await; let (result, artifact) = match built { diff --git a/crates/socket-patch-core/src/patch/vendor/pypi_wheel.rs b/crates/socket-patch-core/src/patch/vendor/pypi_wheel.rs index 69d4ffb..4b851ff 100644 --- a/crates/socket-patch-core/src/patch/vendor/pypi_wheel.rs +++ b/crates/socket-patch-core/src/patch/vendor/pypi_wheel.rs @@ -20,7 +20,7 @@ use sha2::Digest as _; use crate::crawlers::python_crawler::{canonicalize_pypi_name, read_python_metadata}; use crate::manifest::schema::PatchRecord; use crate::patch::apply::{ - apply_package_patch, is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, + is_safe_relative_subpath, normalize_file_path, ApplyResult, PatchSources, }; use crate::utils::fs::{atomic_write_bytes, list_dir_entries}; @@ -255,6 +255,7 @@ pub async fn build_patched_wheel( dest: &Path, dry_run: bool, force: bool, + warnings: &mut Vec, ) -> Result<(ApplyResult, Option), (&'static str, String)> { // Editable installs (`pip install -e` / uv tool dev mode) point // site-packages at the user's own working tree: the RECORD describes a @@ -371,15 +372,18 @@ pub async fn build_patched_wheel( } // Patch the stage through the shared apply pipeline (same verify/source - // strategy contract as `apply`). The installed tree is never touched. - let mut result = apply_package_patch( + // strategy contract as `apply`, with the vendor auto-force policy — + // see `force_apply_staged`). The installed tree is never touched. + let mut result = super::force_apply_staged( purl, stage.path(), - &record.files, + record, sources, - Some(&record.uuid), dry_run, force, + &dist.dist_name, + &dist.version, + warnings, ) .await; if dry_run || !result.success { @@ -903,6 +907,7 @@ mod tests { &fx.dest, false, false, + &mut Vec::new(), ) .await .unwrap(); @@ -952,6 +957,7 @@ mod tests { &fx.dest, false, false, + &mut Vec::new(), ) .await .unwrap_err(); @@ -977,6 +983,7 @@ mod tests { &fx.dest, false, false, + &mut Vec::new(), ) .await .unwrap(); @@ -994,6 +1001,7 @@ mod tests { &fx.dest, false, false, + &mut Vec::new(), ) .await .unwrap(); @@ -1044,6 +1052,7 @@ mod tests { &fx.dest, false, false, + &mut Vec::new(), ) .await .unwrap(); @@ -1082,6 +1091,7 @@ mod tests { &fx.dest, false, false, + &mut Vec::new(), ) .await .unwrap_err(); @@ -1105,6 +1115,7 @@ mod tests { &fx.dest, true, false, + &mut Vec::new(), ) .await .unwrap(); @@ -1120,8 +1131,12 @@ mod tests { ); } + /// Vendor auto-force policy: installed content matching NEITHER hash is + /// overwritten with the verified patched content in the STAGE (the + /// installed tree is never touched), and the overwrite is surfaced as a + /// `vendor_content_mismatch_overwritten` warning. #[tokio::test] - async fn hash_mismatch_fails_without_touching_install_or_dest() { + async fn hash_mismatch_overwrites_in_stage_with_warning() { let fx = make_fixture("", None).await; // Corrupt the installed six.py so verify sees a HashMismatch. tokio::fs::write(fx.site_packages.join("six.py"), b"tampered") @@ -1132,6 +1147,7 @@ mod tests { .unwrap(); let record = patch_record(&[("six.py", ORIG, PATCHED)]); let sources = PatchSources::blobs_only(&fx.blobs); + let mut warnings = Vec::new(); let (result, artifact) = build_patched_wheel( "pkg:pypi/six@1.16.0", &fx.site_packages, @@ -1141,10 +1157,65 @@ mod tests { &fx.dest, false, false, + &mut warnings, + ) + .await + .unwrap(); + assert!(result.success, "{:?}", result.error); + assert!(artifact.is_some()); + assert!(fx.dest.exists(), "patched wheel must be written"); + assert_eq!( + warnings + .iter() + .filter(|w| w.code == "vendor_content_mismatch_overwritten") + .count(), + 1, + "overwrite surfaced as a warning: {warnings:?}" + ); + // Installed tree untouched — only the stage was overwritten. + assert_eq!( + tokio::fs::read(fx.site_packages.join("six.py")) + .await + .unwrap(), + b"tampered" + ); + } + + /// A patch-target file MISSING from the install still fails closed + /// without `--force` — auto-force must not inherit force's silent + /// NotFound skip (the wheel would ship without the fix). + #[tokio::test] + async fn missing_patch_file_fails_without_force() { + let fx = make_fixture("", None).await; + tokio::fs::remove_file(fx.site_packages.join("six.py")) + .await + .unwrap(); + let dist = locate_installed_dist(&fx.site_packages, "six", "1.16.0") + .await + .unwrap(); + let record = patch_record(&[("six.py", ORIG, PATCHED)]); + let sources = PatchSources::blobs_only(&fx.blobs); + let (result, artifact) = build_patched_wheel( + "pkg:pypi/six@1.16.0", + &fx.site_packages, + &dist, + &record, + &sources, + &fx.dest, + false, + false, + &mut Vec::new(), ) .await .unwrap(); assert!(!result.success); + // The RECORD staging step trips first ("RECORD member ... is + // unreadable") — either way the build fails closed rather than + // packing a wheel without the fix. + assert!( + result.error.is_some(), + "missing file fails closed with an error" + ); assert!(artifact.is_none()); assert!(!fx.dest.exists()); } diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs index d24c7c8..b7fa943 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs @@ -254,6 +254,7 @@ pub async fn vendor_yarn_berry( sources, dry_run, force, + &mut warnings, ) .await { diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs index 8d82a00..fb25126 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_classic_lock.rs @@ -135,6 +135,7 @@ pub async fn vendor_yarn_classic( sources, dry_run, force, + &mut warnings, ) .await { From 7363c65ec3b85f441661ac61517a60761a69ed7d Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Thu, 11 Jun 2026 15:42:34 -0400 Subject: [PATCH 3/5] feat(vendor): take over exact-version override pins (pnpm + yarn berry) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A user-authored override/resolution that pins the package to exactly the version being vendored (Flowise: pnpm.overrides 'tar-fs': '3.1.0') no longer refuses with vendor_override_conflict. The pin's key is kept (its spelling and quoting preserved on both pnpm surfaces — pnpm hard-requires the package.json and lock override maps to agree), its VALUE is rewritten to the file:.socket/vendor/... spec, and the pinned value is recorded as the wiring original so every revert path (--revert, reconcile, remove) restores the user's pin verbatim. - pnpm: classify_pkg_override (Insert / Ours / Takeover) replaces the boolean conflict checks; effective key threads through EditCtx, apply_pkg_override and edit_overrides; revert restores originals in place instead of deleting. Ranges, different versions, parent>child selector chains, and duplicate same-name keys still refuse, now with a hint that exact pins are taken over. - yarn berry: bare-name resolutions pin equal to the version is taken over symmetrically (KIND_RESOLUTION records the original). - npm/yarn-classic/bun wire the lock only (no override surface), so no conflict exists there to take over. Co-Authored-By: Claude Fable 5 --- .../src/patch/vendor/pnpm_lock.rs | 398 ++++++++++++++++-- .../src/patch/vendor/yarn_berry_lock.rs | 103 ++++- 2 files changed, 445 insertions(+), 56 deletions(-) diff --git a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs index e2ef0af..433f3b2 100644 --- a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs @@ -138,10 +138,15 @@ pub async fn vendor_pnpm( let mut lines = split_lines(&lock_text); // ── 3. Pre-flight refusals (override conflicts, entry present) ─────── - if let Err(detail) = check_pkg_override_conflict(&pkg, name, &override_key) { - return refused("vendor_override_conflict", detail); - } - if let Err(detail) = check_lock_override_conflict(&lines, name, &override_key) { + // A user-authored exact-version pin equal to `version` is TAKEN OVER + // (the pin's key is rewritten to our spec on both surfaces and the + // original value recorded for revert); anything else same-name refuses. + let disposition = match classify_pkg_override(&pkg, name, version, &override_key) { + Ok(d) => d, + Err(detail) => return refused("vendor_override_conflict", detail), + }; + let effective_key = disposition.effective_key(&override_key).to_string(); + if let Err(detail) = check_lock_override(&lines, name, version, &effective_key) { return refused("vendor_override_conflict", detail); } if !lock_has_target_package(&lines, name, version) { @@ -201,11 +206,12 @@ pub async fn vendor_pnpm( rel_tgz: &rel_tgz, spec: &spec, integrity: &packed.integrity, + override_key: &effective_key, }; let mut wiring: Vec = Vec::new(); let (pkg_changed, created_pnpm_table, created_overrides_table) = - match apply_pkg_override(&mut pkg, &override_key, &spec, &mut wiring) { + match apply_pkg_override(&mut pkg, &effective_key, &spec, &mut wiring) { Ok(out) => out, Err(e) => return done_failure(purl, e), }; @@ -486,6 +492,11 @@ struct EditCtx<'a> { spec: &'a str, /// `sha512-` of the packed tarball. integrity: &'a str, + /// The override key BOTH surfaces edit (see + /// [`OverrideDisposition::effective_key`]): our canonical + /// `name@version` on a fresh insert, or the user's existing key on a + /// takeover / re-run over a taken-over key. + override_key: &'a str, } impl EditCtx<'_> { @@ -561,46 +572,129 @@ fn override_key_name(key: &str) -> &str { } } -/// Is this (key, value) override pair OURS for the target package — the -/// exact versioned selector pointing into `.socket/vendor/npm/`? -fn override_is_ours(key: &str, value: &str, our_key: &str) -> bool { - key == our_key && parse_vendor_path(value).is_some_and(|p| p.eco == "npm") +/// Does `value` point into `.socket/vendor/npm/` (ours — any uuid)? +fn is_vendor_value(value: &str) -> bool { + parse_vendor_path(value).is_some_and(|p| p.eco == "npm") +} + +/// How the package.json `pnpm.overrides` table relates to the package +/// being vendored. The lock's `overrides:` section must mirror this map +/// key-for-key (pnpm hard-checks the two and fails +/// `ERR_PNPM_LOCKFILE_CONFIG_MISMATCH` on any drift), so whichever key +/// this classification yields is the one BOTH surfaces edit. +#[derive(Debug, Clone, PartialEq, Eq)] +enum OverrideDisposition { + /// No same-name key: insert our canonical `name@version` key. + Insert, + /// A same-name key already points into `.socket/vendor/npm/` — ours + /// (any uuid; possibly a user key an earlier vendor took over). + /// Rewrite that key's value in place; our own value is never + /// recorded as an `original`. + Ours { key: String }, + /// A user-authored exact-version pin equal to the version being + /// vendored (`"tar-fs": "3.1.0"` or `"tar-fs@3.1.0": "3.1.0"`): take + /// the key over — rewrite its VALUE to the `file:` spec (the user's + /// pin already forces every `tar-fs` to this exact version, so + /// redirecting the same key preserves their semantics) and record + /// the pin as the wiring `original` so revert restores it exactly. + Takeover { key: String, original: String }, +} + +impl OverrideDisposition { + /// The override key both surfaces edit: the matched existing key, or + /// our canonical `name@version` on a fresh insert. + fn effective_key<'a>(&'a self, our_key: &'a str) -> &'a str { + match self { + OverrideDisposition::Insert => our_key, + OverrideDisposition::Ours { key } | OverrideDisposition::Takeover { key, .. } => key, + } + } } -/// A user-authored override already steering this package would be -/// silently fought over by ours; refuse instead (fail-closed). -fn check_pkg_override_conflict(pkg: &Value, name: &str, our_key: &str) -> Result<(), String> { +/// Classify the package.json override state for `name` (see +/// [`OverrideDisposition`]). `Err` is a genuine conflict (fail-closed): +/// a range/different-version value, a `parent>child` selector chain +/// (scoped to one dependent — our whole-graph rewrite has different +/// semantics), a non-string value, or several same-name keys. +fn classify_pkg_override( + pkg: &Value, + name: &str, + version: &str, + our_key: &str, +) -> Result { let Some(overrides) = pkg.get("pnpm").and_then(|p| p.get("overrides")) else { - return Ok(()); + return Ok(OverrideDisposition::Insert); }; let Some(map) = overrides.as_object() else { return Err("package.json pnpm.overrides is not an object".to_string()); }; + let mut found: Option = None; for (key, value) in map { if override_key_name(key) != name { continue; } + if found.is_some() { + return Err(format!( + "package.json carries more than one pnpm override for `{name}`; vendoring \ + cannot pick one — remove the extras first" + )); + } let value_str = value.as_str().unwrap_or(""); - if override_is_ours(key, value_str, our_key) { - continue; // ours (possibly a stale uuid) — the edit handles it + let classified = if key.contains('>') { + None + } else if is_vendor_value(value_str) { + Some(OverrideDisposition::Ours { key: key.clone() }) + } else if value_str == version && (key == name || key == our_key) { + Some(OverrideDisposition::Takeover { + key: key.clone(), + original: value_str.to_string(), + }) + } else { + None + }; + match classified { + Some(d) => found = Some(d), + None => { + return Err(format!( + "package.json already carries a pnpm override for `{key}` ({value}); \ + vendoring would fight it — remove the override (or vendor --revert) \ + first (an exact-version pin equal to {version} is taken over \ + automatically)" + )) + } } - return Err(format!( - "package.json already carries a pnpm override for `{key}` ({value}); vendoring \ - would fight it — remove the override (or vendor --revert) first" - )); } - Ok(()) + Ok(found.unwrap_or(OverrideDisposition::Insert)) } -/// Same conflict check against the lock's own `overrides:` section (a -/// desynced lock-side override would be silently clobbered otherwise). -fn check_lock_override_conflict(lines: &[String], name: &str, our_key: &str) -> Result<(), String> { +/// Lock-side mirror check against the effective key. Every same-name key +/// in the lock's `overrides:` section must BE `effective_key` (pnpm +/// requires the lock's override map to equal package.json's — a key-shape +/// drift means the pair is already desynced) with a value the edit can +/// own: ours, the exact pinned `version` (takeover), or already our spec. +/// A missing section/key is fine — the edit inserts it, restoring parity. +fn check_lock_override( + lines: &[String], + name: &str, + version: &str, + effective_key: &str, +) -> Result<(), String> { let Some((start, end)) = section_bounds(lines, "overrides") else { return Ok(()); }; for line in &lines[start + 1..end] { if let Some((key, _repr, rest)) = parse_key_line(line, 2) { - if override_key_name(&key) == name && !override_is_ours(&key, &rest, our_key) { + if override_key_name(&key) != name { + continue; + } + if key != effective_key { + return Err(format!( + "{PNPM_LOCK} carries an override key `{key}` for `{name}` that does not \ + match package.json's `{effective_key}` — the two override maps must \ + agree (run `pnpm install` to re-sync them) before vendoring" + )); + } + if !(is_vendor_value(&rest) || rest == version) { return Err(format!( "{PNPM_LOCK} already carries an override for `{key}` ({rest}); vendoring \ would fight it — remove the override (or vendor --revert) first" @@ -666,20 +760,24 @@ fn apply_pkg_override( if existing == Some(spec) { return Ok((false, false, false)); // in sync, no record } - // The conflict pre-flight guarantees any existing value here is OURS - // (a stale uuid): never record our own edit as the "original". - let was_ours = existing.is_some(); + // The classify pre-flight guarantees an existing value here is either + // OURS (a stale uuid — never recorded as an "original") or the user's + // exact-version pin being TAKEN OVER (recorded so revert restores it). + let was_present = existing.is_some(); + let original = existing + .filter(|v| !is_vendor_value(v)) + .map(|v| Value::String(v.to_string())); overrides.insert(our_key.to_string(), Value::String(spec.to_string())); wiring.push(WiringRecord { file: PACKAGE_JSON.to_string(), kind: KIND_PKG_OVERRIDE.to_string(), - action: if was_ours { + action: if was_present { WiringAction::Rewritten } else { WiringAction::Added }, key: Some(our_key.to_string()), - original: None, // Added has none; Rewritten-over-ours records none by design + original, new: Some(Value::String(spec.to_string())), }); Ok((true, created_pnpm_table, created_overrides_table)) @@ -695,7 +793,7 @@ fn edit_overrides( ctx: &EditCtx<'_>, wiring: &mut Vec, ) -> Result { - let our_key = ctx.reg_key(); + let our_key = ctx.override_key.to_string(); let entry_line = format!(" {}: {}", yaml_key(&our_key), ctx.spec); if let Some((start, end)) = section_bounds(lines, "overrides") { // Immutable scan first: our line's position (if present) + the last @@ -703,29 +801,38 @@ fn edit_overrides( let mut ours = None; let mut last_entry = start; for (i, line) in lines.iter().enumerate().take(end).skip(start + 1) { - if let Some((key, _repr, rest)) = parse_key_line(line, 2) { + if let Some((key, repr, rest)) = parse_key_line(line, 2) { last_entry = i; if key == our_key { - ours = Some((i, rest)); + ours = Some((i, repr, rest)); break; } } } - if let Some((i, rest)) = ours { + if let Some((i, repr, rest)) = ours { if rest == ctx.spec { return Ok(false); // in sync } - // Ours with a stale uuid (conflict pre-flight proved it). - lines[i] = entry_line; + // Ours with a stale uuid (no original), or the user's pinned + // value being TAKEN OVER (recorded as original; the live key + // repr/quoting is preserved so revert is byte-faithful). + let original = (!is_vendor_value(&rest)).then(|| rest.clone()); + lines[i] = format!(" {}: {}", yaml_key_like(&our_key, &repr), ctx.spec); wiring.push(overrides_record( &our_key, ctx.spec, WiringAction::Rewritten, + original, )); return Ok(true); } lines.insert(last_entry + 1, entry_line); - wiring.push(overrides_record(&our_key, ctx.spec, WiringAction::Added)); + wiring.push(overrides_record( + &our_key, + ctx.spec, + WiringAction::Added, + None, + )); return Ok(true); } // No overrides section: insert one right before `importers:` (with the @@ -736,17 +843,29 @@ fn edit_overrides( importers..importers, ["overrides:".to_string(), entry_line, String::new()], ); - wiring.push(overrides_record(&our_key, ctx.spec, WiringAction::Added)); + wiring.push(overrides_record( + &our_key, + ctx.spec, + WiringAction::Added, + None, + )); Ok(true) } -fn overrides_record(key: &str, spec: &str, action: WiringAction) -> WiringRecord { +fn overrides_record( + key: &str, + spec: &str, + action: WiringAction, + original: Option, +) -> WiringRecord { WiringRecord { file: PNPM_LOCK.to_string(), kind: KIND_LOCK_OVERRIDES.to_string(), action, key: Some(key.to_string()), - original: None, // Added, or rewritten-over-ours (never an original) + // `Some` only on a takeover (the user's pinned value); Added and + // rewritten-over-ours never record an original. + original: original.map(Value::String), new: Some(Value::String(spec.to_string())), } } @@ -1063,7 +1182,17 @@ fn revert_pkg_record( ))); return; } - overrides.shift_remove(key); + // A takeover recorded the user's pinned value as `original`: restore + // it in place (the key stays). A plain Added/Rewritten-over-ours + // record has no original — remove the key as before. + match rec.original.as_ref().and_then(Value::as_str) { + Some(orig) => { + overrides.insert(key.to_string(), Value::String(orig.to_string())); + } + None => { + overrides.shift_remove(key); + } + } *dirty = true; } @@ -1113,15 +1242,15 @@ fn revert_overrides_line( let mut ours_at = None; let mut others = 0usize; for (i, line) in lines.iter().enumerate().take(end).skip(start + 1) { - if let Some((k, _repr, rest)) = parse_key_line(line, 2) { + if let Some((k, repr, rest)) = parse_key_line(line, 2) { if k == key && ours_at.is_none() { - ours_at = Some((i, rest)); + ours_at = Some((i, repr, rest)); } else { others += 1; } } } - let Some((idx, rest)) = ours_at else { + let Some((idx, repr, rest)) = ours_at else { warnings.push(drifted(format!("overrides entry `{key}` no longer exists"))); return; }; @@ -1133,6 +1262,13 @@ fn revert_overrides_line( ))); return; } + // A takeover recorded the user's pinned value: restore it in place + // (key + quoting preserved; the section obviously stays). + if let Some(orig) = rec.original.as_ref().and_then(Value::as_str) { + lines[idx] = format!(" {}: {orig}", yaml_key_like(key, &repr)); + *dirty = true; + return; + } lines.remove(idx); *dirty = true; if others == 0 { @@ -2112,6 +2248,182 @@ snapshots: assert!(live_lock.contains("overrides:\n other-pkg: 2.0.0\n\nimporters:")); } + // ── exact-version pin takeover ───────────────────────────────────────── + + /// package.json with a user-authored override pin (`key: value`) plus the + /// matching lock-side `overrides:` mirror line. + fn pin_fixture_inputs(key: &str, value: &str) -> (String, String) { + let pkg = format!( + "{{\n \"name\": \"vendor-spike\",\n \"version\": \"1.0.0\",\n \"private\": true,\n \"dependencies\": {{\n \"consumer\": \"file:./consumer\",\n \"left-pad\": \"1.3.0\",\n \"left-pad-old\": \"npm:left-pad@1.2.0\"\n }},\n \"pnpm\": {{\n \"overrides\": {{\n \"{key}\": \"{value}\"\n }}\n }}\n}}\n" + ); + let lock = P1_BEFORE_LOCK.replace( + "importers:", + &format!("overrides:\n {key}: {value}\n\nimporters:"), + ); + (pkg, lock) + } + + /// A user-authored EXACT-version pin equal to the patched version is + /// taken over: the user's key keeps its spelling on both surfaces, its + /// value moves to our `file:` spec, the wiring records the pin as + /// `original`, and a full revert restores both files byte-identically. + #[tokio::test] + async fn user_exact_pin_bare_key_is_taken_over_and_revert_restores_it() { + let (pkg_before, lock_before) = pin_fixture_inputs("left-pad", "1.3.0"); + let fx = fixture_with(&pkg_before, &lock_before).await; + + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + let entry = entry.unwrap(); + + // package.json: the USER'S key (`left-pad`) now carries our spec; + // no `left-pad@1.3.0` key was added; tables pre-existed. + let pkg: Value = serde_json::from_str(&fx.read(PACKAGE_JSON).await).unwrap(); + let overrides = &pkg["pnpm"]["overrides"]; + assert_eq!( + overrides["left-pad"], + Value::String(format!("file:{}", fx.rel_tgz())) + ); + assert!(overrides.get("left-pad@1.3.0").is_none()); + assert_eq!( + entry.pnpm, + Some(PnpmMeta { + created_overrides_table: false, + created_pnpm_table: false + }) + ); + + // Lock: same key, same value (map parity — pnpm hard-checks it). + let live_lock = fx.read(PNPM_LOCK).await; + assert!( + live_lock.contains(&format!("overrides:\n left-pad: file:{}", fx.rel_tgz())), + "{live_lock}" + ); + + // Wiring: both override records carry the user's key, action + // Rewritten, and the pin as `original`. + for kind in [KIND_PKG_OVERRIDE, KIND_LOCK_OVERRIDES] { + let rec = entry + .wiring + .iter() + .find(|r| r.kind == kind) + .unwrap_or_else(|| panic!("no {kind} record: {:?}", entry.wiring)); + assert_eq!(rec.key.as_deref(), Some("left-pad"), "{kind}"); + assert_eq!(rec.action, WiringAction::Rewritten, "{kind}"); + assert_eq!( + rec.original, + Some(Value::String("1.3.0".to_string())), + "{kind}: the user's pin is the original" + ); + } + + // Full revert restores the pin on both surfaces byte-identically. + let outcome = revert_pnpm(&entry, fx.root(), false).await; + assert!(outcome.success, "{:?}", outcome.error); + assert_eq!(fx.read(PACKAGE_JSON).await, pkg_before); + assert_eq!(fx.read(PNPM_LOCK).await, lock_before); + } + + /// The versioned key shape (`left-pad@1.3.0: 1.3.0`) is taken over the + /// same way — the key happens to equal our canonical key. + #[tokio::test] + async fn user_exact_pin_versioned_key_is_taken_over() { + let (pkg_before, lock_before) = pin_fixture_inputs("left-pad@1.3.0", "1.3.0"); + let fx = fixture_with(&pkg_before, &lock_before).await; + + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + let entry = entry.unwrap(); + + let pkg: Value = serde_json::from_str(&fx.read(PACKAGE_JSON).await).unwrap(); + assert_eq!( + pkg["pnpm"]["overrides"]["left-pad@1.3.0"], + Value::String(format!("file:{}", fx.rel_tgz())) + ); + let rec = entry + .wiring + .iter() + .find(|r| r.kind == KIND_PKG_OVERRIDE) + .unwrap(); + assert_eq!(rec.original, Some(Value::String("1.3.0".to_string()))); + + // Revert restores the pin. + let outcome = revert_pnpm(&entry, fx.root(), false).await; + assert!(outcome.success, "{:?}", outcome.error); + assert_eq!(fx.read(PACKAGE_JSON).await, pkg_before); + assert_eq!(fx.read(PNPM_LOCK).await, lock_before); + } + + /// A second vendor over a taken-over key is the in-sync hot path: + /// AlreadyPatched, no new ledger entry, bytes stable. (Guards the + /// `Ours` classification accepting the user-keyed vendor value — the + /// old `key == our_key` requirement would refuse its own wiring.) + #[tokio::test] + async fn takeover_rerun_is_in_sync_and_records_nothing() { + let (pkg_before, lock_before) = pin_fixture_inputs("left-pad", "1.3.0"); + let fx = fixture_with(&pkg_before, &lock_before).await; + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_some()); + let pkg_after = fx.read(PACKAGE_JSON).await; + let lock_after = fx.read(PNPM_LOCK).await; + + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + assert!(entry.is_none(), "in-sync rerun records nothing"); + assert!(result + .files_verified + .iter() + .all(|v| v.status == crate::patch::apply::VerifyStatus::AlreadyPatched)); + assert_eq!(fx.read(PACKAGE_JSON).await, pkg_after, "bytes stable"); + assert_eq!(fx.read(PNPM_LOCK).await, lock_after, "bytes stable"); + } + + /// Selector chains and duplicate same-name keys still refuse — only a + /// plain exact pin is taken over. (Range keys and different-version + /// values are covered by `existing_user_override_for_the_name_is_refused`.) + #[tokio::test] + async fn chain_and_duplicate_override_keys_still_refuse() { + // `parent>child` chain, even with the exact version value. + let (pkg, lock) = pin_fixture_inputs("consumer>left-pad", "1.3.0"); + let fx = fixture_with(&pkg, &lock).await; + let detail = expect_refused(fx.vendor(false).await, "vendor_override_conflict"); + assert!(detail.contains("consumer>left-pad"), "{detail}"); + + // Two same-name keys (one ours-shaped pin + one bare pin). + let pkg = "{\n \"name\": \"x\",\n \"pnpm\": {\n \"overrides\": {\n \"left-pad\": \"1.3.0\",\n \"left-pad@1.3.0\": \"1.3.0\"\n }\n }\n}\n".to_string(); + let fx = fixture_with(&pkg, P1_BEFORE_LOCK).await; + let detail = expect_refused(fx.vendor(false).await, "vendor_override_conflict"); + assert!(detail.contains("more than one"), "{detail}"); + } + + /// pkg↔lock override-key shape drift refuses (pnpm itself would fail + /// `ERR_PNPM_LOCKFILE_CONFIG_MISMATCH`); a pkg-side pin with NO lock + /// mirror is fine — the edit inserts the same key, restoring parity. + #[tokio::test] + async fn takeover_lock_shape_mismatch_refuses_but_missing_section_inserts() { + // Shape drift: pkg keys `left-pad`, lock keys `left-pad@1.3.0`. + let (pkg, _) = pin_fixture_inputs("left-pad", "1.3.0"); + let lock = P1_BEFORE_LOCK.replace( + "importers:", + "overrides:\n left-pad@1.3.0: 1.3.0\n\nimporters:", + ); + let fx = fixture_with(&pkg, &lock).await; + let detail = expect_refused(fx.vendor(false).await, "vendor_override_conflict"); + assert!(detail.contains("must"), "{detail}"); + + // No lock overrides section at all: takeover inserts the pkg key. + let fx = fixture_with(&pkg, P1_BEFORE_LOCK).await; + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + let live_lock = fx.read(PNPM_LOCK).await; + assert!( + live_lock.contains(&format!("overrides:\n left-pad: file:{}", fx.rel_tgz())), + "lock key matches the pkg key: {live_lock}" + ); + assert!(entry.is_some()); + } + #[tokio::test] async fn created_tables_bookkeeping_and_revert_prunes_them() { // pnpm table exists (other keys), overrides created by us: revert diff --git a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs index b7fa943..f4c7ecc 100644 --- a/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/yarn_berry_lock.rs @@ -195,6 +195,12 @@ pub async fn vendor_yarn_berry( format!("{PACKAGE_JSON} root is not an object"), ); }; + // A user-authored BARE-name pin to the exact version being vendored is + // TAKEN OVER (its value is rewritten to our spec — the pin already + // forced this exact version, so semantics are preserved — and recorded + // as the wiring `original` so revert restores it). Anything else + // same-name still refuses. + let mut takeover_original: Option = None; if let Some(res) = pkg_obj.get("resolutions") { let Some(res_obj) = res.as_object() else { return refused( @@ -210,19 +216,26 @@ pub async fn vendor_yarn_berry( continue; } // Our own (possibly stale-uuid) entry is fine to overwrite; a - // user-authored override is never clobbered. + // user-authored override is never clobbered silently. let ours = value .as_str() .is_some_and(|v| parse_vendor_path(v).is_some_and(|p| p.eco == "npm")); - if !ours { - return refused( - "vendor_override_conflict", - format!( - "{PACKAGE_JSON} already has a resolutions entry for `{selector}` \ - ({value}); vendor will not overwrite a user-authored override" - ), - ); + if ours { + continue; + } + if selector == name && value.as_str() == Some(version) { + takeover_original = Some(version.to_string()); + continue; } + return refused( + "vendor_override_conflict", + format!( + "{PACKAGE_JSON} already has a resolutions entry for `{selector}` \ + ({value}); vendor will not overwrite a user-authored override (an \ + exact-version pin `\"{name}\": \"{version}\"` is taken over \ + automatically)" + ), + ); } } @@ -394,16 +407,17 @@ pub async fn vendor_yarn_berry( WiringRecord { file: PACKAGE_JSON.to_string(), kind: KIND_RESOLUTION.to_string(), - // Rewritten only when replacing our own stale entry — and then - // there is deliberately no `original` (never record our own edit - // as a pre-vendor fragment). + // Rewritten when replacing our own stale entry (no `original` — + // never record our own edit as a pre-vendor fragment) or a + // taken-over user pin (whose value IS the `original`, restored + // verbatim on revert). action: if existing_entry { WiringAction::Rewritten } else { WiringAction::Added }, key: Some(name.to_string()), - original: None, + original: takeover_original.map(Value::String), new: Some(Value::String(spec)), }, WiringRecord { @@ -689,6 +703,13 @@ fn revert_resolution_record( )); return; } + // A takeover recorded the user's pinned value: restore it in place + // (the key and table stay). Otherwise remove our entry as before. + if let Some(orig) = rec.original.as_ref().and_then(Value::as_str) { + res_obj.insert(key.to_string(), Value::String(orig.to_string())); + *changed = true; + return; + } res_obj.shift_remove(key); if res_obj.is_empty() { obj.shift_remove("resolutions"); @@ -1357,6 +1378,62 @@ __metadata: assert_eq!(tokio::fs::read(fx.pkg_path()).await.unwrap(), fx.pkg_bytes); } + /// A user-authored BARE-name pin to the exact version being vendored is + /// taken over: the value moves to our spec, the wiring records the pin + /// as `original`, and revert restores it (table kept). Range-keyed + /// selectors keep refusing. + #[tokio::test] + async fn user_exact_pin_resolution_is_taken_over_and_revert_restores_it() { + let pkg_before = B3_BEFORE_PKG.replace( + " }\n}", + " },\n \"resolutions\": {\n \"left-pad\": \"1.3.0\"\n }\n}", + ); + let fx = fixture_with(&pkg_before, B3_BEFORE_LOCK).await; + + let (result, entry, _) = expect_done(fx.vendor(false).await); + assert!(result.success, "{:?}", result.error); + let entry = entry.unwrap(); + + let pkg: Value = + serde_json::from_slice(&tokio::fs::read(fx.pkg_path()).await.unwrap()).unwrap(); + let val = pkg["resolutions"]["left-pad"].as_str().unwrap(); + assert!( + parse_vendor_path(val).is_some_and(|p| p.eco == "npm"), + "pin value rewritten to our spec: {val}" + ); + + let rec = entry + .wiring + .iter() + .find(|r| r.kind == KIND_RESOLUTION) + .unwrap(); + assert_eq!(rec.action, WiringAction::Rewritten); + assert_eq!( + rec.original, + Some(Value::String("1.3.0".to_string())), + "the user's pin is the original" + ); + + // Revert restores the pin in place (the resolutions table stays). + let outcome = revert_yarn_berry(&entry, fx.root(), false).await; + assert!(outcome.success, "{:?}", outcome.error); + let pkg: Value = + serde_json::from_slice(&tokio::fs::read(fx.pkg_path()).await.unwrap()).unwrap(); + assert_eq!( + pkg["resolutions"]["left-pad"], + Value::String("1.3.0".to_string()), + "pin restored" + ); + + // A range-keyed selector with the same value still refuses. + let pkg = B3_BEFORE_PKG.replace( + " }\n}", + " },\n \"resolutions\": {\n \"left-pad@npm:1.x\": \"1.3.0\"\n }\n}", + ); + let fx = fixture_with(&pkg, B3_BEFORE_LOCK).await; + expect_refused(fx.vendor(false).await, "vendor_override_conflict"); + } + #[tokio::test] async fn missing_entry_and_other_version_guards() { // No left-pad entry at all. From adc517993f80bb1d883cc8b44600c0cbcc2b7d95 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Thu, 11 Jun 2026 15:56:54 -0400 Subject: [PATCH 4/5] feat(scan): prune lifecycle for vendored packages scan --prune previously blanket-exempted vendored purls, so nothing ever cleaned unused vendored state: dropped patches kept their artifacts and overrides forever, removed dependencies stayed redirected, and orphan uuid dirs were only swept by vendor --revert. The prune pass now runs a vendored-state GC first (under the apply lock; contention degrades to a skip, never a scan failure): (a) entries whose patch is gone from the manifest are reverted (same stale test as the vendor flows' reconcile_dropped); (b) entries whose dependency left the lockfile graph are reverted and their manifest entries dropped, feeding the same pass's blob sweep. Per-flavor in-use probes: pnpm scans packages:/snapshots: blocks for the artifact (the mirrored overrides: declaration alone is not usage); package-lock/yarn/bun probe the lock text for the uuid dir (those flavors wire resolutions into the lock itself). None = cannot determine = keep, fail-safe; detached entries are exempt (lockfile-invisible by design); (c) orphan .socket/vendor// dirs are swept (extracted from run_revert into a shared sweep_orphan_vendor_dirs). JSON gc gains revertedVendoredEntries/removedVendorOrphanDirs (wet) and revertableVendoredEntries/vendorOrphanDirs (preview, which also mirrors the wet pass's manifest drops so blob counts agree); human output gains a GC summary line. CLI_CONTRACT.md updated. Co-Authored-By: Claude Fable 5 --- crates/socket-patch-cli/CLI_CONTRACT.md | 5 +- crates/socket-patch-cli/src/commands/scan.rs | 128 ++++- .../socket-patch-cli/src/commands/vendor.rs | 456 +++++++++++++++++- .../src/patch/vendor/npm_flavor.rs | 129 +++++ .../src/patch/vendor/pnpm_lock.rs | 86 ++++ 5 files changed, 767 insertions(+), 37 deletions(-) diff --git a/crates/socket-patch-cli/CLI_CONTRACT.md b/crates/socket-patch-cli/CLI_CONTRACT.md index 464ef6a..de9ef89 100644 --- a/crates/socket-patch-cli/CLI_CONTRACT.md +++ b/crates/socket-patch-cli/CLI_CONTRACT.md @@ -71,7 +71,7 @@ Beyond the globals above, each subcommand defines a small set of local arguments `scan --apply` opts JSON callers into the full discover → select → apply pipeline. Without it, `scan --json` stays read-only (discovery + `updates` array only). No effect outside `--json` mode — the non-JSON path always prompts the user interactively. -`scan --prune` opts into garbage collection. When set, `scan` removes manifest entries for packages no longer present in the crawl, then deletes orphan blob, diff, and package-archive files from `.socket/`. Off by default (v3.0) so a temporary uninstall doesn't silently destroy manifest state. +`scan --prune` opts into garbage collection. When set, `scan` removes manifest entries for packages no longer present in the crawl, then deletes orphan blob, diff, and package-archive files from `.socket/`. Off by default (v3.0) so a temporary uninstall doesn't silently destroy manifest state. The pass also reconciles vendored state (runs FIRST, under the apply lock — lock contention skips it without failing the scan): vendored entries whose patch is gone from the manifest are reverted, vendored entries whose dependency is no longer in the lockfile graph are reverted AND their manifest entries dropped (detached entries are exempt from both — they are manifest- and lockfile-invisible by design; a missing or undeterminable lockfile keeps the entry, fail-safe), and orphan `.socket/vendor//` dirs with no ledger entry are swept. The JSON `gc` sub-object gains `revertedVendoredEntries` + `removedVendorOrphanDirs` (wet) / `revertableVendoredEntries` + `vendorOrphanDirs` (preview). `scan` queries the patch API in `--batch-size` chunks. Authenticated runs POST `/v0/orgs/{slug}/patches/batch`; token-less runs POST `{proxy}/patch/batch` on the public proxy and degrade to per-package `GET /patch/by-package/:purl` requests in two cases: the deployed proxy predates the batch endpoint (legacy proxies answer the POST with their `400 "Unsupported endpoint"` catch-all), or the all-or-nothing batch validation rejects the chunk (e.g. a crawled PURL type the server doesn't recognize, such as `pkg:jsr/…` — the per-package path tolerates those individually, preserving the pre-batch scan semantics). Rate limits and over-capacity 503s surface instead of silently degrading. @@ -442,7 +442,8 @@ worse, lets a warm cache silently serve unpatched bytes): moved past the vendored uuid (that would break VEX verification with `vendor_uuid_mismatch` until a vendor run). The skip rides `apply.patches[]` as `skipped`/`vendored`; a newer available patch still surfaces in `updates[]` — the signal to run `scan --vendor`. `scan --prune` exempts - vendored purls (an absent installed copy is their NORMAL state, not grounds to prune). An + vendored purls from the crawl-based manifest prune (an absent installed copy is their NORMAL + state) but reconciles vendored state via the lockfile instead — see the `--prune` section. An explicit `get` is allowed to move the manifest past the vendored uuid and warns (`warnings[]` + stderr) that a `vendor` run must refresh the artifact. * **Old-binary skew caveat**: a pre-detached `socket-patch` binary running `vendor` against a diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index 182e10d..e9ebd7f 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -54,6 +54,12 @@ pub(crate) struct GcSummary { pub blobs: CleanupResult, pub diffs: CleanupResult, pub packages: CleanupResult, + /// Vendored entries reverted (or revertable, preview mode) because + /// their patch is gone from the manifest or their dependency left the + /// lockfile graph — see `vendor::run_vendor_gc`. Sorted. + pub vendored_reverted: Vec, + /// Orphan `.socket/vendor//` dirs swept (or sweepable). + pub vendor_orphan_dirs: usize, /// `true` when `--no-prune` was set; the sub-object only carries the /// `skipped: true` field in that case. pub skipped: bool, @@ -64,6 +70,17 @@ impl GcSummary { self.blobs.bytes_freed + self.diffs.bytes_freed + self.packages.bytes_freed } + /// Fold a vendored-state GC pass into this summary. + fn absorb_vendor_gc(&mut self, v: super::vendor::VendorGcSummary) { + self.vendored_reverted = v + .dropped_reverted + .into_iter() + .chain(v.unused_reverted) + .collect(); + self.vendored_reverted.sort(); + self.vendor_orphan_dirs = v.orphan_dirs; + } + /// Serialize for a *mutating* GC pass (post-apply). fn to_apply_json(&self) -> serde_json::Value { if self.skipped { @@ -74,6 +91,8 @@ impl GcSummary { "removedBlobs": self.blobs.blobs_removed, "removedDiffArchives": self.diffs.blobs_removed, "removedPackageArchives": self.packages.blobs_removed, + "revertedVendoredEntries": self.vendored_reverted, + "removedVendorOrphanDirs": self.vendor_orphan_dirs, "bytesFreed": self.total_bytes(), }) } @@ -88,6 +107,8 @@ impl GcSummary { "orphanBlobs": self.blobs.blobs_removed, "orphanDiffArchives": self.diffs.blobs_removed, "orphanPackageArchives": self.packages.blobs_removed, + "revertableVendoredEntries": self.vendored_reverted, + "vendorOrphanDirs": self.vendor_orphan_dirs, "bytesReclaimable": self.total_bytes(), }) } @@ -118,6 +139,7 @@ async fn run_gc( diffs, packages, skipped: false, + ..Default::default() } } @@ -127,16 +149,28 @@ async fn run_gc( /// `prune` flag — when GC isn't requested, simply don't call this function and /// don't emit a `gc` sub-object. async fn run_apply_gc( + common: &crate::args::GlobalArgs, manifest_path: &Path, socket_dir: &Path, scanned_purls: &HashSet, vendored: &HashSet, ) -> GcSummary { + // Vendored-state GC FIRST: it reverts manifest-dropped and + // lockfile-unused vendored entries, dropping the latter's manifest + // entries — so the manifest prune + blob sweep below reclaims their + // blobs in this same pass (and the stale `vendored` exemption set is + // harmless: the entries it would exempt are already gone). + let vendor_gc = super::vendor::run_vendor_gc(common, manifest_path, /*dry_run=*/ false).await; + // Re-read the just-written manifest (the apply step may have added // or updated entries we now want to consider for pruning). let mut manifest = match read_manifest(manifest_path).await { Ok(Some(m)) => m, - _ => return GcSummary::default(), + _ => { + let mut gc = GcSummary::default(); + gc.absorb_vendor_gc(vendor_gc); + return gc; + } }; let prunable = detect_prunable(&manifest, scanned_purls, vendored); for purl in &prunable { @@ -147,22 +181,42 @@ async fn run_apply_gc( // file-level cleanup below still operates on the in-memory copy. let _ = write_manifest(manifest_path, &manifest).await; } - run_gc(&manifest, prunable, socket_dir, /*dry_run=*/ false).await + let mut gc = run_gc(&manifest, prunable, socket_dir, /*dry_run=*/ false).await; + gc.absorb_vendor_gc(vendor_gc); + gc } /// Dry-run preview of the apply-mode GC pass. Same shape as /// [`run_apply_gc`] but emits `prunable*`/`orphan*` field names and /// performs no mutation. async fn preview_apply_gc( + common: &crate::args::GlobalArgs, manifest_path: &Path, socket_dir: &Path, scanned_purls: &HashSet, vendored: &HashSet, ) -> GcSummary { + // Read-only preview of the vendored-state GC (lists, never reverts). + let vendor_gc = super::vendor::run_vendor_gc(common, manifest_path, /*dry_run=*/ true).await; + let mut manifest = match read_manifest(manifest_path).await { Ok(Some(m)) => m, - _ => return GcSummary::default(), + _ => { + let mut gc = GcSummary::default(); + gc.absorb_vendor_gc(vendor_gc); + return gc; + } }; + // Mirror the wet pass: an unused vendored entry's manifest keys are + // dropped before the blob sweep, so drop them from the in-memory copy + // too — otherwise the preview under-reports orphan blobs/bytes + // relative to what the real `--prune` run frees. + for purl in &vendor_gc.unused_reverted { + let base = strip_purl_qualifiers(purl).to_string(); + manifest + .patches + .retain(|k, _| k != purl && strip_purl_qualifiers(k) != base); + } let prunable = detect_prunable(&manifest, scanned_purls, vendored); // Mirror `run_apply_gc`: drop the prunable entries from the manifest // *before* computing orphans (no write — this is the preview). The @@ -174,7 +228,9 @@ async fn preview_apply_gc( for purl in &prunable { manifest.patches.remove(purl); } - run_gc(&manifest, prunable, socket_dir, /*dry_run=*/ true).await + let mut gc = run_gc(&manifest, prunable, socket_dir, /*dry_run=*/ true).await; + gc.absorb_vendor_gc(vendor_gc); + gc } /// PURL strings present in the manifest but absent from `scanned_purls`. @@ -669,7 +725,7 @@ async fn run_vendor_json_path( result["vendor"] = preview_vendor_json(&args.common.cwd, &selected).await; if prune { let gc = - preview_apply_gc(manifest_path, socket_dir, scanned_purls, vendored_purls).await; + preview_apply_gc(&args.common, manifest_path, socket_dir, scanned_purls, vendored_purls).await; result["gc"] = gc.to_preview_json(); } let final_code = @@ -730,7 +786,7 @@ async fn run_vendor_json_path( // package_not_installed; vendored entries are exempt from // the prune itself. if prune { - let gc = run_apply_gc(manifest_path, socket_dir, scanned_purls, vendored_purls).await; + let gc = run_apply_gc(&args.common, manifest_path, socket_dir, scanned_purls, vendored_purls).await; result["gc"] = gc.to_apply_json(); } @@ -816,7 +872,7 @@ async fn run_vendor_interactive_path( // GC before the vendor step (see the JSON path): stale manifest // entries would fail vendoring with package_not_installed. if prune { - let gc = run_apply_gc(manifest_path, socket_dir, scanned_purls, vendored_purls).await; + let gc = run_apply_gc(&args.common, manifest_path, socket_dir, scanned_purls, vendored_purls).await; if !gc.pruned.is_empty() { println!("GC: pruned {} manifest entr{}.", gc.pruned.len(), { if gc.pruned.len() == 1 { @@ -826,6 +882,15 @@ async fn run_vendor_interactive_path( } }); } + if !gc.vendored_reverted.is_empty() || gc.vendor_orphan_dirs > 0 { + println!( + "GC: reverted {} vendored entr{}; swept {} orphan vendor dir{}.", + gc.vendored_reverted.len(), + if gc.vendored_reverted.len() == 1 { "y" } else { "ies" }, + gc.vendor_orphan_dirs, + if gc.vendor_orphan_dirs == 1 { "" } else { "s" }, + ); + } } match boxed_scan_vendor_step( &args.common, @@ -1567,10 +1632,10 @@ pub async fn run(args: ScanArgs) -> i32 { // --- GC (if requested) -------------------------------------- if prune { let gc = if dry { - preview_apply_gc(&manifest_path, &socket_dir, &scanned_purls, &vendored_purls) + preview_apply_gc(&args.common, &manifest_path, &socket_dir, &scanned_purls, &vendored_purls) .await } else { - run_apply_gc(&manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await + run_apply_gc(&args.common, &manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await }; result["gc"] = if dry { gc.to_preview_json() @@ -1620,9 +1685,9 @@ pub async fn run(args: ScanArgs) -> i32 { // --- GC-only path (no --apply, just --prune) -------------------- if prune { let gc = if dry { - preview_apply_gc(&manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await + preview_apply_gc(&args.common, &manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await } else { - run_apply_gc(&manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await + run_apply_gc(&args.common, &manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await }; result["gc"] = if dry { gc.to_preview_json() @@ -2033,7 +2098,7 @@ pub async fn run(args: ScanArgs) -> i32 { // run `socket-patch gc` (or `repair`) explicitly. (Vendor mode // already ran its GC before the vendor step.) if prune && !args.vendor { - let gc = run_apply_gc(&manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await; + let gc = run_apply_gc(&args.common, &manifest_path, &socket_dir, &scanned_purls, &vendored_purls).await; let total = gc.blobs.blobs_removed + gc.diffs.blobs_removed + gc.packages.blobs_removed; if !args.common.silent && (!gc.pruned.is_empty() || total > 0) { println!( @@ -2045,6 +2110,15 @@ pub async fn run(args: ScanArgs) -> i32 { socket_patch_core::utils::cleanup_blobs::format_bytes(gc.total_bytes()), ); } + if !args.common.silent && (!gc.vendored_reverted.is_empty() || gc.vendor_orphan_dirs > 0) { + println!( + "GC: reverted {} vendored entr{}; swept {} orphan vendor dir{}.", + gc.vendored_reverted.len(), + if gc.vendored_reverted.len() == 1 { "y" } else { "ies" }, + gc.vendor_orphan_dirs, + if gc.vendor_orphan_dirs == 1 { "" } else { "s" }, + ); + } } embed_vex_human(&args.common, &args.vex, &manifest_path, code).await @@ -2235,6 +2309,16 @@ mod tests { HashSet::new() } + /// GlobalArgs rooted at the test project dir (the vendored-state GC + /// loads `.socket/vendor/state.json` from `cwd`; these fixtures have + /// none, so the vendor pass is a no-op). + fn gc_common(cwd: &Path) -> crate::args::GlobalArgs { + crate::args::GlobalArgs { + cwd: cwd.to_path_buf(), + ..Default::default() + } + } + #[test] fn detect_prunable_empty_manifest_empty_scanned() { let m = PatchManifest::new(); @@ -2415,7 +2499,14 @@ mod tests { seed_manifest_with_blob(tmp.path(), "pkg:npm/gone@1.0.0", &after_hash); let scanned: HashSet = HashSet::new(); - let preview = preview_apply_gc(&manifest_path, &socket_dir, &scanned, &no_vendored()).await; + let preview = preview_apply_gc( + &gc_common(tmp.path()), + &manifest_path, + &socket_dir, + &scanned, + &no_vendored(), + ) + .await; assert_eq!( preview.pruned, @@ -2452,13 +2543,20 @@ mod tests { let (mp_p, sd_p, blob_p) = seed_manifest_with_blob(tmp_preview.path(), "pkg:npm/gone@1.0.0", &after_hash); let scanned: HashSet = HashSet::new(); - let preview = preview_apply_gc(&mp_p, &sd_p, &scanned, &no_vendored()).await; + let preview = preview_apply_gc( + &gc_common(tmp_preview.path()), + &mp_p, + &sd_p, + &scanned, + &no_vendored(), + ) + .await; assert!(blob_p.exists(), "preview must not mutate"); let tmp_wet = tempfile::tempdir().unwrap(); let (mp_w, sd_w, blob_w) = seed_manifest_with_blob(tmp_wet.path(), "pkg:npm/gone@1.0.0", &after_hash); - let wet = run_apply_gc(&mp_w, &sd_w, &scanned, &no_vendored()).await; + let wet = run_apply_gc(&gc_common(tmp_wet.path()), &mp_w, &sd_w, &scanned, &no_vendored()).await; assert_eq!( preview.blobs.blobs_removed, wet.blobs.blobs_removed, diff --git a/crates/socket-patch-cli/src/commands/vendor.rs b/crates/socket-patch-cli/src/commands/vendor.rs index 94fd6e7..d032439 100644 --- a/crates/socket-patch-cli/src/commands/vendor.rs +++ b/crates/socket-patch-cli/src/commands/vendor.rs @@ -239,6 +239,62 @@ pub(crate) async fn dispatch_revert_one( } } +/// Is this vendored entry still consumed by its project's lockfile +/// dependency graph? `None` = cannot determine — callers must keep the +/// entry (fail-safe): non-npm ecosystems have no in-use probe yet, and a +/// missing/unreadable lockfile proves nothing. +pub(crate) async fn dispatch_in_use_one(entry: &VendorEntry, project_root: &Path) -> Option { + match entry.ecosystem.as_str() { + "npm" => { + socket_patch_core::patch::vendor::npm_flavor::vendored_entry_in_use( + entry, + project_root, + ) + .await + } + _ => None, + } +} + +/// Uuid dirs under `.socket/vendor//` with no owning `(eco, uuid)` +/// ledger entry (a hand-edited state file, or artifacts left by an +/// interrupted run). The lockfile wiring for these is already gone or +/// owned by a recorded entry, so removal is safe; removed unless +/// `dry_run`. Unparseable dirs are never returned (and never deleted). +/// Returns the orphans so callers can emit events / counts. +pub(crate) async fn sweep_orphan_vendor_dirs( + cwd: &Path, + state: &socket_patch_core::patch::vendor::VendorState, + dry_run: bool, +) -> Vec { + let recorded_units: HashSet<(&str, &str)> = state + .entries + .values() + .map(|e| (e.ecosystem.as_str(), e.uuid.as_str())) + .collect(); + let mut orphans = Vec::new(); + for unit in vendor::path::sweep_vendor_dirs(cwd).await { + if recorded_units.contains(&(unit.eco.as_str(), unit.uuid.as_str())) { + continue; + } + if !dry_run { + let _ = remove_tree(&unit.dir).await; + } + orphans.push(unit); + } + orphans +} + +/// Does `eco` fall inside this run's `--ecosystems` scope? +pub(crate) fn ecosystem_in_scope(common: &GlobalArgs, eco: &str) -> bool { + match common.ecosystems.as_deref() { + None => true, + Some(list) => list.iter().any(|e| { + e.eq_ignore_ascii_case(eco) || (eco == "golang" && e.eq_ignore_ascii_case("go")) + }), + } +} + /// Surface a backend warning: stderr line for humans, a Skipped event with /// the stable code for JSON consumers (Skipped never flips the status). fn record_warning(env: &mut Envelope, purl: &str, warning: &VendorWarning, common: &GlobalArgs) { @@ -767,12 +823,6 @@ pub(crate) async fn reconcile_dropped( // Respect this run's --ecosystems scope: a `vendor --ecosystems npm` // invocation must not silently revert a cargo/go entry (restoring its // lockfile and deleting its artifact) as a cross-ecosystem side effect. - let in_scope = |eco: &str| match common.ecosystems.as_deref() { - None => true, - Some(list) => list.iter().any(|e| { - e.eq_ignore_ascii_case(eco) || (eco == "golang" && e.eq_ignore_ascii_case("go")) - }), - }; let stale: Vec = state .entries .iter() @@ -782,7 +832,7 @@ pub(crate) async fn reconcile_dropped( // normal state, not a drop — only `vendor --revert` or // `remove` may undo them. !entry.detached - && in_scope(&entry.ecosystem) + && ecosystem_in_scope(common, &entry.ecosystem) && !manifest.patches.contains_key(*purl) && !manifest.patches.contains_key(&entry.base_purl) }) @@ -875,19 +925,7 @@ async fn run_revert(args: &VendorArgs, env: &mut Envelope) -> i32 { // state file, or artifacts left by an interrupted run). The lockfile // wiring for these is already gone or owned by a recorded entry, so // removal is safe; unparseable dirs are reported, never deleted. - let swept = vendor::path::sweep_vendor_dirs(&common.cwd).await; - let recorded_units: HashSet<(&str, &str)> = state - .entries - .values() - .map(|e| (e.ecosystem.as_str(), e.uuid.as_str())) - .collect(); - for unit in swept { - if recorded_units.contains(&(unit.eco.as_str(), unit.uuid.as_str())) { - continue; - } - if !common.dry_run { - let _ = remove_tree(&unit.dir).await; - } + for unit in sweep_orphan_vendor_dirs(&common.cwd, &state, common.dry_run).await { let label = unit .purls .first() @@ -925,3 +963,381 @@ async fn run_revert(args: &VendorArgs, env: &mut Envelope) -> i32 { 0 } } + +// ───────────────────────── prune-time vendored GC ───────────────────────── + +/// Summary of the vendored-state GC pass `scan --prune` runs (wet or +/// preview). Purls are the state-ledger keys (manifest spelling). +#[derive(Debug, Default)] +pub(crate) struct VendorGcSummary { + /// (a) entries whose patch is gone from the manifest — reverted. + pub dropped_reverted: Vec, + /// (b) entries whose package left the lockfile dependency graph — + /// reverted, and their manifest entries dropped. + pub unused_reverted: Vec, + /// (c) orphan uuid dirs (no owning ledger entry) swept. + pub orphan_dirs: usize, + /// Entries that could not be reverted (kept in the ledger), plus any + /// pass-level skip marker (e.g. lock contention). + pub failed: Vec, +} + +/// The vendored-state GC behind `scan --prune`: +/// +/// (a) revert entries whose patch was dropped from the manifest (same +/// stale test as [`reconcile_dropped`], shared with the vendor flows); +/// (b) revert entries whose dependency is no longer in the lockfile graph +/// ([`dispatch_in_use_one`] == `Some(false)`; `None` keeps, fail-safe) +/// and drop their manifest entries so the caller's manifest prune + +/// blob sweep reclaims the rest in the same pass; +/// (c) sweep orphan uuid dirs. +/// +/// Detached entries are exempt from BOTH (a) (never manifest-tracked) and +/// (b) (lockfile-invisible by design — the probe would always call them +/// unused). A missing/unreadable manifest skips (a) only (a prune must +/// not mass-revert on a deleted manifest — that is `vendor --revert`'s +/// explicit contract). +/// +/// Wet runs take the apply lock (lockfiles + the manifest are rewritten); +/// contention records a skip marker and returns — it never fails the +/// scan. Dry runs are read-only, lock-free, and list-only. +pub(crate) async fn run_vendor_gc( + common: &GlobalArgs, + manifest_path: &Path, + dry_run: bool, +) -> VendorGcSummary { + let mut out = VendorGcSummary::default(); + let mut state = match load_state(&common.cwd).await { + Ok(s) if !s.entries.is_empty() => s, + // No ledger (or unreadable): only the orphan sweep could apply, and + // without a trustworthy ledger it must not delete anything. + _ => return out, + }; + + let socket_dir = manifest_path + .parent() + .map(Path::to_path_buf) + .unwrap_or_else(|| common.cwd.clone()); + let _guard = if dry_run { + None + } else { + match socket_patch_core::patch::apply_lock::acquire(&socket_dir, Duration::from_secs(0)) { + Ok(g) => Some(g), + Err(_) => { + out.failed.push( + "vendor GC skipped: another socket-patch run holds the apply lock".to_string(), + ); + return out; + } + } + }; + + // (a) manifest-dropped entries. + let mut manifest = socket_patch_core::manifest::operations::read_manifest(manifest_path) + .await + .ok() + .flatten(); + if let Some(m) = &manifest { + let stale: Vec = state + .entries + .iter() + .filter(|(purl, entry)| { + !entry.detached + && ecosystem_in_scope(common, &entry.ecosystem) + && !m.patches.contains_key(*purl) + && !m.patches.contains_key(&entry.base_purl) + }) + .map(|(purl, _)| purl.clone()) + .collect(); + for purl in stale { + if dry_run { + out.dropped_reverted.push(purl); + continue; + } + let entry = state.entries.get(&purl).cloned().expect("listed above"); + if dispatch_revert_one(&entry, &common.cwd, false).await.success { + state.entries.remove(&purl); + out.dropped_reverted.push(purl); + } else { + out.failed.push(purl); + } + } + } + + // (b) lockfile-unused entries. + let mut manifest_dirty = false; + let candidates: Vec = state + .entries + .iter() + .filter(|(_, entry)| !entry.detached && ecosystem_in_scope(common, &entry.ecosystem)) + .map(|(purl, _)| purl.clone()) + .collect(); + for purl in candidates { + let entry = state.entries.get(&purl).cloned().expect("listed above"); + if dispatch_in_use_one(&entry, &common.cwd).await != Some(false) { + continue; // in use, or cannot determine — keep + } + if dry_run { + out.unused_reverted.push(purl); + continue; + } + if !dispatch_revert_one(&entry, &common.cwd, false).await.success { + out.failed.push(purl); + continue; + } + state.entries.remove(&purl); + if let Some(m) = manifest.as_mut() { + let base = strip_purl_qualifiers(&entry.base_purl).to_string(); + let dropped: Vec = m + .patches + .keys() + .filter(|k| *k == &purl || strip_purl_qualifiers(k) == base) + .cloned() + .collect(); + for k in dropped { + m.patches.remove(&k); + manifest_dirty = true; + } + } + out.unused_reverted.push(purl); + } + + if !dry_run { + let _ = save_state(&common.cwd, &state).await; + if manifest_dirty { + if let Some(m) = &manifest { + let _ = + socket_patch_core::manifest::operations::write_manifest(manifest_path, m).await; + } + } + } + + // (c) orphan uuid dirs, against the post-removal ledger. + out.orphan_dirs = sweep_orphan_vendor_dirs(&common.cwd, &state, dry_run) + .await + .len(); + out +} + +#[cfg(test)] +mod gc_tests { + use super::*; + use socket_patch_core::manifest::operations::{read_manifest, write_manifest}; + use socket_patch_core::patch::vendor::state::VendorArtifact; + use socket_patch_core::patch::vendor::VendorState; + use std::path::PathBuf; + + const UUID: &str = "9f6b2c4e-1d3a-4f6b-8c2d-7e5a9b1c3d5f"; + const PURL: &str = "pkg:npm/left-pad@1.3.0"; + + fn entry(detached: bool) -> VendorEntry { + VendorEntry { + ecosystem: "npm".into(), + base_purl: PURL.into(), + uuid: UUID.into(), + artifact: VendorArtifact { + path: format!(".socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz"), + sha256: String::new(), + size: None, + platform_locked: None, + }, + wiring: Vec::new(), + lock: None, + took_over_go_patches: false, + detached, + record: None, + flavor: Some("package-lock".into()), + uv: None, + pnpm: None, + poetry: None, + pdm: None, + pipenv: None, + } + } + + /// Tempdir with: a manifest carrying PURL, a ledger with one entry, + /// the artifact on disk, and a package-lock that resolves to it. + async fn gc_fixture(detached: bool) -> (tempfile::TempDir, GlobalArgs, PathBuf) { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let socket = root.join(".socket"); + tokio::fs::create_dir_all(socket.join(format!("vendor/npm/{UUID}"))) + .await + .unwrap(); + tokio::fs::write( + socket.join(format!("vendor/npm/{UUID}/left-pad-1.3.0.tgz")), + b"tgz", + ) + .await + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + PURL.to_string(), + socket_patch_core::manifest::schema::PatchRecord { + uuid: UUID.to_string(), + exported_at: String::new(), + files: HashMap::new(), + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + let manifest_path = socket.join("manifest.json"); + write_manifest(&manifest_path, &manifest).await.unwrap(); + + let mut state = VendorState::default(); + state.entries.insert(PURL.to_string(), entry(detached)); + save_state(root, &state).await.unwrap(); + + tokio::fs::write( + root.join("package-lock.json"), + format!( + "{{\"packages\":{{\"node_modules/left-pad\":{{\"resolved\":\"file:.socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz\"}}}}}}" + ), + ) + .await + .unwrap(); + + let common = GlobalArgs { + cwd: root.to_path_buf(), + json: true, + silent: true, + ..GlobalArgs::default() + }; + (tmp, common, manifest_path) + } + + /// In-manifest + in-lock: the GC keeps everything. + #[tokio::test] + async fn vendor_gc_keeps_in_use_entries() { + let (tmp, common, manifest_path) = gc_fixture(false).await; + let out = run_vendor_gc(&common, &manifest_path, false).await; + assert!(out.dropped_reverted.is_empty(), "{out:?}"); + assert!(out.unused_reverted.is_empty(), "{out:?}"); + assert_eq!(out.orphan_dirs, 0); + assert!(load_state(tmp.path()).await.unwrap().entries.contains_key(PURL)); + } + + /// (a) the patch is gone from the manifest: revert + drop the entry. + #[tokio::test] + async fn vendor_gc_reverts_manifest_dropped_entry() { + let (tmp, common, manifest_path) = gc_fixture(false).await; + write_manifest(&manifest_path, &PatchManifest::new()) + .await + .unwrap(); + + let out = run_vendor_gc(&common, &manifest_path, false).await; + assert_eq!(out.dropped_reverted, vec![PURL.to_string()], "{out:?}"); + assert!(out.failed.is_empty(), "{out:?}"); + assert!(load_state(tmp.path()).await.unwrap().entries.is_empty()); + assert!( + !tmp.path().join(format!(".socket/vendor/npm/{UUID}")).exists(), + "artifact dir removed by the revert" + ); + } + + /// (b) the dependency left the lockfile graph: revert + drop BOTH the + /// ledger entry and the manifest entry. + #[tokio::test] + async fn vendor_gc_reverts_unused_entry_and_drops_manifest_entry() { + let (tmp, common, manifest_path) = gc_fixture(false).await; + // Re-lock without the dependency (no reference to the artifact). + tokio::fs::write(tmp.path().join("package-lock.json"), "{\"packages\":{}}") + .await + .unwrap(); + + let out = run_vendor_gc(&common, &manifest_path, false).await; + assert_eq!(out.unused_reverted, vec![PURL.to_string()], "{out:?}"); + assert!(load_state(tmp.path()).await.unwrap().entries.is_empty()); + let manifest = read_manifest(&manifest_path).await.unwrap().unwrap(); + assert!( + !manifest.patches.contains_key(PURL), + "the unused entry's manifest record is dropped too" + ); + } + + /// Dry run lists without mutating anything. + #[tokio::test] + async fn vendor_gc_dry_run_is_read_only() { + let (tmp, common, manifest_path) = gc_fixture(false).await; + tokio::fs::write(tmp.path().join("package-lock.json"), "{\"packages\":{}}") + .await + .unwrap(); + let state_before = tokio::fs::read(tmp.path().join(".socket/vendor/state.json")) + .await + .unwrap(); + let manifest_before = tokio::fs::read(&manifest_path).await.unwrap(); + + let out = run_vendor_gc(&common, &manifest_path, true).await; + assert_eq!(out.unused_reverted, vec![PURL.to_string()], "{out:?}"); + assert_eq!( + tokio::fs::read(tmp.path().join(".socket/vendor/state.json")) + .await + .unwrap(), + state_before, + "dry run must not touch the ledger" + ); + assert_eq!( + tokio::fs::read(&manifest_path).await.unwrap(), + manifest_before, + "dry run must not touch the manifest" + ); + assert!( + tmp.path().join(format!(".socket/vendor/npm/{UUID}")).exists(), + "dry run must not remove artifacts" + ); + } + + /// A missing/undeterminable lockfile keeps the entry (fail-safe), and a + /// DETACHED entry is exempt from both (a) and (b). + #[tokio::test] + async fn vendor_gc_keeps_undeterminable_and_detached_entries() { + // Lock removed entirely: probe says None → keep. + let (tmp, common, manifest_path) = gc_fixture(false).await; + tokio::fs::remove_file(tmp.path().join("package-lock.json")) + .await + .unwrap(); + let out = run_vendor_gc(&common, &manifest_path, false).await; + assert!(out.unused_reverted.is_empty(), "{out:?}"); + assert!(load_state(tmp.path()).await.unwrap().entries.contains_key(PURL)); + + // Detached entry: absent from the manifest AND lockfile-invisible — + // exactly its normal state. Never reverted by the GC. + let (tmp, common, manifest_path) = gc_fixture(true).await; + write_manifest(&manifest_path, &PatchManifest::new()) + .await + .unwrap(); + tokio::fs::write(tmp.path().join("package-lock.json"), "{\"packages\":{}}") + .await + .unwrap(); + let out = run_vendor_gc(&common, &manifest_path, false).await; + assert!(out.dropped_reverted.is_empty(), "{out:?}"); + assert!(out.unused_reverted.is_empty(), "{out:?}"); + assert!(load_state(tmp.path()).await.unwrap().entries.contains_key(PURL)); + } + + /// (c) uuid dirs with no owning ledger entry are swept (wet) / counted + /// (dry). + #[tokio::test] + async fn vendor_gc_sweeps_orphan_uuid_dirs() { + let (tmp, common, manifest_path) = gc_fixture(false).await; + let orphan_uuid = "1a2b3c4d-5e6f-4a1b-8c2d-9e0f1a2b3c4d"; + let orphan_dir = tmp.path().join(format!(".socket/vendor/npm/{orphan_uuid}")); + tokio::fs::create_dir_all(&orphan_dir).await.unwrap(); + tokio::fs::write(orphan_dir.join("left-pad-1.3.0.tgz"), b"tgz") + .await + .unwrap(); + + let out = run_vendor_gc(&common, &manifest_path, true).await; + assert_eq!(out.orphan_dirs, 1, "{out:?}"); + assert!(orphan_dir.exists(), "dry run keeps the orphan"); + + let out = run_vendor_gc(&common, &manifest_path, false).await; + assert_eq!(out.orphan_dirs, 1, "{out:?}"); + assert!(!orphan_dir.exists(), "wet run sweeps the orphan"); + // The recorded entry's dir survives the sweep. + assert!(tmp.path().join(format!(".socket/vendor/npm/{UUID}")).exists()); + } +} diff --git a/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs b/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs index eb04e9c..b0efc65 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_flavor.rs @@ -366,6 +366,54 @@ pub async fn vendor_npm_any( outcome } +/// Is this npm-vendored entry still consumed by its lockfile's dependency +/// graph? +/// +/// `Some(true)`: the lockfile still resolves something to the entry's +/// artifact. `Some(false)`: the lockfile is present and parses but no +/// resolution references `.socket/vendor/npm//` — the dependency +/// was removed and re-locked, so the vendoring is unused (an override/ +/// resolutions DECLARATION alone does not count: pnpm's mirrored +/// `overrides:` section is excluded by the flavor probe, and the other +/// flavors carry no declaration inside the lock at all). `None`: cannot +/// determine (missing lock, unknown flavor) — callers keep the entry, +/// fail-safe. Detached entries are lockfile-invisible BY DESIGN and must +/// never be routed here (the probe would always call them unused). +pub async fn vendored_entry_in_use(entry: &VendorEntry, project_root: &Path) -> Option { + match entry.flavor.as_deref() { + Some("pnpm") => super::pnpm_lock::pnpm_entry_in_use(entry, project_root).await, + // The remaining flavors wire resolutions into the lock itself + // (resolved URLs / file: ranges / package tuples), so a textual + // probe for the uuid dir is exact: the path appears iff some + // resolution still points at the artifact. shrinkwrap wins over + // package-lock, mirroring the vendor/revert lockfile selection. + None | Some("package-lock") => { + lock_text_mentions_uuid( + project_root, + &["npm-shrinkwrap.json", "package-lock.json"], + &entry.uuid, + ) + .await + } + Some("yarn-classic") | Some("yarn-berry") => { + lock_text_mentions_uuid(project_root, &["yarn.lock"], &entry.uuid).await + } + Some("bun") => lock_text_mentions_uuid(project_root, &["bun.lock"], &entry.uuid).await, + Some(_) => None, // unknown flavor: cannot determine + } +} + +/// First readable lockfile from `names`, probed for the uuid artifact dir. +async fn lock_text_mentions_uuid(project_root: &Path, names: &[&str], uuid: &str) -> Option { + let needle = format!(".socket/vendor/npm/{uuid}/"); + for name in names { + if let Ok(text) = tokio::fs::read_to_string(project_root.join(name)).await { + return Some(text.contains(&needle)); + } + } + None +} + /// Revert one recorded npm vendor entry through the flavor that wired it. /// Entries from before the flavor field existed (`None`) are package-lock /// wirings; an unknown flavor fails CLOSED (an older binary must not guess @@ -773,4 +821,85 @@ mod tests { assert!(outcome.success, "flavor {flavor:?}: {:?}", outcome.error); } } + + /// One minimal entry per flavor for the in-use probe. + fn probe_entry(flavor: Option<&str>) -> VendorEntry { + VendorEntry { + ecosystem: "npm".into(), + base_purl: "pkg:npm/left-pad@1.3.0".into(), + uuid: UUID.into(), + artifact: VendorArtifact { + path: format!(".socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz"), + sha256: String::new(), + size: None, + platform_locked: None, + }, + wiring: Vec::new(), + lock: None, + took_over_go_patches: false, + detached: false, + record: None, + flavor: flavor.map(str::to_string), + uv: None, + pnpm: None, + poetry: None, + pdm: None, + pipenv: None, + } + } + + /// The textual flavors: a resolution pointing at the uuid dir means in + /// use; a clean lock means unused; a missing lock or unknown flavor + /// cannot be determined (keep, fail-safe). + #[tokio::test] + async fn vendored_entry_in_use_textual_flavors() { + let entry = probe_entry(Some("package-lock")); + + // Missing lock: undeterminable. + let tmp = tempfile::tempdir().unwrap(); + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, None); + + // Lock resolves to our artifact: in use. + touch( + tmp.path(), + "package-lock.json", + &format!( + "{{\"packages\":{{\"node_modules/left-pad\":{{\"resolved\":\"file:.socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz\"}}}}}}" + ), + ) + .await; + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, Some(true)); + + // Dep removed + re-locked (no reference left): unused. + touch(tmp.path(), "package-lock.json", "{\"packages\":{}}").await; + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, Some(false)); + + // shrinkwrap wins over package-lock (same precedence as vendoring). + touch( + tmp.path(), + "npm-shrinkwrap.json", + &format!( + "{{\"packages\":{{\"node_modules/left-pad\":{{\"resolved\":\"file:.socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz\"}}}}}}" + ), + ) + .await; + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, Some(true)); + + // yarn flavors probe yarn.lock. + let entry = probe_entry(Some("yarn-classic")); + let tmp = tempfile::tempdir().unwrap(); + touch( + tmp.path(), + "yarn.lock", + &format!("left-pad@1.3.0:\n resolved \"file:./.socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz#abc\"\n"), + ) + .await; + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, Some(true)); + touch(tmp.path(), "yarn.lock", "# yarn lockfile v1\n").await; + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, Some(false)); + + // Unknown flavor: undeterminable, fail-safe keep. + let entry = probe_entry(Some("future-pm")); + assert_eq!(vendored_entry_in_use(&entry, tmp.path()).await, None); + } } diff --git a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs index 433f3b2..7b21bac 100644 --- a/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs +++ b/crates/socket-patch-core/src/patch/vendor/pnpm_lock.rs @@ -314,6 +314,46 @@ pub async fn vendor_pnpm( } } +/// Is this pnpm-vendored entry still consumed by the lock's dependency +/// graph? +/// +/// `Some(true)`: a `packages:`/`snapshots:` block resolves to the entry's +/// artifact (`@file:.socket/vendor/npm//...`) — some importer +/// still depends on the package. `Some(false)`: the lock parses cleanly +/// and carries NO such block — the dependency was removed and re-locked +/// (the `overrides:` declaration alone does NOT count as usage: pnpm +/// keeps it mirrored from package.json even when nothing matches it). +/// `None`: cannot determine (missing/unreadable/unsupported lock) — +/// callers must keep the entry, fail-safe. +pub async fn pnpm_entry_in_use(entry: &VendorEntry, project_root: &Path) -> Option { + let text = tokio::fs::read_to_string(project_root.join(PNPM_LOCK)) + .await + .ok()?; + if check_lock_version(&text).is_err() { + return None; + } + let lines = split_lines(&text); + for section in ["packages", "snapshots"] { + let Some((start, end)) = section_bounds(&lines, section) else { + continue; + }; + let mut i = start + 1; + while let Some(block) = next_block(&lines, i, end) { + let resolved_to_ours = block + .key + .find("@file:") + .map(|at| &block.key[at + 1..]) + .and_then(parse_vendor_path) + .is_some_and(|p| p.eco == "npm" && p.uuid == entry.uuid); + if resolved_to_ours { + return Some(true); + } + i = block.end; + } + } + Some(false) +} + /// Undo one pnpm-vendored package: restore the recorded pair fragments and /// remove the artifact dir. Reverse application order; per-record ownership /// is re-checked against the live fragment (drift ⇒ warning, left alone). @@ -2248,6 +2288,52 @@ snapshots: assert!(live_lock.contains("overrides:\n other-pkg: 2.0.0\n\nimporters:")); } + // ── in-use probe ─────────────────────────────────────────────────────── + + /// The prune-time in-use probe: a packages/snapshots block resolving to + /// the artifact means in use; an overrides declaration ALONE (the state + /// pnpm leaves after the dependency is removed and re-locked) does not; + /// a missing or unsupported-version lock is undeterminable (keep). + #[tokio::test] + async fn pnpm_entry_in_use_reflects_lock_graph() { + let fx = fixture_with(P1_BEFORE_PKG, P1_BEFORE_LOCK).await; + let (_, entry, _) = expect_done(fx.vendor(false).await); + let entry = entry.unwrap(); + + // Freshly vendored: the rekeyed file: blocks are in the graph. + assert_eq!(pnpm_entry_in_use(&entry, fx.root()).await, Some(true)); + + // Dep removed + re-locked: pnpm prunes the file: blocks but keeps + // the overrides declaration mirrored from package.json. + let removed_lock = format!( + "lockfileVersion: '9.0'\n\nsettings:\n autoInstallPeers: true\n\ + \noverrides:\n left-pad@1.3.0: file:{}\n\nimporters:\n\n .:\n \ + dependencies:\n consumer:\n specifier: file:./consumer\n \ + version: file:consumer\n\npackages:\n\n consumer@file:consumer:\n \ + resolution: {{directory: consumer, type: directory}}\n\nsnapshots:\n\n \ + consumer@file:consumer: {{}}\n", + fx.rel_tgz() + ); + tokio::fs::write(fx.root().join(PNPM_LOCK), &removed_lock) + .await + .unwrap(); + assert_eq!( + pnpm_entry_in_use(&entry, fx.root()).await, + Some(false), + "the lingering overrides declaration alone is not usage" + ); + + // Unsupported lock version: undeterminable. + tokio::fs::write(fx.root().join(PNPM_LOCK), "lockfileVersion: '6.0'\n") + .await + .unwrap(); + assert_eq!(pnpm_entry_in_use(&entry, fx.root()).await, None); + + // Missing lock: undeterminable. + tokio::fs::remove_file(fx.root().join(PNPM_LOCK)).await.unwrap(); + assert_eq!(pnpm_entry_in_use(&entry, fx.root()).await, None); + } + // ── exact-version pin takeover ───────────────────────────────────────── /// package.json with a user-authored override pin (`key: value`) plus the From 7042cbcb38a23b50af17aa9cf282003c721e447b Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Thu, 11 Jun 2026 16:15:06 -0400 Subject: [PATCH 5/5] test: e2e coverage for encoded scoped purls, mismatch annotation, prune lifecycle - scan_vendor_e2e: full pipeline with the API's percent-encoded scoped purl form (download -> vendor lookup against node_modules/@scope -> lock rewiring -> prune exemption); interactive pre-prompt baseline annotation + auto-force warning; scan --prune reverting an unused vendored entry (ledger + manifest + artifact + lock all reconciled) - clippy: too_many_arguments allow on stage_patch_pack, JsrPurlParts type alias Co-Authored-By: Claude Fable 5 --- .../socket-patch-cli/tests/scan_vendor_e2e.rs | 325 ++++++++++++++++++ .../src/patch/vendor/npm_common.rs | 1 + crates/socket-patch-core/src/utils/purl.rs | 6 +- 3 files changed, 331 insertions(+), 1 deletion(-) diff --git a/crates/socket-patch-cli/tests/scan_vendor_e2e.rs b/crates/socket-patch-cli/tests/scan_vendor_e2e.rs index cc13f29..64f0b28 100644 --- a/crates/socket-patch-cli/tests/scan_vendor_e2e.rs +++ b/crates/socket-patch-cli/tests/scan_vendor_e2e.rs @@ -412,3 +412,328 @@ async fn scan_vendor_flag_conflicts_are_clap_errors() { ); } } + +// ───────────── percent-encoded scoped purls (API canonical form) ───────────── + +const SCOPED_CRAWLER_PURL: &str = "pkg:npm/@scope/left-pad@1.3.0"; +const SCOPED_API_PURL: &str = "pkg:npm/%40scope/left-pad@1.3.0"; + +/// Like `write_fixture`, but the installed package is the SCOPED +/// `@scope/left-pad` (the crawler reports the literal `@scope` form). +fn write_scoped_fixture(root: &Path) { + std::fs::write( + root.join("package.json"), + r#"{ "name": "scan-vendor-test", "version": "0.0.0" }"#, + ) + .unwrap(); + let lock = serde_json::json!({ + "name": "scan-vendor-test", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "scan-vendor-test", + "version": "0.0.0", + "dependencies": { "@scope/left-pad": "^1.3.0" } + }, + "node_modules/@scope/left-pad": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@scope/left-pad/-/left-pad-1.3.0.tgz", + "integrity": "sha512-orig==", + "license": "WTFPL" + } + } + }); + let mut lock_bytes = serde_json::to_vec_pretty(&lock).unwrap(); + lock_bytes.push(b'\n'); + std::fs::write(root.join("package-lock.json"), lock_bytes).unwrap(); + + let pkg = root.join("node_modules/@scope/left-pad"); + std::fs::create_dir_all(&pkg).unwrap(); + std::fs::write( + pkg.join("package.json"), + br#"{"name":"@scope/left-pad","version":"1.3.0"}"#, + ) + .unwrap(); + std::fs::write(pkg.join("index.js"), BEFORE).unwrap(); +} + +/// Mock API that serves the patch under the percent-ENCODED purl (the +/// canonical form the production patches API returns for scoped packages), +/// while the batch request/response is keyed by the crawler's literal form. +async fn mount_scoped_patch_api(mock: &MockServer, uuid: &str) { + let before_hash = git_sha256(BEFORE); + let after_hash = git_sha256(AFTER); + Mock::given(method("POST")) + .and(path(format!("/v0/orgs/{ORG_SLUG}/patches/batch"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "packages": [{ + "purl": SCOPED_CRAWLER_PURL, + "patches": [{ + "uuid": uuid, + "purl": SCOPED_API_PURL, + "tier": "free", + "cveIds": ["CVE-2026-0001"], + "ghsaIds": [], + "severity": "high", + "title": "vendor target" + }] + }], + "canAccessPaidPatches": false, + }))) + .mount(mock) + .await; + // Per-package search: the crawler purl, urlencoded. + Mock::given(method("GET")) + .and(path(format!( + "/v0/orgs/{ORG_SLUG}/patches/by-package/pkg%3Anpm%2F%40scope%2Fleft-pad%401.3.0" + ))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "patches": [{ + "uuid": uuid, + "purl": SCOPED_API_PURL, + "publishedAt": "2026-01-01T00:00:00Z", + "description": "Vendor patch", + "license": "MIT", + "tier": "free", + "vulnerabilities": {} + }], + "canAccessPaidPatches": false, + }))) + .mount(mock) + .await; + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/{ORG_SLUG}/patches/view/{uuid}"))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "uuid": uuid, + "purl": SCOPED_API_PURL, + "publishedAt": "2026-01-01T00:00:00Z", + "files": { + "package/index.js": { + "beforeHash": before_hash, + "afterHash": after_hash, + "blobContent": AFTER_B64, + } + }, + "vulnerabilities": {}, + "description": "Vendor patch", + "license": "MIT", + "tier": "free", + }))) + .mount(mock) + .await; +} + +/// The production patches API serves scoped purls percent-encoded +/// (`pkg:npm/%40scope/...`) and scan stores them verbatim as manifest keys. +/// The whole pipeline — download, vendor lookup against the literal +/// `node_modules/@scope/...` install, lock rewiring, prune exemption — must +/// bridge the two spellings. (Flowise regression: `%40modelcontextprotocol` +/// failed with `package not installed`.) +#[tokio::test] +async fn scan_vendor_resolves_percent_encoded_scoped_purl() { + let mock = MockServer::start().await; + mount_scoped_patch_api(&mock, UUID).await; + let tmp = tempfile::tempdir().unwrap(); + write_scoped_fixture(tmp.path()); + + // --prune in the same run: the freshly-downloaded ENCODED manifest + // entry must not be GC'd against the literal crawler purl. + let (code, stdout, stderr) = run_scan_vendor(tmp.path(), &mock.uri(), &["--prune"]); + assert_eq!(code, 0, "stdout={stdout}; stderr={stderr}"); + let v: serde_json::Value = serde_json::from_str(stdout.trim()).expect("valid JSON"); + assert_eq!(v["status"], "success", "envelope={v}"); + + // Manifest keyed by the verbatim encoded purl — and NOT pruned. + let manifest: serde_json::Value = serde_json::from_str( + &std::fs::read_to_string(tmp.path().join(".socket/manifest.json")).unwrap(), + ) + .unwrap(); + assert_eq!( + manifest["patches"][SCOPED_API_PURL]["uuid"], UUID, + "manifest={manifest}" + ); + assert_eq!( + v["gc"]["prunedManifestEntries"], + serde_json::json!([]), + "the encoded entry must not look prunable: {v}" + ); + + // Vendored: artifact under the DECODED scope dir, lock rewired. + assert_eq!(v["vendor"]["summary"]["applied"], 1, "envelope={v}"); + let tgz = tmp.path().join(format!( + ".socket/vendor/npm/{UUID}/@scope/left-pad-1.3.0.tgz" + )); + assert!(tgz.is_file(), "tarball at the decoded scoped path"); + let lock = std::fs::read_to_string(tmp.path().join("package-lock.json")).unwrap(); + assert!( + lock.contains(&format!(".socket/vendor/npm/{UUID}/@scope/left-pad-1.3.0.tgz")), + "lock consumes the vendored tarball; lock={lock}" + ); + // Ledger keyed by the verbatim encoded purl. + let state: serde_json::Value = serde_json::from_str( + &std::fs::read_to_string(tmp.path().join(".socket/vendor/state.json")).unwrap(), + ) + .unwrap(); + assert_eq!(state["entries"][SCOPED_API_PURL]["uuid"], UUID, "{state}"); +} + +// ───────────────────── prune reconciles vendored state ───────────────────── + +/// After a dependency is removed and re-locked, `scan --prune` (without +/// `--vendor`) reverts the now-unused vendored entry: lock restored, ledger +/// entry + manifest entry dropped, artifact dir removed. +#[tokio::test] +async fn scan_prune_reverts_unused_vendored_entry() { + let mock = MockServer::start().await; + mount_patch_api(&mock, UUID).await; + let tmp = tempfile::tempdir().unwrap(); + write_fixture(tmp.path()); + + // A second installed package so the later prune run's crawl is + // non-empty (left-pad itself gets removed below). + let other = tmp.path().join("node_modules/keeper"); + std::fs::create_dir_all(&other).unwrap(); + std::fs::write( + other.join("package.json"), + br#"{"name":"keeper","version":"1.0.0"}"#, + ) + .unwrap(); + + let (code, stdout, stderr) = run_scan_vendor(tmp.path(), &mock.uri(), &[]); + assert_eq!(code, 0, "stdout={stdout}; stderr={stderr}"); + + // Simulate `npm uninstall left-pad` + re-lock: drop the dep from the + // lock graph and remove the installed copy. The override-free npm + // wiring leaves nothing else behind. + let lock = serde_json::json!({ + "name": "scan-vendor-test", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { "name": "scan-vendor-test", "version": "0.0.0" } + } + }); + let mut lock_bytes = serde_json::to_vec_pretty(&lock).unwrap(); + lock_bytes.push(b'\n'); + std::fs::write(tmp.path().join("package-lock.json"), &lock_bytes).unwrap(); + std::fs::remove_dir_all(tmp.path().join("node_modules/left-pad")).unwrap(); + + // Plain prune scan (read-only discovery + GC; no --vendor, no --apply). + let out = Command::new(binary()) + .args([ + "scan", + "--json", + "--prune", + "--yes", + "--api-url", + &mock.uri(), + "--api-token", + "fake-token", + "--org", + ORG_SLUG, + ]) + .current_dir(tmp.path()) + .output() + .expect("run"); + let stdout = String::from_utf8_lossy(&out.stdout); + let code = out.status.code().unwrap_or(-1); + assert_eq!(code, 0, "stdout={stdout}"); + let v: serde_json::Value = serde_json::from_str(stdout.trim()).expect("valid JSON"); + + assert_eq!( + v["gc"]["revertedVendoredEntries"], + serde_json::json!([PURL]), + "gc must report the reverted entry: {v}" + ); + + // Ledger empty (an emptied state file may be removed outright), + // manifest entry dropped, artifact gone. + match std::fs::read_to_string(tmp.path().join(".socket/vendor/state.json")) { + Ok(text) => { + let state: serde_json::Value = serde_json::from_str(&text).unwrap(); + assert!( + state["entries"].as_object().is_none_or(|m| m.is_empty()), + "ledger entry removed: {state}" + ); + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} + Err(e) => panic!("unexpected state.json read error: {e}"), + } + let manifest: serde_json::Value = serde_json::from_str( + &std::fs::read_to_string(tmp.path().join(".socket/manifest.json")).unwrap(), + ) + .unwrap(); + assert!( + manifest["patches"] + .as_object() + .is_none_or(|m| !m.contains_key(PURL)), + "manifest entry dropped: {manifest}" + ); + assert!( + !tmp.path().join(format!(".socket/vendor/npm/{UUID}")).exists(), + "artifact dir removed" + ); + // The (already left-pad-free) lock stays exactly as the user re-locked + // it — the revert had nothing to restore there. + assert_eq!( + std::fs::read(tmp.path().join("package-lock.json")).unwrap(), + lock_bytes + ); +} + +/// Interactive (non-JSON) `scan --vendor` pre-verifies patch baselines: +/// installed content matching NEITHER hash is annotated BEFORE the +/// confirm prompt, and the run still vendors (auto-force) with the +/// `vendor_content_mismatch_overwritten` warning on stderr. +#[tokio::test] +async fn scan_vendor_annotates_mismatched_baseline_and_vendors_anyway() { + let mock = MockServer::start().await; + mount_patch_api(&mock, UUID).await; + let tmp = tempfile::tempdir().unwrap(); + write_fixture(tmp.path()); + // Divergent installed bytes: neither BEFORE nor AFTER. + std::fs::write( + tmp.path().join("node_modules/left-pad/index.js"), + b"divergent\n", + ) + .unwrap(); + + let out = Command::new(binary()) + .args([ + "scan", + "--vendor", + "--yes", + "--api-url", + &mock.uri(), + "--api-token", + "fake-token", + "--org", + ORG_SLUG, + ]) + .current_dir(tmp.path()) + .output() + .expect("run"); + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + assert_eq!( + out.status.code().unwrap_or(-1), + 0, + "stdout={stdout}; stderr={stderr}" + ); + assert!( + stdout.contains("installed content differs from patch baseline"), + "pre-prompt annotation present; stdout={stdout}" + ); + assert!( + stderr.contains("vendor_content_mismatch_overwritten"), + "overwrite warning surfaced; stderr={stderr}" + ); + // Vendored despite the mismatch. + assert!(tmp + .path() + .join(format!(".socket/vendor/npm/{UUID}/left-pad-1.3.0.tgz")) + .is_file()); +} diff --git a/crates/socket-patch-core/src/patch/vendor/npm_common.rs b/crates/socket-patch-core/src/patch/vendor/npm_common.rs index f54a893..e2b4cdb 100644 --- a/crates/socket-patch-core/src/patch/vendor/npm_common.rs +++ b/crates/socket-patch-core/src/patch/vendor/npm_common.rs @@ -121,6 +121,7 @@ pub(super) struct NpmStagedPack { /// verification — no pack, no dirs created). /// * `Ok((Some(staged), result))` — full success: the tarball is on disk at /// `staged.rel_tgz` and the caller proceeds to its lockfile wiring. +#[allow(clippy::too_many_arguments)] pub(super) async fn stage_patch_pack( purl: &str, installed_dir: &Path, diff --git a/crates/socket-patch-core/src/utils/purl.rs b/crates/socket-patch-core/src/utils/purl.rs index 393873d..4fa80ec 100644 --- a/crates/socket-patch-core/src/utils/purl.rs +++ b/crates/socket-patch-core/src/utils/purl.rs @@ -244,8 +244,12 @@ pub fn build_composer_purl(namespace: &str, name: &str, version: &str) -> String /// We follow the same shape as `parse_composer_purl` since both /// have a `/` namespace structure. The leading `@` on /// the scope is preserved (matching npm's `@scope/name` convention). +/// `((scope, name), version)` from a JSR purl, percent-decoded. #[cfg(feature = "deno")] -pub fn parse_jsr_purl(purl: &str) -> Option<((Cow<'_, str>, Cow<'_, str>), Cow<'_, str>)> { +pub type JsrPurlParts<'a> = ((Cow<'a, str>, Cow<'a, str>), Cow<'a, str>); + +#[cfg(feature = "deno")] +pub fn parse_jsr_purl(purl: &str) -> Option> { let base = strip_purl_qualifiers(purl); let rest = base.strip_prefix("pkg:jsr/")?; let at_idx = rest.rfind('@')?;