diff --git a/CHANGELOG.md b/CHANGELOG.md index d83963d..39e686a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## 2.4.10 + +### Added: opt directories back into manifest discovery via `--include-dirs` + +- New `--include-dirs` flag (comma-separated directory names) that re-includes directories + the CLI excludes from manifest discovery by default. The default exclude list + (`node_modules`, `bower_components`, `jspm_packages`, `__pycache__`, `.venv`, `venv`, + `build`, `dist`, `.tox`, `.mypy_cache`, `.pytest_cache`, `*.egg-info`, `vendor`) is a sane + default, but some projects keep manifest files under those names — e.g. `build/requirements.txt`. + Pass `--include-dirs build,dist` to scan them. Names are matched against any path segment, + mirroring how the default exclude list is applied. +- `--include-module-folders` now functions as documented: it re-includes the JS/TS module + folders (`node_modules`, `bower_components`, `jspm_packages`) as a group. Previously the + flag was accepted but had no effect. + ## 2.4.9 ### Added: opt-in streaming log channel via `--upload-logs` diff --git a/docs/cli-reference.md b/docs/cli-reference.md index adb41ca..7524495 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -148,7 +148,7 @@ socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--workspace WORKSPACE] [-- [--owner OWNER] [--pr-number PR_NUMBER] [--commit-message COMMIT_MESSAGE] [--commit-sha COMMIT_SHA] [--committers [COMMITTERS ...]] [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--license-file-name LICENSE_FILE_NAME] [--save-submitted-files-list SAVE_SUBMITTED_FILES_LIST] [--save-manifest-tar SAVE_MANIFEST_TAR] [--files FILES] [--sub-path SUB_PATH] [--workspace-name WORKSPACE_NAME] - [--excluded-ecosystems EXCLUDED_ECOSYSTEMS] [--exclude-paths EXCLUDE_PATHS] [--default-branch] [--pending-head] [--generate-license] [--enable-debug] + [--excluded-ecosystems EXCLUDED_ECOSYSTEMS] [--exclude-paths EXCLUDE_PATHS] [--include-dirs INCLUDE_DIRS] [--default-branch] [--pending-head] [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif] [--sarif-file ] [--sarif-scope {diff,full}] [--sarif-grouping {instance,alert}] [--sarif-reachability {all,reachable,potentially,reachable-or-potentially}] [--enable-gitlab-security] [--gitlab-security-file ] [--disable-overview] [--exclude-license-details] [--allow-unverified] [--disable-security-issue] [--ignore-commit-files] [--disable-blocking] [--disable-ignore] [--enable-diff] [--scm SCM] [--timeout TIMEOUT] [--include-module-folders] @@ -205,13 +205,14 @@ If you don't want to provide the Socket API Token every time then you can use th | `--workspace-name` | False | | Workspace name suffix to append to repository name (repo-name-workspace_name). Must be used with `--sub-path` | | `--excluded-ecosystems` | False | [] | List of ecosystems to exclude from analysis (JSON array string). You can get supported files from the [Supported Files API](https://docs.socket.dev/reference/getsupportedfiles) | | `--exclude-paths` | False | | Comma-separated paths/globs to exclude from **both** manifest discovery (every scan) **and** reachability analysis (e.g. `tests/**,packages/legacy,*.spec.ts`). Patterns are scan-root-relative, case-sensitive globs where `*` does not cross `/` and `**` does. Supersedes `--reach-exclude-paths`. | +| `--include-dirs` | False | | Comma-separated directory **names** that are excluded from manifest discovery by default but should be scanned (e.g. `build,dist`). Names are matched against any path segment, mirroring the default exclude list (`node_modules`, `bower_components`, `jspm_packages`, `__pycache__`, `.venv`, `venv`, `build`, `dist`, `.tox`, `.mypy_cache`, `.pytest_cache`, `*.egg-info`, `vendor`). Use this when manifest files live under a normally-ignored folder, e.g. `build/requirements.txt`. | #### Branch and Scan Configuration | Parameter | Required | Default | Description | |:-------------------------|:---------|:--------|:------------------------------------------------------------------------------------------------------| | `--default-branch` | False | *auto* | Make this branch the default branch (auto-detected from git and CI environment when not specified) | | `--pending-head` | False | *auto* | If true, the new scan will be set as the branch's head scan (automatically synced with default-branch) | -| `--include-module-folders` | False | False | If enabled will include manifest files from folders like node_modules | +| `--include-module-folders` | False | False | If enabled, re-includes the JS/TS module folders (`node_modules`, `bower_components`, `jspm_packages`) in manifest discovery. For other excluded directories, use `--include-dirs`. | #### Output Configuration | Parameter | Required | Default | Description | diff --git a/pyproject.toml b/pyproject.toml index acff1fb..357f26e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" [project] name = "socketsecurity" -version = "2.4.9" +version = "2.4.10" requires-python = ">= 3.11" license = {"file" = "LICENSE"} dependencies = [ diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index cc95834..4b86d9b 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,3 +1,3 @@ __author__ = 'socket.dev' -__version__ = '2.4.9' +__version__ = '2.4.10' USER_AGENT = f'SocketPythonCLI/{__version__}' diff --git a/socketsecurity/config.py b/socketsecurity/config.py index bad7ed6..fdb7005 100644 --- a/socketsecurity/config.py +++ b/socketsecurity/config.py @@ -154,6 +154,7 @@ class CliConfig: repo_is_public: bool = False excluded_ecosystems: list[str] = field(default_factory=lambda: []) exclude_paths: Optional[List[str]] = None + included_dirs: List[str] = field(default_factory=lambda: []) version: str = __version__ jira_plugin: PluginConfig = field(default_factory=PluginConfig) slack_plugin: PluginConfig = field(default_factory=PluginConfig) @@ -314,6 +315,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig': 'reach_ecosystems': args.reach_ecosystems.split(',') if args.reach_ecosystems else None, 'reach_exclude_paths': args.reach_exclude_paths.split(',') if args.reach_exclude_paths else None, 'exclude_paths': normalize_exclude_paths(args.exclude_paths), + 'included_dirs': normalize_exclude_paths(args.include_dirs) or [], 'reach_skip_cache': args.reach_skip_cache, 'reach_min_severity': args.reach_min_severity, 'reach_output_file': args.reach_output_file, @@ -639,6 +641,17 @@ def create_argument_parser() -> argparse.ArgumentParser: "Supersedes --reach-exclude-paths." ) + path_group.add_argument( + "--include-dirs", + dest="include_dirs", + metavar="", + help="Comma-separated directory names that are excluded from manifest discovery by " + "default but should be scanned (e.g. 'build,dist'). Names are matched against any " + "path segment, mirroring the default exclude list. Defaults excluded: " + "node_modules, bower_components, jspm_packages, __pycache__, .venv, venv, build, " + "dist, .tox, .mypy_cache, .pytest_cache, *.egg-info, vendor." + ) + # Branch and Scan Configuration config_group = parser.add_argument_group('Branch and Scan Configuration') config_group.add_argument( diff --git a/socketsecurity/core/socket_config.py b/socketsecurity/core/socket_config.py index 8ebde8d..acf01a2 100644 --- a/socketsecurity/core/socket_config.py +++ b/socketsecurity/core/socket_config.py @@ -15,6 +15,10 @@ "vendor" } +# Subset of default_exclude_dirs that hold installed JS/TS modules. Re-included as a group +# by --include-module-folders (see CliConfig.include_module_folders). +module_folder_dirs = {"node_modules", "bower_components", "jspm_packages"} + @dataclass class SocketConfig: api_key: str diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py index 7eeabbd..0d8bccc 100644 --- a/socketsecurity/socketcli.py +++ b/socketsecurity/socketcli.py @@ -19,7 +19,7 @@ from socketsecurity.core.logging import initialize_logging, set_debug_mode from socketsecurity.core.messages import Messages from socketsecurity.core.scm_comments import Comments -from socketsecurity.core.socket_config import SocketConfig +from socketsecurity.core.socket_config import SocketConfig, module_folder_dirs from socketsecurity.core.streaming import StreamingLogs from socketsecurity.fossa_compat import build_fossa_attribution_payload from socketsecurity.output import OutputHandler @@ -195,7 +195,19 @@ def main_code(): ) as streaming: core = Core(socket_config, sdk, config) log.debug("loaded core") - + + # Re-include directories that are excluded from manifest discovery by default + # (e.g. build/dist). --include-dirs names them individually; --include-module-folders + # re-includes the JS module folders as a group. Build a new set rather than mutating + # the shared default_exclude_dirs in place. Applied here so it covers every find_files + # call below, including the sub-path manifest pre-check. + dirs_to_include = set(config.included_dirs or []) + if config.include_module_folders: + dirs_to_include |= module_folder_dirs + if dirs_to_include: + core.config.excluded_dirs = set(core.config.excluded_dirs) - dirs_to_include + log.debug(f"Re-including normally-excluded directories in scan: {sorted(dirs_to_include)}") + # Check for required dependencies if reachability analysis is enabled if config.reach: log.info("Reachability analysis enabled, checking for required dependencies...") diff --git a/tests/unit/test_include_dirs.py b/tests/unit/test_include_dirs.py new file mode 100644 index 0000000..7261f69 --- /dev/null +++ b/tests/unit/test_include_dirs.py @@ -0,0 +1,99 @@ +"""Tests for --include-dirs (and the now-functional --include-module-folders). + +Covers config parsing of the comma-separated directory names and that re-including a +normally-excluded directory (e.g. build) lets Core.find_files discover manifests under it. +""" +import types +from unittest.mock import MagicMock + +import pytest + +from socketsecurity.config import CliConfig +from socketsecurity.core import Core +from socketsecurity.core.socket_config import ( + SocketConfig, + default_exclude_dirs, + module_folder_dirs, +) + +BASE_ARGS = ["--api-token", "test-token", "--repo", "test-repo"] + + +# ---- config parsing ------------------------------------------------------ + +def test_include_dirs_parses_to_list(): + config = CliConfig.from_args(BASE_ARGS + ["--include-dirs", "build, dist , vendor"]) + assert config.included_dirs == ["build", "dist", "vendor"] + + +def test_include_dirs_defaults_empty(): + config = CliConfig.from_args(BASE_ARGS) + assert config.included_dirs == [] + + +def test_include_dirs_from_config_file(tmp_path): + import json + cfg = tmp_path / "socketcli.json" + cfg.write_text(json.dumps({"socketcli": {"include_dirs": ["build", "dist"]}}), encoding="utf-8") + config = CliConfig.from_args(BASE_ARGS + ["--config", str(cfg)]) + assert config.included_dirs == ["build", "dist"] + + +def test_module_folder_dirs_is_subset_of_defaults(): + assert module_folder_dirs <= default_exclude_dirs + + +# ---- find_files integration ---------------------------------------------- + +def _make_core(excluded_dirs): + core = Core.__new__(Core) + core.config = SocketConfig(api_key="test-key", excluded_dirs=excluded_dirs) + core.cli_config = types.SimpleNamespace(exclude_paths=None) + core.sdk = MagicMock() + return core + + +def _seed_manifests(tmp_path): + for rel in ("requirements.txt", "build/requirements.txt", "dist/requirements.txt"): + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text("flask==1.0\n", encoding="utf-8") + + +def test_find_files_excludes_build_by_default(tmp_path, mocker): + _seed_manifests(tmp_path) + core = _make_core(set(default_exclude_dirs)) + mocker.patch.object( + core, "get_supported_patterns", + return_value={"pypi": {"requirements.txt": {"pattern": "requirements.txt"}}}, + ) + + found = core.find_files(str(tmp_path)) + assert not any("/build/" in f for f in found) + assert not any("/dist/" in f for f in found) + assert any(f.endswith("/requirements.txt") for f in found) + + +def test_find_files_includes_build_when_unexcluded(tmp_path, mocker): + """Mirrors socketcli wiring: dropping a name from excluded_dirs re-includes its manifests.""" + _seed_manifests(tmp_path) + core = _make_core(set(default_exclude_dirs) - {"build"}) + mocker.patch.object( + core, "get_supported_patterns", + return_value={"pypi": {"requirements.txt": {"pattern": "requirements.txt"}}}, + ) + + found = core.find_files(str(tmp_path)) + assert any("/build/requirements.txt" in f for f in found) + # dist is still excluded since only build was re-included + assert not any("/dist/" in f for f in found) + + +def test_unexcluding_does_not_mutate_shared_defaults(): + """The socketcli flow builds a new set rather than mutating the module-level default.""" + before = set(default_exclude_dirs) + config = SocketConfig(api_key="test-key") + config.excluded_dirs = set(config.excluded_dirs) - {"build"} + assert "build" not in config.excluded_dirs + assert default_exclude_dirs == before + assert "build" in default_exclude_dirs diff --git a/uv.lock b/uv.lock index 86d19ff..b11778b 100644 --- a/uv.lock +++ b/uv.lock @@ -1283,7 +1283,7 @@ wheels = [ [[package]] name = "socketsecurity" -version = "2.4.9" +version = "2.4.10" source = { editable = "." } dependencies = [ { name = "brotli", marker = "platform_python_implementation == 'CPython'" },