diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 4765c71..d68e03c 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -31,7 +31,6 @@ body: options: - "Library API (`inspect.Scan`, `inspect.NewScanner`, etc.)" - "MCP (`inspect_scan`, `inspect_scan_dir` tools)" - - "GitHub Action (`.github/action.yml`)" - "SARIF output" - "Browser-based checks (`browser/` sub-module)" - "CVE / dependency database" diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 43f01ef..31916d8 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -30,7 +30,7 @@ body: - "Browser-based / dynamic check (`browser/`)" - "Output format (JSON, SARIF, JUnit, HTML, terminal)" - "Configuration (`.inspect.toml`, options)" - - "MCP server / GitHub Action / tooling" + - "MCP server / tooling" validations: required: true diff --git a/.github/action.yml b/.github/action.yml deleted file mode 100644 index 1543cbd..0000000 --- a/.github/action.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: 'Inspect Site Audit' -description: 'Crawl and audit a website for broken links, security issues, accessibility violations, SEO problems, and performance concerns' -author: 'GrayCode AI' - -branding: - icon: 'search' - color: 'blue' - -inputs: - url: - description: 'Target URL to audit' - required: true - checks: - description: 'Comma-separated checks to run (links,security,forms,a11y,perf,seo)' - required: false - default: 'links,security,forms,a11y,perf,seo' - depth: - description: 'Maximum crawl depth (0 = unlimited)' - required: false - default: '5' - fail-on: - description: 'Minimum severity to fail the action (info,low,medium,high,critical)' - required: false - default: 'high' - concurrency: - description: 'Number of concurrent crawl workers' - required: false - default: '10' - format: - description: 'Output format (terminal,json,junit)' - required: false - default: 'terminal' - timeout: - description: 'Overall scan timeout (e.g., 5m, 10m)' - required: false - default: '5m' - -outputs: - findings: - description: 'Total number of findings' - max-severity: - description: 'Highest severity found' - failed: - description: 'Whether the scan failed based on fail-on threshold' - report: - description: 'Path to the generated report file' - -runs: - using: 'composite' - steps: - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version: '1.25' - - - name: Install inspect - shell: bash - run: go install github.com/GrayCodeAI/inspect/cmd/inspect-ci@latest - - - name: Run audit - id: audit - shell: bash - run: | - inspect-ci \ - --url "${{ inputs.url }}" \ - --checks "${{ inputs.checks }}" \ - --depth "${{ inputs.depth }}" \ - --fail-on "${{ inputs.fail-on }}" \ - --concurrency "${{ inputs.concurrency }}" \ - --format "${{ inputs.format }}" \ - --timeout "${{ inputs.timeout }}" \ - --output-file "${{ runner.temp }}/inspect-report.xml" - - - name: Upload report - if: always() - uses: actions/upload-artifact@v4 - with: - name: inspect-report - path: ${{ runner.temp }}/inspect-report.xml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02f260e..4507d86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: run: git clone --depth=1 https://github.com/GrayCodeAI/hawk.git ../hawk - name: gofumpt diff run: | - go install mvdan.cc/gofumpt@latest + go install mvdan.cc/gofumpt@v0.10.0 out=$(gofumpt -l .) if [ -n "$out" ]; then echo "::error::gofumpt would reformat the following files:" diff --git a/AGENTS.md b/AGENTS.md index 73a0e9e..b8e1cd2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,7 +4,7 @@ Website security auditing and crawling library for Go. Crawls sites concurrently ## Design Principles -- **Library + CLI** — importable library with optional `inspect-ci` binary +- **Library** — importable Go library + embeddable MCP server (no CLI binary) - **No LLM dependency** — pure static analysis on crawled pages - **Extensible** — custom checks (Go code) + declarative rules (no code required) @@ -25,7 +25,6 @@ gofumpt -w . # Format - `rule.go` — Declarative rule engine (YAML-based) - `finding.go` — Findings with severity, CWE, and evidence - `report.go` — Report generation (JSON, SARIF, HTML) -- `cmd/inspect-ci/` — Optional CI binary for pipeline integration ## Conventions diff --git a/CHANGELOG.md b/CHANGELOG.md index 8194ccd..3463397 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,8 +40,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm quality impact / SARIF compatibility / SSRF & egress safety / Testing / Checklist. - `.github/ISSUE_TEMPLATE/bug_report.yml` — surface dropdown - (library API / MCP / SARIF / GitHub Action / browser checks / - CVE database) and false-positive textarea. + (library API / MCP / SARIF / browser checks / CVE database) and + false-positive textarea. - `.github/ISSUE_TEMPLATE/feature_request.yml` — `kind` selector covering 8 functional areas (security checks / accessibility / SEO / performance / browser / output formats / config / tooling) diff --git a/README.md b/README.md index b23ef08..7f3526e 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@
- Security vulnerability scanner for code + Live website auditor for accessibility, TLS, cookies, and security headers
@@ -12,52 +12,168 @@ --- -Inspect scans code for security vulnerabilities, anti-patterns, and potential issues. It provides actionable findings with severity ratings and remediation guidance. +## What is inspect -## Features +inspect is a Go library that crawls live **websites** and audits the pages it +finds — broken links, security headers, forms, accessibility, performance, SEO, +TLS, cookies, mixed content, subresource integrity, AI-readiness, and +reachability. It is part of the [hawk](https://github.com/GrayCodeAI/hawk) ecosystem: +hawk wires inspect into its own commands, and inspect also ships an MCP server +so any MCP-compatible agent can run audits. -- **Multi-language support** - Scans Go, Python, JavaScript, TypeScript, and more -- **OWASP coverage** - Detects common vulnerability patterns -- **Custom rules** - Define project-specific security policies -- **CI/CD integration** - Fails builds on critical issues +> **inspect is a Go library (and MCP server), not a CLI.** It ships no `inspect` +> binary of its own — it analyzes running URLs, not source code. Import it +> directly to embed website auditing in your own Go program, or run the MCP +> server to expose it to an agent. -## Quick Start +It crawls concurrently (with rate limiting, robots.txt support, redirect +handling, and SSRF protection), runs each check against the discovered pages, +and returns findings with severity levels. Results can be emitted as SARIF for +the GitHub Security tab. -```bash -go get github.com/GrayCodeAI/inspect -``` +## Quick Start ```go -import "github.com/GrayCodeAI/inspect" +import ( + "context" + "fmt" -scanner := inspect.NewScanner( - inspect.WithRules(inspect.DefaultRules), + "github.com/GrayCodeAI/inspect" ) -report, err := scanner.Scan(ctx, "./path/to/code") +// One-shot scan with the Standard preset. +report, err := inspect.Scan(ctx, "https://example.com", inspect.Standard) +if err != nil { + // handle error +} for _, f := range report.Findings { - fmt.Printf("[%s] %s - %s\n", f.Severity, f.Rule, f.Message) + fmt.Printf("[%s] %s: %s\n", f.Severity, f.URL, f.Message) +} +``` + +Requires Go 1.26+. + +For repeated or high-throughput scans, reuse a `Scanner` (safe for concurrent use): + +```go +scanner := inspect.NewScanner(inspect.Standard, inspect.WithDepth(3)) +r1, _ := scanner.Scan(ctx, "https://site-a.com") +r2, _ := scanner.Scan(ctx, "https://site-b.com") +``` + +## Features + +inspect ships nine built-in checks (registered in `check.DefaultRegistry`). The +six marked **(default)** run in the `Standard`, `Deep`, and `CI` presets; the +remaining three are opt-in via `WithChecks`. + +- **Links** *(default)* — crawls and reports broken/unreachable links +- **Security headers** *(default)* — detects missing CSP, HSTS, and related + headers; also audits TLS certificate validity/expiry, cookie `Secure` / + `HttpOnly` / `SameSite` flags, and mixed content on HTTPS pages +- **Forms** *(default)* — form validation checks (CSRF, action URLs) +- **Accessibility (`a11y`)** *(default)* — meta/ARIA checks; optional axe-core + and color-contrast analysis through the `browser` sub-module (headless + Chromium via rod) +- **Performance (`perf`)** *(default)* — resource sizes and render-blocking + resources +- **SEO** *(default)* — meta tags, structured data, and metadata checks +- **SRI** — Subresource Integrity validation +- **AI-ready (`aiready`)** — checks for agent/LLM-friendly metadata +- **Reachability** — host/URL reachability checks +- **Concurrent crawler** — depth limits, rate limiting, robots.txt, redirect + following, and SSRF protection (private IPs blocked by default) +- **SARIF output** — `inspect.GenerateSARIF` emits SARIF 2.1.0 for the GitHub + Security tab +- **MCP server** — expose `inspect_scan` and `inspect_scan_dir` to any agent +- **Extensible** — register custom `Checker` implementations or declarative + `RuleCheck` patterns + +## Presets + +The default checks are: `links`, `security`, `forms`, `a11y`, `perf`, `seo`. +Add the opt-in checks (`sri`, `aiready`, `reachability`) with `WithChecks`. + +| Preset | Behavior | +|---|---| +| `Quick` | Shallow crawl (depth 2), `links` only | +| `Standard` | Balanced crawl (depth 5), the six default checks | +| `Deep` | Exhaustive crawl (no depth limit), the six default checks | +| `SecurityOnly` | Security-related checks only | +| `CI` | Default checks, fail on high severity | + +## MCP Server + +inspect ships an MCP server (stdio transport) that exposes website auditing to +any MCP-compatible agent: + +```go +import inspectmcp "github.com/GrayCodeAI/inspect/mcp" + +srv := inspectmcp.New(inspect.Standard) +if err := srv.ServeStdio(); err != nil { + // handle error } ``` +**Tools:** + +- `inspect_scan` — crawl a URL and run the configured checks +- `inspect_scan_dir` — serve and scan a local directory of HTML files + +## Browser-Rendered Analysis + +By default inspect analyzes raw HTTP responses. To analyze JavaScript-rendered +pages and run axe-core accessibility checks, supply a `BrowserEngine` from the +`browser` sub-module: + +```go +import "github.com/GrayCodeAI/inspect/browser" + +engine, err := browser.New() +if err != nil { + // handle error +} +defer engine.Close() + +report, err := inspect.Scan(ctx, "https://example.com", + inspect.Standard, + inspect.WithBrowser(engine), +) +``` + +## Custom Checks + +```go +// Declarative rule — no Go code beyond the struct. +inspect.RegisterRule(inspect.RuleCheck{ + RuleName: "x-frame-options", + RuleSeverity: inspect.SeverityHigh, + HeaderMissing: []string{"X-Frame-Options"}, +}) + +// Full Checker implementation, scoped to a single Scanner. +scanner := inspect.NewScanner(inspect.WithCustomChecks(myCheck)) +``` + ## Examples See the [examples/](examples/) directory for runnable code samples. -## Configuration +## Architecture -Create `.inspect.yaml` to customize scanning: +See [docs/architecture.md](docs/architecture.md) for the package layout and data flow. -```yaml -rules: - - name: no-hardcoded-secrets - severity: critical - - name: sql-injection - severity: high -ignore: - - vendor/ - - testdata/ -``` +## Ecosystem + +inspect is part of the hawk ecosystem: + +| Component | Repository | Purpose | +|---|---|---| +| **hawk** | [GrayCodeAI/hawk](https://github.com/GrayCodeAI/hawk) | AI coding agent | +| **eyrie** | [GrayCodeAI/eyrie](https://github.com/GrayCodeAI/eyrie) | LLM provider runtime | +| **yaad** | [GrayCodeAI/yaad](https://github.com/GrayCodeAI/yaad) | Graph-based memory | +| **inspect** | This repo | Website audit library + MCP server | ## Contributing diff --git a/action.yml b/action.yml deleted file mode 100644 index 477a8b5..0000000 --- a/action.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: "Inspect Web Audit" -description: "Run inspect security and accessibility audits against a URL, with SARIF output for the GitHub Security tab." - -branding: - icon: "shield" - color: "blue" - -inputs: - url: - description: "Target URL to audit" - required: true - checks: - description: "Comma-separated list of checks to run (links, security, forms, a11y, perf, seo)" - required: false - default: "links,security,forms,a11y,perf,seo" - depth: - description: "Maximum crawl depth" - required: false - default: "5" - format: - description: "Output format: text, json, or sarif" - required: false - default: "sarif" - fail-on: - description: "Minimum severity to fail the action: info, low, medium, high, critical" - required: false - default: "high" - -outputs: - findings: - description: "Total number of findings" - max-severity: - description: "Highest severity found" - failed: - description: "Whether the scan failed the threshold (true/false)" - sarif: - description: "Path to the SARIF results file" - -runs: - using: "composite" - steps: - - name: Install Go - uses: actions/setup-go@v5 - with: - go-version: "stable" - - - name: Build inspect-action - shell: bash - run: go build -o inspect-action ./cmd/inspect-action/ - working-directory: ${{ github.action_path }} - - - name: Run inspect audit - id: audit - shell: bash - run: | - "${{ github.action_path }}/inspect-action" \ - --format "${{ inputs.format }}" \ - --checks "${{ inputs.checks }}" \ - --depth "${{ inputs.depth }}" \ - --fail-on "${{ inputs.fail-on }}" \ - "${{ inputs.url }}" - continue-on-error: true - - - name: Upload SARIF - if: ${{ always() && inputs.format == 'sarif' }} - uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4 - with: - sarif_file: results.sarif - - - name: Fail if threshold exceeded - if: ${{ steps.audit.outcome == 'failure' }} - shell: bash - run: exit 1 diff --git a/api/openapi.yaml b/api/openapi.yaml index 364cd37..1b543e9 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -1,9 +1,10 @@ openapi: "3.1.0" info: - title: inspect — Security Auditor Tool Reference + title: inspect — Website Security & Accessibility Auditor Tool Reference description: | - inspect is a website security, accessibility, and SEO auditor. - It operates as a CLI tool, GitHub Action, and MCP server (stdio). + inspect is a website security and accessibility auditor. + It is a Go library with an embeddable MCP server (stdio); the host program + embeds it directly and ships no standalone binary. This document describes the MCP tool surface as a machine-readable reference. inspect does NOT expose an HTTP server — all communication is via stdio MCP @@ -16,6 +17,10 @@ info: url: https://github.com/GrayCodeAI/inspect # No servers section — inspect has no HTTP API. +# inspect ships no standalone binary. The MCP server is embedded by the host +# program, which constructs it from the library and starts a stdio transport: +# srv := mcp.New(opts...) // opts are inspect.Option values +# srv.ServeStdio() // stdio transport # MCP tools are documented below using x-mcp-tool extensions. tags: @@ -26,12 +31,13 @@ tags: x-mcp-server: transport: stdio - binary: inspect-ci - start_command: ["inspect-ci", "mcp"] + package: github.com/GrayCodeAI/inspect/mcp + constructor: mcp.New(opts...) # opts are inspect.Option values + serve_stdio: mcp.Server.ServeStdio() x-mcp-tools: inspect_scan: - description: Crawl a URL and run security, accessibility, SEO, and performance checks + description: Crawl a URL and run website security and accessibility checks inputSchema: type: object required: [url] @@ -43,7 +49,7 @@ x-mcp-tools: type: array items: type: string - enum: [security, links, forms, a11y, performance, seo] + enum: [links, security, forms, a11y, perf, seo, sri, aiready, reachability] description: Checks to run (default: all) depth: type: integer diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml deleted file mode 100644 index ee70f3e..0000000 --- a/deploy/docker/docker-compose.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: inspect - -services: - inspect: - build: - context: ../../ - dockerfile: Dockerfile - image: ghcr.io/graycodeai/inspect:dev - env_file: - - path: ../../.env.example - required: false - entrypoint: ["inspect-ci"] - command: ["--help"] diff --git a/docs/architecture.md b/docs/architecture.md index b4613af..c58ea54 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,7 +2,7 @@ # 🔍 inspect Architecture -**Website Security, Accessibility & SEO Auditor** +**Live Website Accessibility, TLS & Security-Header Auditor** [](https://go.dev/) []() @@ -13,46 +13,52 @@ ## 🎯 Overview -inspect is a website security auditing and crawling library for Go. It crawls sites concurrently, runs **security**, **accessibility**, **SEO**, and **performance** checks, and generates findings with severity levels and **CWE references**. +inspect is a website auditing **library** for Go. It crawls live sites +concurrently, runs **accessibility**, **TLS**, **cookie**, **security-header**, +**mixed-content**, and **meta** checks against the discovered pages, and returns +findings with severity levels. Results can be emitted as SARIF for the GitHub +Security tab. -> 💡 Three modes: **Go library**, **CLI binary** (`inspect-ci`), and **GitHub Action**. +> 💡 inspect ships **no CLI binary**. It is consumed two ways: as a **Go library** +> (imported by hawk and other programs) and as an **MCP server** (stdio transport) +> that exposes auditing tools to any MCP-compatible agent. --- -## 🧱 Components +## 🧱 Package Layout ``` inspect/ -├── api/openapi.yaml 📜 MCP tool surface reference -├── cmd/ -│ ├── inspect-ci/main.go 🖥️ CLI binary entry point -│ └── inspect-action/main.go ⚡ GitHub Action entry point ├── inspect.go 📤 Public API: Scan(), Finding, Report, Stats -├── check.go 🛡️ Checker interface, RuleCheck, RegisterCheck() -├── scanner.go 🔄 Crawler orchestration, check execution -├── options.go ⚙️ config, With* functions, presets -├── config.go 📋 .inspect.toml loading -├── sarif.go 📊 SARIF output formatter -├── ci_output.go 🖥️ CI-friendly terminal output -├── llm_scanner.go 🤖 AI-powered scanning -├── api_security.go 🔒 API endpoint security checks -├── dependency_check.go 📦 Dependency vulnerability checks -├── sbom.go 📋 SBOM generation -├── browser.go 🌐 Browser automation entry -├── browser/ 🖥️ Rod-based browser crawling -├── checks/ -│ ├── security.go 🔒 CSP, HSTS, CORS headers -│ ├── accessibility.go ♿ ARIA violations -│ ├── tls.go 🔐 Certificate checks -│ ├── cookies.go 🍪 Cookie security flags -│ ├── headers.go 📋 Missing security headers -│ └── mixed_content.go ⚠️ Mixed content detection +├── scanner.go 🔄 Scanner: crawl orchestration + check execution +├── options.go ⚙️ config, With* options, presets (Quick/Standard/Deep/…) +├── check.go 🛡️ Checker interface, RuleCheck, RegisterCheck/RegisterRule +├── config.go 📋 .inspect config loading +├── severity.go 🎚️ Severity (aliased from hawk/shared/types) +├── sarif.go 📊 GenerateSARIF — SARIF 2.1.0 output +├── browser.go 🌐 BrowserEngine interface + page-data types (no rod import) +├── browser_fetcher.go 🔌 Adapts a BrowserEngine into the crawler's fetcher +├── checks/ ✅ Built-in checks run against crawled responses +│ ├── headers.go 📋 Missing security headers (CSP, HSTS, …) +│ ├── cookies.go 🍪 Cookie Secure/HttpOnly/SameSite flags +│ ├── tls.go 🔐 Certificate validity & expiry +│ ├── mixed_content.go ⚠️ Insecure resources on HTTPS pages +│ ├── meta.go 🏷️ Meta-tag / SEO checks +│ └── accessibility.go ♿ Accessibility / ARIA checks +├── browser/ 🖥️ Optional rod-based engine (headless Chromium) +│ ├── rod.go 🚀 New() — launches Chromium, renders pages +│ ├── axe.go ♿ axe-core injection & violation collection +│ ├── contrast.go 🎨 Color-contrast analysis +│ └── options.go ⚙️ Engine options (separate Go module) ├── mcp/ 🔌 MCP server (stdio transport) +│ └── server.go 📡 inspect_scan & inspect_scan_dir tools +├── api/openapi.yaml 📜 MCP tool surface reference +├── examples/ 📚 Runnable code samples └── internal/ - ├── crawler/ 🕷️ URL parsing, sitemap, robots.txt - ├── check/ 🔄 Internal check runner + ├── crawler/ 🕷️ Concurrent crawl, robots.txt, sitemap, rate limit, circuit breaker, dir server + ├── check/ 🔄 Internal check registry & runners (links, forms, a11y, perf, reachability) ├── html/ 📄 HTML parsing utilities - └── report/ 📊 Output format implementations + └── report/ 📊 Output formatters (text, JSON, JUnit, markdown) ``` --- @@ -66,17 +72,21 @@ report, err := inspect.Scan(ctx, "https://example.com", inspect.WithDepth(3), ) -// 🔄 Reusable scanner +// 🔄 Reusable scanner (safe for concurrent use) scanner := inspect.NewScanner(inspect.WithConcurrency(10)) report, err := scanner.Scan(ctx, "https://example.com") +// 📁 Audit local build output before deploy +report, err := scanner.ScanDir(ctx, "./public") + // 🛡️ Custom Go check inspect.RegisterCheck(myCheck) // 📋 Declarative rule (no Go code) inspect.RegisterRule(inspect.RuleCheck{ - Name: "x-frame-options", Severity: inspect.High, - Check: inspect.HeaderMissing{Header: "X-Frame-Options"}, + RuleName: "x-frame-options", + RuleSeverity: inspect.SeverityHigh, + HeaderMissing: []string{"X-Frame-Options"}, }) ``` @@ -84,57 +94,84 @@ inspect.RegisterRule(inspect.RuleCheck{ ## ⚡ Presets -| Preset | Checks | Speed | -|--------|--------|:-----:| -| 🏃 `Quick` | links, security headers | Fast | -| 📊 `Standard` | links, security, forms, a11y | Medium | -| 🔬 `Deep` | all checks, depth 10 | Slow | -| 🔒 `SecurityOnly` | security, TLS, cookies, headers | Fast | -| 🤖 `CI` | all checks, fail on Medium+ | Medium | +| Preset | Crawl | Checks | +|--------|-------|--------| +| 🏃 `Quick` | depth 2, concurrency 5 | links | +| 📊 `Standard` | depth 5, concurrency 10 | links, security, forms, a11y, perf, seo | +| 🔬 `Deep` | no depth limit, concurrency 20 | all | +| 🔒 `SecurityOnly` | default crawl | security | +| 🤖 `CI` | depth 5, concurrency 10 | all, fail on high | --- ## 🔌 MCP Server -```bash -inspect-ci mcp # 📡 stdio transport (add to agent MCP config) +Embed the server in a program to expose auditing over stdio: + +```go +import ( + "github.com/GrayCodeAI/inspect" + inspectmcp "github.com/GrayCodeAI/inspect/mcp" +) + +srv := inspectmcp.New(inspect.Standard) +_ = srv.ServeStdio() ``` -**Tools:** `inspect_scan` — crawl URL and run checks · `inspect_scan_dir` — scan local HTML directory +**Tools:** `inspect_scan` — crawl a URL and run checks · `inspect_scan_dir` — serve and scan a local HTML directory. --- -## 🐙 GitHub Action +## 🌐 Browser-Rendered Analysis -```yaml -- uses: GrayCodeAI/inspect@v0.4.0 - with: - url: https://example.com - checks: security,a11y - fail-on: high +The core `inspect` package never imports rod. To analyze JavaScript-rendered +pages and run axe-core / contrast checks, supply a `BrowserEngine` from the +`browser` sub-module (a separate Go module so the rod/Chromium dependency stays +opt-in): + +```go +import "github.com/GrayCodeAI/inspect/browser" + +engine, _ := browser.New() +defer engine.Close() + +report, _ := inspect.Scan(ctx, "https://example.com", + inspect.Standard, + inspect.WithBrowser(engine), +) ``` +`browser_fetcher.go` adapts the engine into the crawler's fetcher so rendered +HTML is analyzed instead of the raw HTTP response. + --- ## 🔎 Findings -Each finding includes: +Each finding (`inspect.Finding`) includes: | Field | Description | |-------|-------------| -| `Check` | Which check produced this finding | +| `Check` | Which check produced the finding | | `Severity` | 🟢 Info · 🟡 Low · 🟠 Medium · 🔴 High · 🟥 Critical | | `URL` | Page where the issue was found | +| `Element` | Offending element (optional) | | `Message` | Human-readable description | -| `Evidence` | Snippet of the problematic content | -| `CWE` | CWE reference (required for security findings) | -| `Confidence` | 0.0–1.0 score | +| `Fix` | Suggested remediation (optional) | +| `Evidence` | Snippet of the problematic content (optional) | + +A `Report` aggregates findings plus `Stats` (pages scanned, counts by severity +and check, per-check durations) and a `FailOn` threshold; `Report.Failed()` and +`Report.MaxSeverity()` summarize the run. `GenerateSARIF` converts findings to +SARIF 2.1.0. --- -## 🛡️ ReDoS Protection +## 🛡️ Crawler Safeguards -All user-supplied regex patterns go through: -- `compileWithTimeout()` — **1s** compilation limit -- `matchWithTimeout()` — **100ms** match limit -- `checkRegexComplexity()` — rejects nested quantifiers before compilation +- **SSRF protection** — requests to private IP ranges are blocked by default + (`WithAllowPrivateIPs` opts out for internal infrastructure) +- **Rate limiting** — per-host request rate caps +- **Circuit breaker** — stops hitting a host after repeated failures, half-opens after cooldown +- **robots.txt** — respected by default +- **Redirect & page timeouts** — bounded redirect chains and per-page deadlines diff --git a/options.go b/options.go index 9893bfd..bdbb786 100644 --- a/options.go +++ b/options.go @@ -76,7 +76,7 @@ var Quick Option = optFunc(func(c *config) { c.concurrency = 5 }) -// Standard performs a balanced crawl with all checks enabled. +// Standard performs a balanced crawl with the six default checks enabled. var Standard Option = optFunc(func(c *config) { c.depth = 5 c.checks = []string{"links", "security", "forms", "a11y", "perf", "seo"}