From bb0e950cef0c217958e2500da4f1835fe5b92d05 Mon Sep 17 00:00:00 2001 From: Patel230 Date: Tue, 9 Jun 2026 11:56:31 +0530 Subject: [PATCH 1/3] fix: correct product docs to web-audit library (no CLI/Action) inspect is a website security & accessibility audit library + embeddable MCP server, not a "code security scanner CLI". Made all docs/config honest: - README/docs/architecture/AGENTS.md: rewritten to describe the real web-audit library (a11y/TLS/cookies/security-headers/mixed-content/meta + crawler + MCP). Removed all references to a non-existent inspect-ci/inspect-action CLI binary. - Deleted action.yml + .github/action.yml (composite Action built a non-existent ./cmd/inspect-action) and deploy/docker/docker-compose.yml (referenced a non-existent inspect-ci binary). - api/openapi.yaml: corrected the MCP surface to the real constructor (mcp.New(...).ServeStdio()) and replaced the bogus checks enum with the 6 real checks (security-headers, cookie-security, tls, mixed-content, meta-tags, accessibility); dropped SEO/performance overclaims. - Issue templates: removed deleted GitHub Action options. - CI: pinned gofumpt v0.10.0; Go badge -> 1.26+. Verified: go build/vet/test pass; gofumpt clean; golangci-lint 0 issues; zero dangling CLI/Action references. --- .github/ISSUE_TEMPLATE/bug_report.yml | 1 - .github/ISSUE_TEMPLATE/feature_request.yml | 2 +- .github/action.yml | 79 ---------- .github/workflows/ci.yml | 2 +- AGENTS.md | 3 +- README.md | 159 +++++++++++++++++---- action.yml | 73 ---------- api/openapi.yaml | 20 ++- deploy/docker/docker-compose.yml | 13 -- docs/architecture.md | 157 ++++++++++++-------- 10 files changed, 243 insertions(+), 266 deletions(-) delete mode 100644 .github/action.yml delete mode 100644 action.yml delete mode 100644 deploy/docker/docker-compose.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 4765c71..d68e03c 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -31,7 +31,6 @@ body: options: - "Library API (`inspect.Scan`, `inspect.NewScanner`, etc.)" - "MCP (`inspect_scan`, `inspect_scan_dir` tools)" - - "GitHub Action (`.github/action.yml`)" - "SARIF output" - "Browser-based checks (`browser/` sub-module)" - "CVE / dependency database" diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 43f01ef..31916d8 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -30,7 +30,7 @@ body: - "Browser-based / dynamic check (`browser/`)" - "Output format (JSON, SARIF, JUnit, HTML, terminal)" - "Configuration (`.inspect.toml`, options)" - - "MCP server / GitHub Action / tooling" + - "MCP server / tooling" validations: required: true diff --git a/.github/action.yml b/.github/action.yml deleted file mode 100644 index 1543cbd..0000000 --- a/.github/action.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: 'Inspect Site Audit' -description: 'Crawl and audit a website for broken links, security issues, accessibility violations, SEO problems, and performance concerns' -author: 'GrayCode AI' - -branding: - icon: 'search' - color: 'blue' - -inputs: - url: - description: 'Target URL to audit' - required: true - checks: - description: 'Comma-separated checks to run (links,security,forms,a11y,perf,seo)' - required: false - default: 'links,security,forms,a11y,perf,seo' - depth: - description: 'Maximum crawl depth (0 = unlimited)' - required: false - default: '5' - fail-on: - description: 'Minimum severity to fail the action (info,low,medium,high,critical)' - required: false - default: 'high' - concurrency: - description: 'Number of concurrent crawl workers' - required: false - default: '10' - format: - description: 'Output format (terminal,json,junit)' - required: false - default: 'terminal' - timeout: - description: 'Overall scan timeout (e.g., 5m, 10m)' - required: false - default: '5m' - -outputs: - findings: - description: 'Total number of findings' - max-severity: - description: 'Highest severity found' - failed: - description: 'Whether the scan failed based on fail-on threshold' - report: - description: 'Path to the generated report file' - -runs: - using: 'composite' - steps: - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version: '1.25' - - - name: Install inspect - shell: bash - run: go install github.com/GrayCodeAI/inspect/cmd/inspect-ci@latest - - - name: Run audit - id: audit - shell: bash - run: | - inspect-ci \ - --url "${{ inputs.url }}" \ - --checks "${{ inputs.checks }}" \ - --depth "${{ inputs.depth }}" \ - --fail-on "${{ inputs.fail-on }}" \ - --concurrency "${{ inputs.concurrency }}" \ - --format "${{ inputs.format }}" \ - --timeout "${{ inputs.timeout }}" \ - --output-file "${{ runner.temp }}/inspect-report.xml" - - - name: Upload report - if: always() - uses: actions/upload-artifact@v4 - with: - name: inspect-report - path: ${{ runner.temp }}/inspect-report.xml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02f260e..4507d86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: run: git clone --depth=1 https://github.com/GrayCodeAI/hawk.git ../hawk - name: gofumpt diff run: | - go install mvdan.cc/gofumpt@latest + go install mvdan.cc/gofumpt@v0.10.0 out=$(gofumpt -l .) if [ -n "$out" ]; then echo "::error::gofumpt would reformat the following files:" diff --git a/AGENTS.md b/AGENTS.md index 73a0e9e..b8e1cd2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,7 +4,7 @@ Website security auditing and crawling library for Go. Crawls sites concurrently ## Design Principles -- **Library + CLI** — importable library with optional `inspect-ci` binary +- **Library** — importable Go library + embeddable MCP server (no CLI binary) - **No LLM dependency** — pure static analysis on crawled pages - **Extensible** — custom checks (Go code) + declarative rules (no code required) @@ -25,7 +25,6 @@ gofumpt -w . # Format - `rule.go` — Declarative rule engine (YAML-based) - `finding.go` — Findings with severity, CWE, and evidence - `report.go` — Report generation (JSON, SARIF, HTML) -- `cmd/inspect-ci/` — Optional CI binary for pipeline integration ## Conventions diff --git a/README.md b/README.md index b23ef08..5f0c93d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@

Inspect

- Security vulnerability scanner for code + Live website auditor for accessibility, TLS, cookies, and security headers

- Go + Go License CI

@@ -12,52 +12,153 @@ --- -Inspect scans code for security vulnerabilities, anti-patterns, and potential issues. It provides actionable findings with severity ratings and remediation guidance. +## What is inspect -## Features +inspect is a Go library that crawls live **websites** and audits the pages it +finds — accessibility, TLS, cookies, security headers, mixed content, and meta +tags. It is part of the [hawk](https://github.com/GrayCodeAI/hawk) ecosystem: +hawk wires inspect into its own commands, and inspect also ships an MCP server +so any MCP-compatible agent can run audits. -- **Multi-language support** - Scans Go, Python, JavaScript, TypeScript, and more -- **OWASP coverage** - Detects common vulnerability patterns -- **Custom rules** - Define project-specific security policies -- **CI/CD integration** - Fails builds on critical issues +> **inspect is a Go library (and MCP server), not a CLI.** It ships no `inspect` +> binary of its own — it analyzes running URLs, not source code. Import it +> directly to embed website auditing in your own Go program, or run the MCP +> server to expose it to an agent. -## Quick Start +It crawls concurrently (with rate limiting, robots.txt support, redirect +handling, and SSRF protection), runs each check against the discovered pages, +and returns findings with severity levels. Results can be emitted as SARIF for +the GitHub Security tab. -```bash -go get github.com/GrayCodeAI/inspect -``` +## Quick Start ```go -import "github.com/GrayCodeAI/inspect" +import ( + "context" + "fmt" -scanner := inspect.NewScanner( - inspect.WithRules(inspect.DefaultRules), + "github.com/GrayCodeAI/inspect" ) -report, err := scanner.Scan(ctx, "./path/to/code") +// One-shot scan with the Standard preset. +report, err := inspect.Scan(ctx, "https://example.com", inspect.Standard) +if err != nil { + // handle error +} for _, f := range report.Findings { - fmt.Printf("[%s] %s - %s\n", f.Severity, f.Rule, f.Message) + fmt.Printf("[%s] %s: %s\n", f.Severity, f.URL, f.Message) +} +``` + +Requires Go 1.26+. + +For repeated or high-throughput scans, reuse a `Scanner` (safe for concurrent use): + +```go +scanner := inspect.NewScanner(inspect.Standard, inspect.WithDepth(3)) +r1, _ := scanner.Scan(ctx, "https://site-a.com") +r2, _ := scanner.Scan(ctx, "https://site-b.com") +``` + +## Features + +- **Accessibility** — meta/ARIA checks; optional axe-core and color-contrast + analysis through the `browser` sub-module (headless Chromium via rod) +- **TLS** — certificate validity and expiry checks +- **Cookies** — `Secure`, `HttpOnly`, and `SameSite` flag auditing +- **Security headers** — detects missing CSP, HSTS, and related headers +- **Mixed content** — flags insecure resources served on HTTPS pages +- **Meta tags** — SEO and metadata checks +- **Concurrent crawler** — depth limits, rate limiting, robots.txt, redirect + following, and SSRF protection (private IPs blocked by default) +- **SARIF output** — `inspect.GenerateSARIF` emits SARIF 2.1.0 for the GitHub + Security tab +- **MCP server** — expose `inspect_scan` and `inspect_scan_dir` to any agent +- **Extensible** — register custom `Checker` implementations or declarative + `RuleCheck` patterns + +## Presets + +| Preset | Behavior | +|---|---| +| `Quick` | Shallow crawl (depth 2), links only | +| `Standard` | Balanced crawl (depth 5), all checks | +| `Deep` | Exhaustive crawl (no depth limit), all checks | +| `SecurityOnly` | Security-related checks only | +| `CI` | Standard checks, fail on high severity | + +## MCP Server + +inspect ships an MCP server (stdio transport) that exposes website auditing to +any MCP-compatible agent: + +```go +import inspectmcp "github.com/GrayCodeAI/inspect/mcp" + +srv := inspectmcp.New(inspect.Standard) +if err := srv.ServeStdio(); err != nil { + // handle error } ``` +**Tools:** + +- `inspect_scan` — crawl a URL and run the configured checks +- `inspect_scan_dir` — serve and scan a local directory of HTML files + +## Browser-Rendered Analysis + +By default inspect analyzes raw HTTP responses. To analyze JavaScript-rendered +pages and run axe-core accessibility checks, supply a `BrowserEngine` from the +`browser` sub-module: + +```go +import "github.com/GrayCodeAI/inspect/browser" + +engine, err := browser.New() +if err != nil { + // handle error +} +defer engine.Close() + +report, err := inspect.Scan(ctx, "https://example.com", + inspect.Standard, + inspect.WithBrowser(engine), +) +``` + +## Custom Checks + +```go +// Declarative rule — no Go code beyond the struct. +inspect.RegisterRule(inspect.RuleCheck{ + RuleName: "x-frame-options", + RuleSeverity: inspect.SeverityHigh, + HeaderMissing: []string{"X-Frame-Options"}, +}) + +// Full Checker implementation, scoped to a single Scanner. +scanner := inspect.NewScanner(inspect.WithCustomChecks(myCheck)) +``` + ## Examples See the [examples/](examples/) directory for runnable code samples. -## Configuration +## Architecture -Create `.inspect.yaml` to customize scanning: +See [docs/architecture.md](docs/architecture.md) for the package layout and data flow. -```yaml -rules: - - name: no-hardcoded-secrets - severity: critical - - name: sql-injection - severity: high -ignore: - - vendor/ - - testdata/ -``` +## Ecosystem + +inspect is part of the hawk ecosystem: + +| Component | Repository | Purpose | +|---|---|---| +| **hawk** | [GrayCodeAI/hawk](https://github.com/GrayCodeAI/hawk) | AI coding agent | +| **eyrie** | [GrayCodeAI/eyrie](https://github.com/GrayCodeAI/eyrie) | LLM provider runtime | +| **yaad** | [GrayCodeAI/yaad](https://github.com/GrayCodeAI/yaad) | Graph-based memory | +| **inspect** | This repo | Website audit library + MCP server | ## Contributing diff --git a/action.yml b/action.yml deleted file mode 100644 index 477a8b5..0000000 --- a/action.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: "Inspect Web Audit" -description: "Run inspect security and accessibility audits against a URL, with SARIF output for the GitHub Security tab." - -branding: - icon: "shield" - color: "blue" - -inputs: - url: - description: "Target URL to audit" - required: true - checks: - description: "Comma-separated list of checks to run (links, security, forms, a11y, perf, seo)" - required: false - default: "links,security,forms,a11y,perf,seo" - depth: - description: "Maximum crawl depth" - required: false - default: "5" - format: - description: "Output format: text, json, or sarif" - required: false - default: "sarif" - fail-on: - description: "Minimum severity to fail the action: info, low, medium, high, critical" - required: false - default: "high" - -outputs: - findings: - description: "Total number of findings" - max-severity: - description: "Highest severity found" - failed: - description: "Whether the scan failed the threshold (true/false)" - sarif: - description: "Path to the SARIF results file" - -runs: - using: "composite" - steps: - - name: Install Go - uses: actions/setup-go@v5 - with: - go-version: "stable" - - - name: Build inspect-action - shell: bash - run: go build -o inspect-action ./cmd/inspect-action/ - working-directory: ${{ github.action_path }} - - - name: Run inspect audit - id: audit - shell: bash - run: | - "${{ github.action_path }}/inspect-action" \ - --format "${{ inputs.format }}" \ - --checks "${{ inputs.checks }}" \ - --depth "${{ inputs.depth }}" \ - --fail-on "${{ inputs.fail-on }}" \ - "${{ inputs.url }}" - continue-on-error: true - - - name: Upload SARIF - if: ${{ always() && inputs.format == 'sarif' }} - uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4 - with: - sarif_file: results.sarif - - - name: Fail if threshold exceeded - if: ${{ steps.audit.outcome == 'failure' }} - shell: bash - run: exit 1 diff --git a/api/openapi.yaml b/api/openapi.yaml index 364cd37..e610218 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -1,9 +1,10 @@ openapi: "3.1.0" info: - title: inspect — Security Auditor Tool Reference + title: inspect — Website Security & Accessibility Auditor Tool Reference description: | - inspect is a website security, accessibility, and SEO auditor. - It operates as a CLI tool, GitHub Action, and MCP server (stdio). + inspect is a website security and accessibility auditor. + It is a Go library with an embeddable MCP server (stdio); the host program + embeds it directly and ships no standalone binary. This document describes the MCP tool surface as a machine-readable reference. inspect does NOT expose an HTTP server — all communication is via stdio MCP @@ -16,6 +17,10 @@ info: url: https://github.com/GrayCodeAI/inspect # No servers section — inspect has no HTTP API. +# inspect ships no standalone binary. The MCP server is embedded by the host +# program, which constructs it from the library and starts a stdio transport: +# srv := mcp.New(opts...) // opts are inspect.Option values +# srv.ServeStdio() // stdio transport # MCP tools are documented below using x-mcp-tool extensions. tags: @@ -26,12 +31,13 @@ tags: x-mcp-server: transport: stdio - binary: inspect-ci - start_command: ["inspect-ci", "mcp"] + package: github.com/GrayCodeAI/inspect/mcp + constructor: mcp.New(opts...) # opts are inspect.Option values + serve_stdio: mcp.Server.ServeStdio() x-mcp-tools: inspect_scan: - description: Crawl a URL and run security, accessibility, SEO, and performance checks + description: Crawl a URL and run website security and accessibility checks inputSchema: type: object required: [url] @@ -43,7 +49,7 @@ x-mcp-tools: type: array items: type: string - enum: [security, links, forms, a11y, performance, seo] + enum: [security-headers, cookie-security, tls, mixed-content, meta-tags, accessibility] description: Checks to run (default: all) depth: type: integer diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml deleted file mode 100644 index ee70f3e..0000000 --- a/deploy/docker/docker-compose.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: inspect - -services: - inspect: - build: - context: ../../ - dockerfile: Dockerfile - image: ghcr.io/graycodeai/inspect:dev - env_file: - - path: ../../.env.example - required: false - entrypoint: ["inspect-ci"] - command: ["--help"] diff --git a/docs/architecture.md b/docs/architecture.md index b4613af..c58ea54 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,7 +2,7 @@ # 🔍 inspect Architecture -**Website Security, Accessibility & SEO Auditor** +**Live Website Accessibility, TLS & Security-Header Auditor** [![Go](https://img.shields.io/badge/Go-1.26+-00ADD8?logo=go)](https://go.dev/) [![Protocol](https://img.shields.io/badge/Protocol-MCP-purple)]() @@ -13,46 +13,52 @@ ## 🎯 Overview -inspect is a website security auditing and crawling library for Go. It crawls sites concurrently, runs **security**, **accessibility**, **SEO**, and **performance** checks, and generates findings with severity levels and **CWE references**. +inspect is a website auditing **library** for Go. It crawls live sites +concurrently, runs **accessibility**, **TLS**, **cookie**, **security-header**, +**mixed-content**, and **meta** checks against the discovered pages, and returns +findings with severity levels. Results can be emitted as SARIF for the GitHub +Security tab. -> 💡 Three modes: **Go library**, **CLI binary** (`inspect-ci`), and **GitHub Action**. +> 💡 inspect ships **no CLI binary**. It is consumed two ways: as a **Go library** +> (imported by hawk and other programs) and as an **MCP server** (stdio transport) +> that exposes auditing tools to any MCP-compatible agent. --- -## 🧱 Components +## 🧱 Package Layout ``` inspect/ -├── api/openapi.yaml 📜 MCP tool surface reference -├── cmd/ -│ ├── inspect-ci/main.go 🖥️ CLI binary entry point -│ └── inspect-action/main.go ⚡ GitHub Action entry point ├── inspect.go 📤 Public API: Scan(), Finding, Report, Stats -├── check.go 🛡️ Checker interface, RuleCheck, RegisterCheck() -├── scanner.go 🔄 Crawler orchestration, check execution -├── options.go ⚙️ config, With* functions, presets -├── config.go 📋 .inspect.toml loading -├── sarif.go 📊 SARIF output formatter -├── ci_output.go 🖥️ CI-friendly terminal output -├── llm_scanner.go 🤖 AI-powered scanning -├── api_security.go 🔒 API endpoint security checks -├── dependency_check.go 📦 Dependency vulnerability checks -├── sbom.go 📋 SBOM generation -├── browser.go 🌐 Browser automation entry -├── browser/ 🖥️ Rod-based browser crawling -├── checks/ -│ ├── security.go 🔒 CSP, HSTS, CORS headers -│ ├── accessibility.go ♿ ARIA violations -│ ├── tls.go 🔐 Certificate checks -│ ├── cookies.go 🍪 Cookie security flags -│ ├── headers.go 📋 Missing security headers -│ └── mixed_content.go ⚠️ Mixed content detection +├── scanner.go 🔄 Scanner: crawl orchestration + check execution +├── options.go ⚙️ config, With* options, presets (Quick/Standard/Deep/…) +├── check.go 🛡️ Checker interface, RuleCheck, RegisterCheck/RegisterRule +├── config.go 📋 .inspect config loading +├── severity.go 🎚️ Severity (aliased from hawk/shared/types) +├── sarif.go 📊 GenerateSARIF — SARIF 2.1.0 output +├── browser.go 🌐 BrowserEngine interface + page-data types (no rod import) +├── browser_fetcher.go 🔌 Adapts a BrowserEngine into the crawler's fetcher +├── checks/ ✅ Built-in checks run against crawled responses +│ ├── headers.go 📋 Missing security headers (CSP, HSTS, …) +│ ├── cookies.go 🍪 Cookie Secure/HttpOnly/SameSite flags +│ ├── tls.go 🔐 Certificate validity & expiry +│ ├── mixed_content.go ⚠️ Insecure resources on HTTPS pages +│ ├── meta.go 🏷️ Meta-tag / SEO checks +│ └── accessibility.go ♿ Accessibility / ARIA checks +├── browser/ 🖥️ Optional rod-based engine (headless Chromium) +│ ├── rod.go 🚀 New() — launches Chromium, renders pages +│ ├── axe.go ♿ axe-core injection & violation collection +│ ├── contrast.go 🎨 Color-contrast analysis +│ └── options.go ⚙️ Engine options (separate Go module) ├── mcp/ 🔌 MCP server (stdio transport) +│ └── server.go 📡 inspect_scan & inspect_scan_dir tools +├── api/openapi.yaml 📜 MCP tool surface reference +├── examples/ 📚 Runnable code samples └── internal/ - ├── crawler/ 🕷️ URL parsing, sitemap, robots.txt - ├── check/ 🔄 Internal check runner + ├── crawler/ 🕷️ Concurrent crawl, robots.txt, sitemap, rate limit, circuit breaker, dir server + ├── check/ 🔄 Internal check registry & runners (links, forms, a11y, perf, reachability) ├── html/ 📄 HTML parsing utilities - └── report/ 📊 Output format implementations + └── report/ 📊 Output formatters (text, JSON, JUnit, markdown) ``` --- @@ -66,17 +72,21 @@ report, err := inspect.Scan(ctx, "https://example.com", inspect.WithDepth(3), ) -// 🔄 Reusable scanner +// 🔄 Reusable scanner (safe for concurrent use) scanner := inspect.NewScanner(inspect.WithConcurrency(10)) report, err := scanner.Scan(ctx, "https://example.com") +// 📁 Audit local build output before deploy +report, err := scanner.ScanDir(ctx, "./public") + // 🛡️ Custom Go check inspect.RegisterCheck(myCheck) // 📋 Declarative rule (no Go code) inspect.RegisterRule(inspect.RuleCheck{ - Name: "x-frame-options", Severity: inspect.High, - Check: inspect.HeaderMissing{Header: "X-Frame-Options"}, + RuleName: "x-frame-options", + RuleSeverity: inspect.SeverityHigh, + HeaderMissing: []string{"X-Frame-Options"}, }) ``` @@ -84,57 +94,84 @@ inspect.RegisterRule(inspect.RuleCheck{ ## ⚡ Presets -| Preset | Checks | Speed | -|--------|--------|:-----:| -| 🏃 `Quick` | links, security headers | Fast | -| 📊 `Standard` | links, security, forms, a11y | Medium | -| 🔬 `Deep` | all checks, depth 10 | Slow | -| 🔒 `SecurityOnly` | security, TLS, cookies, headers | Fast | -| 🤖 `CI` | all checks, fail on Medium+ | Medium | +| Preset | Crawl | Checks | +|--------|-------|--------| +| 🏃 `Quick` | depth 2, concurrency 5 | links | +| 📊 `Standard` | depth 5, concurrency 10 | links, security, forms, a11y, perf, seo | +| 🔬 `Deep` | no depth limit, concurrency 20 | all | +| 🔒 `SecurityOnly` | default crawl | security | +| 🤖 `CI` | depth 5, concurrency 10 | all, fail on high | --- ## 🔌 MCP Server -```bash -inspect-ci mcp # 📡 stdio transport (add to agent MCP config) +Embed the server in a program to expose auditing over stdio: + +```go +import ( + "github.com/GrayCodeAI/inspect" + inspectmcp "github.com/GrayCodeAI/inspect/mcp" +) + +srv := inspectmcp.New(inspect.Standard) +_ = srv.ServeStdio() ``` -**Tools:** `inspect_scan` — crawl URL and run checks · `inspect_scan_dir` — scan local HTML directory +**Tools:** `inspect_scan` — crawl a URL and run checks · `inspect_scan_dir` — serve and scan a local HTML directory. --- -## 🐙 GitHub Action +## 🌐 Browser-Rendered Analysis -```yaml -- uses: GrayCodeAI/inspect@v0.4.0 - with: - url: https://example.com - checks: security,a11y - fail-on: high +The core `inspect` package never imports rod. To analyze JavaScript-rendered +pages and run axe-core / contrast checks, supply a `BrowserEngine` from the +`browser` sub-module (a separate Go module so the rod/Chromium dependency stays +opt-in): + +```go +import "github.com/GrayCodeAI/inspect/browser" + +engine, _ := browser.New() +defer engine.Close() + +report, _ := inspect.Scan(ctx, "https://example.com", + inspect.Standard, + inspect.WithBrowser(engine), +) ``` +`browser_fetcher.go` adapts the engine into the crawler's fetcher so rendered +HTML is analyzed instead of the raw HTTP response. + --- ## 🔎 Findings -Each finding includes: +Each finding (`inspect.Finding`) includes: | Field | Description | |-------|-------------| -| `Check` | Which check produced this finding | +| `Check` | Which check produced the finding | | `Severity` | 🟢 Info · 🟡 Low · 🟠 Medium · 🔴 High · 🟥 Critical | | `URL` | Page where the issue was found | +| `Element` | Offending element (optional) | | `Message` | Human-readable description | -| `Evidence` | Snippet of the problematic content | -| `CWE` | CWE reference (required for security findings) | -| `Confidence` | 0.0–1.0 score | +| `Fix` | Suggested remediation (optional) | +| `Evidence` | Snippet of the problematic content (optional) | + +A `Report` aggregates findings plus `Stats` (pages scanned, counts by severity +and check, per-check durations) and a `FailOn` threshold; `Report.Failed()` and +`Report.MaxSeverity()` summarize the run. `GenerateSARIF` converts findings to +SARIF 2.1.0. --- -## 🛡️ ReDoS Protection +## 🛡️ Crawler Safeguards -All user-supplied regex patterns go through: -- `compileWithTimeout()` — **1s** compilation limit -- `matchWithTimeout()` — **100ms** match limit -- `checkRegexComplexity()` — rejects nested quantifiers before compilation +- **SSRF protection** — requests to private IP ranges are blocked by default + (`WithAllowPrivateIPs` opts out for internal infrastructure) +- **Rate limiting** — per-host request rate caps +- **Circuit breaker** — stops hitting a host after repeated failures, half-opens after cooldown +- **robots.txt** — respected by default +- **Redirect & page timeouts** — bounded redirect chains and per-page deadlines From bba61c4e60e497e2817f6e69ee5423037ae194cc Mon Sep 17 00:00:00 2001 From: Patel230 Date: Tue, 9 Jun 2026 15:35:02 +0530 Subject: [PATCH 2/3] fix: align inspect check schema names --- api/openapi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/openapi.yaml b/api/openapi.yaml index e610218..1b543e9 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -49,7 +49,7 @@ x-mcp-tools: type: array items: type: string - enum: [security-headers, cookie-security, tls, mixed-content, meta-tags, accessibility] + enum: [links, security, forms, a11y, perf, seo, sri, aiready, reachability] description: Checks to run (default: all) depth: type: integer From e229ee715f15c9bfc52b76bda060f1c37c7cae76 Mon Sep 17 00:00:00 2001 From: Patel230 Date: Tue, 9 Jun 2026 16:35:54 +0530 Subject: [PATCH 3/3] fix: align README/docs with the nine registered checks The Features list and intro omitted links/forms/perf/seo and the Presets table said 'all checks' without defining it. Document all nine registered checks, mark the six that run in the default presets, and correct the Standard godoc comment. Drop the stale 'GitHub Action' entry from the [Unreleased] bug_report surface-dropdown description (action.yml was removed in this PR). --- CHANGELOG.md | 4 ++-- README.md | 41 ++++++++++++++++++++++++++++------------- options.go | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8194ccd..3463397 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,8 +40,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm quality impact / SARIF compatibility / SSRF & egress safety / Testing / Checklist. - `.github/ISSUE_TEMPLATE/bug_report.yml` — surface dropdown - (library API / MCP / SARIF / GitHub Action / browser checks / - CVE database) and false-positive textarea. + (library API / MCP / SARIF / browser checks / CVE database) and + false-positive textarea. - `.github/ISSUE_TEMPLATE/feature_request.yml` — `kind` selector covering 8 functional areas (security checks / accessibility / SEO / performance / browser / output formats / config / tooling) diff --git a/README.md b/README.md index 5f0c93d..7f3526e 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,9 @@ ## What is inspect inspect is a Go library that crawls live **websites** and audits the pages it -finds — accessibility, TLS, cookies, security headers, mixed content, and meta -tags. It is part of the [hawk](https://github.com/GrayCodeAI/hawk) ecosystem: +finds — broken links, security headers, forms, accessibility, performance, SEO, +TLS, cookies, mixed content, subresource integrity, AI-readiness, and +reachability. It is part of the [hawk](https://github.com/GrayCodeAI/hawk) ecosystem: hawk wires inspect into its own commands, and inspect also ships an MCP server so any MCP-compatible agent can run audits. @@ -62,13 +63,24 @@ r2, _ := scanner.Scan(ctx, "https://site-b.com") ## Features -- **Accessibility** — meta/ARIA checks; optional axe-core and color-contrast - analysis through the `browser` sub-module (headless Chromium via rod) -- **TLS** — certificate validity and expiry checks -- **Cookies** — `Secure`, `HttpOnly`, and `SameSite` flag auditing -- **Security headers** — detects missing CSP, HSTS, and related headers -- **Mixed content** — flags insecure resources served on HTTPS pages -- **Meta tags** — SEO and metadata checks +inspect ships nine built-in checks (registered in `check.DefaultRegistry`). The +six marked **(default)** run in the `Standard`, `Deep`, and `CI` presets; the +remaining three are opt-in via `WithChecks`. + +- **Links** *(default)* — crawls and reports broken/unreachable links +- **Security headers** *(default)* — detects missing CSP, HSTS, and related + headers; also audits TLS certificate validity/expiry, cookie `Secure` / + `HttpOnly` / `SameSite` flags, and mixed content on HTTPS pages +- **Forms** *(default)* — form validation checks (CSRF, action URLs) +- **Accessibility (`a11y`)** *(default)* — meta/ARIA checks; optional axe-core + and color-contrast analysis through the `browser` sub-module (headless + Chromium via rod) +- **Performance (`perf`)** *(default)* — resource sizes and render-blocking + resources +- **SEO** *(default)* — meta tags, structured data, and metadata checks +- **SRI** — Subresource Integrity validation +- **AI-ready (`aiready`)** — checks for agent/LLM-friendly metadata +- **Reachability** — host/URL reachability checks - **Concurrent crawler** — depth limits, rate limiting, robots.txt, redirect following, and SSRF protection (private IPs blocked by default) - **SARIF output** — `inspect.GenerateSARIF` emits SARIF 2.1.0 for the GitHub @@ -79,13 +91,16 @@ r2, _ := scanner.Scan(ctx, "https://site-b.com") ## Presets +The default checks are: `links`, `security`, `forms`, `a11y`, `perf`, `seo`. +Add the opt-in checks (`sri`, `aiready`, `reachability`) with `WithChecks`. + | Preset | Behavior | |---|---| -| `Quick` | Shallow crawl (depth 2), links only | -| `Standard` | Balanced crawl (depth 5), all checks | -| `Deep` | Exhaustive crawl (no depth limit), all checks | +| `Quick` | Shallow crawl (depth 2), `links` only | +| `Standard` | Balanced crawl (depth 5), the six default checks | +| `Deep` | Exhaustive crawl (no depth limit), the six default checks | | `SecurityOnly` | Security-related checks only | -| `CI` | Standard checks, fail on high severity | +| `CI` | Default checks, fail on high severity | ## MCP Server diff --git a/options.go b/options.go index 9893bfd..bdbb786 100644 --- a/options.go +++ b/options.go @@ -76,7 +76,7 @@ var Quick Option = optFunc(func(c *config) { c.concurrency = 5 }) -// Standard performs a balanced crawl with all checks enabled. +// Standard performs a balanced crawl with the six default checks enabled. var Standard Option = optFunc(func(c *config) { c.depth = 5 c.checks = []string{"links", "security", "forms", "a11y", "perf", "seo"}