BREAKING CHANGES: - Remove -cskill suffix from all skill names (use standard kebab-case) - Simplify marketplace.json to only official fields (fixes Issue #5) - SKILL.md body must be <500 lines (progressive disclosure via references/) New features: - Cross-platform support for 8+ platforms (Claude Code, Copilot, Cursor, Windsurf, Cline, Codex CLI, Gemini CLI) - scripts/install-template.sh: Auto-detect platform installer with --dry-run - scripts/validate.py: Spec compliance checker for generated skills - scripts/security_scan.py: Security scanner for hardcoded keys and dangerous patterns - MIGRATION.md: v3.x to v4.0 migration guide - 6 new reference files for progressive disclosure from lean SKILL.md Key changes: - SKILL.md: 4,116 → 272 lines with spec-compliant YAML frontmatter - marketplace.json: Stripped to {name, plugins} only - article-to-prototype-cskill/ → article-to-prototype/ - stock-analyzer-cskill/ → stock-analyzer/ - Export system integrates validation + security scanning - README.md rewritten for all supported platforms - Phase 5 pipeline outputs SKILL.md-first, spec-compliant skills Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
424 lines
13 KiB
Python
424 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Security Scanner for Generated Agent Skills.
|
|
|
|
Scans a skill directory for hardcoded API keys, sensitive files, and dangerous
|
|
Python patterns that could pose security risks.
|
|
|
|
Usage:
|
|
python3 scripts/security_scan.py path/to/skill/
|
|
python3 scripts/security_scan.py path/to/skill/ --json
|
|
|
|
Exit codes:
|
|
0 - Clean (no issues found)
|
|
1 - Issues found (one or more security issues detected)
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
# --- API Key Patterns ---
|
|
# Each entry: (pattern_name, compiled_regex, description, severity)
|
|
|
|
API_KEY_PATTERNS: list[tuple[str, re.Pattern, str, str]] = [
|
|
(
|
|
"OpenAI API Key",
|
|
re.compile(r"sk-[a-zA-Z0-9]{20,}"),
|
|
"Hardcoded OpenAI API key detected",
|
|
"high",
|
|
),
|
|
(
|
|
"AWS Access Key",
|
|
re.compile(r"AKIA[A-Z0-9]{16}"),
|
|
"Hardcoded AWS access key ID detected",
|
|
"high",
|
|
),
|
|
(
|
|
"GitHub Personal Access Token",
|
|
re.compile(r"ghp_[a-zA-Z0-9]{36}"),
|
|
"Hardcoded GitHub personal access token detected",
|
|
"high",
|
|
),
|
|
(
|
|
"GitLab Personal Access Token",
|
|
re.compile(r"glpat-[a-zA-Z0-9\-]{20}"),
|
|
"Hardcoded GitLab personal access token detected",
|
|
"high",
|
|
),
|
|
(
|
|
"Slack Token",
|
|
re.compile(r"xox[bprs]-[a-zA-Z0-9\-]+"),
|
|
"Hardcoded Slack token detected",
|
|
"high",
|
|
),
|
|
(
|
|
"Generic Secret",
|
|
re.compile(
|
|
r"""(api[_\-]?key|secret|token|password)\s*[:=]\s*["'][^"']{8,}["']""",
|
|
re.IGNORECASE,
|
|
),
|
|
"Possible hardcoded secret (generic key/token/password pattern)",
|
|
"medium",
|
|
),
|
|
]
|
|
|
|
|
|
# --- Sensitive File Names ---
|
|
|
|
SENSITIVE_FILES: dict[str, str] = {
|
|
".env": "Environment file may contain secrets",
|
|
"credentials.json": "Credentials file may contain API keys or passwords",
|
|
"secrets.json": "Secrets file may contain sensitive data",
|
|
"api_keys.json": "API keys file may contain hardcoded keys",
|
|
}
|
|
|
|
|
|
# --- Dangerous Python Patterns ---
|
|
# Each entry: (pattern_name, compiled_regex, description, severity)
|
|
|
|
PYTHON_DANGER_PATTERNS: list[tuple[str, re.Pattern, str, str]] = [
|
|
(
|
|
"eval() usage",
|
|
re.compile(r"\beval\s*\("),
|
|
"Use of eval() can execute arbitrary code; avoid unless strictly necessary",
|
|
"high",
|
|
),
|
|
(
|
|
"exec() usage",
|
|
re.compile(r"\bexec\s*\("),
|
|
"Use of exec() can execute arbitrary code; avoid unless strictly necessary",
|
|
"high",
|
|
),
|
|
(
|
|
"os.system() with concatenation",
|
|
re.compile(r"os\.system\s*\([^)]*[\+f\"']"),
|
|
"os.system() with string concatenation is vulnerable to shell injection",
|
|
"high",
|
|
),
|
|
(
|
|
"subprocess with shell=True",
|
|
re.compile(r"subprocess\.call\s*\([^)]*shell\s*=\s*True"),
|
|
"subprocess.call() with shell=True is vulnerable to shell injection",
|
|
"high",
|
|
),
|
|
(
|
|
"__import__() dynamic import",
|
|
re.compile(r"__import__\s*\("),
|
|
"Dynamic imports via __import__() can load arbitrary modules",
|
|
"medium",
|
|
),
|
|
]
|
|
|
|
|
|
# File extensions to scan for content patterns
|
|
TEXT_EXTENSIONS: set[str] = {
|
|
".py", ".md", ".txt", ".json", ".yaml", ".yml", ".toml", ".cfg",
|
|
".ini", ".sh", ".bash", ".zsh", ".env", ".conf", ".xml", ".html",
|
|
".css", ".js", ".ts", ".jsx", ".tsx", ".sql", ".csv", ".rst",
|
|
}
|
|
|
|
# Maximum file size to scan (skip very large files to avoid performance issues)
|
|
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
|
|
|
|
# Directories to skip during scanning
|
|
SKIP_DIRS: set[str] = {
|
|
".git", "__pycache__", "node_modules", ".venv", "venv", "env",
|
|
".pytest_cache", ".mypy_cache", "dist", "build",
|
|
}
|
|
|
|
|
|
def _is_text_file(file_path: Path) -> bool:
|
|
"""
|
|
Determine if a file is likely a text file that should be scanned.
|
|
|
|
Uses the file extension to decide. Falls back to attempting to read
|
|
a small portion of the file if the extension is unrecognized.
|
|
|
|
Args:
|
|
file_path: Path to the file.
|
|
|
|
Returns:
|
|
True if the file should be scanned for content patterns.
|
|
"""
|
|
if file_path.suffix.lower() in TEXT_EXTENSIONS:
|
|
return True
|
|
|
|
# For files with no extension or unrecognized extensions, try reading a sample
|
|
if file_path.suffix == "" or file_path.suffix.lower() not in {
|
|
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg",
|
|
".pdf", ".zip", ".tar", ".gz", ".bz2", ".xz",
|
|
".exe", ".dll", ".so", ".dylib", ".whl", ".egg",
|
|
".pyc", ".pyo", ".class", ".o", ".a",
|
|
".mp3", ".mp4", ".wav", ".avi", ".mov",
|
|
".ttf", ".otf", ".woff", ".woff2", ".eot",
|
|
".sqlite", ".db",
|
|
}:
|
|
try:
|
|
with open(file_path, "rb") as f:
|
|
chunk = f.read(1024)
|
|
# Check for null bytes (binary indicator)
|
|
if b"\x00" in chunk:
|
|
return False
|
|
return True
|
|
except (OSError, PermissionError):
|
|
return False
|
|
|
|
return False
|
|
|
|
|
|
def _scan_file_content(
|
|
file_path: Path,
|
|
skill_dir: Path,
|
|
) -> list[dict]:
|
|
"""
|
|
Scan a single file for security issues in its content.
|
|
|
|
Args:
|
|
file_path: Absolute path to the file.
|
|
skill_dir: Root directory of the skill (for relative path display).
|
|
|
|
Returns:
|
|
List of issue dictionaries found in this file.
|
|
"""
|
|
issues: list[dict] = []
|
|
relative_path = str(file_path.relative_to(skill_dir))
|
|
|
|
try:
|
|
file_size = file_path.stat().st_size
|
|
except OSError:
|
|
return issues
|
|
|
|
if file_size > MAX_FILE_SIZE_BYTES:
|
|
return issues
|
|
|
|
if not _is_text_file(file_path):
|
|
return issues
|
|
|
|
try:
|
|
lines = file_path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
except (OSError, PermissionError):
|
|
return issues
|
|
|
|
is_python = file_path.suffix.lower() == ".py"
|
|
|
|
for line_num, line in enumerate(lines, start=1):
|
|
# Check API key patterns against all text files
|
|
for pattern_name, regex, description, severity in API_KEY_PATTERNS:
|
|
match = regex.search(line)
|
|
if match:
|
|
issues.append({
|
|
"severity": severity,
|
|
"file": relative_path,
|
|
"line": line_num,
|
|
"pattern": pattern_name,
|
|
"description": description,
|
|
})
|
|
|
|
# Check Python-specific patterns only in .py files
|
|
if is_python:
|
|
for pattern_name, regex, description, severity in PYTHON_DANGER_PATTERNS:
|
|
match = regex.search(line)
|
|
if match:
|
|
issues.append({
|
|
"severity": severity,
|
|
"file": relative_path,
|
|
"line": line_num,
|
|
"pattern": pattern_name,
|
|
"description": description,
|
|
})
|
|
|
|
return issues
|
|
|
|
|
|
def security_scan(skill_path: str) -> dict:
|
|
"""
|
|
Perform a security scan on a skill directory.
|
|
|
|
Checks for hardcoded API keys, sensitive files, and dangerous code patterns.
|
|
|
|
Args:
|
|
skill_path: Path to the skill directory to scan.
|
|
|
|
Returns:
|
|
Dictionary with keys:
|
|
- ``clean`` (bool): True if no issues were found.
|
|
- ``issues`` (list[dict]): List of issue dictionaries. Each has:
|
|
- ``severity`` (str): "high", "medium", or "low"
|
|
- ``file`` (str): Relative file path
|
|
- ``line`` (int): Line number (0 for file-level issues)
|
|
- ``pattern`` (str): Pattern name that triggered the issue
|
|
- ``description`` (str): Human-readable description
|
|
"""
|
|
issues: list[dict] = []
|
|
|
|
skill_dir = Path(skill_path).resolve()
|
|
|
|
# --- Check: directory exists ---
|
|
if not skill_dir.exists():
|
|
return {
|
|
"clean": False,
|
|
"issues": [{
|
|
"severity": "high",
|
|
"file": str(skill_dir),
|
|
"line": 0,
|
|
"pattern": "missing_directory",
|
|
"description": f"Path does not exist: {skill_dir}",
|
|
}],
|
|
}
|
|
|
|
if not skill_dir.is_dir():
|
|
return {
|
|
"clean": False,
|
|
"issues": [{
|
|
"severity": "high",
|
|
"file": str(skill_dir),
|
|
"line": 0,
|
|
"pattern": "not_a_directory",
|
|
"description": f"Path is not a directory: {skill_dir}",
|
|
}],
|
|
}
|
|
|
|
# --- Check: sensitive files ---
|
|
for sensitive_name, description in SENSITIVE_FILES.items():
|
|
sensitive_path = skill_dir / sensitive_name
|
|
if sensitive_path.exists():
|
|
issues.append({
|
|
"severity": "high",
|
|
"file": sensitive_name,
|
|
"line": 0,
|
|
"pattern": "Sensitive file",
|
|
"description": description,
|
|
})
|
|
|
|
# Also check subdirectories for .env files
|
|
for root, dirs, files in os.walk(skill_dir):
|
|
root_path = Path(root)
|
|
|
|
# Skip excluded directories
|
|
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
|
|
|
for filename in files:
|
|
file_path = root_path / filename
|
|
relative = str(file_path.relative_to(skill_dir))
|
|
|
|
# Check for .env files anywhere in the tree
|
|
if filename == ".env" and relative != ".env":
|
|
issues.append({
|
|
"severity": "high",
|
|
"file": relative,
|
|
"line": 0,
|
|
"pattern": "Sensitive file",
|
|
"description": "Environment file may contain secrets",
|
|
})
|
|
|
|
# Check for sensitive JSON files in subdirectories
|
|
if filename in ("credentials.json", "secrets.json", "api_keys.json"):
|
|
if relative != filename: # Not already caught at root level
|
|
issues.append({
|
|
"severity": "high",
|
|
"file": relative,
|
|
"line": 0,
|
|
"pattern": "Sensitive file",
|
|
"description": SENSITIVE_FILES.get(
|
|
filename, "Sensitive file detected"
|
|
),
|
|
})
|
|
|
|
# --- Scan file contents ---
|
|
for root, dirs, files in os.walk(skill_dir):
|
|
root_path = Path(root)
|
|
|
|
# Skip excluded directories
|
|
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
|
|
|
|
for filename in files:
|
|
file_path = root_path / filename
|
|
file_issues = _scan_file_content(file_path, skill_dir)
|
|
issues.extend(file_issues)
|
|
|
|
# Sort issues: high first, then medium, then low
|
|
severity_order = {"high": 0, "medium": 1, "low": 2}
|
|
issues.sort(key=lambda x: (severity_order.get(x["severity"], 3), x["file"], x["line"]))
|
|
|
|
return {
|
|
"clean": len(issues) == 0,
|
|
"issues": issues,
|
|
}
|
|
|
|
|
|
def _print_human_readable(result: dict, skill_path: str) -> None:
|
|
"""
|
|
Print security scan results in a human-readable format.
|
|
|
|
Args:
|
|
result: The scan result dictionary.
|
|
skill_path: The path that was scanned (for display).
|
|
"""
|
|
print(f"Security scan: {skill_path}")
|
|
print(f"{'=' * 60}")
|
|
|
|
if result["clean"]:
|
|
print("Status: CLEAN")
|
|
print("\nNo security issues found.")
|
|
else:
|
|
print(f"Status: ISSUES FOUND ({len(result['issues'])})")
|
|
|
|
# Count by severity
|
|
high = sum(1 for i in result["issues"] if i["severity"] == "high")
|
|
medium = sum(1 for i in result["issues"] if i["severity"] == "medium")
|
|
low = sum(1 for i in result["issues"] if i["severity"] == "low")
|
|
print(f"\n High: {high} Medium: {medium} Low: {low}")
|
|
|
|
print()
|
|
for issue in result["issues"]:
|
|
severity_label = issue["severity"].upper().ljust(6)
|
|
location = issue["file"]
|
|
if issue["line"] > 0:
|
|
location += f":{issue['line']}"
|
|
print(f" [{severity_label}] {location}")
|
|
print(f" Pattern: {issue['pattern']}")
|
|
print(f" {issue['description']}")
|
|
print()
|
|
|
|
print(f"{'=' * 60}")
|
|
|
|
|
|
def main() -> None:
|
|
"""CLI entry point for the security scanner."""
|
|
if len(sys.argv) < 2:
|
|
print(
|
|
"Usage: python3 scripts/security_scan.py <skill-path> [--json]\n"
|
|
"\n"
|
|
"Arguments:\n"
|
|
" skill-path Path to the skill directory to scan\n"
|
|
"\n"
|
|
"Options:\n"
|
|
" --json Output results as JSON to stdout\n"
|
|
"\n"
|
|
"Exit codes:\n"
|
|
" 0 Clean (no issues)\n"
|
|
" 1 Issues found (one or more security issues)\n",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
skill_path = sys.argv[1]
|
|
use_json = "--json" in sys.argv
|
|
|
|
result = security_scan(skill_path)
|
|
|
|
if use_json:
|
|
print(json.dumps(result, indent=2))
|
|
else:
|
|
_print_human_readable(result, skill_path)
|
|
|
|
sys.exit(0 if result["clean"] else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|