pipeline-phases.md Step 7b (new): mandatory harness patterns
- Self-bootstrapping wrappers (bash + PowerShell)
- Input validation with structured JSON errors
- Output sanity checks with _warnings
- --check-prereqs returning JSON health check
- --diagnostics returning skill metadata
- activation: /{skill-name} in frontmatter
- provenance: block (full from cliskill, minimal standalone)
- ## Prerequisites section in SKILL.md body
- Anti-activation in anti-goals
Phase 5 Checklist: 10 new items for harness compliance
validate.py: warnings for missing activation and provenance fields
(warnings not errors — backward compatible with existing skills)
476 lines
16 KiB
Python
476 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Spec Compliance Validator for the Agent Skills Open Standard.
|
|
|
|
Validates a skill directory against the Agent Skills Open Standard by checking
|
|
SKILL.md existence, frontmatter structure, naming conventions, and best practices.
|
|
|
|
Usage:
|
|
python3 scripts/validate.py path/to/skill/
|
|
python3 scripts/validate.py path/to/skill/ --json
|
|
|
|
Exit codes:
|
|
0 - Valid (no errors, may have warnings)
|
|
1 - Invalid (one or more errors found)
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
# --- Constants ---
|
|
|
|
MAX_NAME_LENGTH = 64
|
|
MAX_DESCRIPTION_LENGTH = 1024
|
|
MAX_BODY_LINES_WARNING = 500
|
|
|
|
# Pattern for valid skill names: lowercase letters, numbers, hyphens
|
|
NAME_PATTERN = re.compile(r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$")
|
|
CONSECUTIVE_HYPHENS_PATTERN = re.compile(r"--")
|
|
|
|
# Pattern for YYYY-MM-DD date format
|
|
DATE_FORMAT_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
|
|
|
# Pattern for local file references in markdown: [text](path) excluding http/https/mailto/#
|
|
LOCAL_LINK_PATTERN = re.compile(
|
|
r"\[([^\]]*)\]\(([^)]+)\)"
|
|
)
|
|
|
|
|
|
def _parse_frontmatter(content: str) -> tuple[Optional[str], Optional[str]]:
|
|
"""
|
|
Extract frontmatter and body from SKILL.md content.
|
|
|
|
Args:
|
|
content: Full text content of SKILL.md.
|
|
|
|
Returns:
|
|
Tuple of (frontmatter_text, body_text). Either may be None if
|
|
frontmatter is missing or malformed.
|
|
"""
|
|
if not content.startswith("---"):
|
|
return None, None
|
|
|
|
# Find the closing --- (skip the opening one at position 0)
|
|
closing_index = content.find("---", 3)
|
|
if closing_index == -1:
|
|
return None, None
|
|
|
|
frontmatter = content[3:closing_index].strip()
|
|
body = content[closing_index + 3:].strip()
|
|
return frontmatter, body
|
|
|
|
|
|
def _parse_yaml_field(frontmatter: str, field: str) -> Optional[str]:
|
|
"""
|
|
Extract a top-level scalar field value from YAML frontmatter using simple parsing.
|
|
|
|
Handles both inline values (``name: value``) and YAML block scalars
|
|
(``description: >-`` followed by indented continuation lines).
|
|
|
|
Args:
|
|
frontmatter: The frontmatter text (without ``---`` delimiters).
|
|
field: The field name to look for.
|
|
|
|
Returns:
|
|
The field value as a string, or None if the field is not present.
|
|
"""
|
|
lines = frontmatter.split("\n")
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
if stripped.startswith(f"{field}:"):
|
|
value = stripped[len(field) + 1:].strip()
|
|
|
|
# Check for YAML block scalar indicators (>- , |-, >, |)
|
|
if value in (">-", "|-", ">", "|", ">+", "|+"):
|
|
# Collect indented continuation lines
|
|
parts: list[str] = []
|
|
for j in range(i + 1, len(lines)):
|
|
continuation = lines[j]
|
|
# Continuation lines must be indented
|
|
if continuation and (continuation[0] == " " or continuation[0] == "\t"):
|
|
parts.append(continuation.strip())
|
|
else:
|
|
break
|
|
return " ".join(parts) if parts else ""
|
|
|
|
return value
|
|
return None
|
|
|
|
|
|
def _field_exists_in_frontmatter(frontmatter: str, field: str) -> bool:
|
|
"""
|
|
Check whether a field name appears as a top-level key in frontmatter.
|
|
|
|
Args:
|
|
frontmatter: The frontmatter text.
|
|
field: The field name to look for.
|
|
|
|
Returns:
|
|
True if the field is present.
|
|
"""
|
|
for line in frontmatter.split("\n"):
|
|
stripped = line.strip()
|
|
if stripped.startswith(f"{field}:"):
|
|
return True
|
|
return False
|
|
|
|
|
|
def _subfield_exists(frontmatter: str, parent: str, child: str) -> bool:
|
|
"""
|
|
Check whether a sub-field exists under a parent field in YAML frontmatter.
|
|
|
|
Args:
|
|
frontmatter: The frontmatter text.
|
|
parent: The parent field name (e.g., ``metadata``).
|
|
child: The child field name (e.g., ``author``).
|
|
|
|
Returns:
|
|
True if the sub-field is found under the parent.
|
|
"""
|
|
lines = frontmatter.split("\n")
|
|
in_parent = False
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
# Detect the parent field
|
|
if stripped.startswith(f"{parent}:"):
|
|
in_parent = True
|
|
continue
|
|
if in_parent:
|
|
# Still inside the parent block if line is indented
|
|
if line and (line[0] == " " or line[0] == "\t"):
|
|
if stripped.startswith(f"{child}:"):
|
|
return True
|
|
else:
|
|
# Left the parent block
|
|
in_parent = False
|
|
return False
|
|
|
|
|
|
def _parse_subfield_value(frontmatter: str, parent: str, child: str) -> Optional[str]:
|
|
"""
|
|
Extract a sub-field value from under a parent field in YAML frontmatter.
|
|
|
|
Args:
|
|
frontmatter: The frontmatter text.
|
|
parent: The parent field name (e.g., ``metadata``).
|
|
child: The child field name (e.g., ``author``).
|
|
|
|
Returns:
|
|
The sub-field value as a string, or None if not found.
|
|
"""
|
|
lines = frontmatter.split("\n")
|
|
in_parent = False
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if stripped.startswith(f"{parent}:"):
|
|
in_parent = True
|
|
continue
|
|
if in_parent:
|
|
if line and (line[0] == " " or line[0] == "\t"):
|
|
if stripped.startswith(f"{child}:"):
|
|
return stripped[len(child) + 1:].strip()
|
|
else:
|
|
in_parent = False
|
|
return None
|
|
|
|
|
|
def _extract_local_links(body: str) -> list[str]:
|
|
"""
|
|
Extract local file paths referenced in markdown links within the body.
|
|
|
|
Filters out URLs (http, https, mailto) and anchor links (#).
|
|
|
|
Args:
|
|
body: The markdown body text.
|
|
|
|
Returns:
|
|
List of relative file paths referenced in the body.
|
|
"""
|
|
paths: list[str] = []
|
|
for match in LOCAL_LINK_PATTERN.finditer(body):
|
|
target = match.group(2).strip()
|
|
# Skip external URLs and anchors
|
|
if target.startswith(("http://", "https://", "mailto:", "#")):
|
|
continue
|
|
# Strip any anchor fragment from the path
|
|
if "#" in target:
|
|
target = target.split("#")[0]
|
|
if target:
|
|
paths.append(target)
|
|
return paths
|
|
|
|
|
|
def validate_skill(skill_path: str) -> dict:
|
|
"""
|
|
Validate a skill directory against the Agent Skills Open Standard.
|
|
|
|
Performs both required checks (errors) and recommended checks (warnings).
|
|
|
|
Args:
|
|
skill_path: Path to the skill directory to validate.
|
|
|
|
Returns:
|
|
Dictionary with keys:
|
|
- ``valid`` (bool): True if no errors were found.
|
|
- ``errors`` (list[str]): List of error messages (must fix).
|
|
- ``warnings`` (list[str]): List of warning messages (should fix).
|
|
"""
|
|
errors: list[str] = []
|
|
warnings: list[str] = []
|
|
|
|
skill_dir = Path(skill_path).resolve()
|
|
|
|
# --- Check: directory exists ---
|
|
if not skill_dir.exists():
|
|
errors.append(f"Path does not exist: {skill_dir}")
|
|
return {"valid": False, "errors": errors, "warnings": warnings}
|
|
|
|
if not skill_dir.is_dir():
|
|
errors.append(f"Path is not a directory: {skill_dir}")
|
|
return {"valid": False, "errors": errors, "warnings": warnings}
|
|
|
|
# --- Check: SKILL.md exists ---
|
|
skill_md = skill_dir / "SKILL.md"
|
|
if not skill_md.exists():
|
|
errors.append("SKILL.md not found in skill directory")
|
|
return {"valid": False, "errors": errors, "warnings": warnings}
|
|
|
|
# --- Read SKILL.md ---
|
|
try:
|
|
content = skill_md.read_text(encoding="utf-8")
|
|
except Exception as exc:
|
|
errors.append(f"Could not read SKILL.md: {exc}")
|
|
return {"valid": False, "errors": errors, "warnings": warnings}
|
|
|
|
# --- Check: frontmatter exists ---
|
|
if not content.startswith("---"):
|
|
errors.append("SKILL.md must start with '---' frontmatter delimiter")
|
|
return {"valid": False, "errors": errors, "warnings": warnings}
|
|
|
|
frontmatter, body = _parse_frontmatter(content)
|
|
|
|
if frontmatter is None:
|
|
errors.append("SKILL.md frontmatter is not properly closed (missing closing '---')")
|
|
return {"valid": False, "errors": errors, "warnings": warnings}
|
|
|
|
# --- Check: name field ---
|
|
name_value = _parse_yaml_field(frontmatter, "name")
|
|
if name_value is None:
|
|
errors.append("'name' field is missing from frontmatter")
|
|
else:
|
|
name_value = name_value.strip()
|
|
if len(name_value) == 0:
|
|
errors.append("'name' field is empty")
|
|
elif len(name_value) > MAX_NAME_LENGTH:
|
|
errors.append(
|
|
f"'name' field exceeds {MAX_NAME_LENGTH} characters "
|
|
f"(found {len(name_value)})"
|
|
)
|
|
else:
|
|
# Validate name format
|
|
if not NAME_PATTERN.match(name_value):
|
|
errors.append(
|
|
f"'name' field must contain only lowercase letters, numbers, "
|
|
f"and hyphens (found: '{name_value}')"
|
|
)
|
|
if name_value.startswith("-"):
|
|
errors.append(f"'name' must not start with a hyphen (found: '{name_value}')")
|
|
if name_value.endswith("-"):
|
|
errors.append(f"'name' must not end with a hyphen (found: '{name_value}')")
|
|
if CONSECUTIVE_HYPHENS_PATTERN.search(name_value):
|
|
errors.append(
|
|
f"'name' must not contain consecutive hyphens (found: '{name_value}')"
|
|
)
|
|
|
|
# --- Check: directory name matches name field ---
|
|
dir_name = skill_dir.name
|
|
if dir_name != name_value:
|
|
errors.append(
|
|
f"Directory name '{dir_name}' does not match 'name' field "
|
|
f"'{name_value}' in frontmatter"
|
|
)
|
|
|
|
# --- Check: description field ---
|
|
description_value = _parse_yaml_field(frontmatter, "description")
|
|
if description_value is None:
|
|
errors.append("'description' field is missing from frontmatter")
|
|
else:
|
|
description_value = description_value.strip()
|
|
if len(description_value) == 0:
|
|
errors.append("'description' field is empty")
|
|
elif len(description_value) > MAX_DESCRIPTION_LENGTH:
|
|
errors.append(
|
|
f"'description' field exceeds {MAX_DESCRIPTION_LENGTH} characters "
|
|
f"(found {len(description_value)})"
|
|
)
|
|
|
|
# --- Check: -cskill suffix is deprecated ---
|
|
if name_value is not None and name_value.endswith("-cskill"):
|
|
errors.append(
|
|
f"'name' uses the deprecated '-cskill' suffix. "
|
|
f"Use '-skill' instead (found: '{name_value}')"
|
|
)
|
|
|
|
# --- Warnings ---
|
|
|
|
# Naming convention: -skill suffix (or -suite for suites)
|
|
if name_value is not None and len(name_value) > 0:
|
|
if not name_value.endswith("-skill") and not name_value.endswith("-suite"):
|
|
warnings.append(
|
|
f"'name' should end with '-skill' for discoverability "
|
|
f"(found: '{name_value}')"
|
|
)
|
|
|
|
# Body line count
|
|
if body is not None:
|
|
body_lines = body.split("\n")
|
|
body_line_count = len(body_lines)
|
|
if body_line_count > MAX_BODY_LINES_WARNING:
|
|
warnings.append(
|
|
f"SKILL.md body exceeds {MAX_BODY_LINES_WARNING} lines "
|
|
f"({body_line_count} lines). Consider moving content to references/."
|
|
)
|
|
|
|
# license field
|
|
if not _field_exists_in_frontmatter(frontmatter, "license"):
|
|
warnings.append("'license' field is missing from frontmatter")
|
|
|
|
# metadata field
|
|
if not _field_exists_in_frontmatter(frontmatter, "metadata"):
|
|
warnings.append("'metadata' field is missing from frontmatter")
|
|
else:
|
|
if not _subfield_exists(frontmatter, "metadata", "author"):
|
|
warnings.append("'metadata.author' sub-field is missing")
|
|
if not _subfield_exists(frontmatter, "metadata", "version"):
|
|
warnings.append("'metadata.version' sub-field is missing")
|
|
|
|
# Temporal metadata validation (optional, warnings only)
|
|
created_val = _parse_subfield_value(frontmatter, "metadata", "created")
|
|
reviewed_val = _parse_subfield_value(frontmatter, "metadata", "last_reviewed")
|
|
interval_val = _parse_subfield_value(frontmatter, "metadata", "review_interval_days")
|
|
|
|
if created_val and not DATE_FORMAT_PATTERN.match(created_val.strip()):
|
|
warnings.append(
|
|
f"'metadata.created' should be YYYY-MM-DD format (found: '{created_val}')"
|
|
)
|
|
if reviewed_val and not DATE_FORMAT_PATTERN.match(reviewed_val.strip()):
|
|
warnings.append(
|
|
f"'metadata.last_reviewed' should be YYYY-MM-DD format (found: '{reviewed_val}')"
|
|
)
|
|
if interval_val:
|
|
try:
|
|
int(interval_val.strip())
|
|
except ValueError:
|
|
warnings.append(
|
|
f"'metadata.review_interval_days' should be an integer (found: '{interval_val}')"
|
|
)
|
|
|
|
has_temporal = bool(created_val or reviewed_val or interval_val)
|
|
if not has_temporal:
|
|
warnings.append(
|
|
"Consider adding temporal metadata (metadata.created, metadata.last_reviewed, "
|
|
"metadata.review_interval_days) for staleness tracking"
|
|
)
|
|
|
|
# activation field (harness factory v1.1)
|
|
if not _field_exists_in_frontmatter(frontmatter, "activation"):
|
|
warnings.append(
|
|
"'activation' field is missing from frontmatter. "
|
|
"Add 'activation: /{skill-name}' for namespace enforcement."
|
|
)
|
|
|
|
# provenance field (harness factory v1.1)
|
|
if not _field_exists_in_frontmatter(frontmatter, "provenance"):
|
|
warnings.append(
|
|
"'provenance' field is missing from frontmatter. "
|
|
"Add provenance metadata (maintainer, version, created, source_references)."
|
|
)
|
|
|
|
# Referenced local files
|
|
if body is not None:
|
|
local_links = _extract_local_links(body)
|
|
for link_path in local_links:
|
|
resolved = skill_dir / link_path
|
|
if not resolved.exists():
|
|
warnings.append(
|
|
f"Referenced file does not exist: '{link_path}'"
|
|
)
|
|
|
|
return {
|
|
"valid": len(errors) == 0,
|
|
"errors": errors,
|
|
"warnings": warnings,
|
|
}
|
|
|
|
|
|
def _print_human_readable(result: dict, skill_path: str) -> None:
|
|
"""
|
|
Print validation results in a human-readable format.
|
|
|
|
Args:
|
|
result: The validation result dictionary.
|
|
skill_path: The path that was validated (for display).
|
|
"""
|
|
print(f"Validating: {skill_path}")
|
|
print(f"{'=' * 60}")
|
|
|
|
if result["valid"]:
|
|
print("Status: VALID")
|
|
else:
|
|
print("Status: INVALID")
|
|
|
|
if result["errors"]:
|
|
print(f"\nErrors ({len(result['errors'])}):")
|
|
for error in result["errors"]:
|
|
print(f" [ERROR] {error}")
|
|
|
|
if result["warnings"]:
|
|
print(f"\nWarnings ({len(result['warnings'])}):")
|
|
for warning in result["warnings"]:
|
|
print(f" [WARN] {warning}")
|
|
|
|
if not result["errors"] and not result["warnings"]:
|
|
print("\nNo issues found.")
|
|
|
|
print(f"{'=' * 60}")
|
|
|
|
|
|
def main() -> None:
|
|
"""CLI entry point for the spec compliance validator."""
|
|
if len(sys.argv) < 2:
|
|
print(
|
|
"Usage: python3 scripts/validate.py <skill-path> [--json]\n"
|
|
"\n"
|
|
"Arguments:\n"
|
|
" skill-path Path to the skill directory to validate\n"
|
|
"\n"
|
|
"Options:\n"
|
|
" --json Output results as JSON to stdout\n"
|
|
"\n"
|
|
"Exit codes:\n"
|
|
" 0 Valid (no errors)\n"
|
|
" 1 Invalid (one or more errors)\n",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
skill_path = sys.argv[1]
|
|
use_json = "--json" in sys.argv
|
|
|
|
result = validate_skill(skill_path)
|
|
|
|
if use_json:
|
|
print(json.dumps(result, indent=2))
|
|
else:
|
|
_print_human_readable(result, skill_path)
|
|
|
|
sys.exit(0 if result["valid"] else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|