scientific-agent-skills/scan_pr_skills.py at main · K-Dense-AI/scientific-agent-skills · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/usr/bin/env python3
"""Scan a subset of skills (those changed in a pull request) and emit a PR comment.

Usage:
    python scan_pr_skills.py [--output FILE] [--fail-on SEVERITY] SKILL_DIR ...

Each ``SKILL_DIR`` should be a directory containing a ``SKILL.md``. Directories
without a ``SKILL.md`` (e.g. deleted skills) are skipped gracefully.

The script writes a markdown report intended to be posted as a sticky comment
on the pull request. It exits non-zero when any scanned skill has a finding at
``--fail-on`` severity or higher (default: ``CRITICAL``).
"""

from __future__ import annotations

import argparse
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

from dotenv import load_dotenv
from skill_scanner.core.loader import SkillLoadError
from skill_scanner.core.models import Report

from scan_skills import build_scanner, severity_badge

load_dotenv()

SEVERITY_ORDER = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO", "SAFE"]
COMMENT_MARKER = "<!-- skill-security-scan -->"


def _sev_str(obj) -> str:
    sev = getattr(obj, "max_severity", None) or getattr(obj, "severity", None)
    if sev is None:
        return "SAFE"
    return sev.value if hasattr(sev, "value") else str(sev)


def scan_skill_dirs(scanner, skill_dirs: list[Path]) -> Report:
    report = Report()
    loaded_skills = []

    for skill_dir in skill_dirs:
        name = skill_dir.name
        print(f"  Scanning {name} ...", end="", flush=True)
        t0 = time.time()
        try:
            skill = scanner.loader.load_skill(skill_dir)
            result = scanner._scan_single_skill(skill, skill_dir)
            report.add_scan_result(result)
            loaded_skills.append(skill)
            elapsed = time.time() - t0
            n = len(result.findings)
            tag = severity_badge(_sev_str(result))
            print(f" {tag} — {n} finding{'s' if n != 1 else ''} ({elapsed:.1f}s)")
        except SkillLoadError as exc:
            elapsed = time.time() - t0
            print(f" ⚠️  SKIP ({exc}) ({elapsed:.1f}s)")
            report.skills_skipped.append({"skill": str(skill_dir), "reason": str(exc)})
        except Exception as exc:  # pragma: no cover - defensive
            elapsed = time.time() - t0
            print(f" ❌ ERROR ({exc}) ({elapsed:.1f}s)")
            report.skills_skipped.append({"skill": str(skill_dir), "reason": str(exc)})

    if len(loaded_skills) > 1:
        print("\n  Running cross-skill overlap analysis ...", end="", flush=True)
        t0 = time.time()
        try:
            from skill_scanner.core.analyzers.cross_skill_scanner import CrossSkillScanner

            overlap = scanner._check_description_overlap(loaded_skills) or []
            cross = CrossSkillScanner().analyze_skill_set(loaded_skills) or []
            all_cross = [*overlap, *cross]
            if scanner.policy.disabled_rules:
                all_cross = [f for f in all_cross if f.rule_id not in scanner.policy.disabled_rules]
            if all_cross:
                scanner._apply_severity_overrides(all_cross)
                report.add_cross_skill_findings(all_cross)
            elapsed = time.time() - t0
            print(f" {len(all_cross)} finding{'s' if len(all_cross) != 1 else ''} ({elapsed:.1f}s)")
        except Exception as exc:  # pragma: no cover - defensive
            print(f" error: {exc}")

    return report


def _loc(finding) -> str | None:
    if not finding.file_path:
        return None
    loc = finding.file_path
    if finding.line_number:
        loc += f":{finding.line_number}"
    return loc


def format_comment(report: Report, scanned_dirs: list[Path]) -> str:
    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
    lines: list[str] = [COMMENT_MARKER, "## 🛡️ Skill Security Scan", ""]
    lines.append(f"_Generated at {now}_")
    lines.append("")

    if not scanned_dirs:
        lines.append("No skill directories with a `SKILL.md` were changed in this PR — nothing to scan.")
        return "\n".join(lines)

    lines.append(f"**Skills scanned:** {report.total_skills_scanned}  ")
    lines.append(f"**Total findings:** {report.total_findings}  ")
    lines.append(
        f"**Critical:** {report.critical_count} | "
        f"**High:** {report.high_count} | "
        f"**Safe:** {report.safe_count}/{report.total_skills_scanned}"
    )
    lines.append("")

    if report.scan_results:
        lines.append("| Skill | Severity | Findings | Safe |")
        lines.append("|-------|----------|----------|------|")
        sorted_results = sorted(
            report.scan_results,
            key=lambda r: SEVERITY_ORDER.index(_sev_str(r)),
        )
        for result in sorted_results:
            sev = _sev_str(result)
            safe = "✅" if result.is_safe else "❌"
            lines.append(
                f"| `{result.skill_name}` | {severity_badge(sev)} | {len(result.findings)} | {safe} |"
            )
        lines.append("")

        flagged = [r for r in sorted_results if r.findings]
        if flagged:
            lines.append("### Findings")
            lines.append("")
            for result in flagged:
                sev = _sev_str(result)
                n = len(result.findings)
                lines.append(
                    f"<details><summary><code>{result.skill_name}</code> — "
                    f"{severity_badge(sev)} ({n} finding{'s' if n != 1 else ''})</summary>"
                )
                lines.append("")
                for finding in result.findings:
                    fsev = _sev_str(finding)
                    lines.append(
                        f"- **{severity_badge(fsev)}** `{finding.rule_id}` — {finding.title}"
                    )
                    if finding.description:
                        lines.append(f"  > {finding.description}")
                    loc = _loc(finding)
                    if loc:
                        lines.append(f"  > File: `{loc}`")
                    if finding.remediation:
                        lines.append(f"  > **Remediation:** {finding.remediation}")
                lines.append("")
                lines.append("</details>")
                lines.append("")

    if report.skills_skipped:
        lines.append("### Skipped")
        lines.append("")
        for entry in report.skills_skipped:
            lines.append(f"- `{entry['skill']}`: {entry['reason']}")
        lines.append("")

    return "\n".join(lines)


def _should_block(report: Report, fail_on: str) -> tuple[bool, str | None]:
    if fail_on == "NEVER":
        return False, None
    blocking_ranks = SEVERITY_ORDER[: SEVERITY_ORDER.index(fail_on) + 1]
    for result in report.scan_results:
        sev = _sev_str(result)
        if sev in blocking_ranks:
            return True, f"{sev} finding(s) in {result.skill_name}"
    return False, None


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("skill_dirs", nargs="*", help="Skill directories to scan")
    parser.add_argument(
        "--output",
        default="pr_scan_comment.md",
        help="Path to write the PR comment markdown (default: pr_scan_comment.md)",
    )
    parser.add_argument(
        "--fail-on",
        default="CRITICAL",
        choices=["CRITICAL", "HIGH", "MEDIUM", "LOW", "NEVER"],
        help="Exit non-zero if any scanned skill has a finding at this severity or higher",
    )
    args = parser.parse_args()

    scan_targets: list[Path] = []
    for raw in args.skill_dirs:
        p = Path(raw)
        if not p.is_dir():
            print(f"  SKIP {raw} (not a directory)")
            continue
        if not (p / "SKILL.md").exists():
            print(f"  SKIP {raw} (no SKILL.md)")
            continue
        scan_targets.append(p)

    if not scan_targets:
        print("No skill directories to scan — writing no-op comment.")
        md = format_comment(Report(), [])
        Path(args.output).write_text(md)
        return 0

    print("Building scanner (LLM + behavioral + trigger + balanced policy)...")
    scanner = build_scanner()
    print(f"Analyzers: {scanner.list_analyzers()}\n")

    print(f"Scanning {len(scan_targets)} skill(s):")
    for d in scan_targets:
        print(f"  - {d}")
    print()

    report = scan_skill_dirs(scanner, scan_targets)

    print(
        f"\nResults: {report.total_skills_scanned} skills, {report.total_findings} findings "
        f"(Critical: {report.critical_count}  High: {report.high_count}  Safe: {report.safe_count})"
    )

    md = format_comment(report, scan_targets)
    Path(args.output).write_text(md)
    print(f"Comment written to {args.output}")

    blocked, reason = _should_block(report, args.fail_on)
    if blocked:
        print(f"\n❌ Blocking: {reason} (--fail-on {args.fail_on})")
        return 1

    return 0


if __name__ == "__main__":
    sys.exit(main())