awesome-copilot/skills/quality-playbook/quality_gate.py

#!/usr/bin/env python3
"""quality_gate.py — Post-run validation gate for Quality Playbook artifacts.

Mechanically checks artifact conformance issues that model self-attestation
persistently misses. Now the sole gate script; the earlier quality_gate.sh
(bash) has been retired. See quality_gate/test_quality_gate.py for the test
suite.

Usage:
    ./quality_gate.py .                          # Check current directory (benchmark mode)
    ./quality_gate.py --general .                # Check with relaxed thresholds
    ./quality_gate.py virtio                     # Check named repo (from repos/)
    ./quality_gate.py --all                      # Check all current-version repos
    ./quality_gate.py --version 1.3.27 virtio    # Check specific version

Exit codes:
    0 — all checks passed
    1 — one or more checks failed

Runs on Python 3.8+ with only the standard library.
"""

import json
import os
import re
import sys
from datetime import date
from pathlib import Path

SCRIPT_DIR = Path(__file__).resolve().parent

# Allow soft import of bin/citation_verifier for v1.5.1 byte-equality checks.
# The verifier may live at one of several locations depending on where the
# gate was installed:
#   1. <QPB-clone>/bin/citation_verifier.py — gate runs from the source tree
#      (gate path: <clone>/.github/skills/quality_gate/quality_gate.py;
#      bin/ is three parents up from SCRIPT_DIR).
#   2. <install-root>/bin/citation_verifier.py — gate installed alongside
#      bin/ at the install root (v1.5.6 BUG-005 fix; bin/install_skill.py
#      and repos/setup_repos.sh both bundle bin/citation_verifier.py here).
#   3. <install-root>/bin/citation_verifier.py via the nested-skills path
#      (.github/skills/quality_gate.py — SCRIPT_DIR is .github/skills, and
#      bin/ is two parents up).
# When none of these resolve, byte-equality is skipped with a WARN rather
# than a hard FAIL — the gate continues with reduced enforcement.
_CITATION_VERIFIER = None
_VERIFIER_SEARCH_ROOTS = [
    SCRIPT_DIR.parent.parent.parent,  # source-clone layout
    SCRIPT_DIR,                       # gate + bin/ siblings (uncommon)
    SCRIPT_DIR.parent.parent,         # nested-skills layout (.github/skills/quality_gate.py)
]
for _candidate_root in _VERIFIER_SEARCH_ROOTS:
    _verifier_file = _candidate_root / "bin" / "citation_verifier.py"
    if _verifier_file.is_file():
        try:
            if str(_candidate_root) not in sys.path:
                sys.path.insert(0, str(_candidate_root))
            from bin import citation_verifier as _CITATION_VERIFIER  # noqa: E402
            break
        except Exception:  # noqa: BLE001 — missing / misinstalled bin/ is tolerable
            _CITATION_VERIFIER = None
            continue

# Global counters — reset per invocation via main(). Tests that call check_repo
# directly should reset these in setUp.
FAIL = 0
WARN = 0


# v1.5.2 — REQ Pattern field (Lever 2)
VALID_PATTERN_VALUES = frozenset({"whitelist", "parity", "compensation"})

_REQ_PATTERN_RE = re.compile(
    r"^\s*-\s*Pattern:\s*(\S+)\s*$", re.IGNORECASE | re.MULTILINE
)


def extract_req_pattern(req_block):
    """Return the REQ's pattern tag from a REQUIREMENTS.md block, or None.

    Raises ValueError when the block carries an invalid pattern value. Valid
    values are VALID_PATTERN_VALUES. Absent field returns None.
    """
    m = _REQ_PATTERN_RE.search(req_block)
    if not m:
        return None
    value = m.group(1).strip()
    if value not in VALID_PATTERN_VALUES:
        raise ValueError(
            "Invalid REQ pattern '{}'. Expected one of: {}".format(
                value, sorted(VALID_PATTERN_VALUES)
            )
        )
    return value


# v1.5.2 — cardinality gate (Lever 3)

VALID_REASON_CLASSES = frozenset({
    "out-of-scope",
    "deprecated",
    "platform-gated",
    "handled-upstream",
    "intentionally-partial",
})

_CELL_ID_RE = re.compile(r"^REQ-\d+/cell-[A-Za-z0-9_]+-[A-Za-z0-9_]+$")

_COVERS_RE = re.compile(
    r"^\s*-\s*Covers:\s*\[(.*?)\]\s*$", re.IGNORECASE | re.MULTILINE
)

_CONSOLIDATION_RE = re.compile(
    r"^\s*-\s*Consolidation rationale:\s*(.+?)\s*$",
    re.IGNORECASE | re.MULTILINE,
)

_BUG_HEADING_RE = re.compile(r"^###\s+BUG-(\d+):", re.MULTILINE)

# v1.5.2 (C13.8/Fix 1) — evidence locator for present:true grid cells.
# Relative path (no leading '/'), single colon, line number (>=1) or
# range ``N-M`` with both endpoints >=1. Rejects: absolute paths,
# multi-slash roots, URLs, line zero, zero-endpoint ranges.
_EVIDENCE_RE = re.compile(r"^(?!/)[^:]+:[1-9]\d*(-[1-9]\d*)?$")


def _parse_covers(bug_block):
    m = _COVERS_RE.search(bug_block)
    if not m:
        return []
    raw = m.group(1).strip()
    if not raw:
        return []
    items = [s.strip() for s in raw.split(",")]
    return [s for s in items if s]


def _parse_consolidation_rationale(bug_block):
    m = _CONSOLIDATION_RE.search(bug_block)
    if not m:
        return None
    text = m.group(1).strip()
    return text or None


def _split_bug_blocks(bugs_md_text):
    """Return list of (bug_id, body) pairs."""
    positions = [(m.start(), m.group(1)) for m in _BUG_HEADING_RE.finditer(bugs_md_text)]
    result = []
    for idx, (start, bug_id) in enumerate(positions):
        end = positions[idx + 1][0] if idx + 1 < len(positions) else len(bugs_md_text)
        result.append(("BUG-{}".format(bug_id), bugs_md_text[start:end]))
    return result


def _bug_primary_requirement(block):
    m = re.search(
        r"^\s*-\s*Primary requirement:\s*(REQ-\d+)", block, re.MULTILINE | re.IGNORECASE
    )
    return m.group(1) if m else None


def _load_json_or_none(path):
    if not path.is_file():
        return None
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return None


def _read_text_safe(path):
    try:
        return path.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return ""


_REQ_HEADING_RE = re.compile(r"^###\s+(REQ-\d+):", re.MULTILINE)


def _enumerate_pattern_tagged_reqs(req_text):
    """Return {req_id: pattern} for every ### REQ-NNN: block in REQUIREMENTS.md
    that carries a ``- Pattern: <value>`` line.

    Raises ValueError if any block's pattern value is not in
    VALID_PATTERN_VALUES (delegated to extract_req_pattern()). Blocks without a
    Pattern field are omitted from the result (they're not pattern-tagged).
    """
    if not req_text:
        return {}
    positions = [(m.start(), m.group(1)) for m in _REQ_HEADING_RE.finditer(req_text)]
    result = {}
    for idx, (start, req_id) in enumerate(positions):
        end = positions[idx + 1][0] if idx + 1 < len(positions) else len(req_text)
        block = req_text[start:end]
        pattern = extract_req_pattern(block)
        if pattern is not None:
            result[req_id] = pattern
    return result


# v1.5.2 (C13.7/Fix 2) — per-site UC detection.
# Phase 1's Cartesian UC rule emits UC-N.a / UC-N.b / ... for REQs where both
# eligibility gates match. Any REQ block in REQUIREMENTS.md that cites such
# per-site UCs MUST carry a Pattern field — otherwise Phase 2 silently dropped
# it. The regex is deliberately narrow: one lowercase letter suffix only, word
# boundaries on both sides, so bare UC-N and over-suffixed UC-N.a.bad are not
# mistaken for per-site references.
_PER_SITE_UC_RE = re.compile(r"\bUC-\d+\.[a-z]\b")


def _enumerate_per_site_uc_reqs(req_text):
    """Return {req_id: sorted_list_of_uc_ids} for every ### REQ-NNN: block
    that cites at least one per-site UC reference (UC-N.a / UC-N.b / ...).

    REQ blocks without per-site UC references are omitted from the result.
    Each returned UC list is deduplicated and lexically sorted.
    """
    if not req_text:
        return {}
    positions = [(m.start(), m.group(1)) for m in _REQ_HEADING_RE.finditer(req_text)]
    result = {}
    for idx, (start, req_id) in enumerate(positions):
        end = positions[idx + 1][0] if idx + 1 < len(positions) else len(req_text)
        block = req_text[start:end]
        ucs = sorted(set(_PER_SITE_UC_RE.findall(block)))
        if ucs:
            result[req_id] = ucs
    return result


def validate_cardinality_gate(repo_dir):
    """Run the v1.5.2 cardinality reconciliation gate.

    Returns a list of failure strings. An empty list means the gate passed.
    Caller decides how to surface failures (print / fail()).

    Inputs expected in repo_dir/quality/:
      - REQUIREMENTS.md (source of pattern-tagged REQs)
      - BUGS.md (source of Covers: annotations)
      - compensation_grid.json (source of cell set per REQ)
      - compensation_grid_downgrades.json (optional; source of downgrade cells)
    """
    failures = []
    q = Path(repo_dir) / "quality"

    req_text = _read_text_safe(q / "REQUIREMENTS.md")

    # Enumerate pattern-tagged and per-site-UC REQs up front so the
    # downstream cross-checks can run regardless of whether a grid file
    # exists. A REQ that cites per-site UCs but lacks Pattern is a failure
    # independent of grid presence (in fact, if Pattern is missing there is
    # no grid precisely because Pattern is the trigger for producing one).
    try:
        pattern_tagged = _enumerate_pattern_tagged_reqs(req_text)
    except ValueError as exc:
        failures.append("REQUIREMENTS.md: {}".format(exc))
        pattern_tagged = {}
    try:
        per_site = _enumerate_per_site_uc_reqs(req_text)
    except ValueError as exc:
        failures.append("REQUIREMENTS.md: {}".format(exc))
        per_site = {}

    # Cross-check (C13.7/Fix 2): every REQ that cites per-site UCs (UC-N.a,
    # UC-N.b, ...) in REQUIREMENTS.md MUST carry a Pattern field. Per-site UCs
    # are the structural signal emitted by Phase 1's Cartesian UC rule; if the
    # signal is there but Pattern is missing, Phase 2 silently dropped it and
    # the v1.4.5 regression vector is live again. Runs regardless of grid
    # presence because missing Pattern is exactly what would cause the grid
    # to be absent in the first place.
    for req_id, uc_ids in per_site.items():
        if req_id not in pattern_tagged:
            failures.append(
                "cardinality gate: {} has per-site UCs ({}) in REQUIREMENTS.md "
                "but is missing the Pattern field — Phase 1 Cartesian UC rule "
                "requires Pattern tagging for cross-site REQs (see "
                "phase1_prompt confirmation checklist item 6)".format(
                    req_id, ", ".join(uc_ids)
                )
            )

    grid_path = q / "compensation_grid.json"
    grid = _load_json_or_none(grid_path)
    if grid is None:
        # No grid file: only a problem if any pattern-tagged REQs exist.
        if _REQ_PATTERN_RE.search(req_text):
            failures.append(
                "cardinality gate: pattern-tagged REQs exist but "
                "quality/compensation_grid.json is missing"
            )
        return failures

    reqs = grid.get("reqs") or {}
    if not isinstance(reqs, dict):
        failures.append("compensation_grid.json: 'reqs' is not an object")
        return failures

    # Cross-check: every pattern-tagged REQ in REQUIREMENTS.md must appear in
    # the grid. Omitting a pattern-tagged REQ from the grid was a v1.5.2 escape
    # hatch (silently skipped by the per-REQ reconcile loop); close it here.
    for req_id, req_pattern in pattern_tagged.items():
        if req_id not in reqs:
            failures.append(
                "cardinality gate: {} is pattern-tagged '{}' in REQUIREMENTS.md "
                "but has no entry in compensation_grid.json".format(req_id, req_pattern)
            )

    # Load BUGS.md and index covers by REQ
    bugs_text = _read_text_safe(q / "BUGS.md")
    covers_by_req = {}
    for bug_id, block in _split_bug_blocks(bugs_text):
        covers = _parse_covers(block)
        if len(covers) >= 2:
            if not _parse_consolidation_rationale(block):
                failures.append(
                    "{}: Covers has {} entries but 'Consolidation rationale:' is missing or empty".format(
                        bug_id, len(covers)
                    )
                )
        for cell_id in covers:
            if not _CELL_ID_RE.match(cell_id):
                failures.append(
                    "{}: malformed cell ID '{}' (expected REQ-N/cell-<item>-<site>)".format(
                        bug_id, cell_id
                    )
                )
                continue
            req_id = cell_id.split("/", 1)[0]
            covers_by_req.setdefault(req_id, set()).add(cell_id)

    # Load downgrades and validate each record
    downgrades = _load_json_or_none(q / "compensation_grid_downgrades.json") or {"downgrades": []}
    downgrade_cells_by_req = {}
    for rec in downgrades.get("downgrades", []):
        rid = rec.get("cell_id", "")
        if not _CELL_ID_RE.match(rid):
            failures.append("downgrade record: malformed cell_id '{}'".format(rid))
            continue
        # A downgrade record only counts toward reconciliation once every
        # validation below passes. A malformed record emits diagnostic
        # failure strings AND stays out of downgrade_cells_by_req, so the
        # per-REQ uncovered-cells calculation still flags the cell.
        rec_ok = True
        for field in ("authority_ref", "site_citation", "reason_class", "falsifiable_claim"):
            value = rec.get(field)
            if not value or not isinstance(value, str) or not value.strip():
                failures.append(
                    "downgrade record {}: missing or empty field '{}'".format(rid, field)
                )
                rec_ok = False
        reason = rec.get("reason_class", "")
        if reason and reason not in VALID_REASON_CLASSES:
            failures.append(
                "downgrade record {}: reason_class '{}' not in {}".format(
                    rid, reason, sorted(VALID_REASON_CLASSES)
                )
            )
            rec_ok = False
        if not rec_ok:
            continue
        req_id = rid.split("/", 1)[0]
        downgrade_cells_by_req.setdefault(req_id, set()).add(rid)

    # Reconcile per-REQ
    for req_id, entry in reqs.items():
        pattern = entry.get("pattern")
        if pattern not in {"whitelist", "parity", "compensation"}:
            failures.append(
                "compensation_grid.json: {} has invalid or missing pattern '{}'".format(
                    req_id, pattern
                )
            )
            continue
        cells = entry.get("cells") or []
        # v1.5.2 (C13.8/Fix 2): pre-validate each cell's 'present' field is a
        # strict bool. Non-bool values (string "true", int 1, None, missing key)
        # would otherwise fall between the 'is False' absent-cell branch and
        # the 'is not True' present-cell evidence branch, escaping both checks.
        # Same silent-bypass family as B1 — diagnose AND skip the cell, do not
        # let it count toward coverage accounting.
        valid_cells = []
        for c in cells:
            if not isinstance(c, dict):
                continue
            present = c.get("present")
            if not isinstance(present, bool):
                cell_id = c.get("cell_id") or "<no cell_id>"
                failures.append(
                    "{}: cell {} 'present' must be boolean true or false; got {!r}".format(
                        req_id, cell_id, present
                    )
                )
                continue
            valid_cells.append(c)

        grid_cell_ids = {c.get("cell_id") for c in valid_cells}
        grid_cell_ids.discard(None)
        # Only absent cells require coverage. Identity check is safe now —
        # every element of valid_cells has 'present' as a strict bool.
        absent_cells = {
            c.get("cell_id") for c in valid_cells
            if c.get("present") is False
        }
        absent_cells.discard(None)

        # v1.5.2 (C13.6/B2): present:true cells must carry a non-empty
        # 'evidence' field in file:line form. Without this, a reviewer or LLM
        # can claim any cell is present, supply nothing, and the gate accepts
        # it — the bypass Round 5 Council called the highest remaining risk.
        for c in valid_cells:
            if c.get("present") is not True:
                continue
            cell_id = c.get("cell_id") or "<no cell_id>"
            evidence = c.get("evidence")
            if not evidence or not isinstance(evidence, str) or not evidence.strip():
                failures.append(
                    "{}: present:true requires non-empty 'evidence' field with file:line citation".format(cell_id)
                )
                continue
            if not _EVIDENCE_RE.match(evidence.strip()):
                failures.append(
                    "{}: 'evidence' must be file:line (e.g. 'path/to.c:123' or 'path/to.c:120-140'); got {!r}".format(
                        cell_id, evidence
                    )
                )

        covered = covers_by_req.get(req_id, set())
        downgraded = downgrade_cells_by_req.get(req_id, set())
        uncovered = absent_cells - covered - downgraded

        if uncovered:
            failures.append(
                "{}: uncovered cells — {}".format(req_id, ", ".join(sorted(uncovered)))
            )

        # Every covered cell must be in the grid
        stray = (covered | downgraded) - grid_cell_ids
        if stray:
            failures.append(
                "{}: Covers/downgrade cells not in grid — {}".format(
                    req_id, ", ".join(sorted(stray))
                )
            )

    return failures


def _reset_counters():
    global FAIL, WARN
    FAIL = 0
    WARN = 0


def fail(msg, reason=None, *, line=None):
    """Emit a structured failure line and increment FAIL.

    Phase 5 r3 format: `<path>[:<line>]: <reason>` — no "FAIL:" label, so
    output is grep-parseable as `^[^:]+:[0-9]*:? .+$`. The prefix `FAIL:` is
    deliberately removed; the global FAIL counter (summarised in main()) is
    the authoritative count of failures per run.

    Preferred forms:
        fail("quality/INDEX.md", "file missing")
            -> "  quality/INDEX.md: file missing"
        fail("quality/INDEX.md", "missing required field 'x'", line=42)
            -> "  quality/INDEX.md:42: missing required field 'x'"

    Legacy single-arg form (transitional; still supported — most v1.4.x
    messages already embed a path-like token):
        fail("BUGS.md missing or not a file")
            -> "  BUGS.md missing or not a file"
    """
    global FAIL
    if reason is None:
        print(f"  {msg}")
    elif line is None:
        print(f"  {msg}: {reason}")
    else:
        print(f"  {msg}:{line}: {reason}")
    FAIL += 1


def pass_(msg):
    print(f"  PASS: {msg}")


def warn(msg):
    global WARN
    print(f"  WARN: {msg}")
    WARN += 1


def info(msg):
    print(f"  INFO: {msg}")


# --- JSON helpers (proper parsing, not grep-style) ---


def load_json(path):
    """Parse JSON file. Return parsed value, or None on any error."""
    if not path.is_file():
        return None
    try:
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)
    except (OSError, json.JSONDecodeError):
        return None


def has_key(data, key):
    """True if `data` is a dict containing `key`."""
    return isinstance(data, dict) and key in data


def get_str(data, key):
    """Return data[key] if it's a string, else empty string."""
    if not isinstance(data, dict):
        return ""
    val = data.get(key)
    return val if isinstance(val, str) else ""


def count_per_bug_field(bugs_list, field):
    """Count bugs in list that have `field` set."""
    if not isinstance(bugs_list, list):
        return 0
    return sum(1 for b in bugs_list if isinstance(b, dict) and field in b)


# --- File helpers ---


# v1.5.4 Phase 3.6.4 (B-16): the end-of-run reorg moves intermediate
# pipeline artifacts under quality/workspace/. The gate reads each of
# those subdirectories at multiple sites; _resolve_artifact_path
# centralises the dual-layout lookup so each site stays one-line.
# Top-level wins (legacy / pre-reorg layout); workspace/ is the v1.5.4
# canonical location after _finalize_quality_layout has run.
_WORKSPACE_DIRS = (
    "control_prompts",
    "results",
    "code_reviews",
    "spec_audits",
    "patches",
    "writeups",
    "mechanical",
    "phase3",
)


def _resolve_artifact_path(quality_dir, name):
    """Return the live path for an intermediate artifact directory or
    file under quality/. Tries top-level first (the legacy / current
    in-flight layout), then quality/workspace/<name> (the v1.5.4
    end-of-run reorg layout). Returns the top-level path even when
    neither exists so callers that test ``.is_dir()`` / ``.is_file()``
    get a False rather than an exception.

    ``name`` may be a single segment (``"results"``) or a path with
    segments (``"results/tdd-results.json"``); both forms work
    regardless of layout."""
    top = quality_dir / name
    if top.exists():
        return top
    workspace = quality_dir / "workspace" / name
    if workspace.exists():
        return workspace
    return top


def has_file_matching(directory, patterns):
    """True if any file in `directory` (non-recursive) matches any glob pattern."""
    if not directory.is_dir():
        return False
    for pat in patterns:
        for _ in directory.glob(pat):
            return True
    return False


def count_files_matching(directory, pattern):
    """Count files in `directory` (non-recursive) matching glob pattern."""
    if not directory.is_dir():
        return 0
    return sum(1 for _ in directory.glob(pattern))


def first_file_matching(directory, patterns):
    """Return first matching path or None."""
    if not directory.is_dir():
        return None
    for pat in patterns:
        for p in directory.glob(pat):
            return p
    return None


def file_contains(path, pattern):
    """True if any line in file matches pattern (regex string or compiled)."""
    if not path.is_file():
        return False
    if isinstance(pattern, str):
        pattern = re.compile(pattern)
    try:
        with open(path, "r", encoding="utf-8", errors="replace") as f:
            for line in f:
                if pattern.search(line):
                    return True
    except OSError:
        pass
    return False


def read_first_line_stripped(path):
    """Return first line of file with whitespace stripped."""
    if not path.is_file():
        return ""
    try:
        with open(path, "r", encoding="utf-8", errors="replace") as f:
            line = f.readline()
    except OSError:
        return ""
    return re.sub(r"\s", "", line)


def validate_iso_date(date_str):
    """Return one of: 'valid', 'placeholder', 'future', 'bad_format', 'empty'.

    Placeholders are checked before format so that 'YYYY-MM-DD' is reported
    as 'placeholder' rather than 'bad_format'. The bash version's order was
    flipped, causing 'YYYY-MM-DD' to be misreported — both still FAIL but the
    Python version gives the clearer message.
    """
    if not date_str:
        return "empty"
    if date_str in ("YYYY-MM-DD", "0000-00-00"):
        return "placeholder"
    date_part = date_str[:10]
    if not re.fullmatch(r"\d{4}-\d{2}-\d{2}", date_part):
        return "bad_format"
    if len(date_str) > 10 and not re.fullmatch(r"T\d{2}:\d{2}:\d{2}(Z|[+-]\d{2}:\d{2})?", date_str[10:]):
        return "bad_format"
    today = date.today().isoformat()
    if date_part > today:
        return "future"
    return "valid"


def detect_skill_version(locations):
    """Read `version:` value from the first existing SKILL.md-like file."""
    for loc in locations:
        if loc.is_file():
            try:
                with open(loc, "r", encoding="utf-8", errors="replace") as f:
                    for line in f:
                        m = re.match(r"^\s*(?:version:|\*\*Version:\*\*)\s*([0-9]+(?:\.[0-9]+)+)\b",
                                     line, re.IGNORECASE)
                        if m:
                            return m.group(1)
            except OSError:
                continue
    return ""


def read_skill_value_line(path, prefix):
    """Mimic: grep -m1 'prefix' FILE | sed 's/.*prefix *//' | tr -d ' '."""
    if not path.is_file():
        return ""
    try:
        with open(path, "r", encoding="utf-8", errors="replace") as f:
            for line in f:
                if prefix in line:
                    v = re.sub(rf".*{re.escape(prefix)}\s*", "", line, count=1)
                    return v.replace(" ", "").rstrip("\n").rstrip("\r")
    except OSError:
        pass
    return ""


def detect_project_language(repo_dir):
    """Walk up to 3 dirs deep, return first language whose extension is present.

    Mirrors bash `find -maxdepth 3 -not -path ...` behavior.
    """
    language_order = [
        ("go", ".go"),
        ("py", ".py"),
        ("java", ".java"),
        ("kt", ".kt"),
        ("rs", ".rs"),
        ("ts", ".ts"),
        ("js", ".js"),
        ("scala", ".scala"),
        ("c", ".c"),
        ("agc", ".agc"),
    ]
    excluded = {"vendor", "node_modules", ".git", "quality", "repos"}

    def present(base, target_ext):
        stack = [(Path(base), 1)]
        while stack:
            curr, depth = stack.pop()
            try:
                for entry in os.scandir(curr):
                    name = entry.name
                    if entry.is_dir(follow_symlinks=False):
                        if name in excluded:
                            continue
                        if depth < 3:
                            stack.append((Path(entry.path), depth + 1))
                    elif entry.is_file(follow_symlinks=False):
                        if name.endswith(target_ext):
                            return True
            except (OSError, PermissionError):
                continue
        return False

    for lang, ext in language_order:
        if present(repo_dir, ext):
            return lang
    return ""


def count_source_files(repo_dir):
    """Count source files up to 4 dirs deep, excluding vendor/node_modules/etc."""
    src_count = 0
    exts = {".go", ".py", ".java", ".kt", ".rs", ".ts", ".js", ".scala",
            ".c", ".h", ".agc"}
    excluded = {"vendor", "node_modules", ".git", "quality"}

    def walk(base, current_depth, max_depth):
        nonlocal src_count
        try:
            for entry in os.scandir(base):
                name = entry.name
                if entry.is_dir(follow_symlinks=False):
                    if current_depth < max_depth and name not in excluded:
                        walk(entry.path, current_depth + 1, max_depth)
                elif entry.is_file(follow_symlinks=False):
                    dot = name.rfind(".")
                    if dot >= 0 and name[dot:] in exts:
                        src_count += 1
        except (OSError, PermissionError):
            pass

    walk(str(repo_dir), 1, 4)
    return src_count


# --- Section checks ---


def check_file_existence(repo_dir, q, strictness):
    """File existence section (benchmark 40)."""
    print("[File Existence]")
    for f in ["BUGS.md", "REQUIREMENTS.md", "QUALITY.md", "PROGRESS.md",
              "COVERAGE_MATRIX.md", "COMPLETENESS_REPORT.md"]:
        if (q / f).is_file():
            pass_(f"{f} exists")
        else:
            fail(f"{f} missing")

    for f in ["CONTRACTS.md", "RUN_CODE_REVIEW.md", "RUN_SPEC_AUDIT.md",
              "RUN_INTEGRATION_TESTS.md", "RUN_TDD_TESTS.md"]:
        if (q / f).is_file():
            pass_(f"{f} exists")
        else:
            fail(f"{f} missing")

    if has_file_matching(q, ["test_functional.*", "functional_test.*",
                             "FunctionalSpec.*", "FunctionalTest.*",
                             "functional.test.*"]):
        pass_("functional test file exists")
    else:
        fail("functional test file missing (test_functional.*, functional_test.*, FunctionalSpec.*, FunctionalTest.*, functional.test.*)")

    if (repo_dir / "AGENTS.md").is_file():
        pass_("AGENTS.md exists")
    else:
        fail("AGENTS.md missing (required at project root)")

    if (q / "EXPLORATION.md").is_file():
        pass_("EXPLORATION.md exists")
        _check_exploration_sections(q / "EXPLORATION.md")
    else:
        fail("EXPLORATION.md missing")

    cr_dir = _resolve_artifact_path(q, "code_reviews")
    if cr_dir.is_dir() and has_file_matching(cr_dir, ["*.md"]):
        pass_("code_reviews/ has .md files")
    else:
        fail("code_reviews/ missing or empty")

    sa_dir = _resolve_artifact_path(q, "spec_audits")
    if sa_dir.is_dir():
        triage_count = count_files_matching(sa_dir, "*triage*")
        auditor_count = count_files_matching(sa_dir, "*auditor*")
        if triage_count > 0:
            pass_("spec_audits/ has triage file")
        else:
            fail("spec_audits/ missing triage file")
        if auditor_count > 0:
            pass_(f"spec_audits/ has {auditor_count} auditor file(s)")
        else:
            fail("spec_audits/ missing individual auditor files")

        if triage_count > 0:
            has_probes = False
            if (sa_dir / "triage_probes.sh").is_file():
                has_probes = True
                pass_("triage_probes.sh exists (executable triage evidence)")
            elif (_resolve_artifact_path(q, "mechanical/verify.sh")).is_file() and \
                 file_contains(_resolve_artifact_path(q, "mechanical/verify.sh"), r"probe|triage|auditor"):
                has_probes = True
                pass_("verify.sh contains triage probe assertions")
            if not has_probes:
                msg = "No executable triage evidence found (expected spec_audits/triage_probes.sh or probe assertions in mechanical/verify.sh)"
                if strictness == "benchmark":
                    fail(msg)
                else:
                    warn(msg)
    else:
        fail("spec_audits/ directory missing")


def check_bugs_heading(q):
    """BUGS.md heading-format section (benchmark 39).

    Returns (bug_count, bug_ids).
    """
    print("[BUGS.md Heading Format]")
    bugs_md = q / "BUGS.md"
    if not bugs_md.is_file():
        fail("BUGS.md missing")
        return 0, []

    try:
        bugs_content = bugs_md.read_text(encoding="utf-8", errors="replace")
    except OSError:
        bugs_content = ""
    lines = bugs_content.splitlines()

    correct_headings = sum(1 for ln in lines
                           if re.match(r"^### BUG-([HML]|[0-9])[0-9]*", ln))
    wrong_headings = sum(1 for ln in lines
                         if re.match(r"^## BUG-", ln)
                         and not re.match(r"^### BUG-", ln))
    deep_headings = sum(1 for ln in lines
                        if re.match(r"^#{4,} BUG-([HML]|[0-9])", ln))
    bold_headings = sum(1 for ln in lines
                        if re.match(r"^\*\*BUG-([HML]|[0-9])", ln))
    bullet_headings = sum(1 for ln in lines
                          if re.match(r"^- BUG-([HML]|[0-9])", ln))

    bug_count = correct_headings

    if (correct_headings > 0 and wrong_headings == 0 and deep_headings == 0
            and bold_headings == 0 and bullet_headings == 0):
        pass_(f"All {correct_headings} bug headings use ### BUG-NNN format")
    else:
        if wrong_headings > 0:
            fail(f"{wrong_headings} heading(s) use ## instead of ###")
        if deep_headings > 0:
            fail(f"{deep_headings} heading(s) use #### or deeper instead of ###")
        if bold_headings > 0:
            fail(f"{bold_headings} heading(s) use **BUG- format")
        if bullet_headings > 0:
            fail(f"{bullet_headings} heading(s) use - BUG- format")
        if correct_headings == 0 and wrong_headings == 0:
            if re.search(r"^##\s+(No confirmed bugs|Zero confirmed bugs)\s*$",
                         bugs_content, re.MULTILINE | re.IGNORECASE):
                pass_("Zero-bug run — no headings expected")
            else:
                bug_count = wrong_headings + deep_headings + bold_headings + bullet_headings
                warn("No ### BUG-NNN headings found in BUGS.md")
        else:
            bug_count = correct_headings + wrong_headings + bold_headings + bullet_headings

    # Extract canonical bug IDs: BUG-NNN or BUG-HNN / BUG-MNN / BUG-LNN
    raw = re.findall(r"BUG-(?:[HML][0-9]+|[0-9]+)", bugs_content)
    filtered = [b for b in raw if re.fullmatch(r"BUG-(?:[HML][0-9]+|[0-9]+)", b)]
    bug_ids = sorted(set(filtered))

    return bug_count, bug_ids


def check_tdd_sidecar(q, bug_count):
    """TDD sidecar JSON (benchmarks 14, 41)."""
    print("[TDD Sidecar JSON]")
    json_path = _resolve_artifact_path(q, "results/tdd-results.json")

    if bug_count <= 0:
        info("Zero bugs — tdd-results.json not required")
        return None

    if not json_path.is_file():
        fail(f"tdd-results.json missing ({bug_count} bugs require it)")
        return None

    pass_(f"tdd-results.json exists ({bug_count} bugs)")

    data = load_json(json_path)
    if data is None:
        # File exists but unparsable — fail all root key checks
        for key in ["schema_version", "skill_version", "date", "project",
                    "bugs", "summary"]:
            fail(f"missing root key '{key}'")
        fail("schema_version is 'missing', expected '1.1'")
        return None

    for key in ["schema_version", "skill_version", "date", "project",
                "bugs", "summary"]:
        if has_key(data, key):
            pass_(f"has '{key}'")
        else:
            fail(f"missing root key '{key}'")

    sv = get_str(data, "schema_version")
    if sv == "1.1":
        pass_("schema_version is '1.1'")
    else:
        fail(f"schema_version is '{sv or 'missing'}', expected '1.1'")

    bugs_list = data.get("bugs") if isinstance(data, dict) else None
    if not isinstance(bugs_list, list):
        bugs_list = []

    for field in ["id", "requirement", "red_phase", "green_phase",
                  "verdict", "fix_patch_present", "writeup_path"]:
        fcount = count_per_bug_field(bugs_list, field)
        if fcount >= bug_count:
            pass_(f"per-bug field '{field}' present ({fcount}x)")
        elif fcount > 0:
            warn(f"per-bug field '{field}' found {fcount}x, expected {bug_count}")
        else:
            fail(f"per-bug field '{field}' missing entirely")

    # Non-canonical field names (at any level — check root and bugs)
    bad_fields = ["bug_id", "bug_name", "status", "phase", "result"]
    for bad in bad_fields:
        found = has_key(data, bad) or any(
            has_key(b, bad) for b in bugs_list if isinstance(b, dict)
        )
        if found:
            fail(f"non-canonical field '{bad}' found (use standard field names)")

    summary = data.get("summary") if isinstance(data, dict) else None
    if not isinstance(summary, dict):
        summary = {}
    for skey in ["total", "verified", "confirmed_open", "red_failed", "green_failed"]:
        if skey in summary:
            pass_(f"summary has '{skey}'")
        else:
            fail(f"summary missing '{skey}' count")

    # Date validation
    tdd_date = get_str(data, "date")
    status = validate_iso_date(tdd_date)
    if status == "empty":
        fail("tdd-results.json date field missing or empty")
    elif status == "bad_format":
        fail(f"tdd-results.json date '{tdd_date}' is not ISO 8601 (YYYY-MM-DD)")
    elif status == "placeholder":
        fail(f"tdd-results.json date is placeholder '{tdd_date}'")
    elif status == "future":
        fail(f"tdd-results.json date '{tdd_date}' is in the future")
    else:
        pass_(f"tdd-results.json date '{tdd_date}' is valid")

    # Verdict enum
    allowed_verdicts = {"TDD verified", "red failed", "green failed",
                        "confirmed open", "deferred"}
    bad_verdicts = 0
    for b in bugs_list:
        if isinstance(b, dict) and "verdict" in b:
            v = b.get("verdict")
            if v not in allowed_verdicts:
                bad_verdicts += 1
    if bad_verdicts == 0:
        pass_("all verdict values are canonical")
    else:
        fail(f"{bad_verdicts} non-canonical verdict value(s)")

    return data


def check_tdd_logs(q, bug_count, bug_ids, tdd_data):
    """TDD log files and sidecar-to-log cross-validation."""
    print("[TDD Log Files]")
    if bug_count <= 0:
        info("Zero bugs — TDD log files not required")
        return

    patches_dir = _resolve_artifact_path(q, "patches")
    results_dir = _resolve_artifact_path(q, "results")
    valid_tags = {"RED", "GREEN", "NOT_RUN", "ERROR"}

    red_found = 0
    red_missing = 0
    green_found = 0
    green_missing = 0
    green_expected = 0
    red_bad_tag = 0
    green_bad_tag = 0

    for bid in bug_ids:
        red_log = results_dir / f"{bid}.red.log"
        if red_log.is_file():
            red_found += 1
            tag = read_first_line_stripped(red_log)
            if tag not in valid_tags:
                red_bad_tag += 1
        else:
            red_missing += 1

        fix_patch = first_file_matching(patches_dir, [f"{bid}-fix*.patch"])
        if fix_patch is not None:
            green_expected += 1
            green_log = results_dir / f"{bid}.green.log"
            if green_log.is_file():
                green_found += 1
                tag = read_first_line_stripped(green_log)
                if tag not in valid_tags:
                    green_bad_tag += 1
            else:
                green_missing += 1

    if red_missing == 0 and red_found > 0:
        pass_(f"All {red_found} confirmed bug(s) have red-phase logs")
    elif red_found > 0:
        fail(f"{red_missing} confirmed bug(s) missing red-phase log (BUG-NNN.red.log)")
    else:
        fail("No red-phase logs found (every confirmed bug needs quality/results/BUG-NNN.red.log)")

    if green_expected > 0:
        if green_missing == 0:
            pass_(f"All {green_found} bug(s) with fix patches have green-phase logs")
        else:
            fail(f"{green_missing} bug(s) with fix patches missing green-phase log (BUG-NNN.green.log)")
    else:
        info("No fix patches found — green-phase logs not required")

    if red_bad_tag > 0:
        fail(f"{red_bad_tag} red-phase log(s) missing valid first-line status tag (expected RED/GREEN/NOT_RUN/ERROR)")
    elif red_found > 0:
        pass_("All red-phase logs have valid status tags")
    if green_bad_tag > 0:
        fail(f"{green_bad_tag} green-phase log(s) missing valid first-line status tag (expected RED/GREEN/NOT_RUN/ERROR)")
    elif green_found > 0:
        pass_("All green-phase logs have valid status tags")

    # Sidecar-to-log cross-validation (BUG-M18)
    if tdd_data is not None and isinstance(tdd_data, dict):
        bugs_list = tdd_data.get("bugs") or []
        if not isinstance(bugs_list, list):
            bugs_list = []
        # Index bugs by id for lookup
        bug_by_id = {}
        for b in bugs_list:
            if isinstance(b, dict) and isinstance(b.get("id"), str):
                bug_by_id[b["id"]] = b

        xv_checked = 0
        xv_mismatch = 0

        for bid in bug_ids:
            bug_obj = bug_by_id.get(bid)
            sidecar_red = get_str(bug_obj, "red_phase") if bug_obj else ""
            sidecar_green = get_str(bug_obj, "green_phase") if bug_obj else ""

            red_log = results_dir / f"{bid}.red.log"
            if sidecar_red and red_log.is_file():
                log_tag = read_first_line_stripped(red_log)
                xv_checked += 1
                if sidecar_red == "fail" and log_tag != "RED":
                    xv_mismatch += 1
                    fail(f"{bid}: sidecar red_phase='{sidecar_red}' but log first-line is '{log_tag}' (expected RED)")
                elif sidecar_red == "pass" and log_tag != "GREEN":
                    xv_mismatch += 1
                    fail(f"{bid}: sidecar red_phase='{sidecar_red}' but log first-line is '{log_tag}' (expected GREEN)")

            green_log = results_dir / f"{bid}.green.log"
            if sidecar_green and green_log.is_file():
                log_tag = read_first_line_stripped(green_log)
                xv_checked += 1
                if sidecar_green == "pass" and log_tag != "GREEN":
                    xv_mismatch += 1
                    fail(f"{bid}: sidecar green_phase='{sidecar_green}' but log first-line is '{log_tag}' (expected GREEN)")
                elif sidecar_green == "fail" and log_tag != "RED":
                    xv_mismatch += 1
                    fail(f"{bid}: sidecar green_phase='{sidecar_green}' but log first-line is '{log_tag}' (expected RED)")

        if xv_checked > 0 and xv_mismatch == 0:
            pass_(f"Sidecar-to-log cross-validation passed ({xv_checked} checks)")
        elif xv_checked == 0:
            info("Sidecar-to-log cross-validation: no matching pairs to check")

    # TDD_TRACEABILITY.md
    if red_found > 0:
        if (q / "TDD_TRACEABILITY.md").is_file():
            pass_(f"TDD_TRACEABILITY.md exists ({red_found} bugs with red-phase results)")
        else:
            fail("TDD_TRACEABILITY.md missing (mandatory when bugs have red-phase results)")


def check_integration_sidecar(q, strictness):
    """Integration sidecar JSON section."""
    print("[Integration Sidecar JSON]")
    ij = _resolve_artifact_path(q, "results/integration-results.json")

    if not ij.is_file():
        if strictness == "benchmark":
            warn("integration-results.json not present")
        else:
            info("integration-results.json not present (optional in general mode)")
        return

    data = load_json(ij)

    for key in ["schema_version", "skill_version", "date", "project",
                "recommendation", "groups", "summary", "uc_coverage"]:
        if has_key(data, key):
            pass_(f"has '{key}'")
        else:
            fail(f"missing key '{key}'")

    summary = data.get("summary") if isinstance(data, dict) else None
    if not isinstance(summary, dict):
        summary = {}
    for iskey in ["total_groups", "passed", "failed", "skipped"]:
        if iskey in summary:
            pass_(f"integration summary has '{iskey}'")
        else:
            fail(f"integration summary missing required sub-key '{iskey}'")

    isv = get_str(data, "schema_version")
    if isv == "1.1":
        pass_("integration schema_version is '1.1'")
    else:
        fail(f"integration schema_version is '{isv or 'missing'}', expected '1.1'")

    int_date = get_str(data, "date")
    if int_date:  # match bash: if [ -n "$int_date" ]
        status = validate_iso_date(int_date)
        if status == "bad_format":
            fail(f"integration-results.json date '{int_date}' is not ISO 8601 (YYYY-MM-DD)")
        elif status == "placeholder":
            fail(f"integration-results.json date is placeholder '{int_date}'")
        elif status == "future":
            fail(f"integration-results.json date '{int_date}' is in the future")
        else:
            pass_(f"integration-results.json date '{int_date}' is valid")

    rec = get_str(data, "recommendation")
    if rec in ("SHIP", "FIX BEFORE MERGE", "BLOCK"):
        pass_(f"recommendation '{rec}' is canonical")
    elif rec:
        fail(f"recommendation '{rec}' is non-canonical (must be SHIP/FIX BEFORE MERGE/BLOCK)")
    else:
        fail("recommendation missing")

    # groups[].result enum
    allowed_results = {"pass", "fail", "skipped", "error"}
    bad_results = 0
    groups = data.get("groups") if isinstance(data, dict) else None
    if isinstance(groups, list):
        for g in groups:
            if isinstance(g, dict) and "result" in g:
                if g.get("result") not in allowed_results:
                    bad_results += 1
    if bad_results == 0:
        pass_("all groups[].result values are canonical")
    else:
        fail(f"{bad_results} non-canonical groups[].result value(s) (must be pass/fail/skipped/error)")

    # uc_coverage value enum
    allowed_uc = {"covered_pass", "covered_fail", "not_mapped"}
    bad_uc = 0
    uc_cov = data.get("uc_coverage") if isinstance(data, dict) else None
    if isinstance(uc_cov, dict):
        for v in uc_cov.values():
            if v not in allowed_uc:
                bad_uc += 1
    if bad_uc == 0:
        pass_("all uc_coverage values are canonical")
    else:
        fail(f"{bad_uc} non-canonical uc_coverage value(s) (must be covered_pass/covered_fail/not_mapped)")


def check_recheck_sidecar(q):
    """Recheck sidecar JSON (schema 1.0, uses 'results' key not 'bugs')."""
    print("[Recheck Sidecar JSON]")
    rj = _resolve_artifact_path(q, "results/recheck-results.json")
    rs = _resolve_artifact_path(q, "results/recheck-summary.md")

    if not rj.is_file():
        info("recheck-results.json not present (only required when recheck mode was run)")
        return

    pass_("recheck-results.json exists")
    data = load_json(rj)

    # SKILL.md recheck template uses 'results' as the array key, not 'bugs'.
    for key in ["schema_version", "skill_version", "date", "project",
                "results", "summary"]:
        if has_key(data, key):
            pass_(f"recheck has '{key}'")
        else:
            fail(f"recheck missing root key '{key}'")

    rsv = get_str(data, "schema_version")
    if rsv == "1.0":
        pass_("recheck schema_version is '1.0'")
    else:
        fail(f"recheck schema_version is '{rsv or 'missing'}', expected '1.0'")

    rdate = get_str(data, "date")
    if rdate:
        status = validate_iso_date(rdate)
        if status == "bad_format":
            fail(f"recheck-results.json date '{rdate}' is not ISO 8601 (YYYY-MM-DD)")
        elif status == "placeholder":
            fail(f"recheck-results.json date is placeholder '{rdate}'")
        elif status == "future":
            fail(f"recheck-results.json date '{rdate}' is in the future")
        else:
            pass_(f"recheck-results.json date '{rdate}' is valid")

    if rs.is_file():
        pass_("recheck-summary.md exists")
    else:
        fail("recheck-summary.md missing (required companion to recheck-results.json)")


def check_use_cases(repo_dir, q, strictness):
    """Use case identifier section (benchmarks 43, 48)."""
    print("[Use Cases]")
    req_md = q / "REQUIREMENTS.md"
    if not req_md.is_file():
        fail("REQUIREMENTS.md missing")
        return

    try:
        req_content = req_md.read_text(encoding="utf-8", errors="replace")
    except OSError:
        req_content = ""

    # uc_ids: count of lines matching UC-N (bash grep -cE counts lines)
    uc_ids = sum(1 for ln in req_content.splitlines()
                 if re.search(r"UC-[0-9]+", ln))
    uc_unique = len(set(re.findall(r"UC-[0-9]+", req_content)))

    src_count = count_source_files(repo_dir) if repo_dir.is_dir() else 0
    min_uc = 3 if src_count < 5 else 5

    if uc_unique >= min_uc:
        pass_(f"Found {uc_unique} distinct UC identifiers ({uc_ids} total references, {src_count} source files)")
    elif uc_unique > 0:
        connector = "for" if strictness == "general" else "required for"
        msg = f"Only {uc_unique} distinct UC identifiers (minimum {min_uc} {connector} {src_count} source files)"
        if strictness == "general":
            warn(msg)
        else:
            fail(msg)
    else:
        fail("No canonical UC-NN identifiers in REQUIREMENTS.md")


def check_test_file_extension(repo_dir, q):
    """Test file extension matches project language (benchmark 47)."""
    print("[Test File Extension]")
    func_test = first_file_matching(q, ["test_functional.*", "functional_test.*",
                                        "FunctionalSpec.*", "FunctionalTest.*",
                                        "functional.test.*"])
    reg_test = first_file_matching(q, ["test_regression.*"])

    if func_test is None:
        warn("No functional test file found across the supported naming matrix")
        return

    ext = func_test.suffix.lstrip(".") if func_test.suffix else ""
    detected_lang = detect_project_language(repo_dir) if repo_dir.is_dir() else ""

    if not detected_lang:
        info(f"Cannot detect project language — skipping extension check (test_functional.{ext})")
        return

    lang_to_valid = {
        "go": "go",
        "py": "py",
        "java": "java",
        "kt": "kt java",
        "rs": "rs",
        "ts": "ts",
        "js": "js ts",
        "scala": "scala",
        "c": "c py sh",
        "agc": "py sh",
    }
    valid_ext = lang_to_valid.get(detected_lang, "")
    valid_list = valid_ext.split()
    primary = valid_list[0] if valid_list else ""

    if ext in valid_list:
        pass_(f"{func_test.name} matches project language ({detected_lang})")
    else:
        fail(f"{func_test.name} does not match project language ({detected_lang}) — expected .{primary}")

    if reg_test is not None:
        reg_ext = reg_test.suffix.lstrip(".") if reg_test.suffix else ""
        if reg_ext in valid_list:
            pass_(f"test_regression.{reg_ext} matches project language ({detected_lang})")
        else:
            fail(f"test_regression.{reg_ext} does not match project language ({detected_lang}) — expected .{primary}")


def check_terminal_gate(q):
    """Terminal Gate section in PROGRESS.md."""
    print("[Terminal Gate]")
    progress_md = q / "PROGRESS.md"
    if not progress_md.is_file():
        return
    pat = re.compile(r"^#+ *Terminal", re.IGNORECASE | re.MULTILINE)
    if file_contains(progress_md, pat):
        pass_("PROGRESS.md has Terminal Gate section")
    else:
        fail("PROGRESS.md missing Terminal Gate section")


def check_mechanical(q):
    """Mechanical verification section."""
    print("[Mechanical Verification]")
    mech_dir = _resolve_artifact_path(q, "mechanical")
    if not mech_dir.is_dir():
        info("No mechanical/ directory")
        return
    verify_sh = mech_dir / "verify.sh"
    if not verify_sh.is_file():
        fail("mechanical/ exists but verify.sh missing")
        return
    pass_("verify.sh exists")

    mv_log = _resolve_artifact_path(q, "results/mechanical-verify.log")
    mv_exit = _resolve_artifact_path(q, "results/mechanical-verify.exit")
    if mv_log.is_file() and mv_exit.is_file():
        try:
            exit_code = mv_exit.read_text(encoding="utf-8", errors="replace")
        except OSError:
            exit_code = ""
        exit_code = re.sub(r"\s", "", exit_code)
        if exit_code == "0":
            pass_("mechanical-verify.exit is 0")
        else:
            fail(f"mechanical-verify.exit is '{exit_code}', expected 0")
    else:
        fail("Verification receipt files missing")


def check_patches(q, bug_count, bug_ids, strictness):
    """Patches section (benchmark 44)."""
    print("[Patches]")
    if bug_count <= 0:
        return

    patches_dir = _resolve_artifact_path(q, "patches")

    # Regression test file — required when bugs exist
    reg_test_file = None
    if q.is_dir():
        reg_files = sorted(q.glob("test_regression.*"))
        if reg_files:
            reg_test_file = reg_files[0]

    if reg_test_file is not None:
        pass_(f"test_regression.* exists ({bug_count} confirmed bugs require it)")
    else:
        msg = "test_regression.* missing — required when bugs exist (SKILL.md artifact contract)"
        if strictness == "benchmark":
            fail(msg)
        else:
            warn(msg)

    reg_patch_count = 0
    fix_patch_count = 0
    reg_patch_missing = 0
    for bid in bug_ids:
        if first_file_matching(patches_dir, [f"{bid}-regression*.patch"]) is not None:
            reg_patch_count += 1
        else:
            reg_patch_missing += 1
        if first_file_matching(patches_dir, [f"{bid}-fix*.patch"]) is not None:
            fix_patch_count += 1

    if reg_patch_missing == 0 and reg_patch_count > 0:
        pass_(f"{reg_patch_count} regression-test patch(es) for {bug_count} bug(s)")
    elif reg_patch_count > 0:
        fail(f"{reg_patch_missing} bug(s) missing regression-test patch")
    else:
        fail("No regression-test patches found (quality/patches/BUG-NNN-regression-test.patch required)")

    if fix_patch_count > 0:
        pass_(f"{fix_patch_count} fix patch(es)")
    else:
        warn("0 fix patches (fix patches are optional but strongly encouraged)")

    total_patches = reg_patch_count + fix_patch_count
    info(f"Total: {total_patches} patch file(s) in quality/patches/")


# Unfilled-template sentinel phrases produced by the Phase 5 writeup stub.
# Presence of any of these strings in a writeup is strong evidence that the
# template was emitted without hydrating its content fields from BUGS.md.
# See bin/run_playbook.py::phase5_prompt for the generating prompt.
_WRITEUP_TEMPLATE_SENTINELS = (
    "is a confirmed code bug in ``",
    "The affected implementation lives at ``",
    "Patch path: ``",
    "- Regression test: ``",
    "- Regression patch: ``",
)

# Matches a ```diff fenced block and captures its body for content inspection.
_WRITEUP_DIFF_BLOCK_RE = re.compile(r"```diff\s*\n(.*?)```", re.DOTALL | re.IGNORECASE)


def _writeup_diff_is_non_empty(text):
    """True if any ```diff block in ``text`` contains at least one unified-diff
    line (a `+` or `-` that is not the `+++`/`---` file-header prefix)."""
    for block in _WRITEUP_DIFF_BLOCK_RE.findall(text):
        for line in block.splitlines():
            stripped = line.lstrip()
            if stripped.startswith("+++") or stripped.startswith("---"):
                continue
            if stripped.startswith(("+", "-")):
                return True
    return False


def check_writeups(q, bug_count):
    """Bug writeups section (benchmark 30)."""
    print("[Bug Writeups]")
    if bug_count <= 0:
        return

    writeups_dir = _resolve_artifact_path(q, "writeups")
    writeup_count = 0
    writeup_diff_count = 0
    empty_diff_writeups = []
    sentinel_writeups = []
    if writeups_dir.is_dir():
        writeup_files = sorted(p for p in writeups_dir.glob("BUG-*.md") if p.is_file())
        writeup_count = len(writeup_files)
        for wf in writeup_files:
            try:
                text = wf.read_text(encoding="utf-8", errors="replace")
            except OSError:
                continue
            # Presence test uses the same regex as the content test so the
            # two can never disagree on whether a fence exists. Case-insensitive
            # match accepts ```diff / ```Diff / ```DIFF uniformly — operators
            # routinely uppercase the fence tag and the gate must not silently
            # skip those writeups (the content non-emptiness check would then
            # never fire, producing a confusing "no inline fix diffs" FAIL on a
            # writeup that visibly contains a unified diff).
            if _WRITEUP_DIFF_BLOCK_RE.search(text):
                writeup_diff_count += 1
                if not _writeup_diff_is_non_empty(text):
                    empty_diff_writeups.append(wf.name)
            if any(s in text for s in _WRITEUP_TEMPLATE_SENTINELS):
                sentinel_writeups.append(wf.name)

    if writeup_count >= bug_count:
        pass_(f"{writeup_count} writeup(s) for {bug_count} bug(s)")
    elif writeup_count > 0:
        fail(f"{writeup_count} writeup(s) for {bug_count} bug(s) — all confirmed bugs require writeups (SKILL.md line 1454)")
    else:
        fail(f"No writeups for {bug_count} confirmed bug(s)")

    if writeup_count > 0:
        if writeup_diff_count >= writeup_count:
            pass_(f"All {writeup_diff_count} writeup(s) have inline fix diffs")
        elif writeup_diff_count > 0:
            fail(f"Only {writeup_diff_count}/{writeup_count} writeup(s) have inline fix diffs (all require section 6 diff)")
        else:
            fail("No writeups have inline fix diffs (section 6 'The fix' must include a ```diff block)")

        # Non-empty-diff content check. A ```diff fence with no `+`/`-` body
        # is a template stub — the legacy presence-only check let these pass.
        if empty_diff_writeups:
            preview = ", ".join(empty_diff_writeups[:5])
            suffix = f" (+{len(empty_diff_writeups) - 5} more)" if len(empty_diff_writeups) > 5 else ""
            fail(
                f"{len(empty_diff_writeups)} writeup(s) have empty ```diff blocks "
                f"(fence present, no +/- lines): {preview}{suffix}"
            )
        else:
            pass_("All writeup ```diff blocks contain unified-diff content")

        # Template-sentinel check. Any of these strings remaining in a writeup
        # means the Phase 5 stub was emitted without hydrating from BUGS.md.
        if sentinel_writeups:
            preview = ", ".join(sentinel_writeups[:5])
            suffix = f" (+{len(sentinel_writeups) - 5} more)" if len(sentinel_writeups) > 5 else ""
            fail(
                f"{len(sentinel_writeups)} writeup(s) contain unfilled template "
                f"sentinels (empty backticks after 'is a confirmed code bug in', "
                f"'The affected implementation lives at', 'Patch path:', "
                f"'Regression test:', or 'Regression patch:'): {preview}{suffix}"
            )
        else:
            pass_("No writeups contain unfilled template sentinels")


def check_version_stamps(repo_dir, q):
    """Version stamp consistency (benchmark 26). Returns detected skill_version."""
    print("[Version Stamps]")
    skill_version = detect_skill_version([
        repo_dir / "SKILL.md",
        repo_dir / ".claude" / "skills" / "quality-playbook" / "SKILL.md",
        repo_dir / ".github" / "skills" / "SKILL.md",
        repo_dir / ".github" / "skills" / "quality-playbook" / "SKILL.md",
        SCRIPT_DIR / ".." / "SKILL.md",
        SCRIPT_DIR / "SKILL.md",
    ])

    if not skill_version:
        warn("Cannot detect skill version from SKILL.md")
        return skill_version

    progress_md = q / "PROGRESS.md"
    if progress_md.is_file():
        pv = read_skill_value_line(progress_md, "Skill version:")
        if pv == skill_version:
            pass_(f"PROGRESS.md version matches ({skill_version})")
        elif pv:
            fail(f"PROGRESS.md version '{pv}' != '{skill_version}'")
        else:
            warn("PROGRESS.md missing Skill version field")

    json_path = _resolve_artifact_path(q, "results/tdd-results.json")
    if json_path.is_file():
        data = load_json(json_path)
        tv = get_str(data, "skill_version")
        if tv == skill_version:
            pass_("tdd-results.json skill_version matches")
        elif tv:
            fail(f"tdd-results.json skill_version '{tv}' != '{skill_version}'")

    return skill_version


def check_cross_run_contamination(repo_dir, q, version_arg, skill_version):
    """Cross-run contamination detection."""
    print("[Cross-Run Contamination]")
    repo_name = repo_dir.name
    if skill_version and version_arg:
        matches = re.findall(r"[0-9]+\.[0-9]+\.[0-9]+", repo_name)
        dir_version = matches[-1] if matches else ""
        if dir_version and dir_version != skill_version:
            fail(f"Directory version '{dir_version}' != skill version '{skill_version}' — possible cross-run contamination")
        else:
            pass_("No version mismatch detected")

    json_path = _resolve_artifact_path(q, "results/tdd-results.json")
    if json_path.is_file() and skill_version:
        data = load_json(json_path)
        json_sv = get_str(data, "skill_version")
        if json_sv and json_sv != skill_version:
            fail(f"tdd-results.json skill_version '{json_sv}' != SKILL.md '{skill_version}' — stale artifacts from prior run?")


def _check_exploration_sections(path):
    """Check that EXPLORATION.md contains all required section titles."""
    required_sections = [
        "## Open Exploration Findings",
        "## Quality Risks",
        "## Pattern Applicability Matrix",
        "## Candidate Bugs for Phase 2",
        "## Gate Self-Check",
    ]
    try:
        content = path.read_text(encoding="utf-8", errors="replace")
    except OSError as exc:
        fail(f"EXPLORATION.md unreadable: {exc}")
        return
    for section in required_sections:
        if section not in content:
            fail(f"EXPLORATION.md missing required section: {section!r}")


def check_run_metadata(q):
    """Validate the run-metadata sidecar JSON (run-YYYY-MM-DDTHH-MM-SS.json)."""
    print("[Run Metadata]")
    results_dir = _resolve_artifact_path(q, "results")
    pattern = str(results_dir / "run-*.json")
    import glob as _glob
    matches = _glob.glob(pattern)
    if not matches:
        fail("run-metadata JSON missing (expected quality/results/run-YYYY-MM-DDTHH-MM-SS.json)")
        return
    if len(matches) > 1:
        warn(f"Multiple run-metadata files found: {len(matches)}")
    filename_re = re.compile(r"run-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}\.json$")
    for path in matches:
        if not filename_re.search(path):
            fail(f"run-metadata filename does not match expected format: {path}")
        data = load_json(Path(path))
        if data is None:
            fail(f"run-metadata JSON parse error: {path}")
            continue
        required_fields = ("schema_version", "skill_version", "project", "model", "runner", "start_time")
        for field in required_fields:
            if not data.get(field):
                fail(f"run-metadata missing or empty field: {field!r}")
    pass_("run-metadata JSON present")


# --- Per-repo entry point ---


# ---------------------------------------------------------------------------
# v1.5.1 Layer-1 mechanical invariants (schemas.md §10).
#
# Each check gracefully no-ops on pre-v1.5.1 runs (absent manifests = legacy
# repo; nothing to enforce). When the v1.5.1 artifacts are present every
# invariant below is enforced mechanically and FAILs with a specific
# <path>: <reason> message so the operator can fix the single artifact
# without re-running the whole playbook.
# ---------------------------------------------------------------------------

_V150_VALID_DISPOSITIONS = (
    "code-fix",
    "spec-fix",
    "upstream-spec-issue",
    "mis-read",
    "deferred",
)
_V150_VALID_FIX_TYPES = ("code", "spec", "both")
_V150_ILLEGAL_FIX_PAIRS = {
    ("code-fix", "spec"),
    ("spec-fix", "code"),
    ("upstream-spec-issue", "code"),
    ("mis-read", "both"),
}
_V150_SUPPORTED_EXTENSIONS = (".txt", ".md")
# v1.5.4 Part 1 / Round 1 Council finding C2-1: INDEX schema is now
# version-routed. New runs MUST emit schema_version "2.0" with
# target_role_breakdown; legacy archives carry schema_version "1.0"
# (or no schema_version at all) with target_project_type. The fields
# common to both schemas live in _V150_INDEX_COMMON_FIELDS; the
# version-specific fields live in their own tuples and are picked at
# validation time.
#
# v1.5.4 Round 2 Council finding C1: SCHEMA_VERSION_CURRENT pins the
# version this gate understands. Future schemas (>2.0) refuse with an
# explicit error rather than silently downgrading to legacy. When a
# v1.5.5+ run bumps the schema, also bump this constant; otherwise the
# new gate version will reject the new INDEX shape on purpose.
SCHEMA_VERSION_CURRENT = "2.0"
_V150_INDEX_COMMON_FIELDS = (
    "run_timestamp_start",
    "run_timestamp_end",
    "duration_seconds",
    "qpb_version",
    "target_repo_path",
    "target_repo_git_sha",
    "phases_executed",
    "summary",
    "artifacts",
)
_V150_INDEX_LEGACY_FIELDS = ("target_project_type",)
_V154_INDEX_CURRENT_FIELDS = ("target_role_breakdown",)
# Legacy alias: a small number of pre-iteration tests still import
# _V150_REQUIRED_INDEX_FIELDS expecting a single tuple. Preserve the
# alias under the v1.5.4-current contract; the version-routed
# enforcement happens inside check_v1_5_0_index_md.
_V150_REQUIRED_INDEX_FIELDS = (
    _V150_INDEX_COMMON_FIELDS + _V154_INDEX_CURRENT_FIELDS
)
_V150_REQUIRED_SUMMARY_KEYS = ("requirements", "bugs", "gate_verdict")


# ---------------------------------------------------------------------------
# v1.5.3 — schema extensions (schemas.md §3.6–§3.10, §4.1, §6.1, §8.1, §10
# invariants #21–#23). Field-presence detection (§3.10) toggles the
# v1.5.3 invariants on per-manifest, NOT a schema_version comparison.
# ---------------------------------------------------------------------------

_V153_VALID_SOURCE_TYPES = (
    "code-derived",
    "skill-section",
    "reference-file",
    "execution-observation",
    # v1.5.6 (QG-fail-2 from the v1.5.6 self-bootstrap): REQs derived from
    # operator-supplied informal documentation under the target repo's
    # `reference_docs/` tree. Distinct from `reference-file`, which
    # schemas.md §3.7 ties to QPB-shipped reference files under
    # `references/`. The Phase 2 LLM disambiguates the two evidence
    # sources by name; the schema and gate now match.
    "docs-derived",
)
_V153_VALID_DIVERGENCE_TYPES = (
    "code-spec",
    "internal-prose",
    "prose-to-code",
    "execution",
)
_V153_VALID_FORMAL_DOC_ROLES = (
    "external-spec",
    "project-spec",
    "skill-self-spec",
    "skill-reference",
)

# DQ-3 (v1.5.3 Phase 3 / Round 2 Council): the v1.5.3 field-presence
# detection key set is module-level so a regression test can pin it
# against schemas.md's enum-bearing field list. A future schema
# addition (e.g., a fifth v1.5.3-only field) that updates ONLY this
# constant without updating the test's literal will fail the regression
# test, forcing lockstep maintenance and surfacing the change for
# explicit review.
_V153_FIELD_KEYS = frozenset({"source_type", "divergence_type", "role"})


def _is_v1_5_3_shaped(manifest):
    """Return True iff any record in *manifest* carries a v1.5.3 field.

    Walks the records (or `reviews`) once. Presence of any key in
    _V153_FIELD_KEYS on any record toggles strict-mode validation per
    schemas.md §3.10. Empty / unparsable manifests return False so
    legacy fixtures stay on the soft-warn path.

    DQ-3 design note: the checked-key set is sourced from
    _V153_FIELD_KEYS (a module-level frozenset) rather than hardcoded
    in this function's body. A regression test in
    test_quality_gate.py::TestV153FieldKeysContract pins
    _V153_FIELD_KEYS against the literal `{"source_type",
    "divergence_type", "role"}` so a future maintainer adding a
    v1.5.3-only field to the schema cannot silently miss updating the
    detection helper.
    """
    if not isinstance(manifest, dict):
        return False
    records = manifest.get("records")
    if not isinstance(records, list):
        records = manifest.get("reviews") if isinstance(
            manifest.get("reviews"), list
        ) else []
    for rec in records:
        if not isinstance(rec, dict):
            continue
        if not _V153_FIELD_KEYS.isdisjoint(rec.keys()):
            return True
    return False


def _v150_manifest(q, name):
    """Return the parsed top-level JSON object or None if absent/invalid."""
    path = q / name
    if not path.is_file():
        return None
    data = load_json(path)
    if isinstance(data, dict):
        return data
    fail(f"{path.name}: not a valid JSON object (schemas.md §1.6)")
    return None


def check_v1_5_0_cite_extensions(repo_dir):
    """§10 invariant #9 — reference_docs/cite/ contains only .txt/.md.

    v1.5.2 collapsed the old formal_docs/+informal_docs/ split into a single
    reference_docs/ tree with reference_docs/cite/ holding citable material.
    The plaintext-only constraint now applies to that cite folder; the check
    retains the v1.5.0 invariant ancestry (hence the _v1_5_0_ name prefix).
    """
    folder = repo_dir / "reference_docs" / "cite"
    if not folder.is_dir():
        return
    any_file = False
    for path in sorted(folder.rglob("*")):
        if not path.is_file():
            continue
        any_file = True
        if path.name == "README.md":
            continue
        if path.name.endswith(".meta.json"):
            continue
        # v1.5.6 (QG-fail-1 from the v1.5.6 self-bootstrap): `.gitkeep`
        # is the documented sentinel that pins `reference_docs/cite/`
        # in version control even when adopters have no citable
        # plaintext yet. The pre-flight expects it to exist; the gate
        # must not reject it.
        if path.name == ".gitkeep":
            continue
        ext = path.suffix.lower()
        if ext not in _V150_SUPPORTED_EXTENSIONS:
            rel = path.relative_to(repo_dir).as_posix()
            fail(
                f"{rel}: unsupported extension {ext or '(none)'} under reference_docs/cite/ "
                "(schemas.md §2 allows only .txt, .md; §10 invariant #9)"
            )
    if any_file:
        pass_("reference_docs/cite/: all files use supported extensions")


def check_v1_5_0_manifest_wrappers(q):
    """§10 invariant #13 — manifest wrapper shape.

    Four record-shaped manifests (formal_docs / requirements / use_cases /
    bugs) use `records`; citation_semantic_check.json uses `reviews`
    (schemas.md §9.1). Every manifest must carry schema_version +
    generated_at as non-empty strings.
    """
    record_shaped = (
        "formal_docs_manifest.json",
        "requirements_manifest.json",
        "use_cases_manifest.json",
        "bugs_manifest.json",
    )
    for name in record_shaped:
        data = _v150_manifest(q, name)
        if data is None:
            continue
        for key in ("schema_version", "generated_at"):
            if not isinstance(data.get(key), str) or not data[key]:
                fail(f"{name}: missing or empty top-level {key!r} (schemas.md §1.6)")
        if not isinstance(data.get("records"), list):
            fail(f"{name}: missing or non-array top-level 'records' (schemas.md §1.6)")
        if "reviews" in data:
            fail(
                f"{name}: has 'reviews' key — reserved for citation_semantic_check.json "
                "per schemas.md §9.1 / §10 invariant #13"
            )
        else:
            pass_(f"{name}: manifest wrapper valid")

    data = _v150_manifest(q, "citation_semantic_check.json")
    if data is not None:
        for key in ("schema_version", "generated_at"):
            if not isinstance(data.get(key), str) or not data[key]:
                fail(
                    f"citation_semantic_check.json: missing or empty top-level {key!r} "
                    "(schemas.md §1.6)"
                )
        if not isinstance(data.get("reviews"), list):
            fail(
                "citation_semantic_check.json: missing or non-array top-level 'reviews' "
                "(schemas.md §9.1 — semantic check uses 'reviews', not 'records')"
            )
        if "records" in data:
            fail(
                "citation_semantic_check.json: has 'records' key — semantic check uses "
                "'reviews' per schemas.md §9.1 / §10 invariant #13"
            )
        else:
            pass_("citation_semantic_check.json: manifest wrapper valid")


def _check_citation_block(repo_dir, req_id, citation, formal_docs_by_path, req_tier):
    excerpt = citation.get("citation_excerpt")
    if not isinstance(excerpt, str) or not excerpt:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation has empty or missing citation_excerpt "
            "(schemas.md §10 invariant #4)",
        )
        return
    doc_path_str = citation.get("document")
    if not isinstance(doc_path_str, str) or not doc_path_str:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation missing 'document' field",
        )
        return
    section = citation.get("section")
    line = citation.get("line")
    has_section = isinstance(section, str) and section.strip()
    has_line = isinstance(line, int) and not isinstance(line, bool)
    if not has_section and not has_line:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation has no section or line locator "
            "(page alone is insufficient; schemas.md §10 invariant #4)",
        )
        return

    fd_rec = formal_docs_by_path.get(doc_path_str)
    if fd_rec is None:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation document {doc_path_str!r} "
            "not in formal_docs_manifest.json (schemas.md §10 invariant #2)",
        )
        return
    fd_tier = fd_rec.get("tier")
    if fd_tier != req_tier:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: tier={req_tier} does not match cited FORMAL_DOC "
            f"tier={fd_tier!r} (schemas.md §10 invariant #14)",
        )
    fd_sha = fd_rec.get("document_sha256")
    cite_sha = citation.get("document_sha256")
    if isinstance(fd_sha, str) and isinstance(cite_sha, str) and fd_sha != cite_sha:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation.document_sha256 does not match FORMAL_DOC "
            "(schemas.md §10 invariant #3 — citation_stale)",
        )

    if _CITATION_VERIFIER is None:
        warn(
            f"requirements_manifest.json: record_id={req_id}: byte-equality skipped — "
            "bin/citation_verifier unavailable on this install"
        )
        return

    doc_path = repo_dir / doc_path_str
    if not doc_path.is_file():
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation document not on disk: {doc_path_str}",
        )
        return
    try:
        bytes_ = doc_path.read_bytes()
        fresh = _CITATION_VERIFIER.extract_excerpt(
            bytes_, doc_path.suffix.lower(), section if has_section else None,
            line if has_line else None,
        )
    except _CITATION_VERIFIER.CitationResolutionError as exc:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation location does not resolve in "
            f"{doc_path_str}: {exc.message} (schemas.md §10 invariant #4)",
        )
        return
    except Exception as exc:  # noqa: BLE001 — fail with a real message
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation verifier errored: {exc}",
        )
        return

    if fresh != excerpt:
        fail(
            "requirements_manifest.json",
            f"record_id={req_id}: citation_excerpt is not byte-equal to fresh "
            f"extraction from {doc_path_str} "
            "(schemas.md §10 invariant #11 — Layer-1 anti-hallucination)",
        )


def check_v1_5_0_requirements_manifest(repo_dir, q):
    """§10 invariants #1, #4, #8, #11, #14 — REQ shape, citation gating, functional_section."""
    req_data = _v150_manifest(q, "requirements_manifest.json")
    if req_data is None:
        return
    records = req_data.get("records")
    if not isinstance(records, list):
        return  # wrapper check already reported
    fd_data = _v150_manifest(q, "formal_docs_manifest.json")
    formal_docs_by_path = {}
    if fd_data and isinstance(fd_data.get("records"), list):
        for rec in fd_data["records"]:
            if isinstance(rec, dict) and isinstance(rec.get("source_path"), str):
                formal_docs_by_path[rec["source_path"]] = rec

    for idx, rec in enumerate(records):
        if not isinstance(rec, dict):
            fail(
                "requirements_manifest.json",
                f"record_id=<#{idx}>: not a JSON object",
            )
            continue
        req_id = rec.get("id", f"<#{idx}>")

        fs = rec.get("functional_section")
        if not isinstance(fs, str) or not fs.strip():
            fail(
                "requirements_manifest.json",
                f"record_id={req_id}: has empty or missing functional_section "
                "(schemas.md §10 invariant #8)",
            )

        tier = rec.get("tier")
        citation = rec.get("citation")
        if tier in (1, 2):
            if not isinstance(citation, dict):
                fail(
                    "requirements_manifest.json",
                    f"record_id={req_id}: is tier {tier} but has no citation block "
                    "(schemas.md §10 invariant #1)",
                )
                continue
            _check_citation_block(repo_dir, req_id, citation, formal_docs_by_path, tier)
        elif tier in (3, 4, 5):
            if citation is not None:
                fail(
                    "requirements_manifest.json",
                    f"record_id={req_id}: is tier {tier} but carries a citation block "
                    "(citations are for Tier 1/2 only per schemas.md §10 invariant #1)",
                )
        elif tier is None:
            fail(
                "requirements_manifest.json",
                f"record_id={req_id}: missing 'tier' field",
            )
        else:
            fail(
                "requirements_manifest.json",
                f"record_id={req_id}: has invalid tier {tier!r} (expected integer 1–5)",
            )

        # v1.5.2: validate the optional `pattern` field on the REQ record.
        pattern = rec.get("pattern")
        if pattern is not None and pattern not in VALID_PATTERN_VALUES:
            fail(
                "requirements_manifest.json",
                f"record_id={req_id}: has invalid pattern {pattern!r} "
                f"(expected one of {sorted(VALID_PATTERN_VALUES)})",
            )

    pass_("requirements_manifest.json: v1.5.1 Layer-1 REQ checks complete")


def check_v1_5_0_bugs_manifest(q):
    """§10 invariants #7, #12 — disposition completeness + legal fix_type × disposition."""
    data = _v150_manifest(q, "bugs_manifest.json")
    if data is None:
        return
    records = data.get("records")
    if not isinstance(records, list):
        return
    for idx, rec in enumerate(records):
        if not isinstance(rec, dict):
            continue
        bug_id = rec.get("id", f"<#{idx}>")
        disp = rec.get("disposition")
        if disp not in _V150_VALID_DISPOSITIONS:
            fail(
                "bugs_manifest.json",
                f"record_id={bug_id}: has invalid or missing disposition {disp!r} "
                f"(schemas.md §10 invariant #7, valid: "
                f"{', '.join(_V150_VALID_DISPOSITIONS)})",
            )
            continue
        rationale = rec.get("disposition_rationale")
        if not isinstance(rationale, str) or not rationale.strip():
            fail(
                "bugs_manifest.json",
                f"record_id={bug_id}: has empty or missing disposition_rationale "
                "(schemas.md §10 invariant #7)",
            )
        ft = rec.get("fix_type")
        if ft not in _V150_VALID_FIX_TYPES:
            fail(
                "bugs_manifest.json",
                f"record_id={bug_id}: has invalid or missing fix_type {ft!r}",
            )
            continue
        if (disp, ft) in _V150_ILLEGAL_FIX_PAIRS:
            fail(
                "bugs_manifest.json",
                f"record_id={bug_id}: illegal disposition × fix_type combination "
                f"({disp}, {ft}) per schemas.md §3.4 / §10 invariant #12",
            )

    pass_("bugs_manifest.json: v1.5.1 Layer-1 BUG checks complete")


def check_v1_5_0_index_md(q):
    """§10 invariant #10 — quality/INDEX.md exists with all §11 required fields.

    v1.5.4 Part 1 / Round 1 Council finding C2-1 + Round 2 Council
    finding C1: routes by INDEX payload.schema_version with explicit
    handling for each case so future schemas don't silently downgrade.

      - ``schema_version == SCHEMA_VERSION_CURRENT`` (currently
        ``"2.0"``) → the v1.5.4 contract; target_role_breakdown
        required (null is legitimate for the stub before Phase 1).
      - ``schema_version == "1.0"`` → legacy v1.5.3 archive;
        target_project_type required; one WARN emitted.
      - ``schema_version`` absent/empty AND payload carries
        target_project_type without target_role_breakdown → legacy
        WARN (heuristic fallback for pre-schema-version archives).
      - ``schema_version`` absent/empty AND payload doesn't match the
        legacy heuristic → current path; the run is treated as a
        v1.5.4 stub that simply hasn't populated schema_version yet,
        and target_role_breakdown is required.
      - any other ``schema_version`` (e.g. ``"3.0"`` from a future
        gate) → explicit FAIL "newer than supported" so the operator
        knows to upgrade the gate or downgrade the run.

    This keeps historical archives under quality/previous_runs/
    legible without rewriting them retroactively while keeping the
    gate strict on current runs.
    """
    path = q / "INDEX.md"
    v150_artifacts = (
        "formal_docs_manifest.json",
        "requirements_manifest.json",
        "use_cases_manifest.json",
        "bugs_manifest.json",
        "citation_semantic_check.json",
    )
    is_v150_run = any((q / name).is_file() for name in v150_artifacts)
    if not path.is_file():
        if is_v150_run:
            fail(
                "quality/INDEX.md does not exist (required on every v1.5.1 run per "
                "schemas.md §10 invariant #10)"
            )
        return
    text = path.read_text(encoding="utf-8", errors="ignore")
    match = re.search(r"```json\n(.*?)\n```", text, re.DOTALL)
    if not match:
        fail("quality/INDEX.md: no fenced JSON block found (schemas.md §11)")
        return
    try:
        payload = json.loads(match.group(1))
    except json.JSONDecodeError as exc:
        fail(f"quality/INDEX.md: fenced JSON block invalid: {exc}")
        return
    if not isinstance(payload, dict):
        fail("quality/INDEX.md: fenced JSON block is not a JSON object")
        return

    # Schema-version routing for INDEX.md (v1.5.4 Round 2 Council
    # finding C1). Four cases, handled explicitly so future schemas
    # don't silently downgrade to legacy:
    #   1. schema_version == "1.0"                       -> legacy WARN
    #   2. schema_version absent/empty AND the payload   -> legacy WARN
    #      carries target_project_type but not              (heuristic
    #      target_role_breakdown                             fallback for
    #                                                        pre-schema-
    #                                                        version
    #                                                        archives)
    #   3. schema_version == SCHEMA_VERSION_CURRENT      -> current path
    #   4. schema_version absent/empty AND the payload
    #      doesn't fit case 2                            -> current path
    #                                                       (FAIL on
    #                                                        missing
    #                                                        target_role_breakdown
    #                                                        because the
    #                                                        run is
    #                                                        ambiguous and
    #                                                        v1.5.4 is the
    #                                                        live shape)
    #   5. any other schema_version                      -> explicit FAIL
    #                                                       "newer than
    #                                                        supported"
    schema_version = payload.get("schema_version")
    if schema_version == "1.0":
        is_legacy = True
    elif schema_version in (None, ""):
        is_legacy = (
            "target_project_type" in payload
            and "target_role_breakdown" not in payload
        )
    elif schema_version == SCHEMA_VERSION_CURRENT:
        is_legacy = False
    else:
        fail(
            f"quality/INDEX.md: schema_version {schema_version!r} is "
            f"newer than this gate supports (current: "
            f"{SCHEMA_VERSION_CURRENT!r}). Upgrade the gate or "
            "downgrade the run."
        )
        return

    if is_legacy:
        warn(
            f"quality/INDEX.md: schema_version={schema_version!r} treated as "
            "legacy v1.5.3 archive (target_project_type contract). v1.5.4+ "
            f"runs MUST emit schema_version={SCHEMA_VERSION_CURRENT!r} with "
            "target_role_breakdown."
        )
        required = _V150_INDEX_COMMON_FIELDS + _V150_INDEX_LEGACY_FIELDS
    else:
        required = _V150_INDEX_COMMON_FIELDS + _V154_INDEX_CURRENT_FIELDS

    for key in required:
        if key not in payload:
            fail(f"quality/INDEX.md: missing required field {key!r} (schemas.md §11)")
            continue
        val = payload[key]
        if isinstance(val, str) and not val:
            fail(f"quality/INDEX.md: field {key!r} is empty string (schemas.md §11)")
    summary = payload.get("summary")
    if isinstance(summary, dict):
        for sub in _V150_REQUIRED_SUMMARY_KEYS:
            if sub not in summary:
                fail(
                    f"quality/INDEX.md: summary missing {sub!r} sub-key "
                    "(schemas.md §11)"
                )
    pass_("quality/INDEX.md: §11 fields present")


_V150_VALID_VERDICTS = ("supports", "overreaches", "unclear")


def check_v1_5_0_semantic_check(q):
    """§10 invariant #17 — Council-of-Three majority-overreaches rule.

    Layer-2 semantic check (Phase 6). Gate does NOT re-run the semantic
    review; it parses quality/citation_semantic_check.json and applies
    the majority-overreaches rule:

      - ≥2 of 3 `overreaches` for the same Tier 1/2 REQ → FAIL.
      - isolated 1/3 `overreaches` or `unclear` → WARN.
      - <3 reviews for any Tier 1/2 REQ → FAIL (schemas.md §9.4).
      - review entry for a Tier 3/4/5 REQ → FAIL (only Tier 1/2 are
        semantically reviewable since they carry citations).

    When requirements_manifest.json has zero Tier 1/2 REQs the
    citation_semantic_check.json file is still expected (emitted with
    empty reviews[]); its absence in that case warns rather than
    fails to avoid breaking Spec Gap runs.
    """
    req_data = _v150_manifest(q, "requirements_manifest.json")
    tier_by_req = {}
    if req_data and isinstance(req_data.get("records"), list):
        for rec in req_data["records"]:
            if isinstance(rec, dict):
                rid = rec.get("id")
                tier = rec.get("tier")
                if isinstance(rid, str) and isinstance(tier, int) and not isinstance(tier, bool):
                    tier_by_req[rid] = tier
    tier_12_req_ids = {rid for rid, t in tier_by_req.items() if t in (1, 2)}

    sc_path = q / "citation_semantic_check.json"
    if not sc_path.is_file():
        if tier_12_req_ids:
            fail(
                "quality/citation_semantic_check.json",
                "file missing (schemas.md §10 invariant #17 requires a semantic "
                "check for every Tier 1/2 REQ)",
            )
        else:
            # Spec Gap: no Tier 1/2 REQs to review. File is expected but its
            # absence doesn't break the invariant since there's nothing to
            # enforce. Warn so the orchestrator knows to emit the empty file.
            warn(
                "quality/citation_semantic_check.json: file missing; no Tier 1/2 "
                "REQs present so invariant #17 has nothing to enforce — emit an "
                "empty reviews[] for contract completeness"
            )
        return

    data = _v150_manifest(q, "citation_semantic_check.json")
    if data is None:
        return  # wrapper check already reported the failure
    reviews = data.get("reviews")
    if not isinstance(reviews, list):
        return  # wrapper check already reported

    by_req = {}
    seen_reviewers = {}
    for idx, entry in enumerate(reviews):
        if not isinstance(entry, dict):
            fail(
                "citation_semantic_check.json",
                f"reviews[#{idx}]: not a JSON object",
            )
            continue
        rid = entry.get("req_id")
        reviewer = entry.get("reviewer")
        verdict = entry.get("verdict")
        notes = entry.get("notes")
        if not isinstance(rid, str) or not rid:
            fail(
                "citation_semantic_check.json",
                f"reviews[#{idx}]: missing or non-string req_id",
            )
            continue
        if not isinstance(reviewer, str) or not reviewer:
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: missing or non-string reviewer",
            )
            continue
        if verdict not in _V150_VALID_VERDICTS:
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: reviewer={reviewer!r} invalid verdict "
                f"{verdict!r}; expected one of {_V150_VALID_VERDICTS}",
            )
            continue
        if not isinstance(notes, str):
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: reviewer={reviewer!r} notes must be a string",
            )
            continue
        # §9.4 common-mistake: tier check — review entries must belong to
        # Tier 1/2 REQs only.
        tier = tier_by_req.get(rid)
        if tier is None:
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: reviewer={reviewer!r} reviews a REQ that does "
                "not exist in requirements_manifest.json",
            )
            continue
        if tier not in (1, 2):
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: reviewer={reviewer!r} reviews a tier-{tier} "
                "REQ; semantic check applies to Tier 1/2 only (schemas.md §9.4)",
            )
            continue
        # Detect duplicate (req_id, reviewer) pairs — a typo that would slip a
        # vote past the majority computation.
        pair_key = seen_reviewers.setdefault(rid, set())
        if reviewer in pair_key:
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: duplicate review from reviewer={reviewer!r}",
            )
            continue
        pair_key.add(reviewer)
        by_req.setdefault(rid, []).append(entry)

    # §9.4: every Tier 1/2 REQ needs at least 3 reviews.
    for rid in sorted(tier_12_req_ids):
        entries = by_req.get(rid, [])
        if len(entries) < 3:
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: fewer than 3 reviews ({len(entries)} present) "
                "— schemas.md §9.4 requires one entry per council member for "
                "every Tier 1/2 REQ",
            )
            continue
        overreach_count = sum(1 for e in entries if e.get("verdict") == "overreaches")
        unclear_count = sum(1 for e in entries if e.get("verdict") == "unclear")
        if overreach_count >= 2:
            reviewers_flagged = ", ".join(
                sorted(
                    str(e.get("reviewer"))
                    for e in entries
                    if e.get("verdict") == "overreaches"
                )
            )
            fail(
                "citation_semantic_check.json",
                f"record_id={rid}: semantic check majority overreaches "
                f"({overreach_count}/{len(entries)} reviewers flagged: "
                f"{reviewers_flagged}) — schemas.md §10 invariant #17",
            )
        elif overreach_count == 1:
            flagged = next(
                str(e.get("reviewer"))
                for e in entries
                if e.get("verdict") == "overreaches"
            )
            warn(
                f"citation_semantic_check.json: record_id={rid}: 1/{len(entries)} "
                f"reviewer ({flagged}) flagged as `overreaches` — surfaced for "
                "human review; not a gate failure unless ≥2 agree"
            )
        if unclear_count >= 1 and overreach_count == 0:
            flagged = ", ".join(
                sorted(
                    str(e.get("reviewer"))
                    for e in entries
                    if e.get("verdict") == "unclear"
                )
            )
            warn(
                f"citation_semantic_check.json: record_id={rid}: "
                f"{unclear_count}/{len(entries)} reviewer(s) flagged as "
                f"`unclear` ({flagged}) — surfaced for human review"
            )

    if not tier_12_req_ids:
        pass_(
            "citation_semantic_check.json: no Tier 1/2 REQs to review "
            "(invariant #17 vacuously satisfied)"
        )
    else:
        pass_(
            f"citation_semantic_check.json: §10 invariant #17 checks complete "
            f"for {len(tier_12_req_ids)} Tier 1/2 REQ(s)"
        )


# --- v1.5.1 Item 5.2: challenge-gate coverage invariant -------------------

# Canonical verdict-line regex from Impl-Plan Item 5.2. Matches a top-level
# "**Verdict:** CONFIRMED/DOWNGRADED/REJECTED" line as a stand-alone line.
_CHALLENGE_VERDICT_RE = re.compile(
    r"^\*\*Verdict:\*\*\s+(CONFIRMED|DOWNGRADED|REJECTED)\s*$",
    re.MULTILINE,
)
# Legacy final-verdict form used by challenge records generated before the
# canonical regex was specified (including the preserved virtio-1.4.6
# evidence at repos/benchmark-1.5.0/virtio-1.4.6/quality/challenge/).
# The briefing says "this invariant only verifies the challenge ran" — the
# legacy form unambiguously records a final verdict, so it satisfies the
# invariant's intent without requiring operators to regenerate baseline
# artifacts. New v1.5.1+ runs should prefer the canonical form.
_CHALLENGE_VERDICT_LEGACY_RE = re.compile(
    r"^\*\*(CONFIRMED|DOWNGRADED|REJECTED)\.?\*\*",
    re.MULTILINE,
)

# Trigger-pattern keyword tables (case-insensitive substring matching).
_CHALLENGE_SECURITY_SEVERITIES = frozenset({"CRITICAL", "HIGH"})
_CHALLENGE_SECURITY_KEYWORDS = (
    "credential", "secret", "auth", "injection", "xss", "csrf",
    "ssrf", "privilege", "bypass", "leak",
)
_CHALLENGE_SIBLING_KEYWORDS = (
    "sibling", "parallel", "parity", "contrasted with", "same concern",
    "in contrast", "other path", "other branch",
)
_CHALLENGE_MISSING_KEYWORDS = (
    "never", "does not", "doesn't", "missing", "absent", "fails to",
)
_CHALLENGE_DESIGN_KEYWORDS = (
    "todo", "why", "ooda", "design decision",
)
_CHALLENGE_ITERATION_KEYWORDS = (
    "gap", "unfiltered", "parity", "adversarial", "iteration",
)


def _bug_writeup_text(q, bug_id):
    """Return lowercased writeup text for ``bug_id`` (empty string if absent).

    Writeups live at quality/writeups/BUG-NNN.md. Reading failures are
    treated as empty text — the invariant still runs on the manifest fields
    (title / summary / source) which are present independently.
    """
    path = _resolve_artifact_path(q, f"writeups/{bug_id}.md")
    if not path.is_file():
        return ""
    try:
        return path.read_text(encoding="utf-8", errors="ignore").lower()
    except OSError:
        return ""


def _bug_req_has_tier_12_citation(req_id, requirements_records):
    """True iff req_id resolves to a REQ with a non-empty citation and
    tier in {1, 2}. Used by the "No spec basis" trigger pattern."""
    if not req_id or not isinstance(requirements_records, list):
        return False
    for rec in requirements_records:
        if not isinstance(rec, dict):
            continue
        if rec.get("id") != req_id:
            continue
        if rec.get("tier") not in (1, 2):
            return False
        citation = rec.get("citation")
        if isinstance(citation, dict) and citation:
            return True
        return False
    return False


def _contains_any(text, keywords):
    """Case-insensitive substring OR across a keyword tuple."""
    if not text:
        return False
    lowered = text.lower()
    return any(kw in lowered for kw in keywords)


def _classify_bug_triggers(rec, q, requirements_records):
    """Return the list of trigger-pattern names that fired for one bug.
    Empty list means the bug does not require a challenge record.

    Patterns mirror Impl-Plan Item 5.2 verbatim. Input aliasing:
      - title: prefers rec['title'], falls back to rec['summary'].
      - requirement: prefers rec['requirement'], falls back to rec['req_id']
        (v1.4.x uses req_id; v1.5.1+ converges on requirement).
      - source_comments: optional, older runs may omit it.
      - source / discovery_phase: substring-matched against the
        iteration-derived keyword list.
    """
    fired = []

    bug_id = rec.get("id", "")
    title = rec.get("title") or rec.get("summary") or ""
    severity = (rec.get("severity") or "").upper()
    writeup = _bug_writeup_text(q, bug_id) if bug_id else ""
    title_plus_writeup = f"{title}\n{writeup}"

    # 1. Security-class.
    if severity in _CHALLENGE_SECURITY_SEVERITIES and _contains_any(
        title_plus_writeup, _CHALLENGE_SECURITY_KEYWORDS
    ):
        fired.append("security-class")

    # 2. No spec basis.
    requirement = rec.get("requirement") or rec.get("req_id")
    has_valid_citation = _bug_req_has_tier_12_citation(requirement, requirements_records)
    if not requirement or not has_valid_citation:
        fired.append("no-spec-basis")

    # 3. Sibling-path divergence.
    if _contains_any(writeup, _CHALLENGE_SIBLING_KEYWORDS):
        fired.append("sibling-path-divergence")

    # 4. Missing functionality.
    if _contains_any(writeup, _CHALLENGE_MISSING_KEYWORDS):
        fired.append("missing-functionality")

    # 5. Design-decision comment (optional field).
    source_comments = rec.get("source_comments")
    if isinstance(source_comments, str) and _contains_any(
        source_comments, _CHALLENGE_DESIGN_KEYWORDS
    ):
        fired.append("design-decision-comment")

    # 6. Iteration-derived.
    source = rec.get("source") or ""
    discovery_phase = rec.get("discovery_phase") or ""
    iter_haystack = f"{source}\n{discovery_phase}"
    if _contains_any(iter_haystack, _CHALLENGE_ITERATION_KEYWORDS):
        fired.append("iteration-derived")

    return fired


def _challenge_record_has_verdict(path):
    """True iff the file exists and contains either the canonical or
    legacy verdict line per the invariant's accept set."""
    if not path.is_file():
        return False
    try:
        text = path.read_text(encoding="utf-8", errors="ignore")
    except OSError:
        return False
    if _CHALLENGE_VERDICT_RE.search(text):
        return True
    if _CHALLENGE_VERDICT_LEGACY_RE.search(text):
        return True
    return False


def check_challenge_gate_coverage(q):
    """v1.5.1 Item 5.2 — every bug whose fingerprints trigger the challenge
    gate must have a quality/challenge/BUG-NNN-challenge.md with a valid
    verdict line.

    N/A when quality/bugs_manifest.json is absent (zero-bug runs can't
    have un-challenged bugs). Runs on the current quality/ tree only;
    no cross-run state.
    """
    data = _v150_manifest(q, "bugs_manifest.json")
    if data is None:
        # N/A — the plan explicitly says "invariant is N/A if the file is
        # absent". Consistent with other quality_gate invariants that silently
        # skip when their input isn't present.
        return
    records = data.get("records")
    if not isinstance(records, list):
        return

    reqs_data = _v150_manifest(q, "requirements_manifest.json") or {}
    req_records = reqs_data.get("records") if isinstance(reqs_data, dict) else None

    challenge_dir = q / "challenge"
    triggered = 0
    missing = []   # list of (bug_id, [pattern names]) for bugs with no record
    bad_verdict = []  # list of (bug_id, [pattern names]) for record w/o verdict

    for rec in records:
        if not isinstance(rec, dict):
            continue
        bug_id = rec.get("id")
        if not bug_id:
            continue
        fired = _classify_bug_triggers(rec, q, req_records)
        if not fired:
            continue
        triggered += 1
        record_path = challenge_dir / f"{bug_id}-challenge.md"
        if not record_path.is_file():
            missing.append((bug_id, fired))
        elif not _challenge_record_has_verdict(record_path):
            bad_verdict.append((bug_id, fired))

    if missing:
        for bug_id, fired in missing:
            fail(
                "quality/challenge/",
                f"{bug_id}: challenge record missing (triggered by: {', '.join(fired)}) "
                f"— expected {bug_id}-challenge.md with a **Verdict:** line",
            )
    if bad_verdict:
        for bug_id, fired in bad_verdict:
            fail(
                f"quality/challenge/{bug_id}-challenge.md",
                f"missing or malformed verdict line (triggered by: {', '.join(fired)}) "
                "— expected a line matching `^\\*\\*Verdict:\\*\\*\\s+(CONFIRMED|DOWNGRADED|REJECTED)` "
                "or the legacy final-verdict form",
            )

    if triggered == 0:
        pass_("challenge gate coverage: no bug triggered the challenge gate (vacuous)")
    elif not missing and not bad_verdict:
        pass_(
            f"challenge gate coverage: {triggered} triggered bug(s) all have valid "
            "challenge records"
        )


def check_v1_5_3_formal_doc_role_validation(q):
    """schemas.md §10 invariant #23 — FORMAL_DOC.role on v1.5.3-shaped manifests.

    Legacy manifest (no v1.5.3 fields anywhere): one WARN, then skip.
    v1.5.3-shaped: every record MUST have role populated with a member of
    formal_doc_role (§3.6).
    """
    data = _v150_manifest(q, "formal_docs_manifest.json")
    if data is None:
        return
    records = data.get("records")
    if not isinstance(records, list):
        return  # wrapper check already reported
    if not _is_v1_5_3_shaped(data):
        warn(
            "formal_docs_manifest.json: legacy manifest detected; treating absent "
            "FORMAL_DOC.role as 'external-spec' per schemas.md §3.10 backward-compat rule"
        )
        return
    any_fail = False
    for idx, rec in enumerate(records):
        if not isinstance(rec, dict):
            continue
        rec_id = rec.get("source_path", f"<#{idx}>")
        role = rec.get("role")
        if role not in _V153_VALID_FORMAL_DOC_ROLES:
            fail(
                "formal_docs_manifest.json",
                f"record_id={rec_id}: missing or invalid role {role!r} on "
                f"v1.5.3-shaped manifest (schemas.md §10 invariant #23, valid: "
                f"{', '.join(_V153_VALID_FORMAL_DOC_ROLES)})",
            )
            any_fail = True
    if not any_fail:
        pass_("formal_docs_manifest.json: v1.5.3 role validation complete")


def check_v1_5_3_source_type_validation(q):
    """schemas.md §10 invariants #21 (first part) — REQ.source_type presence.

    Legacy manifest: one WARN, then skip.
    v1.5.3-shaped: every REQ MUST have source_type populated with a member
    of req_source_type (§3.7).
    """
    data = _v150_manifest(q, "requirements_manifest.json")
    if data is None:
        return
    records = data.get("records")
    if not isinstance(records, list):
        return
    if not _is_v1_5_3_shaped(data):
        warn(
            "requirements_manifest.json: legacy manifest detected; treating absent "
            "REQ.source_type as 'code-derived' per schemas.md §3.10 backward-compat rule"
        )
        return
    any_fail = False
    for idx, rec in enumerate(records):
        if not isinstance(rec, dict):
            continue
        req_id = rec.get("id", f"<#{idx}>")
        source_type = rec.get("source_type")
        if source_type not in _V153_VALID_SOURCE_TYPES:
            fail(
                "requirements_manifest.json",
                f"record_id={req_id}: missing or invalid source_type "
                f"{source_type!r} on v1.5.3-shaped manifest "
                f"(schemas.md §10 invariant #21, valid: "
                f"{', '.join(_V153_VALID_SOURCE_TYPES)})",
            )
            any_fail = True
    if not any_fail:
        pass_("requirements_manifest.json: v1.5.3 source_type validation complete")


def check_v1_5_3_skill_section_consistency(q):
    """schemas.md §10 invariant #21 (second part) — skill_section consistency.

    On a v1.5.3-shaped requirements manifest, REQs with
    source_type == 'skill-section' MUST have non-empty skill_section;
    REQs with any other source_type value MUST have skill_section absent
    or null (per §1.5: optional fields may be omitted or present as null).
    Populated skill_section paired with non-skill-section source_type FAILs.

    Legacy manifests are skipped silently here -- the source_type check
    already emitted the single WARN for the manifest.

    Deliberate piggyback (Round 2 Council, item 1): this is the one
    documented exception to the "exactly one WARN per check function"
    convention used by the other three v1.5.3 invariants. Both
    check_v1_5_3_source_type_validation and this check share
    requirements_manifest.json, so emitting a second WARN here would
    double-warn for the same legacy file. The piggyback is locked in
    by test_legacy_manifest_silently_skips in
    TestV153SkillSectionConsistency -- a future maintainer reading the
    brief and adding a WARN for consistency would break that test.
    """
    data = _v150_manifest(q, "requirements_manifest.json")
    if data is None:
        return
    records = data.get("records")
    if not isinstance(records, list):
        return
    if not _is_v1_5_3_shaped(data):
        return  # source_type check handled the soft warn for this manifest
    any_fail = False
    for idx, rec in enumerate(records):
        if not isinstance(rec, dict):
            continue
        req_id = rec.get("id", f"<#{idx}>")
        source_type = rec.get("source_type")
        skill_section = rec.get("skill_section")
        if source_type == "skill-section":
            if not isinstance(skill_section, str) or not skill_section.strip():
                fail(
                    "requirements_manifest.json",
                    f"record_id={req_id}: source_type='skill-section' but "
                    f"skill_section is empty or missing "
                    "(schemas.md §10 invariant #21)",
                )
                any_fail = True
        else:
            if skill_section is not None and skill_section != "":
                fail(
                    "requirements_manifest.json",
                    f"record_id={req_id}: skill_section={skill_section!r} "
                    f"populated but source_type={source_type!r} is not "
                    "'skill-section' (schemas.md §10 invariant #21)",
                )
                any_fail = True
    if not any_fail:
        pass_("requirements_manifest.json: v1.5.3 skill_section consistency complete")


def check_v1_5_3_divergence_type_validation(q):
    """schemas.md §10 invariant #22 — BUG.divergence_type on v1.5.3-shaped manifests.

    Legacy manifest: one WARN, then skip.
    v1.5.3-shaped: every BUG MUST have divergence_type populated with a
    member of bug_divergence_type (§3.8).
    """
    data = _v150_manifest(q, "bugs_manifest.json")
    if data is None:
        return
    records = data.get("records")
    if not isinstance(records, list):
        return
    if not _is_v1_5_3_shaped(data):
        warn(
            "bugs_manifest.json: legacy manifest detected; treating absent "
            "BUG.divergence_type as 'code-spec' per schemas.md §3.10 backward-compat rule"
        )
        return
    any_fail = False
    for idx, rec in enumerate(records):
        if not isinstance(rec, dict):
            continue
        bug_id = rec.get("id", f"<#{idx}>")
        divergence_type = rec.get("divergence_type")
        if divergence_type not in _V153_VALID_DIVERGENCE_TYPES:
            fail(
                "bugs_manifest.json",
                f"record_id={bug_id}: missing or invalid divergence_type "
                f"{divergence_type!r} on v1.5.3-shaped manifest "
                f"(schemas.md §10 invariant #22, valid: "
                f"{', '.join(_V153_VALID_DIVERGENCE_TYPES)})",
            )
            any_fail = True
    if not any_fail:
        pass_("bugs_manifest.json: v1.5.3 divergence_type validation complete")


_V153_COUNCIL_INBOX_ITEM_TYPES = frozenset({
    "rejected-draft",
    "tier-5-demotion",
    "zero-req-section",
    "weak-rationale",
})


def check_v1_5_3_council_inbox_validation(q):
    """Phase 3b BLOCK-4 cross-reference + DQ-5 structural validation.

    Validates quality/phase3/pass_d_council_inbox.json against the
    DQ-5 schema AND verifies that every Pass D rejection / Tier-5
    demotion has a matching council-inbox item. Without the
    cross-reference invariant, a syntactically-valid but functionally
    -empty inbox could pass while pass_d_audit.json shows 30+
    rejections -- the inbox population could silently break and the
    gate would not catch it.

    Two failure modes:
      1. Structural -- malformed item record, invalid item_type,
         missing required field per the DQ-5 schema.
      2. Cross-reference -- pass_d_audit.json entry with outcome in
         {rejected, demoted_to_tier_5} has no matching item in the
         inbox.

    Phase 3 artifact set is at <repo>/quality/phase3/, NOT at the
    top-level <repo>/quality/. The check returns silently if the
    phase3 directory does not exist (the project is Code-only or
    Phase 3 has not been run yet).
    """
    phase3_dir = _resolve_artifact_path(q, "phase3")
    if not phase3_dir.is_dir():
        return  # phase 3 not run; not in scope for this manifest set
    inbox_path = phase3_dir / "pass_d_council_inbox.json"
    audit_path = phase3_dir / "pass_d_audit.json"
    if not inbox_path.is_file():
        return  # phase 3 partially run; skip silently

    inbox_data = load_json(inbox_path)
    if not isinstance(inbox_data, dict):
        fail(f"{inbox_path.name}: not a valid JSON object")
        return

    # Structural validation.
    schema_version = inbox_data.get("schema_version")
    if schema_version != "1.0":
        fail(
            f"{inbox_path.name}: schema_version {schema_version!r} "
            "does not match the DQ-5 spec value '1.0'"
        )
    items = inbox_data.get("items")
    if not isinstance(items, list):
        fail(f"{inbox_path.name}: 'items' is missing or not a list")
        return

    required_fields = {
        "item_type",
        "draft_idx",
        "section_idx",
        "section_heading",
        "rationale",
        "context_excerpt",
        "provisional_disposition",
    }
    for idx, item in enumerate(items):
        if not isinstance(item, dict):
            fail(f"{inbox_path.name}: item #{idx} is not a JSON object")
            continue
        missing = required_fields - set(item.keys())
        if missing:
            fail(
                f"{inbox_path.name}: item #{idx} is missing required "
                f"DQ-5 fields: {sorted(missing)}"
            )
        if item.get("item_type") not in _V153_COUNCIL_INBOX_ITEM_TYPES:
            fail(
                f"{inbox_path.name}: item #{idx} has invalid item_type "
                f"{item.get('item_type')!r} (valid: "
                f"{sorted(_V153_COUNCIL_INBOX_ITEM_TYPES)})"
            )
        rationale = item.get("rationale")
        if not isinstance(rationale, str) or not rationale.strip():
            fail(
                f"{inbox_path.name}: item #{idx} has empty or missing "
                "rationale"
            )

    # Cross-reference invariant: every rejected / demoted audit entry
    # must have a matching inbox item by (draft_idx, item_type).
    if audit_path.is_file():
        audit_data = load_json(audit_path)
        if isinstance(audit_data, dict):
            inbox_pairs = {
                (item.get("draft_idx"), item.get("item_type"))
                for item in items
                if isinstance(item, dict)
            }
            for entry in audit_data.get("rejected", []) or []:
                if not isinstance(entry, dict):
                    continue
                pair = (entry.get("draft_idx"), "rejected-draft")
                if pair not in inbox_pairs:
                    fail(
                        f"{inbox_path.name}: pass_d_audit.json shows "
                        f"rejected draft_idx={entry.get('draft_idx')} "
                        "but there is no matching rejected-draft item "
                        "in the council inbox (BLOCK-4 cross-reference "
                        "invariant violation)"
                    )
            for entry in audit_data.get("demoted_to_tier_5", []) or []:
                if not isinstance(entry, dict):
                    continue
                pair = (entry.get("draft_idx"), "tier-5-demotion")
                if pair not in inbox_pairs:
                    fail(
                        f"{inbox_path.name}: pass_d_audit.json shows "
                        f"tier-5 demotion at draft_idx={entry.get('draft_idx')} "
                        "but there is no matching tier-5-demotion item "
                        "in the council inbox"
                    )

    pass_(f"{inbox_path.name}: v1.5.3 council inbox validation complete")


# ---------------------------------------------------------------------------
# Phase 4 skill-project gate enforcement checks (DQ-4-4).
#
# These four checks fire when the target's role map shows skill-prose
# surface; they SKIP (informational `INFO: skipped` line, no fail
# counter increment) on pure-code targets. The check that always runs
# is check_role_map_consistency.
#
# v1.5.4 Part 1: the legacy Code/Skill/Hybrid string is now derived
# from the Phase-1 role map at <q>/exploration_role_map.json. The
# mapping mirrors bin/role_map.py::derive_legacy_project_type. If the
# role map is absent, all four checks SKIP silently — Phase 1 has not
# been run yet on this target. The gate ships into target repos as a
# stdlib-only script and cannot import bin/role_map; the small amount
# of role-map awareness it needs is inlined below.
# ---------------------------------------------------------------------------


def _load_role_map(q):
    """Return the parsed exploration_role_map.json dict, or None when
    absent / unparsable. v1.5.4 inline replacement for the prior
    project_type.json reader."""
    return load_json(q / "exploration_role_map.json")


def _role_map_has_role(role_map, role_set):
    if not isinstance(role_map, dict):
        return False
    files = role_map.get("files") or []
    if not isinstance(files, list):
        return False
    for entry in files:
        if isinstance(entry, dict) and entry.get("role") in role_set:
            return True
    return False


def _phase4_project_type(q):
    """Return the v1.5.3-equivalent classification string ('Code' /
    'Skill' / 'Hybrid') derived from the Phase-1 role map, or None
    when the role map is absent / unparsable.

    Mapping (mirrors bin/role_map.derive_legacy_project_type):
      - has skill-prose AND has code  -> 'Hybrid'
      - has skill-prose, no code      -> 'Skill'
      - no skill-prose                -> 'Code'
    """
    role_map = _load_role_map(q)
    if role_map is None:
        return None
    skill = _role_map_has_role(role_map, ("skill-prose", "skill-reference"))
    code = _role_map_has_role(role_map, ("code",))
    if skill and code:
        return "Hybrid"
    if skill:
        return "Skill"
    return "Code"


def check_skill_section_req_coverage(repo_dir, q):
    """Skill / Hybrid: every operational SKILL.md section per
    pass_d_section_coverage.json has ≥1 promoted REQ. Meta-allowlist
    sections are exempt (their section_kind == 'meta').

    SKIPS for Code projects."""
    print("[Phase 4: skill-section REQ coverage]")
    classification = _phase4_project_type(q)
    if classification not in ("Skill", "Hybrid"):
        info(f"check_skill_section_req_coverage: skip (project_type={classification!r})")
        return
    coverage_path = _resolve_artifact_path(q, "phase3/pass_d_section_coverage.json")
    data = load_json(coverage_path)
    if not isinstance(data, dict):
        info(
            "check_skill_section_req_coverage: skip "
            "(pass_d_section_coverage.json missing or unparsable)"
        )
        return
    failures = 0
    for s in data.get("sections", []) or []:
        if not isinstance(s, dict):
            continue
        kind = s.get("section_kind")
        if kind != "operational":
            continue
        promoted = s.get("drafts_promoted", 0) or 0
        if promoted < 1:
            heading = s.get("heading") or "<unknown>"
            document = s.get("document") or "SKILL.md"
            section_idx = s.get("section_idx")
            fail(
                f"{document}",
                f"section #{section_idx} {heading!r} has 0 promoted "
                "REQs and is not in the meta allowlist "
                "(check_skill_section_req_coverage)",
            )
            failures += 1
    if failures == 0:
        pass_("check_skill_section_req_coverage: every operational section has ≥1 promoted REQ")


def check_reference_file_req_coverage(repo_dir, q):
    """Skill / Hybrid: every reference file under references/ has ≥1
    REQ citing it OR a `<!-- non-normative -->` marker in its first
    5 lines.

    SKIPS for Code projects."""
    print("[Phase 4: reference-file REQ coverage]")
    classification = _phase4_project_type(q)
    if classification not in ("Skill", "Hybrid"):
        info(f"check_reference_file_req_coverage: skip (project_type={classification!r})")
        return
    references_dir = repo_dir / "references"
    if not references_dir.is_dir():
        info("check_reference_file_req_coverage: skip (no references/ directory)")
        return
    formal_path = _resolve_artifact_path(q, "phase3/pass_c_formal.jsonl")
    if not formal_path.is_file():
        info(
            "check_reference_file_req_coverage: skip "
            "(pass_c_formal.jsonl missing — Phase 3 not run yet)"
        )
        return
    cited_documents = set()
    for line in formal_path.read_text(encoding="utf-8").splitlines():
        if not line.strip():
            continue
        try:
            rec = json.loads(line)
        except json.JSONDecodeError:
            continue
        if not isinstance(rec, dict):
            continue
        sd = rec.get("source_document")
        if isinstance(sd, str):
            cited_documents.add(sd)
    failures = 0
    for ref in sorted(references_dir.glob("*.md")):
        rel = f"references/{ref.name}"
        if rel in cited_documents:
            continue
        # Non-normative marker check (first 5 lines).
        head = ref.read_text(encoding="utf-8", errors="replace").splitlines()[:5]
        if any("<!-- non-normative -->" in line.lower() for line in head):
            continue
        fail(
            rel,
            "no REQ cites this reference file and no <!-- non-normative --> "
            "marker in its first 5 lines (check_reference_file_req_coverage)",
        )
        failures += 1
    if failures == 0:
        pass_("check_reference_file_req_coverage: every reference file has ≥1 citing REQ or non-normative marker")


def check_hybrid_cross_cutting_reqs(repo_dir, q):
    """Hybrid only: ≥1 REQ has triangulated evidence —
    `source_type=skill-section` AND its acceptance_criteria references
    a code artifact mentioned in another REQ with
    `source_type=code-derived`.

    SKIPS for Skill or Code projects."""
    print("[Phase 4: hybrid cross-cutting REQs]")
    classification = _phase4_project_type(q)
    if classification != "Hybrid":
        info(f"check_hybrid_cross_cutting_reqs: skip (project_type={classification!r})")
        return
    formal_path = _resolve_artifact_path(q, "phase3/pass_c_formal.jsonl")
    if not formal_path.is_file():
        info(
            "check_hybrid_cross_cutting_reqs: skip "
            "(pass_c_formal.jsonl missing — Phase 3 not run yet)"
        )
        return
    skill_section_reqs = []
    code_derived_artifacts = set()
    for line in formal_path.read_text(encoding="utf-8").splitlines():
        if not line.strip():
            continue
        try:
            rec = json.loads(line)
        except json.JSONDecodeError:
            continue
        if not isinstance(rec, dict):
            continue
        st = rec.get("source_type")
        if st == "skill-section":
            skill_section_reqs.append(rec)
        elif st == "code-derived":
            ac = (rec.get("acceptance_criteria") or "")
            cite = (rec.get("citation_excerpt") or "")
            for token in re.findall(
                r"\b([\w./-]+\.(?:py|sh|json))\b", ac + " " + cite
            ):
                code_derived_artifacts.add(token)
    if not code_derived_artifacts:
        # On a Hybrid project that hasn't yet produced any code-derived
        # REQs, the cross-cutting check has nothing to triangulate
        # against. INFO + skip rather than fail (the absence is the
        # diagnostic).
        info(
            "check_hybrid_cross_cutting_reqs: skip "
            "(no code-derived REQs in pass_c_formal.jsonl yet)"
        )
        return
    triangulated = 0
    for rec in skill_section_reqs:
        ac = (rec.get("acceptance_criteria") or "") + " " + (
            rec.get("citation_excerpt") or ""
        )
        if any(art in ac for art in code_derived_artifacts):
            triangulated += 1
            if triangulated >= 1:
                break
    if triangulated >= 1:
        pass_(
            f"check_hybrid_cross_cutting_reqs: triangulated evidence "
            f"present (≥{triangulated} skill-section REQ references a "
            "code-derived artifact)"
        )
    else:
        fail(
            "pass_c_formal.jsonl",
            "Hybrid project has no triangulated REQ pair "
            "(skill-section REQ referencing a code-derived artifact); "
            "check_hybrid_cross_cutting_reqs",
        )


def check_role_map_consistency(repo_dir, q):
    """All projects: exploration_role_map.json (when present) parses as
    a JSON object, declares schema_version '1.0', carries a 'files'
    list and a 'breakdown.percentages' dict with the four expected
    share keys.

    SKIPS silently when the role map is absent — Phase 1 has not been
    run yet on this target. v1.5.4 Part 1 replacement for the v1.5.3
    check_project_type_consistency, which keyed on
    quality/project_type.json (now retired)."""
    print("[Phase 4: role-map consistency]")
    rm_path = q / "exploration_role_map.json"
    if not rm_path.is_file():
        info(
            "check_role_map_consistency: skip "
            "(exploration_role_map.json absent — Phase 1 not run yet)"
        )
        return
    data = load_json(rm_path)
    if not isinstance(data, dict):
        fail(
            f"{rm_path.relative_to(q.parent)}",
            "exploration_role_map.json is not a valid JSON object",
        )
        return
    if data.get("schema_version") != "1.0":
        fail(
            f"{rm_path.relative_to(q.parent)}",
            f"schema_version {data.get('schema_version')!r} is not '1.0' "
            "(check_role_map_consistency)",
        )
        return
    files = data.get("files")
    if not isinstance(files, list):
        fail(
            f"{rm_path.relative_to(q.parent)}",
            "'files' is not a list (check_role_map_consistency)",
        )
        return
    breakdown = data.get("breakdown")
    if not isinstance(breakdown, dict):
        fail(
            f"{rm_path.relative_to(q.parent)}",
            "'breakdown' is not an object (check_role_map_consistency)",
        )
        return
    percentages = breakdown.get("percentages")
    if not isinstance(percentages, dict):
        fail(
            f"{rm_path.relative_to(q.parent)}",
            "'breakdown.percentages' is not an object "
            "(check_role_map_consistency)",
        )
        return
    missing = [
        k for k in ("skill_share", "code_share", "tool_share", "other_share")
        if k not in percentages
    ]
    if missing:
        fail(
            f"{rm_path.relative_to(q.parent)}",
            f"breakdown.percentages missing keys: {missing} "
            "(check_role_map_consistency)",
        )
        return
    derived = _phase4_project_type(q) or "Unknown"
    pass_(
        f"{rm_path.relative_to(q.parent)}: role map well-formed "
        f"(legacy-derived project type {derived!r}; "
        "check_role_map_consistency)"
    )


def check_v1_5_2_cardinality_gate(repo_dir):
    """v1.5.2 Lever 3: Phase 5 cardinality reconciliation gate.

    Surfaces every failure from validate_cardinality_gate() as a fail() entry.
    """
    failures = validate_cardinality_gate(repo_dir)
    if not failures:
        pass_("compensation_grid.json: v1.5.2 cardinality gate clean")
        return
    for msg in failures:
        fail("compensation_grid.json", msg)


def check_v1_5_0_gate_invariants(repo_dir, q):
    """Dispatcher that runs every Layer-1 mechanical check from schemas.md §10."""
    check_v1_5_0_cite_extensions(repo_dir)
    check_v1_5_0_manifest_wrappers(q)
    check_v1_5_0_requirements_manifest(repo_dir, q)
    check_v1_5_0_bugs_manifest(q)
    check_v1_5_0_index_md(q)
    # Phase 6 invariant #17 runs after requirements_manifest so it sees
    # shape-validated REQ records.
    check_v1_5_0_semantic_check(q)
    # v1.5.1 Item 5.2: challenge-gate coverage runs last. It depends on
    # requirements_manifest.json for the "No spec basis" pattern but
    # does not redo schema checks that the prior invariants already cover.
    check_challenge_gate_coverage(q)
    # v1.5.2 Lever 3: cardinality reconciliation gate.
    check_v1_5_2_cardinality_gate(repo_dir)
    # v1.5.3 Phase 2: schema extensions for skill-aware projects (Code projects
    # with legacy manifests hit the soft-warn path; v1.5.3-shaped manifests
    # validate strictly per schemas.md §10 invariants #21–#23).
    check_v1_5_3_formal_doc_role_validation(q)
    check_v1_5_3_source_type_validation(q)
    check_v1_5_3_skill_section_consistency(q)
    check_v1_5_3_divergence_type_validation(q)
    # v1.5.3 Phase 3b: council inbox structural + cross-reference
    # validation (DQ-5 + BLOCK-4). No-op for Code projects (phase3
    # directory is absent).
    check_v1_5_3_council_inbox_validation(q)
    # v1.5.3 Phase 4 (DQ-4-4): skill-project gate enforcement. The
    # first three SKIP for code-only projects (no skill-prose surface
    # in the role map); check_role_map_consistency runs for all
    # projects. v1.5.4 Part 1: project_type derived from the Phase-1
    # role map instead of the retired project_type.json.
    check_skill_section_req_coverage(repo_dir, q)
    check_reference_file_req_coverage(repo_dir, q)
    check_hybrid_cross_cutting_reqs(repo_dir, q)
    check_role_map_consistency(repo_dir, q)


def check_repo(repo_dir, version_arg, strictness):
    """Run all checks for one repo. Writes output via pass_/fail_/warn/info."""
    repo_dir = Path(repo_dir)
    if str(repo_dir) == ".":
        repo_dir = Path.cwd()
    repo_name = repo_dir.name
    q = repo_dir / "quality"

    print("")
    print(f"=== {repo_name} ===")

    check_file_existence(repo_dir, q, strictness)
    bug_count, bug_ids = check_bugs_heading(q)
    tdd_data = check_tdd_sidecar(q, bug_count)
    check_tdd_logs(q, bug_count, bug_ids, tdd_data)
    check_integration_sidecar(q, strictness)
    check_recheck_sidecar(q)
    check_use_cases(repo_dir, q, strictness)
    check_test_file_extension(repo_dir, q)
    check_terminal_gate(q)
    check_mechanical(q)
    check_patches(q, bug_count, bug_ids, strictness)
    check_writeups(q, bug_count)
    skill_version = check_version_stamps(repo_dir, q)
    check_cross_run_contamination(repo_dir, q, version_arg, skill_version)
    check_run_metadata(q)
    check_v1_5_0_gate_invariants(repo_dir, q)

    print("")


# --- Main ---


def main(argv=None):
    _reset_counters()
    if argv is None:
        argv = sys.argv[1:]

    repo_dirs = []
    version = ""
    check_all = False
    strictness = "benchmark"

    expect_version = False
    for arg in argv:
        if expect_version:
            version = arg
            expect_version = False
            continue
        if arg == "--version":
            expect_version = True
        elif arg == "--all":
            check_all = True
        elif arg == "--benchmark":
            strictness = "benchmark"
        elif arg == "--general":
            strictness = "general"
        else:
            repo_dirs.append(arg)

    if not version:
        version = detect_skill_version([
            SCRIPT_DIR / ".." / "SKILL.md",
            SCRIPT_DIR / "SKILL.md",
            Path("SKILL.md"),
            Path(".claude") / "skills" / "quality-playbook" / "SKILL.md",
            Path(".github") / "skills" / "SKILL.md",
            Path(".github") / "skills" / "quality-playbook" / "SKILL.md",
        ])

    # Resolve repos
    if check_all:
        for entry in sorted(SCRIPT_DIR.glob(f"*-{version}")):
            if (entry / "quality").is_dir():
                repo_dirs.append(str(entry))
    elif len(repo_dirs) == 1 and repo_dirs[0] == ".":
        repo_dirs = [str(Path.cwd())]
    else:
        resolved = []
        for name in repo_dirs:
            p = Path(name)
            if (p / "quality").is_dir():
                resolved.append(name)
            elif (SCRIPT_DIR / f"{name}-{version}").is_dir():
                resolved.append(str(SCRIPT_DIR / f"{name}-{version}"))
            elif (SCRIPT_DIR / name).is_dir():
                resolved.append(str(SCRIPT_DIR / name))
            else:
                print(f"WARNING: Cannot find repo '{name}'")
        repo_dirs = resolved

    if not repo_dirs:
        print(f"Usage: {sys.argv[0]} [--version V] [--all | repo1 repo2 ... | .]")
        return 1

    print("=== Quality Gate — Post-Run Validation ===")
    print(f"Version:    {version or 'unknown'}")
    print(f"Strictness: {strictness}")
    print(f"Repos:      {len(repo_dirs)}")

    for rd in repo_dirs:
        check_repo(rd, version, strictness)

    print("")
    print("===========================================")
    print(f"Total: {FAIL} FAIL, {WARN} WARN")
    if FAIL > 0:
        print(f"RESULT: GATE FAILED — {FAIL} check(s) must be fixed")
        return 1
    else:
        print("RESULT: GATE PASSED")
        return 0


if __name__ == "__main__":
    sys.exit(main())