mirror of
https://github.com/github/awesome-copilot.git
synced 2026-04-30 12:15:56 +00:00
update eval-driven-dev skill (#1434)
* update eval-driven-dev skill * fix: update skill update command to use correct repository path * address comments. * update eval driven dev
This commit is contained in:
@@ -2,21 +2,74 @@
|
||||
# Setup script for eval-driven-dev skill.
|
||||
# Updates the skill, installs/upgrades pixie-qa[all], initializes the
|
||||
# pixie working directory, and starts the web UI server in the background.
|
||||
# Failures are non-fatal — the workflow continues even if a step here is
|
||||
# blocked by the environment.
|
||||
#
|
||||
# Error handling:
|
||||
# - Skill update failure → non-fatal (continue with existing version)
|
||||
# - pixie-qa upgrade failure when already installed → non-fatal
|
||||
# - pixie-qa NOT installed and install fails → FATAL (exit 1)
|
||||
# - pixie init failure → FATAL (exit 1)
|
||||
# - pixie start failure → FATAL (exit 1)
|
||||
set -u
|
||||
|
||||
echo "=== Updating skill ==="
|
||||
npx skills update || echo "(skill update skipped)"
|
||||
npx skills update github/awesome-copilot --skill eval-driven-dev -g -y && npx skills update github/awesome-copilot --skill eval-driven-dev -p -y || {
|
||||
echo "(skill update failed — proceeding with existing version)"
|
||||
}
|
||||
|
||||
echo ""
|
||||
echo "=== Installing / upgrading pixie-qa[all] ==="
|
||||
|
||||
# Helper: check if pixie CLI is importable
|
||||
_pixie_available() {
|
||||
if [ -f uv.lock ]; then
|
||||
uv run python -c "import pixie" 2>/dev/null
|
||||
elif [ -f poetry.lock ]; then
|
||||
poetry run python -c "import pixie" 2>/dev/null
|
||||
else
|
||||
python -c "import pixie" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if pixie is already installed before attempting upgrade
|
||||
PIXIE_WAS_INSTALLED=false
|
||||
if _pixie_available; then
|
||||
PIXIE_WAS_INSTALLED=true
|
||||
fi
|
||||
|
||||
INSTALL_OK=false
|
||||
if [ -f uv.lock ]; then
|
||||
uv add "pixie-qa[all]>=0.6.1,<0.7.0" --upgrade
|
||||
# uv add does universal resolution across all Python versions in
|
||||
# requires-python. If the host project supports a Python version
|
||||
# where pixie-qa is unavailable (e.g. <3.10), uv add fails.
|
||||
# Fall back to uv pip install which only targets the active interpreter.
|
||||
if uv add "pixie-qa[all]>=0.8.4,<0.9.0" --upgrade 2>&1; then
|
||||
INSTALL_OK=true
|
||||
else
|
||||
echo "(uv add failed — falling back to uv pip install)"
|
||||
if uv pip install "pixie-qa[all]>=0.8.4,<0.9.0" 2>&1; then
|
||||
INSTALL_OK=true
|
||||
fi
|
||||
fi
|
||||
elif [ -f poetry.lock ]; then
|
||||
poetry add "pixie-qa[all]>=0.6.1,<0.7.0"
|
||||
if poetry add "pixie-qa[all]>=0.8.4,<0.9.0"; then
|
||||
INSTALL_OK=true
|
||||
fi
|
||||
else
|
||||
pip install --upgrade "pixie-qa[all]>=0.6.1,<0.7.0"
|
||||
if pip install --upgrade "pixie-qa[all]>=0.8.4,<0.9.0"; then
|
||||
INSTALL_OK=true
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$INSTALL_OK" = false ]; then
|
||||
if [ "$PIXIE_WAS_INSTALLED" = true ]; then
|
||||
echo "(pixie-qa upgrade failed — proceeding with existing version)"
|
||||
else
|
||||
echo ""
|
||||
echo "ERROR: pixie-qa is not installed and installation failed."
|
||||
echo "The eval-driven-dev workflow requires the pixie-qa package."
|
||||
echo "Please install it manually and re-run this script."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
@@ -29,6 +82,13 @@ else
|
||||
pixie init
|
||||
fi
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo ""
|
||||
echo "ERROR: Failed to initialize pixie working directory."
|
||||
echo "Please check the error above and fix it before continuing."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting web UI server (background) ==="
|
||||
if [ -f uv.lock ]; then
|
||||
@@ -39,5 +99,12 @@ else
|
||||
pixie start
|
||||
fi
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo ""
|
||||
echo "ERROR: Failed to start the web UI server."
|
||||
echo "Please check the error above and fix it before continuing."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Setup complete ==="
|
||||
|
||||
139
skills/eval-driven-dev/resources/verify_step6_completion.py
Normal file
139
skills/eval-driven-dev/resources/verify_step6_completion.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Validate that eval-driven-dev Step 6 artifacts are complete.
|
||||
|
||||
Usage:
|
||||
python verify_step6_completion.py /path/to/pixie_qa/results/<test_id>
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ENTRY_REQUIRED_FILES = ("evaluations.jsonl",)
|
||||
DATASET_ANALYSIS_FILES = ("analysis.md", "analysis-summary.md")
|
||||
ROOT_ANALYSIS_FILES = ("action-plan.md", "action-plan-summary.md", "meta.json")
|
||||
|
||||
|
||||
def _dataset_dirs(results_dir: Path) -> list[Path]:
|
||||
return sorted(
|
||||
path
|
||||
for path in results_dir.iterdir()
|
||||
if path.is_dir() and path.name.startswith("dataset-")
|
||||
)
|
||||
|
||||
|
||||
def _entry_dirs(dataset_dir: Path) -> list[Path]:
|
||||
return sorted(
|
||||
path
|
||||
for path in dataset_dir.iterdir()
|
||||
if path.is_dir() and path.name.startswith("entry-")
|
||||
)
|
||||
|
||||
|
||||
def _read_jsonl(path: Path, errors: list[str]) -> list[dict[str, object]]:
|
||||
rows: list[dict[str, object]] = []
|
||||
try:
|
||||
for index, line in enumerate(
|
||||
path.read_text(encoding="utf-8").splitlines(), start=1
|
||||
):
|
||||
if not line.strip():
|
||||
continue
|
||||
obj = json.loads(line)
|
||||
if not isinstance(obj, dict):
|
||||
errors.append(f"{path}: line {index} is not a JSON object")
|
||||
continue
|
||||
rows.append(obj)
|
||||
except OSError as exc:
|
||||
errors.append(f"{path}: could not read file ({exc})")
|
||||
except json.JSONDecodeError as exc:
|
||||
errors.append(f"{path}: invalid JSONL ({exc})")
|
||||
return rows
|
||||
|
||||
|
||||
def validate_results_dir(results_dir: Path) -> list[str]:
|
||||
"""Return a list of validation errors for a pixie results directory."""
|
||||
errors: list[str] = []
|
||||
|
||||
if not results_dir.is_dir():
|
||||
return [f"{results_dir}: results directory not found"]
|
||||
|
||||
for file_name in ROOT_ANALYSIS_FILES:
|
||||
if not (results_dir / file_name).is_file():
|
||||
errors.append(f"Missing root artifact: {results_dir / file_name}")
|
||||
|
||||
datasets = _dataset_dirs(results_dir)
|
||||
if not datasets:
|
||||
errors.append(f"{results_dir}: no dataset-* directories found")
|
||||
return errors
|
||||
|
||||
for dataset_dir in datasets:
|
||||
for file_name in DATASET_ANALYSIS_FILES:
|
||||
if not (dataset_dir / file_name).is_file():
|
||||
errors.append(f"Missing dataset artifact: {dataset_dir / file_name}")
|
||||
|
||||
entry_dirs = _entry_dirs(dataset_dir)
|
||||
if not entry_dirs:
|
||||
errors.append(f"{dataset_dir}: no entry-* directories found")
|
||||
continue
|
||||
|
||||
for entry_dir in entry_dirs:
|
||||
for file_name in ENTRY_REQUIRED_FILES:
|
||||
if not (entry_dir / file_name).is_file():
|
||||
errors.append(f"Missing entry artifact: {entry_dir / file_name}")
|
||||
|
||||
evaluations_path = entry_dir / "evaluations.jsonl"
|
||||
if not evaluations_path.is_file():
|
||||
continue
|
||||
|
||||
evaluations = _read_jsonl(evaluations_path, errors)
|
||||
for row in evaluations:
|
||||
status = row.get("status")
|
||||
if status == "pending":
|
||||
errors.append(
|
||||
"Pending evaluation remains: "
|
||||
f"{evaluations_path} ({row.get('evaluator', 'unknown evaluator')})"
|
||||
)
|
||||
continue
|
||||
|
||||
if "score" not in row:
|
||||
errors.append(
|
||||
"Missing score in scored evaluation: "
|
||||
f"{evaluations_path} ({row.get('evaluator', 'unknown evaluator')})"
|
||||
)
|
||||
if "reasoning" not in row:
|
||||
errors.append(
|
||||
"Missing reasoning in scored evaluation: "
|
||||
f"{evaluations_path} ({row.get('evaluator', 'unknown evaluator')})"
|
||||
)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
"""CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate Step 6 completion for a pixie results directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"results_dir",
|
||||
type=Path,
|
||||
help="Path to pixie_qa/results/<test_id>",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
errors = validate_results_dir(args.results_dir)
|
||||
if errors:
|
||||
print("Step 6 completion check failed:")
|
||||
for error in errors:
|
||||
print(f"- {error}")
|
||||
return 1
|
||||
|
||||
print("Step 6 completion check passed.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user