chore: sync Arize skills from arize-skills@597d609bfe5f07fd7d24acfdb408a082911b18fc and phoenix@746247cbb07b0dc7803b87c69dd8c77811c33f59 (#1583)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Jim Bennett
2026-05-03 18:05:44 -07:00
committed by GitHub
parent 82b58047e0
commit c7b2aecb94
40 changed files with 1316 additions and 423 deletions
@@ -41,9 +41,17 @@ judge_cheap = ClassificationEvaluator(
## Don't Model Shop
```python
from phoenix.client import Client
client = Client()
# BAD
for model in ["gpt-4o", "claude-3", "gemini-pro"]:
results = run_experiment(dataset, task, model)
results = client.experiments.run_experiment(
dataset=dataset,
task=lambda input, _model=model: task(input, model=_model),
evaluators=evaluators,
)
# GOOD
failures = analyze_errors(results)