245 lines
7.0 KiB
Bash
Executable File
245 lines
7.0 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
PROVIDER_CONFIG="
|
|
openrouter -> google/gemini-2.5-pro|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b
|
|
xai -> grok-3
|
|
openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5
|
|
anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-5-20251101
|
|
google -> gemini-2.5-pro|gemini-2.5-flash|gemini-3-pro-preview|gemini-3-flash-preview
|
|
tetrate -> claude-sonnet-4-20250514
|
|
databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o
|
|
azure_openai -> ${AZURE_OPENAI_DEPLOYMENT_NAME}
|
|
aws_bedrock -> us.anthropic.claude-sonnet-4-5-20250929-v1:0
|
|
gcp_vertex_ai -> gemini-2.5-pro
|
|
snowflake -> claude-sonnet-4-5
|
|
venice -> llama-3.3-70b
|
|
litellm -> gpt-4o-mini
|
|
sagemaker_tgi -> sagemaker-tgi-endpoint
|
|
github_copilot -> gpt-4.1
|
|
chatgpt_codex -> gpt-5.1-codex
|
|
claude-code -> default
|
|
codex -> gpt-5.2-codex
|
|
gemini-cli -> gemini-2.5-pro
|
|
cursor-agent -> auto
|
|
ollama -> qwen3
|
|
"
|
|
|
|
# Flaky models allowed to fail without blocking PRs.
|
|
ALLOWED_FAILURES=(
|
|
"google:gemini-2.5-flash"
|
|
"google:gemini-3-pro-preview"
|
|
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
|
|
"openrouter:qwen/qwen3-coder:exacto"
|
|
"openai:gpt-3.5-turbo"
|
|
)
|
|
|
|
AGENTIC_PROVIDERS=("claude-code" "codex" "gemini-cli" "cursor-agent")
|
|
|
|
if [ -f .env ]; then
|
|
export $(grep -v '^#' .env | xargs)
|
|
fi
|
|
|
|
build_goose() {
|
|
if [ -z "$SKIP_BUILD" ]; then
|
|
echo "Building goose..." >&2
|
|
cargo build --bin goose >&2
|
|
echo "" >&2
|
|
else
|
|
echo "Skipping build (SKIP_BUILD is set)..." >&2
|
|
echo "" >&2
|
|
fi
|
|
|
|
echo "$(pwd)/target/debug/goose"
|
|
}
|
|
|
|
has_env() { [ -n "${!1}" ]; }
|
|
has_cmd() { command -v "$1" &>/dev/null; }
|
|
has_file() { [ -f "$1" ]; }
|
|
|
|
is_provider_available() {
|
|
case "$1" in
|
|
openrouter) has_env OPENROUTER_API_KEY ;;
|
|
xai) has_env XAI_API_KEY ;;
|
|
openai) has_env OPENAI_API_KEY ;;
|
|
anthropic) has_env ANTHROPIC_API_KEY ;;
|
|
google) has_env GOOGLE_API_KEY ;;
|
|
tetrate) has_env TETRATE_API_KEY ;;
|
|
databricks) has_env DATABRICKS_HOST && has_env DATABRICKS_TOKEN ;;
|
|
azure_openai) has_env AZURE_OPENAI_ENDPOINT && has_env AZURE_OPENAI_DEPLOYMENT_NAME ;;
|
|
aws_bedrock) has_env AWS_REGION && { has_env AWS_PROFILE || has_env AWS_ACCESS_KEY_ID; } ;;
|
|
gcp_vertex_ai) has_env GCP_PROJECT_ID ;;
|
|
snowflake) has_env SNOWFLAKE_HOST && has_env SNOWFLAKE_TOKEN ;;
|
|
venice) has_env VENICE_API_KEY ;;
|
|
litellm) has_env LITELLM_API_KEY ;;
|
|
sagemaker_tgi) has_env SAGEMAKER_ENDPOINT_NAME && has_env AWS_REGION ;;
|
|
github_copilot) has_env GITHUB_COPILOT_TOKEN || has_file "$HOME/.config/goose/github_copilot_token.json" ;;
|
|
chatgpt_codex) has_env CHATGPT_CODEX_TOKEN || has_file "$HOME/.config/goose/chatgpt_codex_token.json" ;;
|
|
ollama) has_env OLLAMA_HOST || has_cmd ollama ;;
|
|
claude-code) has_cmd claude ;;
|
|
codex) has_cmd codex ;;
|
|
gemini-cli) has_cmd gemini ;;
|
|
cursor-agent) has_cmd cursor-agent ;;
|
|
*) return 0 ;;
|
|
esac
|
|
}
|
|
|
|
is_allowed_failure() {
|
|
local key="${1}:${2}"
|
|
for allowed in "${ALLOWED_FAILURES[@]}"; do
|
|
[ "$allowed" = "$key" ] && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
should_skip_provider() {
|
|
[ -z "$SKIP_PROVIDERS" ] && return 1
|
|
IFS=',' read -ra SKIP_LIST <<< "$SKIP_PROVIDERS"
|
|
for skip in "${SKIP_LIST[@]}"; do
|
|
skip=$(echo "$skip" | xargs)
|
|
[ "$skip" = "$1" ] && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
is_agentic_provider() {
|
|
for agentic in "${AGENTIC_PROVIDERS[@]}"; do
|
|
[ "$agentic" = "$1" ] && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# build_test_cases [--skip-agentic]
|
|
build_test_cases() {
|
|
local skip_agentic=false
|
|
[ "$1" = "--skip-agentic" ] && skip_agentic=true
|
|
|
|
local providers=()
|
|
while IFS= read -r line; do
|
|
[[ "$line" =~ ^#.*$ || -z "$line" ]] && continue
|
|
local provider="${line%% -> *}"
|
|
if is_provider_available "$provider"; then
|
|
providers+=("$line")
|
|
echo "✓ Including $provider"
|
|
else
|
|
echo "⚠️ Skipping $provider (prerequisites not met)"
|
|
fi
|
|
done <<< "$PROVIDER_CONFIG"
|
|
echo ""
|
|
|
|
TEST_CASES=()
|
|
local job_index=0
|
|
for provider_config in "${providers[@]}"; do
|
|
local provider="${provider_config%% -> *}"
|
|
local models_str="${provider_config#* -> }"
|
|
|
|
if should_skip_provider "$provider"; then
|
|
echo "⊘ Skipping provider: ${provider} (SKIP_PROVIDERS)"
|
|
continue
|
|
fi
|
|
|
|
if [ "$skip_agentic" = true ] && is_agentic_provider "$provider"; then
|
|
echo "⊘ Skipping agentic provider: ${provider}"
|
|
continue
|
|
fi
|
|
|
|
IFS='|' read -ra models <<< "$models_str"
|
|
for model in "${models[@]}"; do
|
|
TEST_CASES+=("$provider|$model|$job_index")
|
|
((job_index++))
|
|
done
|
|
done
|
|
}
|
|
|
|
# run_test_cases <test_fn>
|
|
run_test_cases() {
|
|
local test_fn="$1"
|
|
|
|
RESULTS_DIR=$(mktemp -d)
|
|
trap 'if [ -n "${RESULTS_DIR:-}" ]; then rm -rf -- "$RESULTS_DIR"; fi; if [ -n "${CLEANUP_DIR:-}" ]; then rm -rf -- "$CLEANUP_DIR"; fi' EXIT
|
|
MAX_PARALLEL=${MAX_PARALLEL:-$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 8)}
|
|
echo "Running ${#TEST_CASES[@]} tests (max $MAX_PARALLEL parallel)"
|
|
echo ""
|
|
|
|
local running=0
|
|
for ((i=0; i<${#TEST_CASES[@]}; i++)); do
|
|
IFS='|' read -r provider model idx <<< "${TEST_CASES[$i]}"
|
|
|
|
if [ $i -eq 0 ]; then
|
|
# First test runs sequentially to catch early failures
|
|
"$test_fn" "$provider" "$model" "$RESULTS_DIR/result_$idx" "$RESULTS_DIR/output_$idx"
|
|
else
|
|
"$test_fn" "$provider" "$model" "$RESULTS_DIR/result_$idx" "$RESULTS_DIR/output_$idx" &
|
|
((running++))
|
|
if [ $running -ge $MAX_PARALLEL ]; then
|
|
wait -n 2>/dev/null || wait
|
|
((running--))
|
|
fi
|
|
fi
|
|
done
|
|
wait
|
|
}
|
|
|
|
report_results() {
|
|
echo ""
|
|
echo "=== Test Results ==="
|
|
echo ""
|
|
|
|
RESULTS=()
|
|
HARD_FAILURES=()
|
|
|
|
for job in "${TEST_CASES[@]}"; do
|
|
IFS='|' read -r provider model idx <<< "$job"
|
|
|
|
echo "Provider: $provider"
|
|
echo "Model: $model"
|
|
echo ""
|
|
cat "$RESULTS_DIR/output_$idx"
|
|
echo ""
|
|
|
|
local result_line=""
|
|
[ -f "$RESULTS_DIR/result_$idx" ] && result_line=$(cat "$RESULTS_DIR/result_$idx")
|
|
local status="${result_line%%|*}"
|
|
local msg="${result_line#*|}"
|
|
|
|
if [ "$status" = "success" ]; then
|
|
echo "✓ SUCCESS: $msg"
|
|
RESULTS+=("✓ ${provider}: ${model}")
|
|
else
|
|
if is_allowed_failure "$provider" "$model"; then
|
|
echo "⚠ FLAKY: $msg"
|
|
RESULTS+=("⚠ ${provider}: ${model} (flaky)")
|
|
else
|
|
echo "✗ FAILED: $msg"
|
|
RESULTS+=("✗ ${provider}: ${model}")
|
|
HARD_FAILURES+=("${provider}: ${model}")
|
|
fi
|
|
fi
|
|
echo "---"
|
|
done
|
|
|
|
echo ""
|
|
echo "=== Test Summary ==="
|
|
for result in "${RESULTS[@]}"; do
|
|
echo "$result"
|
|
done
|
|
|
|
if [ ${#HARD_FAILURES[@]} -gt 0 ]; then
|
|
echo ""
|
|
echo "Hard failures (${#HARD_FAILURES[@]}):"
|
|
for failure in "${HARD_FAILURES[@]}"; do
|
|
echo " - $failure"
|
|
done
|
|
echo ""
|
|
echo "Some tests failed!"
|
|
exit 1
|
|
else
|
|
if echo "${RESULTS[@]}" | grep -q "⚠"; then
|
|
echo ""
|
|
echo "All required tests passed! (some flaky tests failed but are allowed)"
|
|
else
|
|
echo ""
|
|
echo "All tests passed!"
|
|
fi
|
|
fi
|
|
}
|