-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMakefile
More file actions
95 lines (83 loc) · 2.32 KB
/
Copy pathMakefile
File metadata and controls
95 lines (83 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
MERBENCH_IMAGE ?= merbench:latest
RUNS ?= 15
JUDGE_MODEL ?= gemini-2.5-pro
TIMEOUT ?= 600
OUTPUT_DIR ?= ./mermaid_eval_results
PARALLEL ?= 0
SEQUENTIAL ?= 0
DEBUG_TRACES ?= 0
ENV_FILE_FLAG := $(if $(wildcard .env),--env-file .env,)
# Pass through common provider credentials if they exist in the host env.
COMMON_ENV_VARS := \
-e OPENAI_API_KEY \
-e GEMINI_API_KEY \
-e GOOGLE_API_KEY \
-e ANTHROPIC_API_KEY \
-e AWS_REGION \
-e AWS_PROFILE \
-e PERPLEXITY_API_KEY \
-e OPENROUTER_API_KEY \
-e OPENROUTER_APP_URL \
-e OPENROUTER_APP_TITLE \
-e LOGFIRE_TOKEN \
-e LOCAL_OPENAI_BASE_URL \
-e OLLAMA_BASE_URL
.PHONY: install upgrade lint leaderboard adk_basic_ui adk_multi_ui benchmark-image benchmark benchmark-dry-run
install:
uv sync
npm install -g @mermaid-js/mermaid-cli
upgrade:
uv sync -U
lint:
uv run ruff check .
leaderboard:
uv run -- streamlit run agents_mcp_usage/evaluations/mermaid_evals/merbench_ui.py
adk_basic_ui:
uv run adk web agents_mcp_usage/basic_mcp
adk_multi_ui:
uv run adk web agents_mcp_usage/multi_mcp
benchmark-image:
docker build -t $(MERBENCH_IMAGE) .
benchmark: benchmark-image
@if [ -z "$(MODEL)" ]; then \
echo "MODEL is required. Example: make benchmark MODEL='openai:gpt-5.1 (none)' RUNS=5"; \
exit 1; \
fi
docker run --rm \
$(ENV_FILE_FLAG) \
$(COMMON_ENV_VARS) \
-e MODEL="$(MODEL)" \
-e RUNS="$(RUNS)" \
-e JUDGE_MODEL="$(JUDGE_MODEL)" \
-e TIMEOUT="$(TIMEOUT)" \
-e OUTPUT_DIR="$(OUTPUT_DIR)" \
-e PARALLEL="$(PARALLEL)" \
-e SEQUENTIAL="$(SEQUENTIAL)" \
-e DEBUG_TRACES="$(DEBUG_TRACES)" \
-e TRACE_DIR="$(TRACE_DIR)" \
-e BENCHMARK_FLAGS="$(BENCHMARK_FLAGS)" \
-v "$(PWD)":/workspace \
-w /workspace \
$(MERBENCH_IMAGE)
benchmark-dry-run: benchmark-image
@if [ -z "$(MODEL)" ]; then \
echo "MODEL is required. Example: make benchmark-dry-run MODEL='openai:gpt-5.1 (none)'"; \
exit 1; \
fi
docker run --rm \
$(ENV_FILE_FLAG) \
$(COMMON_ENV_VARS) \
-e DRY_RUN="1" \
-e MODEL="$(MODEL)" \
-e RUNS="$(RUNS)" \
-e JUDGE_MODEL="$(JUDGE_MODEL)" \
-e TIMEOUT="$(TIMEOUT)" \
-e OUTPUT_DIR="$(OUTPUT_DIR)" \
-e PARALLEL="$(PARALLEL)" \
-e SEQUENTIAL="$(SEQUENTIAL)" \
-e DEBUG_TRACES="$(DEBUG_TRACES)" \
-e TRACE_DIR="$(TRACE_DIR)" \
-e BENCHMARK_FLAGS="$(BENCHMARK_FLAGS)" \
-v "$(PWD)":/workspace \
-w /workspace \
$(MERBENCH_IMAGE)