Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
981191f
feat: cloud-mode TestMode::Cloud for benchmarks with best-effort clea…
kmatasfp Jun 2, 2026
341bab3
feat: add run specific details to perf tests
kmatasfp Jun 3, 2026
b1764ec
fix(benchmark): make --builtin-plugin-owner-account-id and --default-…
kmatasfp Jun 4, 2026
4294bdb
fix: make ProvidedShardManager kill/restart no-ops instead of panics
kmatasfp Jun 5, 2026
5b9902b
feat(benchmark): enable all tests
kmatasfp Jun 5, 2026
742a669
feat: retry connectivity to shard manager
kmatasfp Jun 5, 2026
18d5af6
chore: fmt
kmatasfp Jun 5, 2026
395bcd2
investigation: run echo test first to see if they get stuck again
kmatasfp Jun 5, 2026
dac3c69
feat(benchmark): lower number of conccurent live apps
kmatasfp Jun 6, 2026
2256623
feat: more observability, make memory component coefficient configurable
kmatasfp Jun 6, 2026
02e527a
feat(benchmark): run only throughput-echo test
kmatasfp Jun 6, 2026
faeb651
feat(bench): try 200 apps after tuning
kmatasfp Jun 6, 2026
f8dd565
feat: try 250 again
kmatasfp Jun 6, 2026
1bf0063
feat(benchmark): run all the tests again
kmatasfp Jun 7, 2026
2e53af6
fix: metric description
kmatasfp Jun 7, 2026
32ef9e5
feat: proper load for our cluster
kmatasfp Jun 7, 2026
bc11779
feat(benchmark): run only benchmark tests
kmatasfp Jun 8, 2026
5347626
feat: enable all tests again
kmatasfp Jun 8, 2026
9e582a2
feat(benchmark): increase max number of concurrent compilations
kmatasfp Jun 8, 2026
e7b44bf
feat(worker-executor): add measured-headroom memory admission gate
kmatasfp Jun 8, 2026
817c672
feat(worker-executor): charge component module size once per resident…
kmatasfp Jun 9, 2026
35874d3
fix(worker-executor): disable measured admission when executor does n…
kmatasfp Jun 9, 2026
acb9968
feat(benchmark): add throughput-under-memory-saturation benchmarks
kmatasfp Jun 9, 2026
bfe1b14
test(worker-executor): exercise admission reserve under maximum concu…
kmatasfp Jun 9, 2026
c3af739
feat(benchmark): longer sustained load, bumpt the number of agents
kmatasfp Jun 9, 2026
7dcb2d3
fix: add empty workspace
kmatasfp Jun 9, 2026
139aed5
fix: use snake case as method names
kmatasfp Jun 9, 2026
442c1c5
chore: 300 already saturates, no need for 500
kmatasfp Jun 9, 2026
4bbb200
fix(worker-executor): avoid deadlock between memory grow and admissio…
kmatasfp Jun 9, 2026
be19cf4
feat: change order of tests
kmatasfp Jun 10, 2026
21fd401
feat: restore iterations count to 3
kmatasfp Jun 10, 2026
a9285c0
refactor(worker-executor): make cgroup gate primary, semaphore clampe…
kmatasfp Jun 10, 2026
27119b2
feat: run only initial echo test to make sure we did not make it slower
kmatasfp Jun 10, 2026
b608593
feat: run only saturation test
kmatasfp Jun 10, 2026
1f1b77a
feat: bigger saturation spread
kmatasfp Jun 10, 2026
1bd27ea
feat(benchmark): change the steps
kmatasfp Jun 10, 2026
898435d
feat: replace estimate-semaphore completely with measured-headroom ad…
kmatasfp Jun 10, 2026
8cecf91
fix: clippy warnings
kmatasfp Jun 10, 2026
8566f13
fix: startup message regarding memory
kmatasfp Jun 11, 2026
626e4ba
chore: run only oom test
kmatasfp Jun 11, 2026
2434047
feat: enable to whole perf test suite
kmatasfp Jun 11, 2026
a8fcf52
feat: more metrics plus FixedProbe for tests
kmatasfp Jun 11, 2026
7eb6f08
fix: make admission gate reserve atomic to prevent ceiling overshoot
kmatasfp Jun 11, 2026
83a6b2f
test: gate concurrent-agent permit tests with a semaphore, not Notify
kmatasfp Jun 11, 2026
24673f6
feat: expose tokio metrics
kmatasfp Jun 11, 2026
a1928c5
fix: prevent concurrent-agent scheduler deadlock on cancel-after-grant
kmatasfp Jun 12, 2026
183de28
feat: use official tokio-metrics crate to expose tokio runtime metrics
kmatasfp Jun 12, 2026
78d311d
feat: use official tokio-metrics crate to expose tokio runtime metric…
kmatasfp Jun 12, 2026
aec411b
chore: cleanup comments
kmatasfp Jun 12, 2026
71bee78
feat: try mimalloc
kmatasfp Jun 13, 2026
b146823
feat: try mimalloc vol 2
kmatasfp Jun 13, 2026
0ef2c16
perf: enable thin LTO and codegen-units=1 for release builds
kmatasfp Jun 13, 2026
99e3633
perf: pin target-cpu baseline for published images (x86-64-v3, neover…
kmatasfp Jun 13, 2026
3218b4f
perf: drop codegen-units=1, keep thin LTO
kmatasfp Jun 13, 2026
ac3b3ee
chore: lower number of cuncurrent agents to 200 in case of durability…
kmatasfp Jun 14, 2026
ee49d86
feat: restore 3 iterations
kmatasfp Jun 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 81 additions & 1 deletion golem-test-framework/src/benchmark/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub struct BenchmarkSuiteItem {
impl BenchmarkSuiteItem {
pub fn runs(&self, mode: &TestMode) -> Vec<RunConfig> {
let cluster_size: Vec<usize> = match mode {
TestMode::Provided { .. } => {
TestMode::Provided { .. } | TestMode::Cloud { .. } => {
vec![0]
}
_ => self
Expand Down Expand Up @@ -163,3 +163,83 @@ impl BenchmarkSuiteItem {
res
}
}

/// Smoke tests for cloud-mode wiring that do not require running services.
///
/// For a full end-to-end smoke test that exercises actual HTTP clients,
/// cleanup, and the benchmark API contract, run the binary directly against a
/// local Spawned cluster:
///
/// ```text
/// cargo run --bin benchmarks -- benchmark cold-start-unknown-small \
/// --size 1 --iterations 1 --length 0 \
/// cloud \
/// --api-url http://localhost:8081 \
/// --apps-base-domain golem.cloud \
/// --admin-account-id <uuid> \
/// --admin-account-email <email> \
/// --admin-account-token <token> \
/// --builtin-plugin-owner-account-id <uuid> \
/// --default-plan-id <uuid>
/// ```
#[cfg(test)]
mod cloud_mode_smoke {
use super::*;
use test_r::test;
use url::Url;
use uuid::Uuid;

fn cloud_mode() -> TestMode {
TestMode::Cloud {
api_url: Url::parse("https://release.dev-api.golem.cloud").unwrap(),
apps_base_domain: "apps.dev.golem.cloud".to_string(),
admin_account_token: "test-token".to_string(),
builtin_plugin_owner_account_id: Uuid::nil(),
default_plan_id: Uuid::nil(),
shard_manager_grpc_host: None,
shard_manager_grpc_port: None,
component_directory: "test-components".to_string(),
}
}

/// Cloud mode always returns exactly one `RunConfig` with `cluster_size=0`,
/// regardless of how many `cluster_size` values the suite item specifies.
#[test]
fn runs_returns_single_cluster_size_zero_run() {
let mode = cloud_mode();
let item = BenchmarkSuiteItem {
name: "cold-start-unknown-small".to_string(),
iterations: 3,
cluster_size: vec![1, 3, 5], // must be ignored in cloud mode
size: vec![10],
length: vec![100],
disable_compilation_cache: None,
};
let runs = item.runs(&mode);
assert_eq!(runs.len(), 1, "cloud mode ignores cluster_size variations");
assert_eq!(runs[0].cluster_size, 0, "cloud mode cluster_size must be 0");
assert_eq!(runs[0].size, 10);
assert_eq!(runs[0].length, 100);
}

/// Multiple size and length combinations still expand normally; only
/// `cluster_size` is collapsed.
#[test]
fn runs_expands_size_and_length_but_not_cluster_size() {
let mode = cloud_mode();
let item = BenchmarkSuiteItem {
name: "latency-small".to_string(),
iterations: 1,
cluster_size: vec![1, 3],
size: vec![5, 10],
length: vec![50, 100],
disable_compilation_cache: None,
};
let runs = item.runs(&mode);
// 1 (collapsed cluster_size) × 2 sizes × 2 lengths = 4 runs
assert_eq!(runs.len(), 4);
for r in &runs {
assert_eq!(r.cluster_size, 0);
}
}
}
5 changes: 4 additions & 1 deletion golem-test-framework/src/benchmark/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ mod config;
mod results;

pub use config::{BenchmarkConfig, BenchmarkSuite, BenchmarkSuiteItem, RunConfig};
pub use results::{BenchmarkResult, BenchmarkRunResult, BenchmarkSuiteResult, ResultKey};
pub use results::{
BenchmarkResult, BenchmarkRunResult, BenchmarkSuiteResult, ResultKey, RunMetadata,
};

use crate::config::benchmark::TestMode;
use async_trait::async_trait;
Expand Down Expand Up @@ -301,6 +303,7 @@ impl<B: Benchmark> BenchmarkApi for B {
description: B::description().to_string(),
runs,
results,
run_id: None,
}
}
}
108 changes: 108 additions & 0 deletions golem-test-framework/src/benchmark/results.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,17 +484,118 @@ impl Display for BenchmarkResultView {
}
}

/// Cloud-mode run metadata collected by the buildspec and passed via environment variables.
/// All fields are optional — missing env vars produce `None` rather than failing the run.
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RunMetadata {
/// The `golem-oss` commit SHA that was built and deployed.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub golem_oss_commit_sha: Option<String>,
/// The `golem-cloud` (kubernetes manifests) commit SHA that was deployed.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub kubernetes_manifest_commit_sha: Option<String>,
/// Number of Ready `worker-executor` pods observed at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub observed_cluster_size: Option<u32>,
/// Container image tag of the deployed `worker-executor`.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub worker_executor_image_tag: Option<String>,
/// Container image tag of the deployed `registry-service`.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub registry_service_image_tag: Option<String>,
/// Container image tag of the deployed `worker-service`.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub worker_service_image_tag: Option<String>,
/// Aurora ACU capacity for the main (`golem_dev`) cluster at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub aurora_acu_main: Option<f64>,
/// Aurora ACU capacity for the indexed-storage cluster at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub aurora_acu_indexed: Option<f64>,
/// Aurora ACU capacity for the keyvalue-storage cluster at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub aurora_acu_keyvalue: Option<f64>,
/// Ready replica count for `worker-executor` at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub worker_executor_replicas: Option<u32>,
/// Ready replica count for `worker-service` at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub worker_service_replicas: Option<u32>,
/// Ready replica count for `registry-service` at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub registry_service_replicas: Option<u32>,
/// Ready replica count for `compilation-service` at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub compilation_service_replicas: Option<u32>,
/// Ready replica count for `debugging-service` at run start.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub debugging_service_replicas: Option<u32>,
/// Free-form note from the `workflow_dispatch` trigger.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub note: Option<String>,
}

impl RunMetadata {
/// Reads all `GOLEM_BENCH_*` environment variables and returns a populated
/// `RunMetadata`. Missing variables produce `None` for that field.
pub fn from_env() -> Self {
fn env_str(key: &str) -> Option<String> {
std::env::var(key).ok().filter(|v| !v.is_empty())
}
fn env_u32(key: &str) -> Option<u32> {
env_str(key).and_then(|v| v.parse().ok())
}
fn env_f64(key: &str) -> Option<f64> {
env_str(key).and_then(|v| v.parse().ok())
}

Self {
golem_oss_commit_sha: env_str("GOLEM_BENCH_OSS_COMMIT_SHA"),
kubernetes_manifest_commit_sha: env_str("GOLEM_BENCH_K8S_MANIFEST_COMMIT_SHA"),
observed_cluster_size: env_u32("GOLEM_BENCH_OBSERVED_CLUSTER_SIZE"),
worker_executor_image_tag: env_str("GOLEM_BENCH_WORKER_EXECUTOR_IMAGE_TAG"),
registry_service_image_tag: env_str("GOLEM_BENCH_REGISTRY_SERVICE_IMAGE_TAG"),
worker_service_image_tag: env_str("GOLEM_BENCH_WORKER_SERVICE_IMAGE_TAG"),
aurora_acu_main: env_f64("GOLEM_BENCH_AURORA_ACU_MAIN"),
aurora_acu_indexed: env_f64("GOLEM_BENCH_AURORA_ACU_INDEXED"),
aurora_acu_keyvalue: env_f64("GOLEM_BENCH_AURORA_ACU_KEYVALUE"),
worker_executor_replicas: env_u32("GOLEM_BENCH_WORKER_EXECUTOR_REPLICAS"),
worker_service_replicas: env_u32("GOLEM_BENCH_WORKER_SERVICE_REPLICAS"),
registry_service_replicas: env_u32("GOLEM_BENCH_REGISTRY_SERVICE_REPLICAS"),
compilation_service_replicas: env_u32("GOLEM_BENCH_COMPILATION_SERVICE_REPLICAS"),
debugging_service_replicas: env_u32("GOLEM_BENCH_DEBUGGING_SERVICE_REPLICAS"),
note: env_str("GOLEM_BENCH_RUN_NOTE"),
}
}

/// Returns `true` if every field is `None` (nothing was read from env).
pub fn is_empty(&self) -> bool {
self == &Self::default()
}
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BenchmarkSuiteResultCollection {
pub runs: Vec<BenchmarkSuiteResult>,
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BenchmarkSuiteResult {
/// Result format version. Always `1` for results produced by this binary.
pub schema_version: u32,
pub suite: String,
pub environment: String,
pub version: String,
pub timestamp: DateTime<Utc>,
/// Suite-level run-id. Set in cloud mode to `bench-{run_id}` to allow
/// cross-run correlation and garbage collection of orphaned state.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub run_id: Option<String>,
/// Cloud-mode run metadata populated from `GOLEM_BENCH_*` environment variables.
/// `None` in Spawned or Provided modes where cluster metadata is not available.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub run_metadata: Option<RunMetadata>,
pub results: Vec<BenchmarkResult>,
}

Expand Down Expand Up @@ -526,10 +627,13 @@ impl BenchmarkSuiteResult {
);

Self {
schema_version: 1,
suite: suite.to_string(),
environment,
version: golem_common::golem_version().to_string(),
timestamp: Utc::now(),
run_id: None,
run_metadata: None,
results: vec![],
}
}
Expand Down Expand Up @@ -606,6 +710,10 @@ pub struct BenchmarkResult {
pub description: String,
pub runs: Vec<RunConfig>,
pub results: Vec<BenchmarkRunResult>,
/// Suite-level run-id. Set in cloud mode to `bench-{run_id}` to allow
/// cross-run correlation and garbage collection of orphaned state.
#[serde(skip_serializing_if = "Option::is_none", default)]
pub run_id: Option<String>,
}

impl BenchmarkResult {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use tracing::Level;

pub mod provided;
pub mod spawned;
pub mod unavailable;

#[async_trait]
pub trait ComponentCompilationService: Send + Sync {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2024-2026 Golem Cloud
//
// Licensed under the Golem Source License v1.1 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://license.golem.cloud/LICENSE
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::ComponentCompilationService;
use async_trait::async_trait;

/// A `ComponentCompilationService` that is not directly reachable. Used in
/// cloud mode, where it is an internal cluster component with no external
/// exposure. `kill` is a no-op so that `kill_all()` completes; operational
/// methods panic with a clear message.
pub struct UnavailableComponentCompilationService;

#[async_trait]
impl ComponentCompilationService for UnavailableComponentCompilationService {
fn grpc_host(&self) -> String {
panic!("component_compilation_service() is not available in cloud mode");
}

fn grpc_port(&self) -> u16 {
panic!("component_compilation_service() is not available in cloud mode");
}

async fn kill(&self) {}
}
1 change: 1 addition & 0 deletions golem-test-framework/src/components/rdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod docker_mysql;
pub mod docker_postgres;
pub mod provided_postgres;
pub mod sqlite;
pub mod unavailable;

#[async_trait]
pub trait Rdb: Send + Sync {
Expand Down
31 changes: 31 additions & 0 deletions golem-test-framework/src/components/rdb/unavailable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright 2024-2026 Golem Cloud
//
// Licensed under the Golem Source License v1.1 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://license.golem.cloud/LICENSE
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::{DbInfo, Rdb};
use async_trait::async_trait;

/// An `Rdb` that is not directly reachable. Used in cloud mode, where the
/// database is an internal cluster component with no external exposure.
/// Lifecycle teardown (`kill`) is a no-op so that `kill_all()` completes;
/// operational methods panic with a clear message.
pub struct UnavailableRdb;

#[async_trait]
impl Rdb for UnavailableRdb {
fn info(&self) -> DbInfo {
panic!("rdb() is not available in cloud mode");
}

async fn kill(&self) {}
}
1 change: 1 addition & 0 deletions golem-test-framework/src/components/redis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use tracing::info;
pub mod provided;
pub mod spawned;
pub mod spawned_tls;
pub mod unavailable;

#[async_trait]
pub trait Redis: Send + Sync {
Expand Down
43 changes: 43 additions & 0 deletions golem-test-framework/src/components/redis/unavailable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2024-2026 Golem Cloud
//
// Licensed under the Golem Source License v1.1 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://license.golem.cloud/LICENSE
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::Redis;
use async_trait::async_trait;

/// A `Redis` that is not directly reachable. Used in cloud mode, where Redis
/// is an internal cluster component with no external exposure. `kill` is a
/// no-op so that `kill_all()` completes; operational methods panic with a
/// clear message.
pub struct UnavailableRedis;

#[async_trait]
impl Redis for UnavailableRedis {
fn assert_valid(&self) {
panic!("redis() is not available in cloud mode");
}

fn private_host(&self) -> String {
panic!("redis() is not available in cloud mode");
}

fn private_port(&self) -> u16 {
panic!("redis() is not available in cloud mode");
}

fn prefix(&self) -> &str {
panic!("redis() is not available in cloud mode");
}

async fn kill(&self) {}
}
1 change: 1 addition & 0 deletions golem-test-framework/src/components/redis_monitor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

pub mod spawned;
pub mod unavailable;

pub trait RedisMonitor: Send + Sync {
fn assert_valid(&self);
Expand Down
Loading
Loading