Skip to content

Commit fa53e2d

Browse files
committed
feat(ci): add performance regression workflow
- Add benchmark.yml workflow for PR performance regression testing - Fix benchmark compilation errors (add EnhancedMetricsCollector to SchedulerEngine::new calls) - Format code with cargo fmt Tasks 23-26: Phase 5 CI Performance Regression & Final Verification
1 parent 5fc150d commit fa53e2d

File tree

11 files changed

+276
-46
lines changed

11 files changed

+276
-46
lines changed

.github/workflows/benchmark.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: Performance Regression
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
7+
jobs:
8+
benchmark:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/checkout@v3
12+
- name: Install Rust
13+
uses: dtolnay/rust-action@stable
14+
- name: Cache cargo
15+
uses: actions/cache@v3
16+
with:
17+
path: |
18+
~/.cargo/registry
19+
~/.cargo/git
20+
target/
21+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
22+
- name: Run Benchmarks (PR)
23+
run: cargo bench -- --save-baseline pr
24+
- name: Checkout Main
25+
run: |
26+
git fetch origin main
27+
git checkout origin/main
28+
- name: Run Benchmarks (Main)
29+
run: cargo bench -- --save-baseline main
30+
- name: Compare Results
31+
run: cargo bench -- --baseline main --threshold 10
32+
- name: Upload Results
33+
if: always()
34+
uses: actions/upload-artifact@v3
35+
with:
36+
name: benchmark-results
37+
path: target/criterion/
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
2+
use std::hint::black_box;
3+
use std::sync::Arc;
4+
use tokio::sync::mpsc;
5+
use vllm_core::engine::Engine;
6+
use vllm_core::metrics::EnhancedMetricsCollector;
7+
use vllm_core::scheduler::SchedulerEngine;
8+
use vllm_core::types::{AdaptiveDraftConfig, Request, SchedulerConfig};
9+
use vllm_testing::IncrementModel;
10+
11+
/// Benchmark Sequence Packing vs FIFO
12+
fn bench_sequence_packing(c: &mut Criterion) {
13+
let mut group = c.benchmark_group("sequence_packing");
14+
let metrics = Arc::new(EnhancedMetricsCollector::new());
15+
16+
for batch_size in [4, 8, 16].iter() {
17+
// FIFO baseline
18+
group.bench_with_input(
19+
BenchmarkId::new("fifo", batch_size),
20+
batch_size,
21+
|b, &batch_size| {
22+
let config = SchedulerConfig {
23+
packing: vllm_core::types::SequencePackingConfig {
24+
enabled: false,
25+
..Default::default()
26+
},
27+
..Default::default()
28+
};
29+
let mut scheduler = SchedulerEngine::new(config, 1024, metrics.clone());
30+
31+
// Add requests with varying lengths
32+
for i in 0..batch_size {
33+
let len = 100 + (i * 50); // 100, 150, 200, ...
34+
scheduler.add_request(Request::new(i as u64, vec![1; len], 10));
35+
}
36+
37+
b.iter(|| {
38+
black_box(scheduler.build_batch());
39+
});
40+
},
41+
);
42+
43+
// Packing optimized
44+
group.bench_with_input(
45+
BenchmarkId::new("packing", batch_size),
46+
batch_size,
47+
|b, &batch_size| {
48+
let config = SchedulerConfig::default(); // packing enabled by default
49+
let mut scheduler = SchedulerEngine::new(config, 1024, metrics.clone());
50+
51+
// Add requests with varying lengths
52+
for i in 0..batch_size {
53+
let len = 100 + (i * 50);
54+
scheduler.add_request(Request::new(i as u64, vec![1; len], 10));
55+
}
56+
57+
b.iter(|| {
58+
black_box(scheduler.build_batch());
59+
});
60+
},
61+
);
62+
}
63+
64+
group.finish();
65+
}
66+
67+
/// Benchmark Adaptive Speculative Decoding
68+
fn bench_adaptive_speculative(c: &mut Criterion) {
69+
let mut group = c.benchmark_group("adaptive_speculative");
70+
71+
// Fixed draft tokens
72+
group.bench_function("fixed_draft", |b| {
73+
let config = SchedulerConfig::default();
74+
let mut engine = Engine::with_config(IncrementModel, IncrementModel, config, 4, 1024);
75+
76+
let (tx, _rx) = mpsc::channel(64);
77+
engine.add_request(Request::new(1, vec![10, 20], 50), tx);
78+
79+
b.iter(|| {
80+
black_box(engine.step_speculative().unwrap());
81+
});
82+
});
83+
84+
// Adaptive draft tokens
85+
group.bench_function("adaptive_draft", |b| {
86+
let config = SchedulerConfig::default();
87+
let mut engine = Engine::with_config(IncrementModel, IncrementModel, config, 4, 1024);
88+
engine.enable_adaptive_speculative(AdaptiveDraftConfig::default());
89+
90+
let (tx, _rx) = mpsc::channel(64);
91+
engine.add_request(Request::new(1, vec![10, 20], 50), tx);
92+
93+
b.iter(|| {
94+
black_box(engine.step_adaptive_speculative().unwrap());
95+
});
96+
});
97+
98+
group.finish();
99+
}
100+
101+
/// Benchmark end-to-end throughput
102+
fn bench_throughput(c: &mut Criterion) {
103+
let mut group = c.benchmark_group("throughput");
104+
group.sample_size(10);
105+
106+
for num_requests in [10, 50, 100].iter() {
107+
// Baseline: No optimizations
108+
group.bench_with_input(
109+
BenchmarkId::new("baseline", num_requests),
110+
num_requests,
111+
|b, &num_requests| {
112+
let config = SchedulerConfig {
113+
packing: vllm_core::types::SequencePackingConfig {
114+
enabled: false,
115+
..Default::default()
116+
},
117+
..Default::default()
118+
};
119+
let mut engine =
120+
Engine::with_config(IncrementModel, IncrementModel, config, 4, 1024);
121+
122+
for i in 0..num_requests {
123+
let (tx, _rx) = mpsc::channel(64);
124+
engine.add_request(Request::new(i as u64, vec![10, 20], 20), tx);
125+
}
126+
127+
b.iter(|| {
128+
let mut completed = 0;
129+
while completed < num_requests {
130+
let results = black_box(engine.step().unwrap());
131+
completed += results.len();
132+
}
133+
});
134+
},
135+
);
136+
137+
// All optimizations enabled
138+
group.bench_with_input(
139+
BenchmarkId::new("optimized", num_requests),
140+
num_requests,
141+
|b, &num_requests| {
142+
let config = SchedulerConfig::default();
143+
let mut engine =
144+
Engine::with_config(IncrementModel, IncrementModel, config, 4, 1024);
145+
engine.enable_adaptive_speculative(AdaptiveDraftConfig::default());
146+
147+
for i in 0..num_requests {
148+
let (tx, _rx) = mpsc::channel(64);
149+
engine.add_request(Request::new(i as u64, vec![10, 20], 20), tx);
150+
}
151+
152+
b.iter(|| {
153+
let mut completed = 0;
154+
while completed < num_requests {
155+
let results = black_box(engine.step_adaptive_speculative().unwrap());
156+
completed += results.len();
157+
}
158+
});
159+
},
160+
);
161+
}
162+
163+
group.finish();
164+
}
165+
166+
criterion_group!(
167+
benches,
168+
bench_sequence_packing,
169+
bench_adaptive_speculative,
170+
bench_throughput
171+
);
172+
criterion_main!(benches);

crates/core/benches/scheduler.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
22
use std::hint::black_box;
3+
use std::sync::Arc;
4+
use vllm_core::metrics::EnhancedMetricsCollector;
35
use vllm_core::scheduler::SchedulerEngine;
46
use vllm_core::types::{Request, SchedulerConfig};
57

@@ -17,18 +19,20 @@ fn scheduler_add_request(c: &mut Criterion) {
1719
max_batch_size: 256,
1820
..Default::default()
1921
};
22+
let metrics = Arc::new(EnhancedMetricsCollector::new());
2023

2124
c.bench_function("scheduler_new", |b| {
2225
b.iter(|| {
23-
let scheduler = SchedulerEngine::new(config.clone(), 1024);
26+
let scheduler = SchedulerEngine::new(config.clone(), 1024, metrics.clone());
2427
black_box(scheduler)
2528
});
2629
});
2730
}
2831

2932
fn scheduler_build_batch(c: &mut Criterion) {
3033
let config = SchedulerConfig::default();
31-
let mut scheduler = SchedulerEngine::new(config, 1024);
34+
let metrics = Arc::new(EnhancedMetricsCollector::new());
35+
let mut scheduler = SchedulerEngine::new(config, 1024, metrics);
3236

3337
for i in 0..100 {
3438
let tokens: Vec<u32> = (0..128).map(|j| (i * 100 + j) as u32).collect();

crates/core/benches/scheduler_benchmarks.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use criterion::{Criterion, criterion_group, criterion_main};
22
use std::hint::black_box;
33
use std::sync::Arc;
44
use std::time::Instant;
5+
use vllm_core::metrics::EnhancedMetricsCollector;
56
use vllm_core::scheduler::policy::{FcfsPolicy, SchedulingContext, SchedulingPolicy, SjfPolicy};
67
use vllm_core::scheduler::{
78
PhaseScheduler, PhaseSwitchPolicy, RequestQueue, SchedulerEngine, SchedulerState,
@@ -106,11 +107,12 @@ fn bench_scheduling_policies(c: &mut Criterion) {
106107
fn bench_batch_building(c: &mut Criterion) {
107108
let mut group = c.benchmark_group("batch_building");
108109
let config = SchedulerConfig::default();
110+
let metrics = Arc::new(EnhancedMetricsCollector::new());
109111

110112
group.bench_function("build_batch_10", |b| {
111113
b.iter_with_setup(
112114
|| {
113-
let mut engine = SchedulerEngine::new(config.clone(), 1024);
115+
let mut engine = SchedulerEngine::new(config.clone(), 1024, metrics.clone());
114116
for i in 0..10 {
115117
engine.add_request(Request::new(i, vec![i as u32; 50], 100));
116118
}
@@ -123,7 +125,7 @@ fn bench_batch_building(c: &mut Criterion) {
123125
group.bench_function("build_batch_100", |b| {
124126
b.iter_with_setup(
125127
|| {
126-
let mut engine = SchedulerEngine::new(config.clone(), 1024);
128+
let mut engine = SchedulerEngine::new(config.clone(), 1024, metrics.clone());
127129
for i in 0..100 {
128130
engine.add_request(Request::new(i, vec![i as u32; 50], 100));
129131
}

crates/core/src/circuit_breaker/breaker.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
// crates/core/src/circuit_breaker/breaker.rs
2-
use std::sync::atomic::{AtomicU64, Ordering};
32
use std::sync::Arc;
3+
use std::sync::atomic::{AtomicU64, Ordering};
44
use std::time::{Duration, Instant};
55
use tokio::sync::RwLock;
66

77
/// Circuit breaker state
88
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99
pub enum CircuitState {
10-
Closed, // Normal operation
11-
Open, // Failing, reject calls
10+
Closed, // Normal operation
11+
Open, // Failing, reject calls
1212
HalfOpen, // Testing recovery
1313
}
1414

@@ -161,7 +161,9 @@ mod tests {
161161
};
162162
let breaker = CircuitBreaker::new(config);
163163
for _ in 0..3 {
164-
let _ = breaker.call(|| async { Err::<i32, TestError>(TestError("fail")) }).await;
164+
let _ = breaker
165+
.call(|| async { Err::<i32, TestError>(TestError("fail")) })
166+
.await;
165167
}
166168
let result = breaker.call(|| async { Ok::<_, TestError>(42) }).await;
167169
assert!(matches!(result, Err(CircuitBreakerError::Open)));
@@ -176,7 +178,9 @@ mod tests {
176178
};
177179
let breaker = CircuitBreaker::new(config);
178180
// First failure opens the circuit
179-
let _ = breaker.call(|| async { Err::<i32, TestError>(TestError("fail")) }).await;
181+
let _ = breaker
182+
.call(|| async { Err::<i32, TestError>(TestError("fail")) })
183+
.await;
180184
// Wait for recovery timeout
181185
tokio::time::sleep(Duration::from_millis(100)).await;
182186
// The next call will transition to HalfOpen
@@ -198,8 +202,12 @@ mod tests {
198202
};
199203
let breaker = CircuitBreaker::new(config);
200204
// Two failures open the circuit
201-
let _ = breaker.call(|| async { Err::<i32, TestError>(TestError("fail")) }).await;
202-
let _ = breaker.call(|| async { Err::<i32, TestError>(TestError("fail")) }).await;
205+
let _ = breaker
206+
.call(|| async { Err::<i32, TestError>(TestError("fail")) })
207+
.await;
208+
let _ = breaker
209+
.call(|| async { Err::<i32, TestError>(TestError("fail")) })
210+
.await;
203211
// Wait for recovery
204212
tokio::time::sleep(Duration::from_millis(100)).await;
205213
// Check that we're in HalfOpen by making a call that succeeds

crates/core/src/circuit_breaker/mod.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,4 @@
33
pub mod breaker;
44
pub mod strategy;
55
pub use breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitBreakerError, CircuitState};
6-
pub use strategy::{
7-
DegradeStrategy, FailFastStrategy, FallbackStrategy, RetryStrategy,
8-
};
6+
pub use strategy::{DegradeStrategy, FailFastStrategy, FallbackStrategy, RetryStrategy};

crates/core/src/circuit_breaker/strategy.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,7 @@ mod tests {
119119
let attempts = std::sync::atomic::AtomicUsize::new(0);
120120
let result = strategy
121121
.execute(|| async {
122-
let count =
123-
attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
122+
let count = attempts.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
124123
if count < 2 {
125124
Err::<i32, ()>(())
126125
} else {

0 commit comments

Comments
 (0)