Skip to content

Commit

Permalink
feat: EXC-1758: Evict sandboxes based on their RSS (#2197)
Browse files Browse the repository at this point in the history
This allows to increase the number of sandbox processes without risking
the OOM as the total RSS size of sandboxes is still limited.

This PR also increases the number of sandbox processes to 5k.
  • Loading branch information
berestovskyy authored and sasa-tomic committed Oct 24, 2024
1 parent 524aa68 commit 75dd48c
Show file tree
Hide file tree
Showing 7 changed files with 194 additions and 52 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rs/canister_sandbox/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ DEPENDENCIES = [
"@crate_index//:libc",
"@crate_index//:libflate",
"@crate_index//:nix",
"@crate_index//:num-traits",
"@crate_index//:once_cell",
"@crate_index//:prometheus",
"@crate_index//:rayon",
Expand Down
1 change: 1 addition & 0 deletions rs/canister_sandbox/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ libc = { workspace = true }
libflate = { workspace = true }
memory_tracker = { path = "../memory_tracker" }
nix = { workspace = true }
num-traits = { workspace = true }
once_cell = "1.8"
prometheus = { workspace = true }
rayon = { workspace = true }
Expand Down
108 changes: 91 additions & 17 deletions rs/canister_sandbox/src/replica_controller/sandbox_process_eviction.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use num_traits::ops::saturating::SaturatingAdd;
use std::time::Instant;

use ic_types::CanisterId;
use ic_types::{CanisterId, NumBytes};

#[derive(Clone, Eq, PartialEq, Debug)]
pub(crate) struct EvictionCandidate {
pub id: CanisterId,
pub last_used: Instant,
pub rss: NumBytes,
}

/// Evicts the least recently used candidates in order to bring the number of
Expand All @@ -26,30 +28,30 @@ pub(crate) struct EvictionCandidate {
/// 4. Return the evicted candidates.
pub(crate) fn evict(

This comment has been minimized.

Copy link
@ilbertt

ilbertt Oct 26, 2024

@berestovskyy the function's docs should be updated to reflect the new arguments and logic

This comment has been minimized.

Copy link
@berestovskyy

berestovskyy Oct 28, 2024

Author Contributor

Thanks, Luca,
Will be fixed in #2301

mut candidates: Vec<EvictionCandidate>,
min_count_threshold: usize,
total_rss: NumBytes,
max_count_threshold: usize,
last_used_threshold: Instant,
max_sandboxes_rss: NumBytes,
) -> Vec<EvictionCandidate> {
candidates.sort_by_key(|x| x.last_used);

let evict_at_least = candidates.len().saturating_sub(max_count_threshold);
let evict_at_most = candidates.len().saturating_sub(min_count_threshold);

let mut evicted = vec![];
let mut evicted_rss = NumBytes::new(0);

for candidate in candidates.into_iter() {
if evicted.len() >= evict_at_most {
// Cannot evict anymore because at least `min_count_threshold`
// should remain not evicted.
break;
}
if candidate.last_used >= last_used_threshold && evicted.len() >= evict_at_least {
if candidate.last_used >= last_used_threshold
&& evicted.len() >= evict_at_least
&& total_rss <= max_sandboxes_rss.saturating_add(&evicted_rss)
{
// We have already evicted the minimum required number of candidates
// and all the remaining candidates were not idle the recent
// `last_used_threshold` time window. No need to evict more.
break;
}
evicted.push(candidate)
evicted_rss = evicted_rss.saturating_add(&candidate.rss);
evicted.push(candidate);
}

evicted
Expand All @@ -60,12 +62,13 @@ mod tests {
use std::time::{Duration, Instant};

use ic_test_utilities_types::ids::canister_test_id;
use ic_types::NumBytes;

use super::{evict, EvictionCandidate};

#[test]
fn evict_empty() {
assert_eq!(evict(vec![], 0, 0, Instant::now()), vec![],);
assert_eq!(evict(vec![], 0.into(), 0, Instant::now(), 0.into()), vec![],);
}

#[test]
Expand All @@ -76,9 +79,10 @@ mod tests {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now,
rss: 0.into(),
});
}
assert_eq!(evict(candidates, 0, 10, now,), vec![],);
assert_eq!(evict(candidates, 0.into(), 10, now, 0.into()), vec![],);
}

#[test]
Expand All @@ -89,10 +93,11 @@ mod tests {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now + Duration::from_secs(100 - i),
rss: 0.into(),
});
}
assert_eq!(
evict(candidates.clone(), 0, 90, now,),
evict(candidates.clone(), 0.into(), 90, now, 0.into()),
candidates.into_iter().rev().take(10).collect::<Vec<_>>()
);
}
Expand All @@ -105,10 +110,17 @@ mod tests {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now - Duration::from_secs(i),
rss: 0.into(),
});
}
assert_eq!(
evict(candidates.clone(), 0, 100, now - Duration::from_secs(50)),
evict(
candidates.clone(),
0.into(),
100,
now - Duration::from_secs(50),
0.into()
),
candidates.into_iter().rev().take(49).collect::<Vec<_>>()
);
}
Expand All @@ -120,15 +132,73 @@ mod tests {
for i in 0..100 {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now - Duration::from_secs(i + 1),
last_used: now - Duration::from_secs(i + 1) + Duration::from_secs(10),
rss: 0.into(),
});
}
assert_eq!(
evict(candidates.clone(), 10, 100, now),
evict(candidates.clone(), 0.into(), 100, now, 0.into()),
candidates.into_iter().rev().take(90).collect::<Vec<_>>()
);
}

#[test]
fn evict_none_due_to_rss() {
let mut candidates = vec![];
let now = Instant::now();
let mut total_rss = NumBytes::new(0);
for i in 0..100 {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now,
rss: 50.into(),
});
total_rss += 50.into();
}
assert_eq!(
evict(candidates.clone(), total_rss, 100, now, total_rss),
vec![]
);
}

#[test]
fn evict_some_due_to_rss() {
let mut candidates = vec![];
let now = Instant::now();
let mut total_rss = NumBytes::new(0);
for i in 0..100 {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now,
rss: 50.into(),
});
total_rss += 50.into();
}
assert_eq!(
evict(candidates.clone(), total_rss, 100, now, total_rss / 2),
candidates.into_iter().take(50).collect::<Vec<_>>()
);
}

#[test]
fn evict_all_due_to_rss() {
let mut candidates = vec![];
let now = Instant::now();
let mut total_rss = NumBytes::new(0);
for i in 0..100 {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now,
rss: 50.into(),
});
total_rss += 50.into();
}
assert_eq!(
evict(candidates.clone(), total_rss, 100, now, 0.into()),
candidates
);
}

#[test]
fn evict_all() {
let mut candidates = vec![];
Expand All @@ -137,8 +207,12 @@ mod tests {
candidates.push(EvictionCandidate {
id: canister_test_id(i),
last_used: now - Duration::from_secs(i + 1),
rss: 0.into(),
});
}
assert_eq!(evict(candidates.clone(), 0, 100, now).len(), 100);
assert_eq!(
evict(candidates.clone(), 0.into(), 100, now, 0.into()).len(),
100
);
}
}
Loading

0 comments on commit 75dd48c

Please sign in to comment.