Skip to content

Commit

Permalink
fix: always use applied clock offset values instead of reading them f…
Browse files Browse the repository at this point in the history
…rom NVML, bigger grace period for self-inflicted config changes
  • Loading branch information
ilya-zlobintsev committed Nov 24, 2024
1 parent 490929f commit 73adb1a
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 38 deletions.
15 changes: 11 additions & 4 deletions lact-daemon/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ const FILE_NAME: &str = "config.yaml";
const DEFAULT_ADMIN_GROUPS: [&str; 2] = ["wheel", "sudo"];
/// Minimum amount of time between separate config reloads
const CONFIG_RELOAD_INTERVAL_MILLIS: u64 = 50;
/// Period when config changes are ignored after LACT itself has edited the config
const SELF_CONFIG_EDIT_PERIOD_MILLIS: u64 = 1000;

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Config {
Expand Down Expand Up @@ -175,19 +177,24 @@ impl Config {
}
}

pub fn save(&self) -> anyhow::Result<()> {
pub fn save(&self, config_last_saved: &Mutex<Instant>) -> anyhow::Result<()> {
let path = get_path();
debug!("saving config to {path:?}");
let raw_config = serde_yaml::to_string(self)?;
fs::write(path, raw_config).context("Could not write config")

let mut instant_guard = config_last_saved.lock().unwrap();
fs::write(path, raw_config).context("Could not write config")?;
*instant_guard = Instant::now();

Ok(())
}

pub fn load_or_create() -> anyhow::Result<Self> {
if let Some(config) = Config::load()? {
Ok(config)
} else {
let config = Config::default();
config.save()?;
config.save(&Mutex::new(Instant::now()))?;
Ok(config)
}
}
Expand Down Expand Up @@ -268,7 +275,7 @@ pub fn start_watcher(config_last_saved: Arc<Mutex<Instant>>) -> mpsc::UnboundedR
event.kind
{
if config_last_saved.lock().unwrap().elapsed()
< Duration::from_millis(CONFIG_RELOAD_INTERVAL_MILLIS)
< Duration::from_millis(SELF_CONFIG_EDIT_PERIOD_MILLIS)
{
debug!("ignoring fs event after self-inflicted config change");
continue;
Expand Down
45 changes: 23 additions & 22 deletions lact-daemon/src/server/gpu_controller/nvidia.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@ use nvml_wrapper::{
Device, Nvml,
};
use std::{
cell::RefCell,
cell::{Cell, RefCell},
collections::HashMap,
fmt::Write,
path::{Path, PathBuf},
rc::Rc,
sync::atomic::{AtomicI32, Ordering},
time::{Duration, Instant},
};
use tokio::{select, sync::Notify, time::sleep};
Expand All @@ -39,8 +38,8 @@ pub struct NvidiaGpuController {
pub sysfs_path: PathBuf,
pub fan_control_handle: RefCell<Option<FanControlHandle>>,

last_applied_gpc_offset: Rc<AtomicI32>,
last_applied_mem_offset: Rc<AtomicI32>,
last_applied_gpc_offset: Cell<Option<i32>>,
last_applied_mem_offset: Cell<Option<i32>>,
}

impl NvidiaGpuController {
Expand All @@ -56,8 +55,8 @@ impl NvidiaGpuController {
pci_info,
sysfs_path,
fan_control_handle: RefCell::new(None),
last_applied_gpc_offset: Rc::new(AtomicI32::new(0)),
last_applied_mem_offset: Rc::new(AtomicI32::new(0)),
last_applied_gpc_offset: Cell::new(None),
last_applied_mem_offset: Cell::new(None),
}
}

Expand Down Expand Up @@ -488,11 +487,11 @@ impl GpuController for NvidiaGpuController {

if let Ok(max) = device.max_clock_info(Clock::Graphics) {
if let Ok(offset_range) = device.gpc_clk_min_max_vf_offset() {
if let Ok(mut offset) = device.gpc_clk_vf_offset() {
if !(offset_range.0..offset_range.1).contains(&offset) {
offset = self.last_applied_gpc_offset.load(Ordering::SeqCst);
}

if let Some(offset) = self
.last_applied_gpc_offset
.get()
.or_else(|| device.gpc_clk_vf_offset().ok())
{
gpc = Some(NvidiaClockInfo {
max: max as i32,
offset,
Expand All @@ -504,11 +503,11 @@ impl GpuController for NvidiaGpuController {

if let Ok(max) = device.max_clock_info(Clock::Memory) {
if let Ok(offset_range) = device.mem_clk_min_max_vf_offset() {
if let Ok(mut offset) = device.mem_clk_vf_offset() {
if !(offset_range.0..offset_range.1).contains(&offset) {
offset = self.last_applied_mem_offset.load(Ordering::SeqCst);
}

if let Some(offset) = self
.last_applied_mem_offset
.get()
.or_else(|| device.mem_clk_vf_offset().ok())
{
mem = Some(NvidiaClockInfo {
max: max as i32,
offset,
Expand Down Expand Up @@ -586,27 +585,29 @@ impl GpuController for NvidiaGpuController {
.max_clock_info(Clock::Graphics)
.context("Could not read max graphics clock")?;
let offset = max_gpu_clock - default_max_clock as i32;
debug!("Using graphics clock offset {offset}");
debug!(
"Using graphics clock offset {offset} (default max clock: {default_max_clock})"
);

device
.set_gpc_clk_vf_offset(offset)
.context("Could not set graphics clock offset")?;

self.last_applied_gpc_offset.store(offset, Ordering::SeqCst);
self.last_applied_gpc_offset.set(Some(offset));
}

if let Some(max_mem_clock) = config.clocks_configuration.max_memory_clock {
let default_max_clock = device
.max_clock_info(Clock::Memory)
.context("Could not read max memory clock")?;
let offset = max_mem_clock - default_max_clock as i32;
debug!("Using mem clock offset {offset}");
debug!("Using mem clock offset {offset} (default max clock: {default_max_clock})");

device
.set_mem_clk_vf_offset(offset)
.context("Could not set memory clock offset")?;

self.last_applied_mem_offset.store(offset, Ordering::SeqCst);
self.last_applied_mem_offset.set(Some(offset));
}

if config.fan_control_enabled {
Expand Down Expand Up @@ -654,7 +655,7 @@ impl GpuController for NvidiaGpuController {
.set_gpc_clk_vf_offset(0)
.context("Could not reset graphics clock offset")?;

self.last_applied_gpc_offset.store(0, Ordering::SeqCst);
self.last_applied_gpc_offset.set(None);
}
}

Expand All @@ -664,7 +665,7 @@ impl GpuController for NvidiaGpuController {
.set_mem_clk_vf_offset(0)
.context("Could not reset memory clock offset")?;

self.last_applied_mem_offset.store(0, Ordering::SeqCst);
self.last_applied_mem_offset.set(None);
}
}

Expand Down
17 changes: 5 additions & 12 deletions lact-daemon/src/server/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,6 @@ impl<'a> Handler {
match result {
Ok(ConfirmCommand::Confirm) => {
info!("saving updated config");
*handler.config_last_saved.lock().unwrap() = Instant::now();

let mut config_guard = handler.config.borrow_mut();
match config_guard.gpus_mut() {
Ok(gpus) => {
Expand All @@ -261,11 +259,9 @@ impl<'a> Handler {
Err(err) => error!("{err:#}"),
}

if let Err(err) = config_guard.save() {
if let Err(err) = config_guard.save(&handler.config_last_saved) {
error!("{err:#}");
}

*handler.config_last_saved.lock().unwrap() = Instant::now();
}
Ok(ConfirmCommand::Revert) | Err(_) => {
if let Err(err) = controller.apply_config(&previous_config).await {
Expand Down Expand Up @@ -668,7 +664,7 @@ impl<'a> Handler {
self.config.borrow_mut().current_profile = name;

self.apply_current_config().await?;
self.config.borrow_mut().save()?;
self.config.borrow_mut().save(&self.config_last_saved)?;

Ok(())
}
Expand All @@ -685,7 +681,7 @@ impl<'a> Handler {
ProfileBase::Profile(name) => config.profile(&name)?.clone(),
};
config.profiles.insert(name, profile);
config.save()?;
config.save(&self.config_last_saved)?;
Ok(())
}

Expand All @@ -694,7 +690,7 @@ impl<'a> Handler {
self.set_profile(None).await?;
}
self.config.borrow_mut().profiles.shift_remove(&name);
self.config.borrow().save()?;
self.config.borrow().save(&self.config_last_saved)?;
Ok(())
}

Expand All @@ -718,12 +714,9 @@ impl<'a> Handler {
let mut config = self.config.borrow_mut();
config.clear();

*self.config_last_saved.lock().unwrap() = Instant::now();
if let Err(err) = config.save() {
if let Err(err) = config.save(&self.config_last_saved) {
error!("could not save config: {err:#}");
}

*self.config_last_saved.lock().unwrap() = Instant::now();
}

pub async fn cleanup(&self) {
Expand Down

0 comments on commit 73adb1a

Please sign in to comment.