Skip to content

Commit

Permalink
test: add tests for retrieving data from various debug snapshots (#429)
Browse files Browse the repository at this point in the history
* test: add tests for retrieving data from various debug snapshots

* chore: add power profiles mode to snapshot, add rx 7700s test data

* fix: update rust workflow
  • Loading branch information
ilya-zlobintsev authored Dec 18, 2024
1 parent 571d5d9 commit 0625b55
Show file tree
Hide file tree
Showing 484 changed files with 2,472 additions and 45 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,21 @@ env:
jobs:
build-test:
if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }}
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v2
- name: Update repos
run: sudo apt update
- name: Install dependencies
run: sudo apt install libgtk-4-dev pkg-config libvulkan-dev libdrm-dev
- name: Install blueprint-compiler
run: curl -o /tmp/blueprint-compiler.deb http://de.archive.ubuntu.com/ubuntu/pool/universe/b/blueprint-compiler/blueprint-compiler_0.10.0-3_all.deb && sudo apt install -y /tmp/blueprint-compiler.deb
run: sudo apt install -y libgtk-4-dev pkg-config libvulkan-dev libdrm-dev blueprint-compiler
- name: Build
run: cargo build
- name: Run tests
run: cargo test --verbose --no-default-features -p lact

check-format:
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04
if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }}
steps:
- uses: actions/checkout@v2
Expand Down
44 changes: 44 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions lact-daemon/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,6 @@ tar = "0.4.40"
libflate = "2.0.0"
os-release = "0.1.0"
notify = { version = "6.1.1", default-features = false }

[dev-dependencies]
insta = { version = "1.41.1", features = ["json"] }
2 changes: 2 additions & 0 deletions lact-daemon/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ mod config;
mod server;
mod socket;
mod suspend;
#[cfg(test)]
mod tests;

use anyhow::Context;
use config::Config;
Expand Down
8 changes: 6 additions & 2 deletions lact-daemon/src/server/gpu_controller/amd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,16 @@ pub struct AmdGpuController {
}

impl AmdGpuController {
pub fn new_from_path(sysfs_path: PathBuf, pci_db: &Database) -> anyhow::Result<Self> {
pub fn new_from_path(
sysfs_path: PathBuf,
pci_db: &Database,
skip_drm: bool,
) -> anyhow::Result<Self> {
let handle = GpuHandle::new_from_path(sysfs_path)
.map_err(|error| anyhow!("failed to initialize gpu handle: {error}"))?;

let mut drm_handle = None;
if matches!(handle.get_driver(), "amdgpu" | "radeon") {
if matches!(handle.get_driver(), "amdgpu" | "radeon") && !skip_drm {
match get_drm_handle(&handle) {
Ok(handle) => {
drm_handle = Some(handle);
Expand Down
148 changes: 110 additions & 38 deletions lact-daemon/src/server/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ const SNAPSHOT_DEVICE_FILES: &[&str] = &[
"current_link_speed",
"current_link_width",
];
/// Prefixes for entries that will be recursively included in the debug snapshot
const SNAPSHOT_DEVICE_RECURSIVE_PATHS_PREFIXES: &[&str] = &["tile"];
const SNAPSHOT_FAN_CTRL_FILES: &[&str] = &[
"fan_curve",
"acoustic_limit_rpm_threshold",
Expand All @@ -89,13 +91,25 @@ pub struct Handler {

impl<'a> Handler {
pub async fn new(config: Config) -> anyhow::Result<Self> {
let base_path = match env::var("_LACT_DRM_SYSFS_PATH") {
Ok(custom_path) => PathBuf::from(custom_path),
Err(_) => PathBuf::from("/sys/class/drm"),
};
Self::with_base_path(&base_path, config, false).await
}

pub(crate) async fn with_base_path(
base_path: &Path,
config: Config,
sysfs_only: bool,
) -> anyhow::Result<Self> {
let mut controllers = BTreeMap::new();

// Sometimes LACT starts too early in the boot process, before the sysfs is initialized.
// For such scenarios there is a retry logic when no GPUs were found,
// or if some of the PCI devices don't have a drm entry yet.
for i in 1..=CONTROLLERS_LOAD_RETRY_ATTEMPTS {
controllers = load_controllers()?;
controllers = load_controllers(base_path, sysfs_only)?;

let mut should_retry = false;
if let Ok(devices) = fs::read_dir("/sys/bus/pci/devices") {
Expand Down Expand Up @@ -538,6 +552,25 @@ impl<'a> Handler {
add_path_to_archive(&mut archive, &full_path)?;
}

let device_files = fs::read_dir(controller.get_path())
.context("Could not read device dir")?
.flatten();

for device_entry in device_files {
if let Some(entry_name) = device_entry.file_name().to_str() {
if SNAPSHOT_DEVICE_RECURSIVE_PATHS_PREFIXES
.iter()
.any(|prefix| entry_name.starts_with(prefix))
{
add_path_recursively(
&mut archive,
&device_entry.path(),
controller.get_path(),
)?;
}
}
}

let fan_ctrl_path = controller_path.join("gpu_od").join("fan_ctrl");
for fan_ctrl_file in SNAPSHOT_FAN_CTRL_FILES {
let full_path = fan_ctrl_path.join(fan_ctrl_file);
Expand All @@ -547,7 +580,7 @@ impl<'a> Handler {
for hw_mon in controller.hw_monitors() {
let hw_mon_path = hw_mon.get_path();
let hw_mon_entries =
std::fs::read_dir(hw_mon_path).context("Could not read HwMon dir")?;
fs::read_dir(hw_mon_path).context("Could not read HwMon dir")?;

'entries: for entry in hw_mon_entries.flatten() {
if !entry.metadata().is_ok_and(|metadata| metadata.is_file()) {
Expand Down Expand Up @@ -599,30 +632,10 @@ impl<'a> Handler {
Err(err) => Some(err.to_string().into()),
};

let devices: BTreeMap<String, serde_json::Value> = self
.gpu_controllers
.iter()
.map(|(id, controller)| {
let config = self.config.try_borrow();
let gpu_config = config
.as_ref()
.ok()
.and_then(|config| config.gpus().ok()?.get(id));

let data = json!({
"pci_info": controller.get_pci_info(),
"info": controller.get_info(),
"stats": controller.get_stats(gpu_config),
"clocks_info": controller.get_clocks_info().ok(),
});
(id.clone(), data)
})
.collect();

let info = json!({
"system_info": system_info,
"initramfs_type": initramfs_type,
"devices": devices,
"devices": self.generate_snapshot_device_info(),
});
let info_data = serde_json::to_vec_pretty(&info).unwrap();

Expand All @@ -647,6 +660,28 @@ impl<'a> Handler {
Ok(out_path)
}

pub(crate) fn generate_snapshot_device_info(&self) -> BTreeMap<String, serde_json::Value> {
self.gpu_controllers
.iter()
.map(|(id, controller)| {
let config = self.config.try_borrow();
let gpu_config = config
.as_ref()
.ok()
.and_then(|config| config.gpus().ok()?.get(id));

let data = json!({
"pci_info": controller.get_pci_info(),
"info": controller.get_info(),
"stats": controller.get_stats(gpu_config),
"clocks_info": controller.get_clocks_info().ok(),
"power_profile_modes": controller.get_power_profile_modes().ok(),
});
(id.clone(), data)
})
.collect()
}

pub fn list_profiles(&self) -> ProfilesInfo {
let config = self.config.borrow();
ProfilesInfo {
Expand Down Expand Up @@ -743,14 +778,13 @@ impl<'a> Handler {
}
}

fn load_controllers() -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>> {
/// `sysfs_only` disables initialization of any external data sources, such as libdrm and nvml
fn load_controllers(
base_path: &Path,
sysfs_only: bool,
) -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>> {
let mut controllers = BTreeMap::new();

let base_path = match env::var("_LACT_DRM_SYSFS_PATH") {
Ok(custom_path) => PathBuf::from(custom_path),
Err(_) => PathBuf::from("/sys/class/drm"),
};

let pci_db = Database::read().unwrap_or_else(|err| {
warn!("could not read PCI ID database: {err}, device information will be limited");
Database {
Expand All @@ -759,14 +793,18 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>
}
});

let nvml = match Nvml::init() {
Ok(nvml) => {
info!("NVML initialized");
Some(Rc::new(nvml))
}
Err(err) => {
info!("Nvidia support disabled, {err}");
None
let nvml = if sysfs_only {
None
} else {
match Nvml::init() {
Ok(nvml) => {
info!("NVML initialized");
Some(Rc::new(nvml))
}
Err(err) => {
info!("Nvidia support disabled, {err}");
None
}
}
};

Expand All @@ -783,7 +821,7 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>
if name.starts_with("card") && !name.contains('-') {
trace!("trying gpu controller at {:?}", entry.path());
let device_path = entry.path().join("device");
match AmdGpuController::new_from_path(device_path, &pci_db) {
match AmdGpuController::new_from_path(device_path, &pci_db, sysfs_only) {
Ok(controller) => match controller.get_id() {
Ok(id) => {
let path = controller.get_path();
Expand Down Expand Up @@ -844,6 +882,40 @@ fn load_controllers() -> anyhow::Result<BTreeMap<String, Box<dyn GpuController>>
Ok(controllers)
}

fn add_path_recursively(
archive: &mut tar::Builder<impl Write>,
entry_path: &Path,
controller_path: &Path,
) -> anyhow::Result<()> {
if let Ok(entries) = fs::read_dir(entry_path) {
for entry in entries.flatten() {
match entry.metadata() {
Ok(metadata) => {
// Skip symlinks
if metadata.is_symlink() {
continue;
}

let full_path = controller_path.join(entry.path());
if metadata.is_file() {
add_path_to_archive(archive, &full_path)?;
} else if metadata.is_dir() {
add_path_recursively(archive, &full_path, controller_path)?;
}
}
Err(err) => {
warn!(
"could not include file '{}' in snapshot: {err}",
entry.path().display()
);
}
}
}
}

Ok(())
}

fn add_path_to_archive(
archive: &mut tar::Builder<impl Write>,
full_path: &Path,
Expand Down
1 change: 1 addition & 0 deletions lact-daemon/src/tests/data/amd/rx580/ari_enabled
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
1 change: 1 addition & 0 deletions lact-daemon/src/tests/data/amd/rx580/boot_vga
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
1 change: 1 addition & 0 deletions lact-daemon/src/tests/data/amd/rx580/broken_parity_status
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
40
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(null)
1 change: 1 addition & 0 deletions lact-daemon/src/tests/data/amd/rx580/card0/device/enable
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
11
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
595
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3200
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
Loading

0 comments on commit 0625b55

Please sign in to comment.