From efebfca680470b8d9c995fc81a9386bb68fdbf5d Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Thu, 26 May 2022 21:26:37 +0200 Subject: [PATCH 01/18] MMDS: Add support for lowercased headers Signed-off-by: Ives van Hoorne update test Co-authored-by: Luminita Voicu move test to valid headers --- docs/mmds/mmds-user-guide.md | 2 +- src/mmds/src/lib.rs | 6 ++--- src/mmds/src/token_headers.rs | 22 +++++++++++++++++-- .../integration_tests/functional/test_mmds.py | 2 +- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/docs/mmds/mmds-user-guide.md b/docs/mmds/mmds-user-guide.md index c6bac05100e..5f9b3f012d0 100644 --- a/docs/mmds/mmds-user-guide.md +++ b/docs/mmds/mmds-user-guide.md @@ -249,7 +249,7 @@ The session must start with an HTTP `PUT` request that generates the session tok In order to be successful, the request must respect the following constraints: - must be directed towards `/latest/api/token` path -- must contain a `X-ametadata-token-ttl-seconds` header specifying the token lifetime +- must contain a `X-metadata-token-ttl-seconds` header specifying the token lifetime in seconds. The value cannot be lower than 1 or greater than 21600 (6 hours). - must not contain a `X-Forwarded-For` header. diff --git a/src/mmds/src/lib.rs b/src/mmds/src/lib.rs index 5360c5a0f0a..ae206ab7d6e 100644 --- a/src/mmds/src/lib.rs +++ b/src/mmds/src/lib.rs @@ -42,7 +42,7 @@ impl fmt::Display for Error { ), Error::NoTtlProvided => write!( f, - "Token time to live value not found. Use `X-metadata-token-ttl_seconds` header to \ + "Token time to live value not found. Use `X-metadata-token-ttl-seconds` header to \ specify the token's lifetime." ), Error::ResourceNotFound(ref uri) => { @@ -705,8 +705,8 @@ mod tests { assert_eq!( Error::NoTtlProvided.to_string(), - "Token time to live value not found. Use `X-metadata-token-ttl_seconds` header to \ - specify the token's lifetime." + "Token time to live value not found. Use `X-metadata-token-ttl-seconds` header to \ + specify the token's lifetime." ); assert_eq!( diff --git a/src/mmds/src/token_headers.rs b/src/mmds/src/token_headers.rs index 5d13d9a4245..1f03d0b4f4e 100644 --- a/src/mmds/src/token_headers.rs +++ b/src/mmds/src/token_headers.rs @@ -39,12 +39,19 @@ impl TokenHeaders { /// Return `TokenHeaders` from headers map. pub fn try_from(map: &HashMap) -> Result { let mut headers = Self::default(); + let lowercased_headers: HashMap = map + .iter() + .map(|(k, v)| (k.to_lowercase(), v.clone())) + .collect(); - if let Some(token) = map.get(TokenHeaders::X_METADATA_TOKEN) { + if let Some(token) = lowercased_headers.get(&TokenHeaders::X_METADATA_TOKEN.to_lowercase()) + { headers.x_metadata_token = Some(token.to_string()); } - if let Some(value) = map.get(TokenHeaders::X_METADATA_TOKEN_TTL_SECONDS) { + if let Some(value) = + lowercased_headers.get(&TokenHeaders::X_METADATA_TOKEN_TTL_SECONDS.to_lowercase()) + { match value.parse::() { Ok(seconds) => { headers.x_metadata_token_ttl_seconds = Some(seconds); @@ -127,6 +134,17 @@ mod tests { let headers = TokenHeaders::try_from(&map).unwrap(); assert_eq!(*headers.x_metadata_token().unwrap(), "".to_string()); + // Lowercased headers + let mut map: HashMap = HashMap::default(); + map.insert( + TokenHeaders::X_METADATA_TOKEN_TTL_SECONDS + .to_string() + .to_lowercase(), + "60".to_string(), + ); + let headers = TokenHeaders::try_from(&map).unwrap(); + assert_eq!(headers.x_metadata_token_ttl_seconds().unwrap(), 60); + // Invalid value. let mut map: HashMap = HashMap::default(); map.insert( diff --git a/tests/integration_tests/functional/test_mmds.py b/tests/integration_tests/functional/test_mmds.py index 870bce0a70a..92f735436df 100644 --- a/tests/integration_tests/functional/test_mmds.py +++ b/tests/integration_tests/functional/test_mmds.py @@ -832,7 +832,7 @@ def test_mmds_v2_negative(test_microvm_with_api, network_config): cmd = f"curl -m 2 -s -X PUT http://{DEFAULT_IPV4}/latest/api/token" expected = ( "Token time to live value not found. Use " - "`X-metadata-token-ttl_seconds` header to specify " + "`X-metadata-token-ttl-seconds` header to specify " "the token's lifetime." ) _run_guest_cmd(ssh_connection, cmd, expected) From a43235462de3642f4fa2130c9722b60da3cd3298 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Tue, 21 Jun 2022 12:25:56 +0200 Subject: [PATCH 02/18] Support shared mmap for running VMs This allows us to run VMs while streaming memory changes to disk support mmap shared update log add configuration for memory backing file Progress tweaks --- .../seccomp/aarch64-unknown-linux-musl.json | 13 +++ .../seccomp/x86_64-unknown-linux-musl.json | 13 +++ src/api_server/src/parsed_request.rs | 2 + .../src/request/memory_backing_file.rs | 42 ++++++++++ src/api_server/src/request/mod.rs | 1 + src/api_server/swagger/firecracker.yaml | 31 +++++++ src/logger/src/metrics.rs | 4 + src/vm-memory/src/lib.rs | 2 +- src/vmm/src/builder.rs | 60 ++++++++++---- src/vmm/src/lib.rs | 11 +-- src/vmm/src/persist.rs | 80 ++++++++++++++----- src/vmm/src/resources.rs | 14 ++++ src/vmm/src/rpc_interface.rs | 45 ++++++++--- src/vmm/src/vmm_config/memory_backing_file.rs | 31 +++++++ src/vmm/src/vmm_config/mod.rs | 2 + 15 files changed, 303 insertions(+), 48 deletions(-) create mode 100644 src/api_server/src/request/memory_backing_file.rs create mode 100644 src/vmm/src/vmm_config/memory_backing_file.rs diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json index ca0e51381a5..5a2204f41d6 100644 --- a/resources/seccomp/aarch64-unknown-linux-musl.json +++ b/resources/seccomp/aarch64-unknown-linux-musl.json @@ -596,6 +596,19 @@ } ] }, + { + "syscall": "msync", + "comment": "Used to sync memory from mmap to disk", + "args": [ + { + "index": 2, + "type": "dword", + "op": "eq", + "val": 4, + "comment": "MS_SYNC" + } + ] + }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index b419581ea3f..02bc9629a6a 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -248,6 +248,19 @@ } ] }, + { + "syscall": "msync", + "comment": "Used to sync memory from mmap to disk", + "args": [ + { + "index": 2, + "type": "dword", + "op": "eq", + "val": 4, + "comment": "MS_SYNC" + } + ] + }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", diff --git a/src/api_server/src/parsed_request.rs b/src/api_server/src/parsed_request.rs index 9b4cd6e64b9..7565853e1ab 100644 --- a/src/api_server/src/parsed_request.rs +++ b/src/api_server/src/parsed_request.rs @@ -17,6 +17,7 @@ use crate::request::logger::parse_put_logger; use crate::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; +use crate::request::memory_backing_file::parse_put_memory_backing_file; use crate::request::metrics::parse_put_metrics; use crate::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use crate::request::net::{parse_patch_net, parse_put_net}; @@ -112,6 +113,7 @@ impl ParsedRequest { (Method::Put, "network-interfaces", Some(body)) => { parse_put_net(body, path_tokens.get(1)) } + (Method::Put, "memory-backing-file", Some(body)) => parse_put_memory_backing_file(body), (Method::Put, "shutdown-internal", None) => { Ok(ParsedRequest::new(RequestAction::ShutdownInternal)) } diff --git a/src/api_server/src/request/memory_backing_file.rs b/src/api_server/src/request/memory_backing_file.rs new file mode 100644 index 00000000000..e29798039bc --- /dev/null +++ b/src/api_server/src/request/memory_backing_file.rs @@ -0,0 +1,42 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::super::VmmAction; +use crate::parsed_request::{Error, ParsedRequest}; +use crate::request::Body; +use logger::{IncMetric, METRICS}; +use vmm::vmm_config::memory_backing_file::MemoryBackingFileConfig; + +pub(crate) fn parse_put_memory_backing_file(body: &Body) -> Result { + METRICS.put_api_requests.memory_backing_file_cfg_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile( + serde_json::from_slice::(body.raw()).map_err(|e| { + METRICS.put_api_requests.memory_backing_file_cfg_fails.inc(); + Error::SerdeJson(e) + })?, + ))) +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::*; + + #[test] + fn test_parse_memory_backing_file() { + assert!(parse_put_memory_backing_file(&Body::new("invalid_payload")).is_err()); + + let body = r#"{ + "path": "./memory.snap" + }"#; + let same_body = MemoryBackingFileConfig { + path: PathBuf::from("./memory.snap"), + }; + let result = parse_put_memory_backing_file(&Body::new(body)); + assert!(result.is_ok()); + let parsed_req = result.unwrap_or_else(|_e| panic!("Failed test.")); + + assert!(parsed_req == ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile(same_body))); + } +} diff --git a/src/api_server/src/request/mod.rs b/src/api_server/src/request/mod.rs index 75f9a0daef3..c2f52110bf0 100644 --- a/src/api_server/src/request/mod.rs +++ b/src/api_server/src/request/mod.rs @@ -8,6 +8,7 @@ pub mod drive; pub mod instance_info; pub mod logger; pub mod machine_configuration; +pub mod memory_backing_file; pub mod metrics; pub mod mmds; pub mod net; diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml index 07d242532e6..14cae2b7a69 100644 --- a/src/api_server/swagger/firecracker.yaml +++ b/src/api_server/swagger/firecracker.yaml @@ -350,6 +350,29 @@ paths: description: Internal server error schema: $ref: "#/definitions/Error" + + /memory-backing-file: + put: + summary: Configures a memory backing file to sync the memory changes to during the runtime of the vm + operationId: putMemoryBackingFile + parameters: + - name: body + in: body + description: Path to memory backing file + required: true + schema: + $ref: "#/definitions/MemoryBackingFile" + responses: + 204: + description: Memory backing file configured + 400: + description: Memory backing file failed + schema: + $ref: "#/definitions/Error" + default: + description: Internal server error. + schema: + $ref: "#/definitions/Error" /metrics: put: @@ -1047,6 +1070,14 @@ definitions: tx_rate_limiter: $ref: "#/definitions/RateLimiter" + MemoryBackingFile: + type: object + required: + - path + properties: + path: + type: string + PartialDrive: type: object required: diff --git a/src/logger/src/metrics.rs b/src/logger/src/metrics.rs index 779689f04b8..aba286cc4d8 100644 --- a/src/logger/src/metrics.rs +++ b/src/logger/src/metrics.rs @@ -403,6 +403,10 @@ pub struct PutRequestsMetrics { pub machine_cfg_count: SharedIncMetric, /// Number of failures in configuring the machine. pub machine_cfg_fails: SharedIncMetric, + /// Number of PUTs for setting memory backing file. + pub memory_backing_file_cfg_count: SharedIncMetric, + /// Number of failures in configuring the machine. + pub memory_backing_file_cfg_fails: SharedIncMetric, /// Number of PUTs for initializing the metrics system. pub metrics_count: SharedIncMetric, /// Number of failures in initializing the metrics system. diff --git a/src/vm-memory/src/lib.rs b/src/vm-memory/src/lib.rs index 814f79098b4..1d18139dd99 100644 --- a/src/vm-memory/src/lib.rs +++ b/src/vm-memory/src/lib.rs @@ -117,7 +117,7 @@ pub fn create_guest_memory( for region in regions { let flags = match region.0 { None => libc::MAP_NORESERVE | libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, - Some(_) => libc::MAP_NORESERVE | libc::MAP_PRIVATE, + Some(_) => libc::MAP_NORESERVE | libc::MAP_SHARED, }; let mmap_region = diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index bffb43e47cf..c096d8cb132 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -5,9 +5,11 @@ use std::convert::TryFrom; use std::fmt::{Display, Formatter}; +use std::fs::File; use std::io::{self, Read, Seek, SeekFrom}; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::{Arc, Mutex}; +use vm_memory::FileOffset; use arch::InitrdConfig; #[cfg(target_arch = "x86_64")] @@ -28,7 +30,6 @@ use linux_loader::loader::KernelLoader; use logger::{error, warn, METRICS}; use seccompiler::BpfThreadMap; use snapshot::Persist; -use userfaultfd::Uffd; use utils::eventfd::EventFd; use utils::terminal::Terminal; use utils::time::TimestampUs; @@ -43,7 +44,7 @@ use crate::construct_kvm_mpidrs; use crate::device_manager::legacy::PortIODeviceManager; use crate::device_manager::mmio::MMIODeviceManager; use crate::device_manager::persist::MMIODevManagerConstructorArgs; -use crate::persist::{MicrovmState, MicrovmStateError}; +use crate::persist::{MemoryDescriptor, MicrovmState, MicrovmStateError}; use crate::resources::VmResources; use crate::vmm_config::boot_source::BootConfig; use crate::vmm_config::instance_info::InstanceInfo; @@ -58,6 +59,8 @@ use crate::{device_manager, mem_size_mib, Error, EventManager, Vmm, VmmEventsObs pub enum StartMicrovmError { /// Unable to attach block device to Vmm. AttachBlockDevice(io::Error), + /// Unable to create the memory backing file. + BackingMemoryFile(io::Error), /// This error is thrown by the minimal boot loader implementation. ConfigureSystem(arch::Error), /// Internal errors are due to resource exhaustion. @@ -112,6 +115,9 @@ impl Display for StartMicrovmError { write!(f, "Unable to attach block device to Vmm: {}", err) } ConfigureSystem(err) => write!(f, "System configuration error: {:?}", err), + BackingMemoryFile(err) => { + write!(f, "Unable to create the memory backing file: {}", err) + } CreateRateLimiter(err) => write!(f, "Cannot create RateLimiter: {}", err), CreateNetDevice(err) => { let mut err_msg = format!("{:?}", err); @@ -231,7 +237,7 @@ fn create_vmm_and_vcpus( instance_info: &InstanceInfo, event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, - uffd: Option, + memory_descriptor: Option, track_dirty_pages: bool, vcpu_count: u8, ) -> std::result::Result<(Vmm, Vec), StartMicrovmError> { @@ -297,7 +303,7 @@ fn create_vmm_and_vcpus( shutdown_exit_code: None, vm, guest_memory, - uffd, + memory_descriptor, vcpus_handles: Vec::new(), vcpus_exit_evt, mmio_device_manager, @@ -329,8 +335,23 @@ pub fn build_microvm_for_boot( let boot_config = vm_resources.boot_source().ok_or(MissingKernelConfig)?; let track_dirty_pages = vm_resources.track_dirty_pages(); - let guest_memory = - create_guest_memory(vm_resources.vm_config().mem_size_mib, track_dirty_pages)?; + + let backing_memory_file = if let Some(ref file) = vm_resources.backing_memory_file { + file.set_len((vm_resources.vm_config().mem_size_mib * 1024 * 1024) as u64) + .map_err(|e| { + error!("Failed to set backing memory file size: {}", e); + StartMicrovmError::BackingMemoryFile(e) + })?; + + Some(file.clone()) + } else { + None + }; + let guest_memory = create_guest_memory( + vm_resources.vm_config().mem_size_mib, + backing_memory_file.clone(), + track_dirty_pages, + )?; let vcpu_config = vm_resources.vcpu_config(); let entry_addr = load_kernel(boot_config, &guest_memory)?; let initrd = load_initrd_from_config(boot_config, &guest_memory)?; @@ -362,7 +383,7 @@ pub fn build_microvm_for_boot( instance_info, event_manager, guest_memory, - None, + backing_memory_file.map(MemoryDescriptor::File), track_dirty_pages, vcpu_config.vcpu_count, )?; @@ -451,7 +472,7 @@ pub fn build_microvm_from_snapshot( event_manager: &mut EventManager, microvm_state: MicrovmState, guest_memory: GuestMemoryMmap, - uffd: Option, + memory_descriptor: Option, track_dirty_pages: bool, seccomp_filters: &BpfThreadMap, vm_resources: &mut VmResources, @@ -466,7 +487,7 @@ pub fn build_microvm_from_snapshot( instance_info, event_manager, guest_memory.clone(), - uffd, + memory_descriptor, track_dirty_pages, vcpu_count, )?; @@ -581,15 +602,24 @@ pub fn build_microvm_from_snapshot( /// Creates GuestMemory of `mem_size_mib` MiB in size. pub fn create_guest_memory( mem_size_mib: usize, + backing_memory_file: Option>, track_dirty_pages: bool, ) -> std::result::Result { let mem_size = mem_size_mib << 20; let arch_mem_regions = arch::arch_memory_regions(mem_size); + let mut offset = 0_u64; vm_memory::create_guest_memory( &arch_mem_regions .iter() - .map(|(addr, size)| (None, *addr, *size)) + .map(|(addr, size)| { + let file_offset = backing_memory_file + .clone() + .map(|file| FileOffset::from_arc(file, offset)); + offset += *size as u64; + + (file_offset, *addr, *size) + }) .collect::>()[..], track_dirty_pages, ) @@ -1068,7 +1098,7 @@ pub mod tests { } pub(crate) fn default_vmm() -> Vmm { - let guest_memory = create_guest_memory(128, false).unwrap(); + let guest_memory = create_guest_memory(128, None, false).unwrap(); let vcpus_exit_evt = EventFd::new(libc::EFD_NONBLOCK) .map_err(Error::EventFd) @@ -1096,12 +1126,12 @@ pub mod tests { shutdown_exit_code: None, vm, guest_memory, - uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, mmio_device_manager, #[cfg(target_arch = "x86_64")] pio_device_manager, + memory_descriptor: None, } } @@ -1283,13 +1313,13 @@ pub mod tests { // Case 1: create guest memory without dirty page tracking { - let guest_memory = create_guest_memory(mem_size, false).unwrap(); + let guest_memory = create_guest_memory(mem_size, None, false).unwrap(); assert!(!is_dirty_tracking_enabled(&guest_memory)); } // Case 2: create guest memory with dirty page tracking { - let guest_memory = create_guest_memory(mem_size, true).unwrap(); + let guest_memory = create_guest_memory(mem_size, None, true).unwrap(); assert!(is_dirty_tracking_enabled(&guest_memory)); } } @@ -1297,7 +1327,7 @@ pub mod tests { #[test] fn test_create_vcpus() { let vcpu_count = 2; - let guest_memory = create_guest_memory(128, false).unwrap(); + let guest_memory = create_guest_memory(128, None, false).unwrap(); #[allow(unused_mut)] let mut vm = setup_kvm_vm(&guest_memory, false).unwrap(); diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index d71932525b5..90975ac6acc 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -50,10 +50,10 @@ use devices::virtio::{ use devices::BusDevice; use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber}; use logger::{error, info, warn, LoggerError, MetricsError, METRICS}; +use persist::MemoryDescriptor; use rate_limiter::BucketUpdate; use seccompiler::BpfProgram; use snapshot::Persist; -use userfaultfd::Uffd; use utils::epoll::EventSet; use utils::eventfd::EventFd; use vm_memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; @@ -260,10 +260,6 @@ pub struct Vmm { // Guest VM core resources. vm: Vm, guest_memory: GuestMemoryMmap, - // Save UFFD in order to keep it open in the Firecracker process, as well. - // Since this field is never read again, we need to allow `dead_code`. - #[allow(dead_code)] - uffd: Option, vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. vcpus_exit_evt: EventFd, @@ -272,6 +268,11 @@ pub struct Vmm { mmio_device_manager: MMIODeviceManager, #[cfg(target_arch = "x86_64")] pio_device_manager: PortIODeviceManager, + + // The mem file that should be mmaped. We need to keep a reference of the UFFD in the + // process so we allow dead_code + #[allow(dead_code)] + memory_descriptor: Option, } impl Vmm { diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 309cbce46b3..0ff5ca1f9da 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -325,6 +325,27 @@ fn snapshot_memory_to_file( snapshot_type: &SnapshotType, ) -> std::result::Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; + if OpenOptions::new().read(true).open(mem_file_path).is_ok() + && snapshot_type == &SnapshotType::Diff + { + // // The memory file already exists. + // // We're going to use the msync behaviour + // for region in vmm.guest_memory().iter() { + // info!("msyncing memory region"); + // unsafe { + // if libc::msync(region.as_ptr() as _, region.len() as _, libc::MS_SYNC) == -1 { + // return Err(CreateSnapshotError::Memory( + // memory_snapshot::Error::CreateRegion(vm_memory::MmapRegionError::Mmap( + // std::io::Error::last_os_error(), + // )), + // )); + // } + // }; + // } + + return Ok(()); + } + let mut file = OpenOptions::new() .write(true) .create(true) @@ -483,6 +504,16 @@ pub fn snapshot_state_sanity_check( Ok(()) } +/// Describes a descriptor that connects to the memory used by the VM. This could either be the a file descriptor +/// or a UFFD descriptor. +#[derive(Debug)] +pub enum MemoryDescriptor { + /// A file descriptor that connects to the user fault process. + Uffd(Uffd), + /// A file descriptor of the backing memory file. + File(Arc), +} + /// Loads a Microvm snapshot producing a 'paused' Microvm. pub fn restore_from_snapshot( instance_info: &InstanceInfo, @@ -501,26 +532,31 @@ pub fn restore_from_snapshot( let mem_backend_path = ¶ms.mem_backend.backend_path; let mem_state = µvm_state.memory_state; let track_dirty_pages = params.enable_diff_snapshots; - let (guest_memory, uffd) = match params.mem_backend.backend_type { - MemBackendType::File => ( - guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)?, - None, - ), - MemBackendType::Uffd => guest_memory_from_uffd( - mem_backend_path, - mem_state, - track_dirty_pages, - // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device - // is present in the microVM state. - microvm_state.device_states.balloon_device.is_some(), - )?, + let (guest_memory, memory_descriptor) = match params.mem_backend.backend_type { + MemBackendType::File => { + let (guest_memory, file) = + guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)?; + (guest_memory, Some(MemoryDescriptor::File(Arc::new(file)))) + } + MemBackendType::Uffd => { + let (guest_memory, uffd) = guest_memory_from_uffd( + mem_backend_path, + mem_state, + track_dirty_pages, + // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device + // is present in the microVM state. + microvm_state.device_states.balloon_device.is_some(), + )?; + + (guest_memory, uffd.map(MemoryDescriptor::Uffd)) + } }; builder::build_microvm_from_snapshot( instance_info, event_manager, microvm_state, guest_memory, - uffd, + memory_descriptor, track_dirty_pages, seccomp_filters, vm_resources, @@ -545,11 +581,19 @@ fn guest_memory_from_file( mem_file_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, -) -> std::result::Result { +) -> std::result::Result<(GuestMemoryMmap, File), LoadSnapshotError> { use self::LoadSnapshotError::{DeserializeMemory, MemoryBackingFile}; - let mem_file = File::open(mem_file_path).map_err(MemoryBackingFile)?; - GuestMemoryMmap::restore(Some(&mem_file), mem_state, track_dirty_pages) - .map_err(DeserializeMemory) + let mem_file = OpenOptions::new() + .write(true) + .read(true) + .open(mem_file_path) + .map_err(MemoryBackingFile)?; + + Ok(( + GuestMemoryMmap::restore(Some(&mem_file), mem_state, track_dirty_pages) + .map_err(DeserializeMemory)?, + mem_file, + )) } fn guest_memory_from_uffd( diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 9a5257ac307..fb9c3b11e61 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::convert::From; +use std::fs::File; use std::sync::{Arc, Mutex, MutexGuard}; use logger::info; @@ -117,6 +118,8 @@ pub struct VmResources { pub mmds_size_limit: usize, /// Whether or not to load boot timer device. pub boot_timer: bool, + /// When backed by a memory file, this should be set + pub backing_memory_file: Option>, } impl VmResources { @@ -236,6 +239,16 @@ impl VmResources { self.vm_config.track_dirty_pages = dirty_page_tracking; } + /// Returns the config for the backing memory file + pub fn backing_memory_file(&self) -> Option> { + self.backing_memory_file.clone() + } + + /// Sets the backing memory file + pub fn set_backing_memory_file(&mut self, backing_mem_file: Arc) { + self.backing_memory_file.get_or_insert(backing_mem_file); + } + /// Returns the VmConfig. pub fn vm_config(&self) -> &VmConfig { &self.vm_config @@ -575,6 +588,7 @@ mod tests { mmds: None, boot_timer: false, mmds_size_limit: HTTP_MAX_PAYLOAD_SIZE, + backing_memory_file: None, } } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 2a79237d3f1..87f9f2aae2a 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{Display, Formatter}; -use std::result; use std::sync::{Arc, Mutex, MutexGuard}; +use std::{fs, result}; use logger::*; use mmds::data_store::{self, Mmds}; @@ -34,6 +34,7 @@ use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, Drive use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::logger::{LoggerConfig, LoggerConfigError}; use crate::vmm_config::machine_config::{VmConfig, VmConfigError, VmUpdateConfig}; +use crate::vmm_config::memory_backing_file::{MemoryBackingFileConfig, MemoryBackingFileError}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ @@ -99,6 +100,9 @@ pub enum VmmAction { /// `BalloonDeviceConfig` as input. This action can only be called before the microVM /// has booted. SetBalloonDevice(BalloonDeviceConfig), + /// Set the memory backing file for the VM. The VM will use this backing file to store its + /// memory. This action can only be called before the microVM has booted. + SetMemoryBackingFile(MemoryBackingFileConfig), /// Set the MMDS configuration. SetMmdsConfiguration(MmdsConfig), /// Set the vsock device or update the one that already exists using the @@ -130,6 +134,8 @@ pub enum VmmAction { pub enum VmmActionError { /// The action `SetBalloonDevice` failed because of bad user input. BalloonConfig(BalloonConfigError), + /// The action `SetMemoryBackingFile` failed because of bad user input. + MemoryBackingFile(MemoryBackingFileError), /// The action `ConfigureBootSource` failed because of bad user input. BootSource(BootSourceConfigError), /// The action `CreateSnapshot` failed. @@ -182,6 +188,7 @@ impl Display for VmmActionError { match self { BalloonConfig(err) => err.to_string(), BootSource(err) => err.to_string(), + MemoryBackingFile(err) => err.to_string(), CreateSnapshot(err) => err.to_string(), DriveConfig(err) => err.to_string(), InternalVmm(err) => format!("Internal Vmm error: {}", err), @@ -422,6 +429,7 @@ impl<'a> PrebootApiController<'a> { SetBalloonDevice(config) => self.set_balloon_device(config), SetVsockDevice(config) => self.set_vsock_device(config), SetMmdsConfiguration(config) => self.set_mmds_config(config), + SetMemoryBackingFile(config) => self.set_backing_memory_file(config), StartMicroVm => self.start_microvm(), UpdateVmConfiguration(config) => self.update_vm_config(config), // Operations not allowed pre-boot. @@ -447,6 +455,23 @@ impl<'a> PrebootApiController<'a> { .map_err(VmmActionError::BalloonConfig) } + fn set_backing_memory_file(&mut self, cfg: MemoryBackingFileConfig) -> ActionResult { + self.boot_path = true; + let file = fs::OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(cfg.path) + .map(Arc::new) + .map_err(|e| { + VmmActionError::MemoryBackingFile(MemoryBackingFileError::CreateFile(e)) + })?; + + self.vm_resources.backing_memory_file = Some(file); + + Ok(VmmData::Empty) + } + fn insert_block_device(&mut self, cfg: BlockDeviceConfig) -> ActionResult { self.boot_path = true; self.vm_resources @@ -654,6 +679,7 @@ impl RuntimeApiController { | InsertNetworkDevice(_) | LoadSnapshot(_) | SetBalloonDevice(_) + | SetMemoryBackingFile(_) | SetVsockDevice(_) | SetMmdsConfiguration(_) | StartMicroVm @@ -720,14 +746,14 @@ impl RuntimeApiController { fn create_snapshot(&mut self, create_params: &CreateSnapshotParams) -> ActionResult { log_dev_preview_warning("Virtual machine snapshots", None); - if create_params.snapshot_type == SnapshotType::Diff - && !self.vm_resources.track_dirty_pages() - { - return Err(VmmActionError::NotSupported( - "Diff snapshots are not allowed on uVMs with dirty page tracking disabled." - .to_string(), - )); - } + // if create_params.snapshot_type == SnapshotType::Diff + // && !self.vm_resources.track_dirty_pages() + // { + // return Err(VmmActionError::NotSupported( + // "Diff snapshots are not allowed on uVMs with dirty page tracking disabled." + // .to_string(), + // )); + // } let mut locked_vmm = self.vmm.lock().unwrap(); let create_start_us = utils::time::get_time_us(utils::time::ClockType::Monotonic); @@ -862,6 +888,7 @@ mod tests { pub boot_timer: bool, // when `true`, all self methods are forced to fail pub force_errors: bool, + pub backing_memory_file: Option>, } impl MockVmRes { diff --git a/src/vmm/src/vmm_config/memory_backing_file.rs b/src/vmm/src/vmm_config/memory_backing_file.rs new file mode 100644 index 00000000000..f5f5bd9ce6f --- /dev/null +++ b/src/vmm/src/vmm_config/memory_backing_file.rs @@ -0,0 +1,31 @@ +use std::{ + fmt::{Display, Formatter}, + io, + path::PathBuf, +}; + +use serde::{Deserialize, Serialize}; + +/// Keeps the Memory Backing file configuration. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[serde(deny_unknown_fields)] +pub struct MemoryBackingFileConfig { + /// Location of the memory backing file. + pub path: PathBuf, +} + +/// Errors associated with the operations allowed on a memory backing file. +#[derive(Debug)] +pub enum MemoryBackingFileError { + /// Failed to create the block device + CreateFile(io::Error), +} + +impl Display for MemoryBackingFileError { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + use self::MemoryBackingFileError::*; + match self { + CreateFile(e) => write!(f, "Unable to create the memory backing file: {}", e), + } + } +} diff --git a/src/vmm/src/vmm_config/mod.rs b/src/vmm/src/vmm_config/mod.rs index c509fce888f..61b235a0fbb 100644 --- a/src/vmm/src/vmm_config/mod.rs +++ b/src/vmm/src/vmm_config/mod.rs @@ -23,6 +23,8 @@ pub mod instance_info; pub mod logger; /// Wrapper for configuring the memory and CPU of the microVM. pub mod machine_config; +/// Wrapper for configuring the backing memory file. +pub mod memory_backing_file; /// Wrapper for configuring the metrics. pub mod metrics; /// Wrapper for configuring the MMDS. From 41643714f571f281ac9777ff72254591856ea0da Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Wed, 31 Aug 2022 12:05:46 +0200 Subject: [PATCH 03/18] fix: VMs getting stuck when starting from snapshot Fixes https://github.com/firecracker-microvm/firecracker/issues/3043 Fixes https://github.com/firecracker-microvm/firecracker/issues/3020 --- src/cpuid/src/transformer/amd.rs | 3 +++ src/cpuid/src/transformer/common.rs | 13 +++++++++++++ src/cpuid/src/transformer/intel.rs | 2 ++ 3 files changed, 18 insertions(+) diff --git a/src/cpuid/src/transformer/amd.rs b/src/cpuid/src/transformer/amd.rs index c1db38260a5..261e88ef4e4 100644 --- a/src/cpuid/src/transformer/amd.rs +++ b/src/cpuid/src/transformer/amd.rs @@ -147,6 +147,9 @@ impl CpuidTransformer for AmdCpuidTransformer { leaf_0x8000001d::LEAF_NUM => Some(amd::update_extended_cache_topology_entry), leaf_0x8000001e::LEAF_NUM => Some(amd::update_extended_apic_id_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), + + // hypervisor stuff + 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } diff --git a/src/cpuid/src/transformer/common.rs b/src/cpuid/src/transformer/common.rs index ba89fc35f74..59de658fba5 100644 --- a/src/cpuid/src/transformer/common.rs +++ b/src/cpuid/src/transformer/common.rs @@ -69,6 +69,19 @@ pub fn update_brand_string_entry( Ok(()) } +// KVM feature bits +#[cfg(target_arch = "x86_64")] +const KVM_FEATURE_ASYNC_PF_INT_BIT: u32 = 14; + +pub fn disable_kvm_feature_async_pf( + entry: &mut kvm_cpuid_entry2, + vm_spec: &VmSpec, +) -> Result<(), Error> { + entry.eax.write_bit(KVM_FEATURE_ASYNC_PF_INT_BIT, false); + + Ok(()) +} + pub fn update_cache_parameters_entry( entry: &mut kvm_cpuid_entry2, vm_spec: &VmSpec, diff --git a/src/cpuid/src/transformer/intel.rs b/src/cpuid/src/transformer/intel.rs index 8505b668932..439058c8780 100644 --- a/src/cpuid/src/transformer/intel.rs +++ b/src/cpuid/src/transformer/intel.rs @@ -126,6 +126,8 @@ impl CpuidTransformer for IntelCpuidTransformer { leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), leaf_0xb::LEAF_NUM => Some(intel::update_extended_topology_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), + // hypervisor stuff + 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } From ab6fe7395f99f8993b1940ab3ea1151c46e7545e Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sun, 9 Oct 2022 23:40:12 +0200 Subject: [PATCH 04/18] Progress --- .../seccomp/x86_64-unknown-linux-musl.json | 12 ++ src/api_server/src/parsed_request.rs | 4 +- src/api_server/src/request/memory_backend.rs | 46 ++++ .../src/request/memory_backing_file.rs | 42 ---- src/api_server/src/request/mod.rs | 2 +- src/api_server/swagger/firecracker.yaml | 22 +- src/cpuid/src/transformer/common.rs | 2 +- src/logger/src/metrics.rs | 4 +- src/vmm/src/builder.rs | 199 ++++++++++++++++-- src/vmm/src/memory_snapshot.rs | 143 ++++++++++++- src/vmm/src/persist.rs | 128 +++++++---- src/vmm/src/resources.rs | 16 +- src/vmm/src/rpc_interface.rs | 34 +-- src/vmm/src/vmm_config/memory_backing_file.rs | 31 --- src/vmm/src/vmm_config/mod.rs | 2 - src/vmm/src/vmm_config/snapshot.rs | 4 +- 16 files changed, 502 insertions(+), 189 deletions(-) create mode 100644 src/api_server/src/request/memory_backend.rs delete mode 100644 src/api_server/src/request/memory_backing_file.rs delete mode 100644 src/vmm/src/vmm_config/memory_backing_file.rs diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json index 02bc9629a6a..51c90224393 100644 --- a/resources/seccomp/x86_64-unknown-linux-musl.json +++ b/resources/seccomp/x86_64-unknown-linux-musl.json @@ -261,6 +261,18 @@ } ] }, + { + "syscall": "memfd_create", + "comment": "Used to create a memory backed file descriptor that can be used to save memory to" + }, + { + "syscall": "nanosleep", + "comment": "Debugging sleep" + }, + { + "syscall": "copy_file_range", + "comment": "debugging" + }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", diff --git a/src/api_server/src/parsed_request.rs b/src/api_server/src/parsed_request.rs index 7565853e1ab..b81e3391b0e 100644 --- a/src/api_server/src/parsed_request.rs +++ b/src/api_server/src/parsed_request.rs @@ -17,7 +17,7 @@ use crate::request::logger::parse_put_logger; use crate::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; -use crate::request::memory_backing_file::parse_put_memory_backing_file; +use crate::request::memory_backend::parse_put_memory_backend; use crate::request::metrics::parse_put_metrics; use crate::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use crate::request::net::{parse_patch_net, parse_put_net}; @@ -113,7 +113,7 @@ impl ParsedRequest { (Method::Put, "network-interfaces", Some(body)) => { parse_put_net(body, path_tokens.get(1)) } - (Method::Put, "memory-backing-file", Some(body)) => parse_put_memory_backing_file(body), + (Method::Put, "memory-backend", Some(body)) => parse_put_memory_backend(body), (Method::Put, "shutdown-internal", None) => { Ok(ParsedRequest::new(RequestAction::ShutdownInternal)) } diff --git a/src/api_server/src/request/memory_backend.rs b/src/api_server/src/request/memory_backend.rs new file mode 100644 index 00000000000..b81c7f5fc78 --- /dev/null +++ b/src/api_server/src/request/memory_backend.rs @@ -0,0 +1,46 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::super::VmmAction; +use crate::parsed_request::{Error, ParsedRequest}; +use crate::request::Body; +use logger::{IncMetric, METRICS}; +use vmm::vmm_config::snapshot::MemBackendConfig; + +pub(crate) fn parse_put_memory_backend(body: &Body) -> Result { + METRICS.put_api_requests.memory_backend_cfg_count.inc(); + Ok(ParsedRequest::new_sync(VmmAction::SetMemoryBackend( + serde_json::from_slice::(body.raw()).map_err(|e| { + METRICS.put_api_requests.memory_backend_cfg_fails.inc(); + Error::SerdeJson(e) + })?, + ))) +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use vmm::vmm_config::snapshot::MemBackendType; + + use super::*; + + #[test] + fn test_parse_memory_backing_file() { + assert!(parse_put_memory_backend(&Body::new("invalid_payload")).is_err()); + + let body = r#"{ + "backend_type": "File", + "backend_path": "./memory.snap" + }"#; + let same_body = MemBackendConfig { + backend_type: MemBackendType::File, + backend_path: PathBuf::from("./memory.snap"), + }; + let result = parse_put_memory_backend(&Body::new(body)); + assert!(result.is_ok()); + let parsed_req = result.unwrap_or_else(|_e| panic!("Failed test.")); + + assert!(parsed_req == ParsedRequest::new_sync(VmmAction::SetMemoryBackend(same_body))); + } +} diff --git a/src/api_server/src/request/memory_backing_file.rs b/src/api_server/src/request/memory_backing_file.rs deleted file mode 100644 index e29798039bc..00000000000 --- a/src/api_server/src/request/memory_backing_file.rs +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -use super::super::VmmAction; -use crate::parsed_request::{Error, ParsedRequest}; -use crate::request::Body; -use logger::{IncMetric, METRICS}; -use vmm::vmm_config::memory_backing_file::MemoryBackingFileConfig; - -pub(crate) fn parse_put_memory_backing_file(body: &Body) -> Result { - METRICS.put_api_requests.memory_backing_file_cfg_count.inc(); - Ok(ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile( - serde_json::from_slice::(body.raw()).map_err(|e| { - METRICS.put_api_requests.memory_backing_file_cfg_fails.inc(); - Error::SerdeJson(e) - })?, - ))) -} - -#[cfg(test)] -mod tests { - use std::path::PathBuf; - - use super::*; - - #[test] - fn test_parse_memory_backing_file() { - assert!(parse_put_memory_backing_file(&Body::new("invalid_payload")).is_err()); - - let body = r#"{ - "path": "./memory.snap" - }"#; - let same_body = MemoryBackingFileConfig { - path: PathBuf::from("./memory.snap"), - }; - let result = parse_put_memory_backing_file(&Body::new(body)); - assert!(result.is_ok()); - let parsed_req = result.unwrap_or_else(|_e| panic!("Failed test.")); - - assert!(parsed_req == ParsedRequest::new_sync(VmmAction::SetMemoryBackingFile(same_body))); - } -} diff --git a/src/api_server/src/request/mod.rs b/src/api_server/src/request/mod.rs index c2f52110bf0..f58bce5b533 100644 --- a/src/api_server/src/request/mod.rs +++ b/src/api_server/src/request/mod.rs @@ -8,7 +8,7 @@ pub mod drive; pub mod instance_info; pub mod logger; pub mod machine_configuration; -pub mod memory_backing_file; +pub mod memory_backend; pub mod metrics; pub mod mmds; pub mod net; diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml index 14cae2b7a69..88f0d2697a5 100644 --- a/src/api_server/swagger/firecracker.yaml +++ b/src/api_server/swagger/firecracker.yaml @@ -351,22 +351,22 @@ paths: schema: $ref: "#/definitions/Error" - /memory-backing-file: + /memory-backend: put: - summary: Configures a memory backing file to sync the memory changes to during the runtime of the vm - operationId: putMemoryBackingFile + summary: Configures a memory backend to sync the memory changes from during the runtime of the vm + operationId: putMemoryBackend parameters: - name: body in: body - description: Path to memory backing file + description: The memory backend to use required: true schema: - $ref: "#/definitions/MemoryBackingFile" + $ref: "#/definitions/MemoryBackend" responses: 204: - description: Memory backing file configured + description: Memory backend configured 400: - description: Memory backing file failed + description: Memory backend failed schema: $ref: "#/definitions/Error" default: @@ -1070,14 +1070,6 @@ definitions: tx_rate_limiter: $ref: "#/definitions/RateLimiter" - MemoryBackingFile: - type: object - required: - - path - properties: - path: - type: string - PartialDrive: type: object required: diff --git a/src/cpuid/src/transformer/common.rs b/src/cpuid/src/transformer/common.rs index 59de658fba5..ea2592a07da 100644 --- a/src/cpuid/src/transformer/common.rs +++ b/src/cpuid/src/transformer/common.rs @@ -75,7 +75,7 @@ const KVM_FEATURE_ASYNC_PF_INT_BIT: u32 = 14; pub fn disable_kvm_feature_async_pf( entry: &mut kvm_cpuid_entry2, - vm_spec: &VmSpec, + _vm_spec: &VmSpec, ) -> Result<(), Error> { entry.eax.write_bit(KVM_FEATURE_ASYNC_PF_INT_BIT, false); diff --git a/src/logger/src/metrics.rs b/src/logger/src/metrics.rs index aba286cc4d8..f009004362f 100644 --- a/src/logger/src/metrics.rs +++ b/src/logger/src/metrics.rs @@ -404,9 +404,9 @@ pub struct PutRequestsMetrics { /// Number of failures in configuring the machine. pub machine_cfg_fails: SharedIncMetric, /// Number of PUTs for setting memory backing file. - pub memory_backing_file_cfg_count: SharedIncMetric, + pub memory_backend_cfg_count: SharedIncMetric, /// Number of failures in configuring the machine. - pub memory_backing_file_cfg_fails: SharedIncMetric, + pub memory_backend_cfg_fails: SharedIncMetric, /// Number of PUTs for initializing the metrics system. pub metrics_count: SharedIncMetric, /// Number of failures in initializing the metrics system. diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index c096d8cb132..681faf0f440 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -4,12 +4,18 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. use std::convert::TryFrom; +use std::ffi::CString; use std::fmt::{Display, Formatter}; -use std::fs::File; +use std::fs::{File, OpenOptions}; use std::io::{self, Read, Seek, SeekFrom}; use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::net::UnixStream; +use std::os::unix::prelude::FromRawFd; +use std::path::Path; use std::sync::{Arc, Mutex}; -use vm_memory::FileOffset; +use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; +use utils::sock_ctrl_msg::ScmSocket; +use vm_memory::{FileOffset, GuestMemory, GuestMemoryRegion}; use arch::InitrdConfig; #[cfg(target_arch = "x86_64")] @@ -44,7 +50,7 @@ use crate::construct_kvm_mpidrs; use crate::device_manager::legacy::PortIODeviceManager; use crate::device_manager::mmio::MMIODeviceManager; use crate::device_manager::persist::MMIODevManagerConstructorArgs; -use crate::persist::{MemoryDescriptor, MicrovmState, MicrovmStateError}; +use crate::persist::{GuestRegionUffdMapping, MemoryDescriptor, MicrovmState, MicrovmStateError}; use crate::resources::VmResources; use crate::vmm_config::boot_source::BootConfig; use crate::vmm_config::instance_info::InstanceInfo; @@ -59,7 +65,7 @@ use crate::{device_manager, mem_size_mib, Error, EventManager, Vmm, VmmEventsObs pub enum StartMicrovmError { /// Unable to attach block device to Vmm. AttachBlockDevice(io::Error), - /// Unable to create the memory backing file. + /// Unable to create/open the memory backing file. BackingMemoryFile(io::Error), /// This error is thrown by the minimal boot loader implementation. ConfigureSystem(arch::Error), @@ -97,6 +103,17 @@ pub enum StartMicrovmError { RestoreMicrovmState(MicrovmStateError), /// Unable to set VmResources. SetVmResources(VmConfigError), + /// Failed to create an UFFD Builder. + CreateUffdBuilder(userfaultfd::Error), + /// Unable to connect to UDS in order to send information regarding + /// handling guest memory page-fault events. + UdsConnection(io::Error), + /// Failed to register guest memory regions to UFFD. + UffdMemoryRegionsRegister(userfaultfd::Error), + /// Failed to send guest memory layout and path to user fault FD used to handle + /// guest memory page faults. This information is sent to a UDS where a custom + /// page-fault handler process is listening. + UffdSend(kvm_ioctls::Error), } /// It's convenient to automatically convert `linux_loader::cmdline::Error`s @@ -183,6 +200,12 @@ impl Display for StartMicrovmError { } RestoreMicrovmState(err) => write!(f, "Cannot restore microvm state. Error: {}", err), SetVmResources(err) => write!(f, "Cannot set vm resources. Error: {}", err), + CreateUffdBuilder(err) => write!(f, "Cannot create uffd socket. Error: {}", err), + UdsConnection(err) => write!(f, "Cannot connect to uffd socket. Error: {}", err), + UffdMemoryRegionsRegister(err) => { + write!(f, "Cannot uffd memory region register. Error: {}", err) + } + UffdSend(err) => write!(f, "Cannot send to uffd. Error: {}", err), } } } @@ -336,22 +359,53 @@ pub fn build_microvm_for_boot( let track_dirty_pages = vm_resources.track_dirty_pages(); - let backing_memory_file = if let Some(ref file) = vm_resources.backing_memory_file { - file.set_len((vm_resources.vm_config().mem_size_mib * 1024 * 1024) as u64) - .map_err(|e| { - error!("Failed to set backing memory file size: {}", e); - StartMicrovmError::BackingMemoryFile(e) - })?; + let (guest_memory, memory_descriptor) = + if let Some(ref backend_config) = vm_resources.memory_backend { + match backend_config.backend_type { + crate::vmm_config::snapshot::MemBackendType::File => { + let file = OpenOptions::new() + .read(true) + .write(true) + .open(&backend_config.backend_path) + .map_err(BackingMemoryFile)?; + file.set_len((vm_resources.vm_config().mem_size_mib * 1024 * 1024) as u64) + .map_err(|e| { + error!("Failed to set backing memory file size: {}", e); + StartMicrovmError::BackingMemoryFile(e) + })?; + + let file = Arc::new(file); + + ( + create_guest_memory( + vm_resources.vm_config().mem_size_mib, + Some(file.clone()), + track_dirty_pages, + )?, + Some(MemoryDescriptor::File(file)), + ) + } + crate::vmm_config::snapshot::MemBackendType::Uffd => { + let (mem, uffd) = create_uffd_guest_memory( + vm_resources.vm_config().mem_size_mib, + backend_config.backend_path.as_path(), + track_dirty_pages, + )?; + + (mem, Some(MemoryDescriptor::Uffd(uffd))) + } + } + } else { + ( + create_guest_memory( + vm_resources.vm_config().mem_size_mib, + None, + track_dirty_pages, + )?, + None, + ) + }; - Some(file.clone()) - } else { - None - }; - let guest_memory = create_guest_memory( - vm_resources.vm_config().mem_size_mib, - backing_memory_file.clone(), - track_dirty_pages, - )?; let vcpu_config = vm_resources.vcpu_config(); let entry_addr = load_kernel(boot_config, &guest_memory)?; let initrd = load_initrd_from_config(boot_config, &guest_memory)?; @@ -383,7 +437,7 @@ pub fn build_microvm_for_boot( instance_info, event_manager, guest_memory, - backing_memory_file.map(MemoryDescriptor::File), + memory_descriptor, track_dirty_pages, vcpu_config.vcpu_count, )?; @@ -626,6 +680,111 @@ pub fn create_guest_memory( .map_err(StartMicrovmError::GuestMemoryMmap) } +/// Creates GuestMemory of `mem_size_mib` MiB in size. +pub fn create_uffd_guest_memory( + mem_size_mib: usize, + uds_socket_path: &Path, + track_dirty_pages: bool, +) -> std::result::Result<(GuestMemoryMmap, Uffd), StartMicrovmError> { + use StartMicrovmError::{ + BackingMemoryFile, CreateUffdBuilder, UdsConnection, UffdMemoryRegionsRegister, UffdSend, + }; + let mem_size = mem_size_mib << 20; + let arch_mem_regions = arch::arch_memory_regions(mem_size); + + let backing_memory_file = unsafe { + let mem_name = CString::new("vm-memory").expect("CString::new failed"); + Arc::new(File::from_raw_fd(libc::memfd_create(mem_name.as_ptr(), 0))) + }; + backing_memory_file + .set_len(mem_size as u64) + .map_err(BackingMemoryFile)?; + + let mut offset = 0_u64; + let guest_memory = vm_memory::create_guest_memory( + &arch_mem_regions + .iter() + .map(|(addr, size)| { + let file_offset = Some(FileOffset::from_arc(backing_memory_file.clone(), offset)); + offset += *size as u64; + + (file_offset, *addr, *size) + }) + .collect::>()[..], + track_dirty_pages, + ) + .map_err(StartMicrovmError::GuestMemoryMmap)?; + + let mut uffd_builder = UffdBuilder::new(); + uffd_builder.require_features(FeatureFlags::EVENT_REMOVE | FeatureFlags::PAGEFAULT_FLAG_WP); + + let uffd = uffd_builder + .close_on_exec(true) + .non_blocking(true) + .create() + .map_err(CreateUffdBuilder)?; + + let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); + let mut offset = 0; + for mem_region in guest_memory.iter() { + let host_base_addr = mem_region.as_ptr(); + let size = mem_region.size(); + + uffd.register(host_base_addr as _, size as _) + .map_err(UffdMemoryRegionsRegister)?; + backend_mappings.push(GuestRegionUffdMapping { + base_host_virt_addr: host_base_addr as u64, + size, + offset, + }); + offset += mem_region.len(); + } + + // This is safe to unwrap() because we control the contents of the vector + // (i.e GuestRegionUffdMapping entries). + let backend_mappings = serde_json::to_string(&backend_mappings).unwrap(); + + let socket = UnixStream::connect(uds_socket_path).map_err(UdsConnection)?; + socket + .send_with_fd( + backend_mappings.as_bytes(), + // In the happy case we can close the fd since the other process has it open and is + // using it to serve us pages. + // + // The problem is that if other process crashes/exits, firecracker guest memory + // will simply revert to anon-mem behavior which would lead to silent errors and + // undefined behavior. + // + // To tackle this scenario, the page fault handler can notify Firecracker of any + // crashes/exits. There is no need for Firecracker to explicitly send its process ID. + // The external process can obtain Firecracker's PID by calling `getsockopt` with + // `libc::SO_PEERCRED` option like so: + // + // let mut val = libc::ucred { pid: 0, gid: 0, uid: 0 }; + // let mut ucred_size: u32 = mem::size_of::() as u32; + // libc::getsockopt( + // socket.as_raw_fd(), + // libc::SOL_SOCKET, + // libc::SO_PEERCRED, + // &mut val as *mut _ as *mut _, + // &mut ucred_size as *mut libc::socklen_t, + // ); + // + // Per this linux man page: https://man7.org/linux/man-pages/man7/unix.7.html, + // `SO_PEERCRED` returns the credentials (PID, UID and GID) of the peer process + // connected to this socket. The returned credentials are those that were in effect + // at the time of the `connect` call. + // + // Moreover, Firecracker holds a copy of the UFFD fd as well, so that even if the + // page fault handler process does not tear down Firecracker when necessary, the + // uffd will still be alive but with no one to serve faults, leading to guest freeze. + uffd.as_raw_fd(), + ) + .map_err(UffdSend)?; + + Ok((guest_memory, uffd)) +} + fn load_kernel( boot_config: &BootConfig, guest_memory: &GuestMemoryMmap, diff --git a/src/vmm/src/memory_snapshot.rs b/src/vmm/src/memory_snapshot.rs index 9eebe371da0..8e5293bcfcd 100644 --- a/src/vmm/src/memory_snapshot.rs +++ b/src/vmm/src/memory_snapshot.rs @@ -6,7 +6,9 @@ use std::fmt::{Display, Formatter}; use std::fs::File; use std::io::SeekFrom; +use std::time::Instant; +use libc::{MAP_SHARED, PROT_WRITE}; use utils::{errno, get_page_size}; use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; @@ -126,7 +128,11 @@ impl SnapshotMemory for GuestMemoryMmap { let mut writer_offset = 0; let page_size = get_page_size()?; - self.iter() + let start = Instant::now(); + let mut total_written = 0; + + let res = self + .iter() .enumerate() .try_for_each(|(slot, region)| { let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); @@ -150,23 +156,37 @@ impl SnapshotMemory for GuestMemoryMmap { } write_size += page_size; } else if write_size > 0 { + let start = Instant::now(); // We are at the end of a batch of dirty pages. region.write_all_to( MemoryRegionAddress(dirty_batch_start), writer, write_size, )?; + eprintln!( + "writing {}B took {}ms", + write_size, + start.elapsed().as_millis() + ); + total_written += write_size; write_size = 0; } } } if write_size > 0 { + let start = Instant::now(); region.write_all_to( MemoryRegionAddress(dirty_batch_start), writer, write_size, )?; + total_written += write_size; + eprintln!( + "writing {}B took {}ms", + write_size, + start.elapsed().as_millis() + ); } writer_offset += region.len(); if let Some(bitmap) = firecracker_bitmap { @@ -175,7 +195,15 @@ impl SnapshotMemory for GuestMemoryMmap { Ok(()) }) - .map_err(Error::WriteMemory) + .map_err(Error::WriteMemory); + + eprintln!( + "total write time: {}ms, total written: {}B", + start.elapsed().as_millis(), + total_written + ); + + res } /// Creates a GuestMemoryMmap backed by a `file` if present, otherwise backed @@ -199,6 +227,117 @@ impl SnapshotMemory for GuestMemoryMmap { } } +/// Dumps all pages of GuestMemoryMmap present in `dirty_bitmap` to a writer. +pub fn mem_dump_dirty( + mem_map: &GuestMemoryMmap, + fd: i32, + len: usize, + dirty_bitmap: &DirtyBitmap, +) -> std::result::Result<(), Error> { + let mut writer_offset = 0_u64; + let page_size = get_page_size()?; + + let start = Instant::now(); + let mut total_written = 0; + + let source_map = + unsafe { libc::mmap(std::ptr::null_mut(), len, PROT_WRITE, MAP_SHARED, fd, 0) }; + + let res = mem_map + .iter() + .enumerate() + .try_for_each(|(slot, region)| { + let kvm_bitmap = dirty_bitmap.get(&slot).unwrap(); + let firecracker_bitmap = region.bitmap(); + let mut write_size = 0; + let mut dirty_batch_start: u64 = 0; + + let mmap_base = region.get_host_address(MemoryRegionAddress(0)).unwrap(); + for (i, v) in kvm_bitmap.iter().enumerate() { + for j in 0..64 { + let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; + let page_offset = ((i * 64) + j) * page_size; + let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset); + if is_kvm_page_dirty || is_firecracker_page_dirty { + // We are at the start of a new batch of dirty pages. + if write_size == 0 { + // Seek forward over the unmodified pages. + dirty_batch_start = page_offset as u64; + } + write_size += page_size; + } else if write_size > 0 { + let start = Instant::now(); + + eprintln!( + "starting write of {}B (source {}, dest {})", + write_size, + dirty_batch_start, + writer_offset + dirty_batch_start + ); + unsafe { + std::ptr::copy_nonoverlapping( + mmap_base.offset((dirty_batch_start) as isize), + source_map.offset((writer_offset + dirty_batch_start) as isize) + as *mut u8, + write_size, + ); + } + + eprintln!( + "writing {}B took {}ms", + write_size, + start.elapsed().as_millis() + ); + total_written += write_size; + write_size = 0; + } + } + } + + if write_size > 0 { + let start = Instant::now(); + + eprintln!( + "starting final write of {}B (source {}, dest {}) (total_size: {})", + write_size, + dirty_batch_start, + writer_offset + dirty_batch_start, + len + ); + unsafe { + std::ptr::copy_nonoverlapping( + mmap_base.offset((dirty_batch_start) as isize), + source_map.offset((writer_offset + dirty_batch_start) as isize) as *mut u8, + write_size, + ); + } + total_written += write_size; + eprintln!( + "writing {}B took {}ms", + write_size, + start.elapsed().as_millis() + ); + } + writer_offset += region.len(); + if let Some(bitmap) = firecracker_bitmap { + bitmap.reset(); + } + + Ok(()) + }) + .map_err(Error::WriteMemory); + + eprintln!( + "total write time: {}ms, total written: {}B", + start.elapsed().as_millis(), + total_written + ); + + eprintln!("memfd {}, len {}", fd, len); + + res +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 0ff5ca1f9da..8ee7cb107f9 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -3,19 +3,23 @@ //! Defines state structures for saving/restoring a Firecracker microVM. +use std::ffi::CString; use std::fmt::{Display, Formatter}; use std::fs::{File, OpenOptions}; use std::io::{self, Write}; use std::os::unix::io::AsRawFd; use std::os::unix::net::UnixStream; +use std::os::unix::prelude::FromRawFd; use std::path::Path; use std::sync::{Arc, Mutex}; +use std::time::Instant; #[cfg(target_arch = "aarch64")] use arch::regs::{get_manufacturer_id_from_host, get_manufacturer_id_from_state}; #[cfg(target_arch = "x86_64")] use cpuid::common::{get_vendor_id_from_cpuid, get_vendor_id_from_host}; use devices::virtio::TYPE_NET; +use libc::memfd_create; use logger::{error, info}; use seccompiler::BpfThreadMap; use serde::Serialize; @@ -29,7 +33,7 @@ use vm_memory::{GuestMemory, GuestMemoryMmap}; use crate::builder::{self, StartMicrovmError}; use crate::device_manager::persist::{DeviceStates, Error as DevicePersistError}; -use crate::memory_snapshot::{GuestMemoryState, SnapshotMemory}; +use crate::memory_snapshot::{mem_dump_dirty, GuestMemoryState, SnapshotMemory}; use crate::resources::VmResources; #[cfg(target_arch = "x86_64")] use crate::version_map::FC_V0_23_SNAP_VERSION; @@ -325,52 +329,84 @@ fn snapshot_memory_to_file( snapshot_type: &SnapshotType, ) -> std::result::Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; - if OpenOptions::new().read(true).open(mem_file_path).is_ok() - && snapshot_type == &SnapshotType::Diff - { - // // The memory file already exists. - // // We're going to use the msync behaviour - // for region in vmm.guest_memory().iter() { - // info!("msyncing memory region"); - // unsafe { - // if libc::msync(region.as_ptr() as _, region.len() as _, libc::MS_SYNC) == -1 { - // return Err(CreateSnapshotError::Memory( - // memory_snapshot::Error::CreateRegion(vm_memory::MmapRegionError::Mmap( - // std::io::Error::last_os_error(), - // )), - // )); - // } - // }; - // } - - return Ok(()); - } + // if OpenOptions::new().read(true).open(mem_file_path).is_ok() + // && snapshot_type == &SnapshotType::Diff + // { + // // The memory file already exists. + // // We're going to use the msync behaviour + // for region in vmm.guest_memory().iter() { + // info!("msyncing memory region"); + // unsafe { + // if libc::msync(region.as_ptr() as _, region.len() as _, libc::MS_SYNC) == -1 { + // return Err(CreateSnapshotError::Memory( + // memory_snapshot::Error::CreateRegion(vm_memory::MmapRegionError::Mmap( + // std::io::Error::last_os_error(), + // )), + // )); + // } + // }; + // } + + // return Ok(()); + // } + + eprintln!("snapshot_memory_to_file"); + eprintln!("{}", mem_file_path.to_string_lossy()); + let start = Instant::now(); + let mut file = if mem_file_path.to_string_lossy() == "memfd" { + let fd = unsafe { + let memfd_name = CString::new("diff").unwrap(); + memfd_create(memfd_name.as_ptr(), 0) + }; + if fd == -1 { + return Err(MemoryBackingFile( + "memfd_create", + std::io::Error::last_os_error(), + )); + } - let mut file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(mem_file_path) - .map_err(|err| MemoryBackingFile("open", err))?; + unsafe { File::from_raw_fd(fd) } + } else { + OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(mem_file_path) + .map_err(|err| MemoryBackingFile("open", err))? + }; // Set the length of the file to the full size of the memory area. let mem_size_mib = mem_size_mib(vmm.guest_memory()); + // // Set the length of the file to the full size of the memory area. + // memory_file + // .set_len((mem_size_mib * 1024 * 1024) as u64) + // .map_err(|err| MemoryBackingFile("set_length", err))?; + file.set_len((mem_size_mib * 1024 * 1024) as u64) .map_err(|err| MemoryBackingFile("set_length", err))?; match snapshot_type { SnapshotType::Diff => { let dirty_bitmap = vmm.get_dirty_bitmap().map_err(DirtyBitmap)?; - vmm.guest_memory() - .dump_dirty(&mut file, &dirty_bitmap) - .map_err(Memory) + + mem_dump_dirty( + vmm.guest_memory(), + file.as_raw_fd(), + (mem_size_mib * 1024 * 1024) as usize, + &dirty_bitmap, + ) + .map_err(Memory) + + // vmm.guest_memory() + // .dump_dirty(&mut memory_file, &dirty_bitmap) + // .map_err(Memory) } SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory), }?; - file.flush() - .map_err(|err| MemoryBackingFile("flush", err))?; - file.sync_all() - .map_err(|err| MemoryBackingFile("sync_all", err)) + + eprintln!("snapshot_memory_to_file took {:?}", start.elapsed()); + + Ok(()) } /// Validate the microVM version and translate it to its corresponding snapshot data format. @@ -596,25 +632,41 @@ fn guest_memory_from_file( )) } -fn guest_memory_from_uffd( +pub(crate) fn guest_memory_from_uffd( mem_uds_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, enable_balloon: bool, ) -> std::result::Result<(GuestMemoryMmap, Option), LoadSnapshotError> { use self::LoadSnapshotError::{ - CreateUffdBuilder, DeserializeMemory, UdsConnection, UffdMemoryRegionsRegister, UffdSend, + CreateUffdBuilder, DeserializeMemory, MemoryBackingFile, UdsConnection, + UffdMemoryRegionsRegister, UffdSend, }; + let backing_memory_file = unsafe { + let mem_name = CString::new("vm-memory").expect("CString::new failed"); + File::from_raw_fd(libc::memfd_create(mem_name.as_ptr(), 0)) + }; + backing_memory_file + .set_len( + mem_state + .regions + .iter() + .map(|region| region.size) + .sum::() as u64, + ) + .map_err(MemoryBackingFile)?; + let guest_memory = - GuestMemoryMmap::restore(None, mem_state, track_dirty_pages).map_err(DeserializeMemory)?; + GuestMemoryMmap::restore(Some(&backing_memory_file), mem_state, track_dirty_pages) + .map_err(DeserializeMemory)?; let mut uffd_builder = UffdBuilder::new(); if enable_balloon { // We enable this so that the page fault handler can add logic // for treating madvise(MADV_DONTNEED) events triggerd by balloon inflation. - uffd_builder.require_features(FeatureFlags::EVENT_REMOVE); + uffd_builder.require_features(FeatureFlags::EVENT_REMOVE | FeatureFlags::PAGEFAULT_FLAG_WP); } let uffd = uffd_builder diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index fb9c3b11e61..94c4c7d05fa 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 use std::convert::From; -use std::fs::File; use std::sync::{Arc, Mutex, MutexGuard}; use logger::info; @@ -21,6 +20,7 @@ use crate::vmm_config::machine_config::{VmConfig, VmConfigError, VmUpdateConfig} use crate::vmm_config::metrics::{init_metrics, MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::*; +use crate::vmm_config::snapshot::MemBackendConfig; use crate::vmm_config::vsock::*; use crate::vstate::vcpu::VcpuConfig; @@ -118,8 +118,8 @@ pub struct VmResources { pub mmds_size_limit: usize, /// Whether or not to load boot timer device. pub boot_timer: bool, - /// When backed by a memory file, this should be set - pub backing_memory_file: Option>, + /// When backed by a memory on boot, this should be set + pub memory_backend: Option, } impl VmResources { @@ -240,13 +240,13 @@ impl VmResources { } /// Returns the config for the backing memory file - pub fn backing_memory_file(&self) -> Option> { - self.backing_memory_file.clone() + pub fn memory_backend(&self) -> Option { + self.memory_backend.clone() } /// Sets the backing memory file - pub fn set_backing_memory_file(&mut self, backing_mem_file: Arc) { - self.backing_memory_file.get_or_insert(backing_mem_file); + pub fn set_memory_backend(&mut self, backing_mem_file: MemBackendConfig) { + self.memory_backend.get_or_insert(backing_mem_file); } /// Returns the VmConfig. @@ -588,7 +588,7 @@ mod tests { mmds: None, boot_timer: false, mmds_size_limit: HTTP_MAX_PAYLOAD_SIZE, - backing_memory_file: None, + memory_backend: None, } } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 87f9f2aae2a..4c263c048b7 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::{Display, Formatter}; +use std::result; use std::sync::{Arc, Mutex, MutexGuard}; -use std::{fs, result}; use logger::*; use mmds::data_store::{self, Mmds}; @@ -34,13 +34,14 @@ use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, Drive use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::logger::{LoggerConfig, LoggerConfigError}; use crate::vmm_config::machine_config::{VmConfig, VmConfigError, VmUpdateConfig}; -use crate::vmm_config::memory_backing_file::{MemoryBackingFileConfig, MemoryBackingFileError}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ NetworkInterfaceConfig, NetworkInterfaceError, NetworkInterfaceUpdateConfig, }; -use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; +use crate::vmm_config::snapshot::{ + CreateSnapshotParams, LoadSnapshotParams, MemBackendConfig, SnapshotType, +}; use crate::vmm_config::vsock::{VsockConfigError, VsockDeviceConfig}; use crate::vmm_config::{self, RateLimiterUpdate}; use crate::{EventManager, FcExitCode}; @@ -100,9 +101,9 @@ pub enum VmmAction { /// `BalloonDeviceConfig` as input. This action can only be called before the microVM /// has booted. SetBalloonDevice(BalloonDeviceConfig), - /// Set the memory backing file for the VM. The VM will use this backing file to store its + /// Set the memory backend for the VM. The VM will use this backend to handle its /// memory. This action can only be called before the microVM has booted. - SetMemoryBackingFile(MemoryBackingFileConfig), + SetMemoryBackend(MemBackendConfig), /// Set the MMDS configuration. SetMmdsConfiguration(MmdsConfig), /// Set the vsock device or update the one that already exists using the @@ -134,8 +135,6 @@ pub enum VmmAction { pub enum VmmActionError { /// The action `SetBalloonDevice` failed because of bad user input. BalloonConfig(BalloonConfigError), - /// The action `SetMemoryBackingFile` failed because of bad user input. - MemoryBackingFile(MemoryBackingFileError), /// The action `ConfigureBootSource` failed because of bad user input. BootSource(BootSourceConfigError), /// The action `CreateSnapshot` failed. @@ -188,7 +187,6 @@ impl Display for VmmActionError { match self { BalloonConfig(err) => err.to_string(), BootSource(err) => err.to_string(), - MemoryBackingFile(err) => err.to_string(), CreateSnapshot(err) => err.to_string(), DriveConfig(err) => err.to_string(), InternalVmm(err) => format!("Internal Vmm error: {}", err), @@ -429,7 +427,7 @@ impl<'a> PrebootApiController<'a> { SetBalloonDevice(config) => self.set_balloon_device(config), SetVsockDevice(config) => self.set_vsock_device(config), SetMmdsConfiguration(config) => self.set_mmds_config(config), - SetMemoryBackingFile(config) => self.set_backing_memory_file(config), + SetMemoryBackend(config) => self.set_memory_backend(config), StartMicroVm => self.start_microvm(), UpdateVmConfiguration(config) => self.update_vm_config(config), // Operations not allowed pre-boot. @@ -455,19 +453,9 @@ impl<'a> PrebootApiController<'a> { .map_err(VmmActionError::BalloonConfig) } - fn set_backing_memory_file(&mut self, cfg: MemoryBackingFileConfig) -> ActionResult { + fn set_memory_backend(&mut self, cfg: MemBackendConfig) -> ActionResult { self.boot_path = true; - let file = fs::OpenOptions::new() - .create(true) - .read(true) - .write(true) - .open(cfg.path) - .map(Arc::new) - .map_err(|e| { - VmmActionError::MemoryBackingFile(MemoryBackingFileError::CreateFile(e)) - })?; - - self.vm_resources.backing_memory_file = Some(file); + self.vm_resources.memory_backend = Some(cfg); Ok(VmmData::Empty) } @@ -679,7 +667,7 @@ impl RuntimeApiController { | InsertNetworkDevice(_) | LoadSnapshot(_) | SetBalloonDevice(_) - | SetMemoryBackingFile(_) + | SetMemoryBackend(_) | SetVsockDevice(_) | SetMmdsConfiguration(_) | StartMicroVm @@ -888,7 +876,7 @@ mod tests { pub boot_timer: bool, // when `true`, all self methods are forced to fail pub force_errors: bool, - pub backing_memory_file: Option>, + pub memory_backend: Option, } impl MockVmRes { diff --git a/src/vmm/src/vmm_config/memory_backing_file.rs b/src/vmm/src/vmm_config/memory_backing_file.rs deleted file mode 100644 index f5f5bd9ce6f..00000000000 --- a/src/vmm/src/vmm_config/memory_backing_file.rs +++ /dev/null @@ -1,31 +0,0 @@ -use std::{ - fmt::{Display, Formatter}, - io, - path::PathBuf, -}; - -use serde::{Deserialize, Serialize}; - -/// Keeps the Memory Backing file configuration. -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] -#[serde(deny_unknown_fields)] -pub struct MemoryBackingFileConfig { - /// Location of the memory backing file. - pub path: PathBuf, -} - -/// Errors associated with the operations allowed on a memory backing file. -#[derive(Debug)] -pub enum MemoryBackingFileError { - /// Failed to create the block device - CreateFile(io::Error), -} - -impl Display for MemoryBackingFileError { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - use self::MemoryBackingFileError::*; - match self { - CreateFile(e) => write!(f, "Unable to create the memory backing file: {}", e), - } - } -} diff --git a/src/vmm/src/vmm_config/mod.rs b/src/vmm/src/vmm_config/mod.rs index 61b235a0fbb..c509fce888f 100644 --- a/src/vmm/src/vmm_config/mod.rs +++ b/src/vmm/src/vmm_config/mod.rs @@ -23,8 +23,6 @@ pub mod instance_info; pub mod logger; /// Wrapper for configuring the memory and CPU of the microVM. pub mod machine_config; -/// Wrapper for configuring the backing memory file. -pub mod memory_backing_file; /// Wrapper for configuring the metrics. pub mod metrics; /// Wrapper for configuring the MMDS. diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index c8634b66980..d06a92f7d3a 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -28,7 +28,7 @@ impl Default for SnapshotType { /// 1) A file that contains the guest memory to be loaded, /// 2) An UDS where a custom page-fault handler process is listening for /// the UFFD set up by Firecracker to handle its guest memory page faults. -#[derive(Debug, Deserialize, PartialEq)] +#[derive(Debug, Clone, Deserialize, PartialEq)] pub enum MemBackendType { /// Guest memory contents will be loaded from a file. File, @@ -91,7 +91,7 @@ pub struct LoadSnapshotConfig { } /// Stores the configuration used for managing snapshot memory. -#[derive(Debug, Deserialize, PartialEq)] +#[derive(Debug, Clone, Deserialize, PartialEq)] #[serde(deny_unknown_fields)] pub struct MemBackendConfig { /// Path to the backend used to handle the guest memory. From 182554dff4a83e9c9b61f586e532d888bea7e64a Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Fri, 4 Nov 2022 14:04:24 +0100 Subject: [PATCH 05/18] changes --- .cargo/config | 4 +- Cargo.lock | 46 +++++------ src/cpuid/src/transformer/intel.rs | 4 +- src/jailer/src/env.rs | 5 +- src/vmm/Cargo.toml | 10 ++- src/vmm/src/builder.rs | 73 +++++++++++------- src/vmm/src/persist.rs | 118 ++++++++++------------------- tests/host_tools/uffd/Cargo.toml | 2 +- tools/devtool | 1 + 9 files changed, 117 insertions(+), 146 deletions(-) diff --git a/.cargo/config b/.cargo/config index ef7728c6c98..0678c0723c6 100644 --- a/.cargo/config +++ b/.cargo/config @@ -1,6 +1,6 @@ [build] -target = "x86_64-unknown-linux-musl" -target-dir = "build/cargo_target" +# target = "x86_64-unknown-linux-musl" +# target-dir = "build/cargo_target" [net] git-fetch-with-cli = true diff --git a/Cargo.lock b/Cargo.lock index 338b2b1a74e..4ccc0cc02a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,7 +17,7 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cipher", "cpufeatures", "opaque-debug", @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.59.2" +version = "0.60.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" dependencies = [ "bitflags", "cexpr", @@ -182,12 +182,6 @@ dependencies = [ "nom", ] -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -292,7 +286,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -302,7 +296,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] @@ -313,7 +307,7 @@ version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", "lazy_static", "memoffset", @@ -326,7 +320,7 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "lazy_static", ] @@ -464,7 +458,7 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "wasi", ] @@ -595,7 +589,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "winapi", ] @@ -614,7 +608,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -691,7 +685,7 @@ checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" dependencies = [ "bitflags", "cc", - "cfg-if 1.0.0", + "cfg-if", "libc", "memoffset", ] @@ -778,7 +772,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "opaque-debug", "universal-hash", @@ -1142,12 +1136,11 @@ dependencies = [ [[package]] name = "userfaultfd" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b738009e099b4ded1ecf19dfb7631f69c24f16e0af6d29fd9b3f54a092aca46" +version = "0.5.0" +source = "git+https://github.com/codesandbox/userfaultfd-rs.git?rev=3bafb90a85d8f2aa664dd391a5ae63ad1f43e3f5#3bafb90a85d8f2aa664dd391a5ae63ad1f43e3f5" dependencies = [ "bitflags", - "cfg-if 1.0.0", + "cfg-if", "libc", "nix", "thiserror", @@ -1156,13 +1149,12 @@ dependencies = [ [[package]] name = "userfaultfd-sys" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a4be003c705d2c8dc1234d473856945e291bb998ac2e2d83e70328d964d7458" +version = "0.4.2" +source = "git+https://github.com/codesandbox/userfaultfd-rs.git?rev=3bafb90a85d8f2aa664dd391a5ae63ad1f43e3f5#3bafb90a85d8f2aa664dd391a5ae63ad1f43e3f5" dependencies = [ "bindgen", "cc", - "cfg-if 0.1.10", + "cfg-if", ] [[package]] @@ -1321,7 +1313,7 @@ version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "wasm-bindgen-macro", ] diff --git a/src/cpuid/src/transformer/intel.rs b/src/cpuid/src/transformer/intel.rs index 439058c8780..5272ac4437b 100644 --- a/src/cpuid/src/transformer/intel.rs +++ b/src/cpuid/src/transformer/intel.rs @@ -126,8 +126,8 @@ impl CpuidTransformer for IntelCpuidTransformer { leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), leaf_0xb::LEAF_NUM => Some(intel::update_extended_topology_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), - // hypervisor stuff - 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), + // // hypervisor stuff + // 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } diff --git a/src/jailer/src/env.rs b/src/jailer/src/env.rs index 405b74e4337..bd8c39219b2 100644 --- a/src/jailer/src/env.rs +++ b/src/jailer/src/env.rs @@ -371,9 +371,8 @@ impl Env { // a new PathBuf, with something like chroot_dir.join(exec_file_name) ?! self.chroot_dir.push(exec_file_name); - // TODO: hard link instead of copy? This would save up disk space, but hard linking is - // not always possible :( - fs::copy(&self.exec_file_path, &self.chroot_dir).map_err(|err| { + // We hard link instead of copy for space savings and to retain the capabilities + fs::hard_link(&self.exec_file_path, &self.chroot_dir).map_err(|err| { Error::Copy(self.exec_file_path.clone(), self.chroot_dir.clone(), err) })?; diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 68a64a4d23c..50a5bd78232 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -14,12 +14,16 @@ libc = ">=0.2.39" linux-loader = ">=0.4.0" serde = { version = ">=1.0.27", features = ["derive"] } serde_json = ">=1.0.9" -userfaultfd = ">=0.4.0" +userfaultfd = { git = "https://github.com/codesandbox/userfaultfd-rs.git", rev = "3bafb90a85d8f2aa664dd391a5ae63ad1f43e3f5", features = [ + "linux5_7", +] } versionize = ">=0.1.6" versionize_derive = ">=0.1.3" vm-superio = ">=0.4.0" vm-allocator = "0.1.0" -derive_more = { version = "0.99.17", default-features = false, features = ["from"] } +derive_more = { version = "0.99.17", default-features = false, features = [ + "from", +] } arch = { path = "../arch" } devices = { path = "../devices" } @@ -27,7 +31,7 @@ logger = { path = "../logger" } mmds = { path = "../mmds" } rate_limiter = { path = "../rate_limiter" } seccompiler = { path = "../seccompiler" } -snapshot = { path = "../snapshot"} +snapshot = { path = "../snapshot" } utils = { path = "../utils" } virtio_gen = { path = "../virtio_gen" } vm-memory = { path = "../vm-memory" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 681faf0f440..a83c6baeadc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -13,9 +13,9 @@ use std::os::unix::net::UnixStream; use std::os::unix::prelude::FromRawFd; use std::path::Path; use std::sync::{Arc, Mutex}; -use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; +use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder}; use utils::sock_ctrl_msg::ScmSocket; -use vm_memory::{FileOffset, GuestMemory, GuestMemoryRegion}; +use vm_memory::{FileOffset, GuestMemory}; use arch::InitrdConfig; #[cfg(target_arch = "x86_64")] @@ -114,6 +114,9 @@ pub enum StartMicrovmError { /// guest memory page faults. This information is sent to a UDS where a custom /// page-fault handler process is listening. UffdSend(kvm_ioctls::Error), + + /// Failed to get the memfd from the uffd socket + NoMemFdReceived, } /// It's convenient to automatically convert `linux_loader::cmdline::Error`s @@ -206,6 +209,7 @@ impl Display for StartMicrovmError { write!(f, "Cannot uffd memory region register. Error: {}", err) } UffdSend(err) => write!(f, "Cannot send to uffd. Error: {}", err), + NoMemFdReceived => write!(f, "No memfd received from uffd."), } } } @@ -359,13 +363,14 @@ pub fn build_microvm_for_boot( let track_dirty_pages = vm_resources.track_dirty_pages(); - let (guest_memory, memory_descriptor) = + let (guest_memory, memory_descriptor, _file) = if let Some(ref backend_config) = vm_resources.memory_backend { match backend_config.backend_type { crate::vmm_config::snapshot::MemBackendType::File => { let file = OpenOptions::new() .read(true) .write(true) + .create(true) .open(&backend_config.backend_path) .map_err(BackingMemoryFile)?; file.set_len((vm_resources.vm_config().mem_size_mib * 1024 * 1024) as u64) @@ -383,16 +388,17 @@ pub fn build_microvm_for_boot( track_dirty_pages, )?, Some(MemoryDescriptor::File(file)), + None, ) } crate::vmm_config::snapshot::MemBackendType::Uffd => { - let (mem, uffd) = create_uffd_guest_memory( + let (mem, uffd, file) = create_uffd_guest_memory( vm_resources.vm_config().mem_size_mib, backend_config.backend_path.as_path(), track_dirty_pages, )?; - (mem, Some(MemoryDescriptor::Uffd(uffd))) + (mem, Some(MemoryDescriptor::Uffd(uffd)), Some(file)) } } } else { @@ -403,6 +409,7 @@ pub fn build_microvm_for_boot( track_dirty_pages, )?, None, + None, ) }; @@ -685,20 +692,21 @@ pub fn create_uffd_guest_memory( mem_size_mib: usize, uds_socket_path: &Path, track_dirty_pages: bool, -) -> std::result::Result<(GuestMemoryMmap, Uffd), StartMicrovmError> { - use StartMicrovmError::{ - BackingMemoryFile, CreateUffdBuilder, UdsConnection, UffdMemoryRegionsRegister, UffdSend, - }; +) -> std::result::Result<(GuestMemoryMmap, Uffd, Arc), StartMicrovmError> { + use StartMicrovmError::{CreateUffdBuilder, NoMemFdReceived, UdsConnection, UffdSend}; + + let mut socket = UnixStream::connect(uds_socket_path).map_err(UdsConnection)?; + + let mut buf = [0u8; 8]; + let (_, memfd) = socket.recv_with_fd(&mut buf).map_err(UffdSend)?; + + if memfd.is_none() { + return Err(NoMemFdReceived); + } + let mem_size = mem_size_mib << 20; let arch_mem_regions = arch::arch_memory_regions(mem_size); - - let backing_memory_file = unsafe { - let mem_name = CString::new("vm-memory").expect("CString::new failed"); - Arc::new(File::from_raw_fd(libc::memfd_create(mem_name.as_ptr(), 0))) - }; - backing_memory_file - .set_len(mem_size as u64) - .map_err(BackingMemoryFile)?; + let backing_memory_file = Arc::new(memfd.unwrap()); let mut offset = 0_u64; let guest_memory = vm_memory::create_guest_memory( @@ -715,12 +723,17 @@ pub fn create_uffd_guest_memory( ) .map_err(StartMicrovmError::GuestMemoryMmap)?; - let mut uffd_builder = UffdBuilder::new(); - uffd_builder.require_features(FeatureFlags::EVENT_REMOVE | FeatureFlags::PAGEFAULT_FLAG_WP); - - let uffd = uffd_builder - .close_on_exec(true) - .non_blocking(true) + let uffd = UffdBuilder::new() + .require_features( + FeatureFlags::EVENT_REMOVE + | FeatureFlags::EVENT_REMAP + | FeatureFlags::EVENT_FORK + | FeatureFlags::EVENT_UNMAP + | FeatureFlags::MISSING_SHMEM + | FeatureFlags::MINOR_SHMEM + | FeatureFlags::PAGEFAULT_FLAG_WP, + ) + .user_mode_only(false) .create() .map_err(CreateUffdBuilder)?; @@ -730,21 +743,18 @@ pub fn create_uffd_guest_memory( let host_base_addr = mem_region.as_ptr(); let size = mem_region.size(); - uffd.register(host_base_addr as _, size as _) - .map_err(UffdMemoryRegionsRegister)?; backend_mappings.push(GuestRegionUffdMapping { base_host_virt_addr: host_base_addr as u64, size, offset, }); - offset += mem_region.len(); + offset += size as u64; } // This is safe to unwrap() because we control the contents of the vector // (i.e GuestRegionUffdMapping entries). let backend_mappings = serde_json::to_string(&backend_mappings).unwrap(); - let socket = UnixStream::connect(uds_socket_path).map_err(UdsConnection)?; socket .send_with_fd( backend_mappings.as_bytes(), @@ -782,7 +792,12 @@ pub fn create_uffd_guest_memory( ) .map_err(UffdSend)?; - Ok((guest_memory, uffd)) + // Wait for UFFD to be ready. + // TODO: maybe add a timeout? + let mut buf = [0; 2]; + socket.read_exact(&mut buf).map_err(UdsConnection)?; + + Ok((guest_memory, uffd, backing_memory_file)) } fn load_kernel( @@ -883,7 +898,7 @@ pub(crate) fn setup_kvm_vm( .map_err(Error::KvmContext) .map_err(Internal)?; let mut vm = Vm::new(kvm.fd()).map_err(Error::Vm).map_err(Internal)?; - vm.memory_init(&guest_memory, kvm.max_memslots(), track_dirty_pages) + vm.memory_init(guest_memory, kvm.max_memslots(), track_dirty_pages) .map_err(Error::Vm) .map_err(Internal)?; Ok(vm) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 8ee7cb107f9..36667c0d578 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -6,7 +6,7 @@ use std::ffi::CString; use std::fmt::{Display, Formatter}; use std::fs::{File, OpenOptions}; -use std::io::{self, Write}; +use std::io::{self, Read, Write}; use std::os::unix::io::AsRawFd; use std::os::unix::net::UnixStream; use std::os::unix::prelude::FromRawFd; @@ -228,6 +228,8 @@ pub enum LoadSnapshotError { /// Unable to connect to UDS in order to send information regarding /// handling guest memory page-fault events. UdsConnection(io::Error), + /// We didn't get the memfd when handshaking with the uffd manager + NoMemFdReceived, /// Failed to register guest memory regions to UFFD. UffdMemoryRegionsRegister(userfaultfd::Error), /// Failed to send guest memory layout and path to user fault FD used to handle @@ -249,6 +251,7 @@ impl Display for LoadSnapshotError { } InvalidSnapshot(err) => write!(f, "Snapshot sanity check failed: {}", err), MemoryBackingFile(err) => write!(f, "Cannot open the memory file: {}", err), + NoMemFdReceived => write!(f, "No memory file descriptor received"), ResumeMicroVm(err) => write!( f, "Failed to resume microVM after loading snapshot: {}", @@ -280,7 +283,7 @@ pub fn create_snapshot( version_map: VersionMap, ) -> std::result::Result<(), CreateSnapshotError> { // Fail early from invalid target version. - let snapshot_data_version = get_snapshot_data_version(¶ms.version, &version_map, &vmm)?; + let snapshot_data_version = get_snapshot_data_version(¶ms.version, &version_map, vmm)?; let microvm_state = vmm .save_state() @@ -293,7 +296,9 @@ pub fn create_snapshot( version_map, )?; - snapshot_memory_to_file(vmm, ¶ms.mem_file_path, ¶ms.snapshot_type)?; + if params.snapshot_type == SnapshotType::Full { + snapshot_memory_to_file(vmm, ¶ms.mem_file_path, ¶ms.snapshot_type)?; + } Ok(()) } @@ -329,30 +334,7 @@ fn snapshot_memory_to_file( snapshot_type: &SnapshotType, ) -> std::result::Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; - // if OpenOptions::new().read(true).open(mem_file_path).is_ok() - // && snapshot_type == &SnapshotType::Diff - // { - // // The memory file already exists. - // // We're going to use the msync behaviour - // for region in vmm.guest_memory().iter() { - // info!("msyncing memory region"); - // unsafe { - // if libc::msync(region.as_ptr() as _, region.len() as _, libc::MS_SYNC) == -1 { - // return Err(CreateSnapshotError::Memory( - // memory_snapshot::Error::CreateRegion(vm_memory::MmapRegionError::Mmap( - // std::io::Error::last_os_error(), - // )), - // )); - // } - // }; - // } - - // return Ok(()); - // } - - eprintln!("snapshot_memory_to_file"); - eprintln!("{}", mem_file_path.to_string_lossy()); - let start = Instant::now(); + let mut file = if mem_file_path.to_string_lossy() == "memfd" { let fd = unsafe { let memfd_name = CString::new("diff").unwrap(); @@ -377,11 +359,7 @@ fn snapshot_memory_to_file( // Set the length of the file to the full size of the memory area. let mem_size_mib = mem_size_mib(vmm.guest_memory()); - // // Set the length of the file to the full size of the memory area. - // memory_file - // .set_len((mem_size_mib * 1024 * 1024) as u64) - // .map_err(|err| MemoryBackingFile("set_length", err))?; - + // Set the length of the file to the full size of the memory area. file.set_len((mem_size_mib * 1024 * 1024) as u64) .map_err(|err| MemoryBackingFile("set_length", err))?; @@ -396,16 +374,10 @@ fn snapshot_memory_to_file( &dirty_bitmap, ) .map_err(Memory) - - // vmm.guest_memory() - // .dump_dirty(&mut memory_file, &dirty_bitmap) - // .map_err(Memory) } SnapshotType::Full => vmm.guest_memory().dump(&mut file).map_err(Memory), }?; - eprintln!("snapshot_memory_to_file took {:?}", start.elapsed()); - Ok(()) } @@ -575,14 +547,8 @@ pub fn restore_from_snapshot( (guest_memory, Some(MemoryDescriptor::File(Arc::new(file)))) } MemBackendType::Uffd => { - let (guest_memory, uffd) = guest_memory_from_uffd( - mem_backend_path, - mem_state, - track_dirty_pages, - // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device - // is present in the microVM state. - microvm_state.device_states.balloon_device.is_some(), - )?; + let (guest_memory, uffd) = + guest_memory_from_uffd(mem_backend_path, mem_state, track_dirty_pages)?; (guest_memory, uffd.map(MemoryDescriptor::Uffd)) } @@ -636,42 +602,34 @@ pub(crate) fn guest_memory_from_uffd( mem_uds_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, - enable_balloon: bool, ) -> std::result::Result<(GuestMemoryMmap, Option), LoadSnapshotError> { - use self::LoadSnapshotError::{ - CreateUffdBuilder, DeserializeMemory, MemoryBackingFile, UdsConnection, - UffdMemoryRegionsRegister, UffdSend, - }; + use self::LoadSnapshotError::{CreateUffdBuilder, DeserializeMemory, UdsConnection, UffdSend}; - let backing_memory_file = unsafe { - let mem_name = CString::new("vm-memory").expect("CString::new failed"); - File::from_raw_fd(libc::memfd_create(mem_name.as_ptr(), 0)) - }; - backing_memory_file - .set_len( - mem_state - .regions - .iter() - .map(|region| region.size) - .sum::() as u64, - ) - .map_err(MemoryBackingFile)?; + let mut socket = UnixStream::connect(mem_uds_path).map_err(UdsConnection)?; - let guest_memory = - GuestMemoryMmap::restore(Some(&backing_memory_file), mem_state, track_dirty_pages) - .map_err(DeserializeMemory)?; + let mut buf = [0u8; 8]; + let (_, memfd) = socket.recv_with_fd(&mut buf).map_err(UffdSend)?; - let mut uffd_builder = UffdBuilder::new(); - - if enable_balloon { - // We enable this so that the page fault handler can add logic - // for treating madvise(MADV_DONTNEED) events triggerd by balloon inflation. - uffd_builder.require_features(FeatureFlags::EVENT_REMOVE | FeatureFlags::PAGEFAULT_FLAG_WP); + if memfd.is_none() { + return Err(LoadSnapshotError::NoMemFdReceived); } - let uffd = uffd_builder - .close_on_exec(true) - .non_blocking(true) + let memfd = memfd.unwrap(); + + let guest_memory = GuestMemoryMmap::restore(Some(&memfd), mem_state, track_dirty_pages) + .map_err(DeserializeMemory)?; + + let uffd = UffdBuilder::new() + .require_features( + FeatureFlags::EVENT_REMOVE + | FeatureFlags::EVENT_REMAP + | FeatureFlags::EVENT_FORK + | FeatureFlags::EVENT_UNMAP + | FeatureFlags::MISSING_SHMEM + | FeatureFlags::MINOR_SHMEM + | FeatureFlags::PAGEFAULT_FLAG_WP, + ) + .user_mode_only(false) .create() .map_err(CreateUffdBuilder)?; @@ -680,8 +638,6 @@ pub(crate) fn guest_memory_from_uffd( let host_base_addr = mem_region.as_ptr(); let size = mem_region.size(); - uffd.register(host_base_addr as _, size as _) - .map_err(UffdMemoryRegionsRegister)?; backend_mappings.push(GuestRegionUffdMapping { base_host_virt_addr: host_base_addr as u64, size, @@ -693,7 +649,6 @@ pub(crate) fn guest_memory_from_uffd( // (i.e GuestRegionUffdMapping entries). let backend_mappings = serde_json::to_string(&backend_mappings).unwrap(); - let socket = UnixStream::connect(mem_uds_path).map_err(UdsConnection)?; socket .send_with_fd( backend_mappings.as_bytes(), @@ -731,6 +686,11 @@ pub(crate) fn guest_memory_from_uffd( ) .map_err(UffdSend)?; + // Wait for UFFD to be ready. + // TODO: maybe add a timeout? + let mut buf = [0; 2]; + socket.read_exact(&mut buf).map_err(UdsConnection)?; + Ok((guest_memory, Some(uffd))) } diff --git a/tests/host_tools/uffd/Cargo.toml b/tests/host_tools/uffd/Cargo.toml index f83ed0f9c3f..725da59e84e 100644 --- a/tests/host_tools/uffd/Cargo.toml +++ b/tests/host_tools/uffd/Cargo.toml @@ -11,7 +11,7 @@ libc = ">=0.2.39" nix = "0.23.0" serde = { version = ">=1.0.27", features = ["derive"] } serde_json = ">=1.0.9" -userfaultfd = ">=0.4.0" +userfaultfd = ">=0.5.0" [workspace] diff --git a/tools/devtool b/tools/devtool index 6d5bc487cc2..fcca289378f 100755 --- a/tools/devtool +++ b/tools/devtool @@ -615,6 +615,7 @@ run_devctr() { --rm \ --volume /dev:/dev \ --volume "$FC_ROOT_DIR:$CTR_FC_ROOT_DIR:z" \ + --mount type=bind,source=/usr/include/linux/userfaultfd.h,target=/usr/include/linux/userfaultfd.h \ --env OPT_LOCAL_IMAGES_PATH="$(dirname "$CTR_MICROVM_IMAGES_DIR")" \ --env PYTHONDONTWRITEBYTECODE=1 \ "$DEVCTR_IMAGE" "${ctr_args[@]}" From 12b00cd87a6305941f309a5fa9eca59ed6861d5d Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Fri, 4 Nov 2022 14:26:20 +0100 Subject: [PATCH 06/18] remove warning --- src/vmm/src/builder.rs | 4 +--- src/vmm/src/persist.rs | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index a83c6baeadc..170e4ed6a0f 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -4,16 +4,14 @@ //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. use std::convert::TryFrom; -use std::ffi::CString; use std::fmt::{Display, Formatter}; use std::fs::{File, OpenOptions}; use std::io::{self, Read, Seek, SeekFrom}; use std::os::unix::io::{AsRawFd, RawFd}; use std::os::unix::net::UnixStream; -use std::os::unix::prelude::FromRawFd; use std::path::Path; use std::sync::{Arc, Mutex}; -use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder}; +use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; use utils::sock_ctrl_msg::ScmSocket; use vm_memory::{FileOffset, GuestMemory}; diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 36667c0d578..60b40d7f5b0 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -12,7 +12,6 @@ use std::os::unix::net::UnixStream; use std::os::unix::prelude::FromRawFd; use std::path::Path; use std::sync::{Arc, Mutex}; -use std::time::Instant; #[cfg(target_arch = "aarch64")] use arch::regs::{get_manufacturer_id_from_host, get_manufacturer_id_from_state}; From 8ac760b67d97bb2c1d62ba5fea6f553bcccf96e8 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Wed, 31 Aug 2022 12:05:46 +0200 Subject: [PATCH 07/18] disable async pf for intel --- src/cpuid/src/transformer/amd.rs | 3 +-- src/cpuid/src/transformer/intel.rs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cpuid/src/transformer/amd.rs b/src/cpuid/src/transformer/amd.rs index 261e88ef4e4..8fc6222f2d9 100644 --- a/src/cpuid/src/transformer/amd.rs +++ b/src/cpuid/src/transformer/amd.rs @@ -147,8 +147,7 @@ impl CpuidTransformer for AmdCpuidTransformer { leaf_0x8000001d::LEAF_NUM => Some(amd::update_extended_cache_topology_entry), leaf_0x8000001e::LEAF_NUM => Some(amd::update_extended_apic_id_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), - - // hypervisor stuff + // Disable async PF, as it hangs the VM for some reason when loading from snapshot/uffd. 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } diff --git a/src/cpuid/src/transformer/intel.rs b/src/cpuid/src/transformer/intel.rs index 5272ac4437b..db897b5ab31 100644 --- a/src/cpuid/src/transformer/intel.rs +++ b/src/cpuid/src/transformer/intel.rs @@ -126,8 +126,8 @@ impl CpuidTransformer for IntelCpuidTransformer { leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), leaf_0xb::LEAF_NUM => Some(intel::update_extended_topology_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), - // // hypervisor stuff - // 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), + // Disable async PF, as it hangs the VM for some reason when loading from snapshot/uffd. + 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } From 1a7a3831dad1bc82fe3cdc2ce7b402143dd1e487 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Fri, 16 Jun 2023 15:23:16 +0200 Subject: [PATCH 08/18] balloon: don't map over existing regions Firecracker used to map over the mmap when it was a restore. For CodeSandbox this is not needed, as we use UFFD and we don't have the limitation that the map was used for. In fact, this is problematic for the UFFD handler, because it implicitly unregisters the UFFD handler from the reclaimed range. --- src/devices/src/virtio/balloon/utils.rs | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/devices/src/virtio/balloon/utils.rs b/src/devices/src/virtio/balloon/utils.rs index 53ec5d3efd0..e2185b23189 100644 --- a/src/devices/src/virtio/balloon/utils.rs +++ b/src/devices/src/virtio/balloon/utils.rs @@ -80,24 +80,26 @@ pub(crate) fn remove_range( .get_host_address(guest_address) .map_err(|_| RemoveRegionError::AddressTranslation)?; - // Mmap a new anonymous region over the present one in order to create a hole. - // This workaround is (only) needed after resuming from a snapshot because the guest memory - // is mmaped from file as private and there is no `madvise` flag that works for this case. - if restored { - let ret = unsafe { - libc::mmap( - phys_address as *mut _, - range_len as usize, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, - -1, - 0, - ) - }; - if ret == libc::MAP_FAILED { - return Err(RemoveRegionError::MmapFail(io::Error::last_os_error())); - } - }; + // CodeSandbox: since we use UFFD handler, this is not needed for us. In fact, it breaks the UFFD handler + // if this happens right now, as it unregisters the UFFD handler for the given range. + // // Mmap a new anonymous region over the present one in order to create a hole. + // // This workaround is (only) needed after resuming from a snapshot because the guest memory + // // is mmaped from file as private and there is no `madvise` flag that works for this case. + // if restored { + // let ret = unsafe { + // libc::mmap( + // phys_address as *mut _, + // range_len as usize, + // libc::PROT_READ | libc::PROT_WRITE, + // libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + // -1, + // 0, + // ) + // }; + // if ret == libc::MAP_FAILED { + // return Err(RemoveRegionError::MmapFail(io::Error::last_os_error())); + // } + // }; // Madvise the region in order to mark it as not used. let ret = unsafe { From 32de822631ae0cc45894c3768a6a964863838ca3 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sat, 17 Jun 2023 15:17:48 +0200 Subject: [PATCH 09/18] update clippy --- src/devices/src/virtio/balloon/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/devices/src/virtio/balloon/utils.rs b/src/devices/src/virtio/balloon/utils.rs index e2185b23189..2a3ed7af56c 100644 --- a/src/devices/src/virtio/balloon/utils.rs +++ b/src/devices/src/virtio/balloon/utils.rs @@ -68,7 +68,7 @@ pub(crate) fn compact_page_frame_numbers(v: &mut [u32]) -> Vec<(u32, u32)> { pub(crate) fn remove_range( guest_memory: &GuestMemoryMmap, range: (GuestAddress, u64), - restored: bool, + _restored: bool, ) -> std::result::Result<(), RemoveRegionError> { let (guest_address, range_len) = range; From be2d754c3d8ccb90c7497c13f6303d95ba8196d2 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Tue, 20 Jun 2023 16:47:08 +0200 Subject: [PATCH 10/18] Possible race condition between VMM thread and API thread (FC 1.2) https://github.com/firecracker-microvm/firecracker/pull/3591/commits/174e0425fc17ac98d4210943d8739f84cf31c535 --- src/firecracker/src/api_server_adapter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/firecracker/src/api_server_adapter.rs b/src/firecracker/src/api_server_adapter.rs index 7934530e1c4..cd5725eedfb 100644 --- a/src/firecracker/src/api_server_adapter.rs +++ b/src/firecracker/src/api_server_adapter.rs @@ -71,6 +71,7 @@ impl MutEventSubscriber for ApiServerAdapter { let event_set = event.event_set(); if source == self.api_event_fd.as_raw_fd() && event_set == EventSet::IN { + let _ = self.api_event_fd.read(); match self.from_api.try_recv() { Ok(api_request) => { let request_is_pause = *api_request == VmmAction::Pause; @@ -101,7 +102,6 @@ impl MutEventSubscriber for ApiServerAdapter { panic!("The channel's sending half was disconnected. Cannot receive data."); } }; - let _ = self.api_event_fd.read(); } else { error!("Spurious EventManager event for handler: ApiServerAdapter"); } @@ -129,7 +129,7 @@ pub(crate) fn run_with_api( // FD to notify of API events. This is a blocking eventfd by design. // It is used in the config/pre-boot loop which is a simple blocking loop // which only consumes API events. - let api_event_fd = EventFd::new(0).expect("Cannot create API Eventfd."); + let api_event_fd = EventFd::new(libc::EFD_SEMAPHORE).expect("Cannot create API Eventfd."); // Channels for both directions between Vmm and Api threads. let (to_vmm, from_api) = channel(); From bb077cbb974a8230129f36ac9542328542039f99 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Mon, 3 Jul 2023 23:34:57 +0200 Subject: [PATCH 11/18] uffd: enable non-blocking mode --- src/vmm/src/builder.rs | 1 + src/vmm/src/persist.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 170e4ed6a0f..95e7065e310 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -732,6 +732,7 @@ pub fn create_uffd_guest_memory( | FeatureFlags::PAGEFAULT_FLAG_WP, ) .user_mode_only(false) + .non_blocking(true) .create() .map_err(CreateUffdBuilder)?; diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 60b40d7f5b0..efddea30696 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -629,6 +629,7 @@ pub(crate) fn guest_memory_from_uffd( | FeatureFlags::PAGEFAULT_FLAG_WP, ) .user_mode_only(false) + .non_blocking(true) .create() .map_err(CreateUffdBuilder)?; From 84c87c3951429a80a578a23f3ddc2543e01254ba Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sat, 5 Aug 2023 14:24:01 +0200 Subject: [PATCH 12/18] feat(balloon): enable memory reclaiming --- src/devices/src/virtio/balloon/device.rs | 71 +++++++++++++++++-- .../src/virtio/balloon/event_handler.rs | 17 ++++- src/devices/src/virtio/balloon/mod.rs | 8 ++- 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/src/devices/src/virtio/balloon/device.rs b/src/devices/src/virtio/balloon/device.rs index 31af2a49af0..ae185dd9f66 100644 --- a/src/devices/src/virtio/balloon/device.rs +++ b/src/devices/src/virtio/balloon/device.rs @@ -18,12 +18,13 @@ use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryMmap}; use super::super::{ActivateResult, DeviceState, Queue, VirtioDevice, TYPE_BALLOON}; use super::utils::{compact_page_frame_numbers, remove_range}; use super::{ - BALLOON_DEV_ID, DEFLATE_INDEX, INFLATE_INDEX, MAX_PAGES_IN_DESC, MAX_PAGE_COMPACT_BUFFER, - MIB_TO_4K_PAGES, NUM_QUEUES, QUEUE_SIZES, STATS_INDEX, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, - VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_PFN_SHIFT, VIRTIO_BALLOON_S_AVAIL, - VIRTIO_BALLOON_S_CACHES, VIRTIO_BALLOON_S_HTLB_PGALLOC, VIRTIO_BALLOON_S_HTLB_PGFAIL, - VIRTIO_BALLOON_S_MAJFLT, VIRTIO_BALLOON_S_MEMFREE, VIRTIO_BALLOON_S_MEMTOT, - VIRTIO_BALLOON_S_MINFLT, VIRTIO_BALLOON_S_SWAP_IN, VIRTIO_BALLOON_S_SWAP_OUT, + BALLOON_DEV_ID, DEFLATE_INDEX, FREE_PAGE_REPORTING_INDEX, INFLATE_INDEX, MAX_PAGES_IN_DESC, + MAX_PAGE_COMPACT_BUFFER, MIB_TO_4K_PAGES, NUM_QUEUES, QUEUE_SIZES, STATS_INDEX, + VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_REPORTING, VIRTIO_BALLOON_F_STATS_VQ, + VIRTIO_BALLOON_PFN_SHIFT, VIRTIO_BALLOON_S_AVAIL, VIRTIO_BALLOON_S_CACHES, + VIRTIO_BALLOON_S_HTLB_PGALLOC, VIRTIO_BALLOON_S_HTLB_PGFAIL, VIRTIO_BALLOON_S_MAJFLT, + VIRTIO_BALLOON_S_MEMFREE, VIRTIO_BALLOON_S_MEMTOT, VIRTIO_BALLOON_S_MINFLT, + VIRTIO_BALLOON_S_SWAP_IN, VIRTIO_BALLOON_S_SWAP_OUT, }; use crate::virtio::balloon::Error as BalloonError; use crate::virtio::{IrqTrigger, IrqType}; @@ -167,10 +168,15 @@ impl Balloon { avail_features |= 1u64 << VIRTIO_BALLOON_F_STATS_VQ; } + avail_features |= 1u64 << VIRTIO_BALLOON_F_REPORTING; + + logger::debug!("balloon: registering balloon device"); + let queue_evts = [ EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, + EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, ]; let mut queues: Vec = QUEUE_SIZES.iter().map(|&s| Queue::new(s)).collect(); @@ -231,6 +237,14 @@ impl Balloon { self.trigger_stats_update() } + pub(crate) fn process_free_page_report_event(&mut self) -> Result<(), BalloonError> { + logger::debug!("balloon: received free page report event"); + self.queue_evts[FREE_PAGE_REPORTING_INDEX] + .read() + .map_err(BalloonError::EventFd)?; + self.process_free_page_reporting_queue() + } + pub(crate) fn process_inflate(&mut self) -> Result<(), BalloonError> { // This is safe since we checked in the event handler that the device is activated. let mem = self.device_state.mem().unwrap(); @@ -382,6 +396,50 @@ impl Balloon { Ok(()) } + pub(crate) fn process_free_page_reporting_queue( + &mut self, + ) -> std::result::Result<(), BalloonError> { + logger::debug!("balloon: processing free page reporting queue"); + let mem = self.device_state.mem().unwrap(); + + let mut total_removed = 0; + let queue = &mut self.queues[FREE_PAGE_REPORTING_INDEX]; + let mut needs_interrupt = false; + + while let Some(head) = queue.pop(mem) { + let head_index = head.index; + let head_mem = head.mem; + + let mut last_desc = Some(head); + while let Some(desc) = last_desc { + total_removed += desc.len; + if let Err(err) = + remove_range(desc.mem, (desc.addr, desc.len as u64), self.restored) + { + error!("balloon: failed to remove range: {:?}", err); + }; + last_desc = desc.next_descriptor(); + } + + // Acknowledge the receipt of the descriptor. + queue + .add_used(head_mem, head_index, 0) + .map_err(BalloonError::Queue)?; + + logger::debug!("balloon: adding to the queue"); + + needs_interrupt = true; + } + + logger::debug!("balloon: total removed: {}MiB", total_removed >> 20); + + if needs_interrupt { + self.signal_used_queue()?; + } + + Ok(()) + } + pub(crate) fn signal_used_queue(&self) -> Result<(), BalloonError> { self.irq_trigger.trigger_irq(IrqType::Vring).map_err(|err| { METRICS.balloon.event_fails.inc(); @@ -393,6 +451,7 @@ impl Balloon { pub fn process_virtio_queues(&mut self) { let _ = self.process_inflate(); let _ = self.process_deflate_queue(); + let _ = self.process_free_page_reporting_queue(); } pub fn id(&self) -> &str { diff --git a/src/devices/src/virtio/balloon/event_handler.rs b/src/devices/src/virtio/balloon/event_handler.rs index 841f5d231db..f2ad8678232 100644 --- a/src/devices/src/virtio/balloon/event_handler.rs +++ b/src/devices/src/virtio/balloon/event_handler.rs @@ -9,7 +9,9 @@ use utils::epoll::EventSet; use crate::report_balloon_event_fail; use crate::virtio::balloon::device::Balloon; -use crate::virtio::{VirtioDevice, DEFLATE_INDEX, INFLATE_INDEX, STATS_INDEX}; +use crate::virtio::{ + VirtioDevice, DEFLATE_INDEX, FREE_PAGE_REPORTING_INDEX, INFLATE_INDEX, STATS_INDEX, +}; impl Balloon { fn register_runtime_events(&self, ops: &mut EventOps) { @@ -27,6 +29,15 @@ impl Balloon { error!("Failed to register stats timerfd event: {}", err); } } + if let Err(err) = ops.add(Events::new( + &self.queue_evts[FREE_PAGE_REPORTING_INDEX], + EventSet::IN, + )) { + error!( + "Failed to register free page reporting queue event: {}", + err + ); + } } fn register_activate_event(&self, ops: &mut EventOps) { @@ -65,6 +76,7 @@ impl MutEventSubscriber for Balloon { let virtq_inflate_ev_fd = self.queue_evts[INFLATE_INDEX].as_raw_fd(); let virtq_deflate_ev_fd = self.queue_evts[DEFLATE_INDEX].as_raw_fd(); let virtq_stats_ev_fd = self.queue_evts[STATS_INDEX].as_raw_fd(); + let free_page_report_ev_fd = self.queue_evts[FREE_PAGE_REPORTING_INDEX].as_raw_fd(); let stats_timer_fd = self.stats_timer.as_raw_fd(); let activate_fd = self.activate_evt.as_raw_fd(); @@ -82,6 +94,9 @@ impl MutEventSubscriber for Balloon { _ if source == stats_timer_fd => self .process_stats_timer_event() .unwrap_or_else(report_balloon_event_fail), + _ if source == free_page_report_ev_fd => self + .process_free_page_report_event() + .unwrap_or_else(report_balloon_event_fail), _ if activate_fd == source => self.process_activate_event(ops), _ => { warn!("Balloon: Spurious event received: {:?}", source); diff --git a/src/devices/src/virtio/balloon/mod.rs b/src/devices/src/virtio/balloon/mod.rs index fb614c9ca6e..c6f8b619cd3 100644 --- a/src/devices/src/virtio/balloon/mod.rs +++ b/src/devices/src/virtio/balloon/mod.rs @@ -17,8 +17,9 @@ pub use self::event_handler::*; pub const BALLOON_DEV_ID: &str = "balloon"; pub const CONFIG_SPACE_SIZE: usize = 8; pub const QUEUE_SIZE: u16 = 256; -pub const NUM_QUEUES: usize = 3; -pub const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE, QUEUE_SIZE, QUEUE_SIZE]; +pub const REPORTING_QUEUE_SIZE: u16 = 32; +pub const NUM_QUEUES: usize = 4; +pub const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE, QUEUE_SIZE, QUEUE_SIZE, QUEUE_SIZE]; // Number of 4K pages in a MiB. pub const MIB_TO_4K_PAGES: u32 = 256; // The maximum number of pages that can be received in a single descriptor. @@ -34,10 +35,13 @@ pub const INFLATE_INDEX: usize = 0; pub const DEFLATE_INDEX: usize = 1; // The index of the deflate queue from Balloon device queues/queues_evts vector. pub const STATS_INDEX: usize = 2; +// The index of the free page reporting from Balloon device queues/queues_evts vector. +pub const FREE_PAGE_REPORTING_INDEX: usize = 3; // The feature bitmap for virtio balloon. const VIRTIO_BALLOON_F_STATS_VQ: u32 = 1; // Enable statistics. const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u32 = 2; // Deflate balloon on OOM. +const VIRTIO_BALLOON_F_REPORTING: u32 = 5; // Page reporting virtqueue // The statistics tags. const VIRTIO_BALLOON_S_SWAP_IN: u16 = 0; From 6fad03dfb76f3891fbf0123c7f9344f152af30c4 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Thu, 17 Aug 2023 09:47:29 +0200 Subject: [PATCH 13/18] perf: disable flush on snapshot saving Firecracker is taking significant time on saving the snapshot file. For comparison: writing the memory snapshot by us (1GiB) takes 1s, and writing the snapshot file by Firecracker (150KiB) takes 8.5s. We presume this is because of the syncing of the metadata, which is why we disable it for now. The filesystem should still handle persisting, and given the fact that the snapshot/memory files are temporary anyway, we can afford to not sync/flush. --- src/vmm/src/persist.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index efddea30696..c417973ac10 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -319,12 +319,14 @@ fn snapshot_state_to_file( snapshot .save(&mut snapshot_file, microvm_state) .map_err(SerializeMicrovmState)?; - snapshot_file - .flush() - .map_err(|err| SnapshotBackingFile("flush", err))?; - snapshot_file - .sync_all() - .map_err(|err| SnapshotBackingFile("sync_all", err)) + // Disable the following lines as we're seeing some performance issues with btrfs on these operations + // snapshot_file + // .flush() + // .map_err(|err| SnapshotBackingFile("flush", err))?; + // snapshot_file + // .sync_all() + // .map_err(|err| SnapshotBackingFile("sync_all", err)) + Ok(()) } fn snapshot_memory_to_file( From e9dda44c2ae525492e58e0032513d1df05bf536c Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Thu, 17 Aug 2023 21:10:15 +0200 Subject: [PATCH 14/18] perf: remove another snapshot flush --- src/snapshot/src/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snapshot/src/lib.rs b/src/snapshot/src/lib.rs index 1e4c476391d..5a43bcc8560 100644 --- a/src/snapshot/src/lib.rs +++ b/src/snapshot/src/lib.rs @@ -219,9 +219,10 @@ impl Snapshot { object .serialize(&mut writer, &self.version_map, self.target_version) .map_err(Error::Versionize)?; - writer - .flush() - .map_err(|ref err| Error::Io(err.raw_os_error().unwrap_or(libc::EINVAL))) + // writer + // .flush() + // .map_err(|ref err| Error::Io(err.raw_os_error().unwrap_or(libc::EINVAL))) + Ok(()) } // Returns the current snapshot format version. From dd2ce577c857378624e65b6e4e88672853b18148 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sun, 24 Sep 2023 21:05:01 +0200 Subject: [PATCH 15/18] re-enable async pf --- src/cpuid/src/transformer/amd.rs | 4 ++-- src/cpuid/src/transformer/intel.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cpuid/src/transformer/amd.rs b/src/cpuid/src/transformer/amd.rs index 8fc6222f2d9..7cf2ee8bf7e 100644 --- a/src/cpuid/src/transformer/amd.rs +++ b/src/cpuid/src/transformer/amd.rs @@ -147,8 +147,8 @@ impl CpuidTransformer for AmdCpuidTransformer { leaf_0x8000001d::LEAF_NUM => Some(amd::update_extended_cache_topology_entry), leaf_0x8000001e::LEAF_NUM => Some(amd::update_extended_apic_id_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), - // Disable async PF, as it hangs the VM for some reason when loading from snapshot/uffd. - 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), + // // Disable async PF, as it hangs the VM for some reason when loading from snapshot/uffd. + // 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } diff --git a/src/cpuid/src/transformer/intel.rs b/src/cpuid/src/transformer/intel.rs index db897b5ab31..9d3e970d929 100644 --- a/src/cpuid/src/transformer/intel.rs +++ b/src/cpuid/src/transformer/intel.rs @@ -126,8 +126,8 @@ impl CpuidTransformer for IntelCpuidTransformer { leaf_0xa::LEAF_NUM => Some(intel::update_perf_mon_entry), leaf_0xb::LEAF_NUM => Some(intel::update_extended_topology_entry), 0x8000_0002..=0x8000_0004 => Some(common::update_brand_string_entry), - // Disable async PF, as it hangs the VM for some reason when loading from snapshot/uffd. - 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), + // // Disable async PF, as it hangs the VM for some reason when loading from snapshot/uffd. + // 0x4000_0001 => Some(common::disable_kvm_feature_async_pf), _ => None, } } From 9983aef5875a262c4f9ae9f1b6b74b44a122ca00 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sat, 30 Sep 2023 22:39:28 +0200 Subject: [PATCH 16/18] reuse balloon memory queues --- src/vmm/src/devices/virtio/balloon/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vmm/src/devices/virtio/balloon/mod.rs b/src/vmm/src/devices/virtio/balloon/mod.rs index b954fc34faa..77e55a9ccd3 100644 --- a/src/vmm/src/devices/virtio/balloon/mod.rs +++ b/src/vmm/src/devices/virtio/balloon/mod.rs @@ -12,21 +12,21 @@ mod util; use utils::vm_memory::GuestMemoryError; pub use self::device::{Balloon, BalloonConfig, BalloonStats}; +use crate::devices::virtio::FIRECRACKER_MAX_QUEUE_SIZE; /// Device ID used in MMIO device identification. /// Because Balloon is unique per-vm, this ID can be hardcoded. pub const BALLOON_DEV_ID: &str = "balloon"; /// The size of the config space. pub const BALLOON_CONFIG_SPACE_SIZE: usize = 8; -pub const BALLOON_QUEUE_SIZE: u16 = 256; pub const BALLOON_REPORTING_QUEUE_SIZE: u16 = 32; /// Number of virtio queues. pub const BALLOON_NUM_QUEUES: usize = 4; pub const BALLOON_QUEUE_SIZES: &[u16] = &[ - BALLOON_QUEUE_SIZE, - BALLOON_QUEUE_SIZE, - BALLOON_QUEUE_SIZE, - BALLOON_QUEUE_SIZE, + FIRECRACKER_MAX_QUEUE_SIZE, + FIRECRACKER_MAX_QUEUE_SIZE, + FIRECRACKER_MAX_QUEUE_SIZE, + FIRECRACKER_MAX_QUEUE_SIZE, ]; // Number of 4K pages in a MiB. pub const MIB_TO_4K_PAGES: u32 = 256; From 6ef60bd5fb485b37110bf875f43577b8d91d52a5 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sat, 30 Sep 2023 22:41:58 +0200 Subject: [PATCH 17/18] change ctr options --- build.rs | 0 src/api_server/src/request/memory_backend.rs | 2 +- tools/devtool | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 build.rs diff --git a/build.rs b/build.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/api_server/src/request/memory_backend.rs b/src/api_server/src/request/memory_backend.rs index b81c7f5fc78..a99b5afeb1e 100644 --- a/src/api_server/src/request/memory_backend.rs +++ b/src/api_server/src/request/memory_backend.rs @@ -4,7 +4,7 @@ use super::super::VmmAction; use crate::parsed_request::{Error, ParsedRequest}; use crate::request::Body; -use logger::{IncMetric, METRICS}; +use vmm::logger::{IncMetric, METRICS}; use vmm::vmm_config::snapshot::MemBackendConfig; pub(crate) fn parse_put_memory_backend(body: &Body) -> Result { diff --git a/tools/devtool b/tools/devtool index 9933cac6161..b4389679b9d 100755 --- a/tools/devtool +++ b/tools/devtool @@ -309,8 +309,8 @@ run_devctr() { --rm \ --volume /dev:/dev \ --volume "$FC_ROOT_DIR:$CTR_FC_ROOT_DIR:z" \ + --tmpfs /srv:exec,dev,size=32G \ --mount type=bind,source=/usr/include/linux/userfaultfd.h,target=/usr/include/linux/userfaultfd.h \ - --env OPT_LOCAL_IMAGES_PATH="$(dirname "$CTR_MICROVM_IMAGES_DIR")" \ -v /boot:/boot \ --env PYTHONDONTWRITEBYTECODE=1 \ "$DEVCTR_IMAGE" "${ctr_args[@]}" From f61dd1fbcf593bc9a77a75ee26d36e7c83f27573 Mon Sep 17 00:00:00 2001 From: Ives van Hoorne Date: Sun, 1 Oct 2023 12:39:18 +0200 Subject: [PATCH 18/18] update --- Cargo.lock | 35 +++---------------- src/api_server/src/parsed_request.rs | 1 + src/firecracker/Cargo.toml | 9 +++-- src/jailer/src/env.rs | 2 +- src/vmm/Cargo.toml | 2 +- src/vmm/src/builder.rs | 2 +- .../src/cpu_config/x86_64/cpuid/normalize.rs | 22 ++++++++++++ src/vmm/src/memory_snapshot.rs | 1 - src/vmm/src/persist.rs | 6 +--- 9 files changed, 39 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e46c7f28c0d..fe38a68439d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -551,7 +551,7 @@ dependencies = [ "snapshot", "thiserror", "timerfd", - "userfaultfd 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "userfaultfd", "utils", "vmm", ] @@ -1294,45 +1294,20 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "userfaultfd" version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a38c7e24af201e66f02659492f86ccd5efa9fd62f2c851fff3961a60e81966" -dependencies = [ - "bitflags 2.3.3", - "cfg-if", - "libc", - "nix", - "thiserror", - "userfaultfd-sys 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "userfaultfd" -version = "0.7.0" -source = "git+https://github.com/codesandbox/userfaultfd-rs.git?rev=7e52200e92bc0edaf7d6342040210328e67b8cfe#7e52200e92bc0edaf7d6342040210328e67b8cfe" +source = "git+https://github.com/codesandbox/userfaultfd-rs.git?rev=27b2ff3bc71774b79338afca051d49cb07146d90#27b2ff3bc71774b79338afca051d49cb07146d90" dependencies = [ "bitflags 2.3.3", "cfg-if", "libc", "nix", "thiserror", - "userfaultfd-sys 0.5.0 (git+https://github.com/codesandbox/userfaultfd-rs.git?rev=7e52200e92bc0edaf7d6342040210328e67b8cfe)", -] - -[[package]] -name = "userfaultfd-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75595d2a62b7db16bd47f5a1ce14e1fe05ccbe27d6c96721a958e0a027cad41" -dependencies = [ - "bindgen 0.68.1", - "cc", - "cfg-if", + "userfaultfd-sys", ] [[package]] name = "userfaultfd-sys" version = "0.5.0" -source = "git+https://github.com/codesandbox/userfaultfd-rs.git?rev=7e52200e92bc0edaf7d6342040210328e67b8cfe#7e52200e92bc0edaf7d6342040210328e67b8cfe" +source = "git+https://github.com/codesandbox/userfaultfd-rs.git?rev=27b2ff3bc71774b79338afca051d49cb07146d90#27b2ff3bc71774b79338afca051d49cb07146d90" dependencies = [ "bindgen 0.68.1", "cc", @@ -1463,7 +1438,7 @@ dependencies = [ "snapshot", "thiserror", "timerfd", - "userfaultfd 0.7.0 (git+https://github.com/codesandbox/userfaultfd-rs.git?rev=7e52200e92bc0edaf7d6342040210328e67b8cfe)", + "userfaultfd", "utils", "versionize", "versionize_derive", diff --git a/src/api_server/src/parsed_request.rs b/src/api_server/src/parsed_request.rs index 73a043a71d2..5edf6bad1ba 100644 --- a/src/api_server/src/parsed_request.rs +++ b/src/api_server/src/parsed_request.rs @@ -92,6 +92,7 @@ impl TryFrom<&Request> for ParsedRequest { (Method::Put, "drives", Some(body)) => parse_put_drive(body, path_tokens.next()), (Method::Put, "logger", Some(body)) => parse_put_logger(body), (Method::Put, "machine-config", Some(body)) => parse_put_machine_config(body), + (Method::Put, "memory-backend", Some(body)) => parse_put_memory_backend(body), (Method::Put, "metrics", Some(body)) => parse_put_metrics(body), (Method::Put, "mmds", Some(body)) => parse_put_mmds(body, path_tokens.next()), (Method::Put, "network-interfaces", Some(body)) => { diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 415964fa986..70f1a10bf91 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -28,11 +28,16 @@ vmm = { path = "../vmm" } [dev-dependencies] cargo_toml = "0.16.0" -regex = { version = "1.9.5", default-features = false, features = ["std", "unicode-perl"] } +regex = { version = "1.9.5", default-features = false, features = [ + "std", + "unicode-perl", +] } # Dev-Dependencies for uffd examples serde = { version = "1.0.188", features = ["derive"] } -userfaultfd = "0.7.0" +userfaultfd = { git = "https://github.com/codesandbox/userfaultfd-rs.git", rev = "27b2ff3bc71774b79338afca051d49cb07146d90", features = [ + "linux5_7", +] } [build-dependencies] bincode = "1.2.1" diff --git a/src/jailer/src/env.rs b/src/jailer/src/env.rs index a2e587d387a..21fdaa4cc66 100644 --- a/src/jailer/src/env.rs +++ b/src/jailer/src/env.rs @@ -467,7 +467,7 @@ impl Env { // section), this latter part is not desirable in Firecracker's // threat model. Copying prevents 2 Firecracker processes from // sharing memory. - fs::copy(&self.exec_file_path, &self.chroot_dir).map_err(|err| { + fs::hard_link(&self.exec_file_path, &self.chroot_dir).map_err(|err| { JailerError::Copy(self.exec_file_path.clone(), self.chroot_dir.clone(), err) })?; diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 10f108f906c..e889ca64a34 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -27,7 +27,7 @@ serde_json = "1.0.78" timerfd = "1.5.0" thiserror = "1.0.32" displaydoc = "0.2.4" -userfaultfd = { git = "https://github.com/codesandbox/userfaultfd-rs.git", rev = "7e52200e92bc0edaf7d6342040210328e67b8cfe", features = [ +userfaultfd = { git = "https://github.com/codesandbox/userfaultfd-rs.git", rev = "27b2ff3bc71774b79338afca051d49cb07146d90", features = [ "linux5_7", ] } versionize = "0.1.10" diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index ad40d673222..4cd0f239f01 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -145,7 +145,7 @@ pub enum StartMicrovmError { #[error("Cannot set vm resources: {0}")] SetVmResources(VmConfigError), /// Failed to create an UFFD Builder. - #[error("Cannot create uffd socket: {0}")] + #[error("Cannot create uffd socket: {0:?}")] CreateUffdBuilder(userfaultfd::Error), /// Unable to connect to UDS in order to send information regarding /// handling guest memory page-fault events. diff --git a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs index cebd34f319e..0380d9e7e24 100644 --- a/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs +++ b/src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs @@ -23,6 +23,8 @@ pub enum NormalizeCpuidError { ExtendedCacheFeatures(#[from] ExtendedCacheFeaturesError), /// Failed to set vendor ID in leaf 0x0: {0} VendorId(#[from] VendorIdError), + /// Failed to disable async pf + DisableAsyncPf, } /// Error type for setting leaf 0 section. @@ -186,6 +188,7 @@ impl super::Cpuid { self.update_feature_info_entry(cpu_index, cpu_count)?; self.update_extended_topology_entry(cpu_index, cpu_count, cpu_bits, cpus_per_core)?; self.update_extended_cache_features()?; + // self.disable_kvm_feature_async_pf()?; // Apply manufacturer specific modifications. match self { @@ -300,6 +303,25 @@ impl super::Cpuid { Ok(()) } + /// Disable async pf, as it hangs the VM from time to time + /// + /// # Errors + /// + /// Returns `NormalizeCpuidError::DisableAsyncPf` if the leaf is not found + pub fn disable_kvm_feature_async_pf(&mut self) -> Result<(), NormalizeCpuidError> { + // KVM feature bits + #[cfg(target_arch = "x86_64")] + const KVM_FEATURE_ASYNC_PF_INT_BIT: u8 = 14; + + let leaf: &mut CpuidEntry = self + .get_mut(&CpuidKey::leaf(0x4000_0001)) + .ok_or(NormalizeCpuidError::DisableAsyncPf)?; + + set_bit(&mut leaf.result.eax, KVM_FEATURE_ASYNC_PF_INT_BIT, false); + + Ok(()) + } + /// Update extended topology entry fn update_extended_topology_entry( &mut self, diff --git a/src/vmm/src/memory_snapshot.rs b/src/vmm/src/memory_snapshot.rs index e9a54fc4290..4ca6ba4281a 100644 --- a/src/vmm/src/memory_snapshot.rs +++ b/src/vmm/src/memory_snapshot.rs @@ -139,7 +139,6 @@ impl SnapshotMemory for GuestMemoryMmap { } write_size += page_size; } else if write_size > 0 { - let start = Instant::now(); // We are at the end of a batch of dirty pages. writer.write_all_volatile( ®ion.get_slice( diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 64bab9d9fa0..d248f6843fc 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -3,17 +3,14 @@ //! Defines state structures for saving/restoring a Firecracker microVM. -use std::ffi::CString; use std::fmt::Debug; use std::fs::{File, OpenOptions}; -use std::io::{self, Read, Write}; +use std::io::{self, Read}; use std::os::unix::io::AsRawFd; use std::os::unix::net::UnixStream; -use std::os::unix::prelude::FromRawFd; use std::path::Path; use std::sync::{Arc, Mutex}; -use libc::memfd_create; use log::{info, warn}; use seccompiler::BpfThreadMap; use semver::Version; @@ -652,7 +649,6 @@ pub(crate) fn guest_memory_from_uffd( ) .user_mode_only(false) .non_blocking(true) - .user_mode_only(false) .create() .map_err(GuestMemoryFromUffdError::Create)?;