diff --git a/openhcl/hcl/src/ioctl.rs b/openhcl/hcl/src/ioctl.rs index 5c291a50f..3baaf8df4 100644 --- a/openhcl/hcl/src/ioctl.rs +++ b/openhcl/hcl/src/ioctl.rs @@ -1590,7 +1590,12 @@ impl HclVp { isolation_type: IsolationType, ) -> Result { let fd = &hcl.mshv_vtl.file; - let run = MappedPage::new(fd, vp as i64).map_err(|e| Error::MmapVp(e, None))?; + let run: MappedPage = + MappedPage::new(fd, vp as i64).map_err(|e| Error::MmapVp(e, None))?; + // SAFETY: `proxy_irr_blocked` is not accessed by any other VPs/kernel at this point (`HclVp` creation) + // so we know we have exclusive access. Initializing to block all vectors by default + let proxy_irr_blocked = unsafe { &mut *addr_of_mut!((*run.0.as_ptr()).proxy_irr_blocked) }; + proxy_irr_blocked.fill(0xFFFFFFFF); let backing = match isolation_type { IsolationType::None | IsolationType::Vbs => BackingState::Mshv { @@ -1857,6 +1862,21 @@ impl<'a, T: Backing> ProcessorRunner<'a, T> { } } + /// Update the `proxy_irr_blocked` in run page + pub fn update_proxy_irr_filter(&mut self, irr_filter: &[u32; 8]) { + // SAFETY: `proxy_irr_blocked` is accessed by current VP only, but could + // be concurrently accessed by kernel too, hence accessing as Atomic + let proxy_irr_blocked = unsafe { + &mut *(addr_of_mut!((*self.run.as_ptr()).proxy_irr_blocked).cast::<[AtomicU32; 8]>()) + }; + + // By default block all (i.e. set all), and only allow (unset) given vectors + for (filter, irr) in proxy_irr_blocked.iter_mut().zip(irr_filter.iter()) { + filter.store(!irr, Ordering::Relaxed); + tracing::debug!(irr, "update_proxy_irr_filter"); + } + } + /// Gets the proxy_irr_exit bitmask. This mask ensures that /// the masked interrupts always exit to user-space, and cannot /// be injected in the kernel. Interrupts matching this condition diff --git a/openhcl/virt_mshv_vtl/src/lib.rs b/openhcl/virt_mshv_vtl/src/lib.rs index d0258d7c6..b0cd2e1ac 100644 --- a/openhcl/virt_mshv_vtl/src/lib.rs +++ b/openhcl/virt_mshv_vtl/src/lib.rs @@ -24,6 +24,9 @@ cfg_if::cfg_if!( use processor::tdx::TdxBackedShared; use std::arch::x86_64::CpuidResult; use virt::CpuidLeaf; + use bitvec::prelude::BitArray; + use bitvec::prelude::Lsb0; + type IrrBitmap = BitArray<[u32; 8], Lsb0>; } else if #[cfg(target_arch = "aarch64")] { // xtask-fmt allow-target-arch sys-crate pub use crate::processor::mshv::arm64::HypervisorBackedArm64 as HypervisorBacked; use hvdef::HvArm64RegisterName; @@ -223,6 +226,10 @@ struct UhPartitionInner { no_sidecar_hotplug: AtomicBool, use_mmio_hypercalls: bool, backing_shared: BackingShared, + #[cfg(guest_arch = "x86_64")] + // N.B For now, only one device vector table i.e. for VTL0 only + #[inspect(with = "|x| inspect::iter_by_index(x.read().into_inner().map(inspect::AsHex))")] + device_vector_table: RwLock, } #[derive(Inspect)] @@ -628,7 +635,8 @@ struct WakeReason { message_queues: bool, hv_start_enable_vtl_vp: bool, intcon: bool, - #[bits(28)] + update_proxy_irr_filter: bool, + #[bits(27)] _reserved: u32, } @@ -638,6 +646,8 @@ impl WakeReason { const MESSAGE_QUEUES: Self = Self::new().with_message_queues(true); const HV_START_ENABLE_VP_VTL: Self = Self::new().with_hv_start_enable_vtl_vp(true); // StartVp/EnableVpVtl handling const INTCON: Self = Self::new().with_intcon(true); + #[cfg(guest_arch = "x86_64")] + const UPDATE_PROXY_IRR_FILTER: Self = Self::new().with_update_proxy_irr_filter(true); } /// Immutable access to useful bits of Partition state. @@ -749,6 +759,39 @@ impl UhPartitionInner { self.vps.get(index.index() as usize) } + /// For requester VP to issue `proxy_irr_blocked` update to other VPs + #[cfg(guest_arch = "x86_64")] + fn request_proxy_irr_filter_update(&self, vtl: GuestVtl, device_vector: u8, req_vp_index: u32) { + tracing::debug!( + ?vtl, + device_vector, + req_vp_index, + "request_proxy_irr_filter_update" + ); + + // Add given vector to partition global device vector table (VTL0 only for now) + { + let mut device_vector_table = self.device_vector_table.write(); + device_vector_table.set(device_vector as usize, true); + } + + // Wake all other VPs for their `proxy_irr_blocked` filter update + for vp in self.vps.iter() { + if vp.cpu_index != req_vp_index { + vp.wake(vtl, WakeReason::UPDATE_PROXY_IRR_FILTER); + } + } + } + + /// Get current partition global device irr vectors (VTL0 for now) + #[cfg(guest_arch = "x86_64")] + fn get_device_vectors(&self, _vtl: GuestVtl, irr_vectors: &mut IrrBitmap) { + let device_vector_table = self.device_vector_table.read(); + for idx in device_vector_table.iter_ones() { + irr_vectors.set(idx, true); + } + } + fn inspect_extra(&self, resp: &mut inspect::Response<'_>) { let mut wake_vps = false; resp.field_mut( @@ -1609,6 +1652,8 @@ impl<'a> UhProtoPartition<'a> { no_sidecar_hotplug: params.no_sidecar_hotplug.into(), use_mmio_hypercalls: params.use_mmio_hypercalls, backing_shared: BackingShared::new(isolation, BackingSharedParams { cvm_state })?, + #[cfg(guest_arch = "x86_64")] + device_vector_table: RwLock::new(IrrBitmap::new(Default::default())), }); if cfg!(guest_arch = "x86_64") { diff --git a/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs b/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs index 31aebec67..794c92319 100644 --- a/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/hardware_cvm/mod.rs @@ -723,6 +723,18 @@ impl UhHypercallHandler<'_, '_, T, B> { multicast: bool, target_processors: &[u32], ) -> HvResult<()> { + // Before dispatching retarget_device_interrupt, add the device vector + // to partition global device vector table and issue `proxy_irr_blocked` + // filter wake request to other VPs + self.vp.partition.request_proxy_irr_filter_update( + self.intercepted_vtl, + vector as u8, + self.vp.vp_index().index(), + ); + + // Update `proxy_irr_blocked` for this VP itself + self.vp.update_proxy_irr_filter(self.intercepted_vtl); + self.vp.partition.hcl.retarget_device_interrupt( device_id, hvdef::hypercall::InterruptEntry { diff --git a/openhcl/virt_mshv_vtl/src/processor/mod.rs b/openhcl/virt_mshv_vtl/src/processor/mod.rs index 5e4098cb5..3de0399d2 100644 --- a/openhcl/virt_mshv_vtl/src/processor/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/mod.rs @@ -23,6 +23,8 @@ cfg_if::cfg_if! { use virt::x86::MsrError; use virt_support_apic::LocalApic; use virt_support_x86emu::translate::TranslationRegisters; + use bitvec::prelude::BitArray; + use bitvec::prelude::Lsb0; } else if #[cfg(guest_arch = "aarch64")] { use hv1_hypercall::Arm64RegisterState; use hvdef::HvArm64RegisterName; @@ -934,6 +936,13 @@ impl<'a, T: Backing> UhProcessor<'a, T> { } } } + + #[cfg(guest_arch = "x86_64")] + if wake_reasons.update_proxy_irr_filter() { + // update `proxy_irr_blocked` filter + debug_assert!(self.partition.isolation.is_hardware_isolated()); + self.update_proxy_irr_filter(vtl); + } } Ok(wake_reasons_vtl.map(|w| w.intcon()).into()) @@ -967,8 +976,23 @@ impl<'a, T: Backing> UhProcessor<'a, T> { fn write_msr(&mut self, msr: u32, value: u64, vtl: GuestVtl) -> Result<(), MsrError> { if msr & 0xf0000000 == 0x40000000 { if let Some(hv) = self.backing.hv_mut(vtl).as_mut() { + // If updated is Synic MSR, then check if its proxy or previous was proxy + // in either case, we need to update the `proxy_irr_blocked` + let mut irr_filter_update = false; + if matches!(msr, hvdef::HV_X64_MSR_SINT0..=hvdef::HV_X64_MSR_SINT15) { + let sint_curr = + HvSynicSint::from(hv.synic.sint((msr - hvdef::HV_X64_MSR_SINT0) as u8)); + let sint_new = HvSynicSint::from(value); + if sint_curr.proxy() || sint_new.proxy() { + irr_filter_update = true; + } + } let r = hv.msr_write(msr, value); if !matches!(r, Err(MsrError::Unknown)) { + // Check if proxy filter update was required (in case of SINT writes) + if irr_filter_update { + self.update_proxy_irr_filter(vtl); + } return r; } } @@ -1128,6 +1152,28 @@ impl<'a, T: Backing> UhProcessor<'a, T> { self.request_sint_notifications(vtl, pending_sints); } + + #[cfg(guest_arch = "x86_64")] + fn update_proxy_irr_filter(&mut self, vtl: GuestVtl) { + let mut irr_bits: BitArray<[u32; 8], Lsb0> = BitArray::new(Default::default()); + + // Get all not maksed && proxy SINT vectors + if let Some(hv) = self.backing.hv(vtl).as_ref() { + for sint in 0..NUM_SINTS as u8 { + let sint_msr = hv.synic.sint(sint); + let hv_sint = HvSynicSint::from(sint_msr); + if hv_sint.proxy() && !hv_sint.masked() { + irr_bits.set(hv_sint.vector() as usize, true); + } + } + } + + // Get all device vectors + self.partition.get_device_vectors(vtl, &mut irr_bits); + + // Update `proxy_irr_blocked` filter in run page + self.runner.update_proxy_irr_filter(&irr_bits.into_inner()); + } } fn signal_mnf(dev: &impl CpuIo, connection_id: u32) { diff --git a/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs b/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs index 64049412a..3ae608b8d 100644 --- a/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/snp/mod.rs @@ -396,7 +396,6 @@ impl BackingPrivate for SnpBacked { // TODO SNP GUEST VSM supporting VTL 1 proxy irrs requires kernel changes if vtl == GuestVtl::Vtl0 { if let Some(irr) = this.runner.proxy_irr() { - // TODO SNP: filter proxy IRRs. this.backing.cvm.lapics[vtl] .lapic .request_fixed_interrupts(irr); diff --git a/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs b/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs index f6eaa6dfa..daec9b457 100644 --- a/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs +++ b/openhcl/virt_mshv_vtl/src/processor/tdx/mod.rs @@ -883,7 +883,6 @@ impl UhProcessor<'_, TdxBacked> { // Check for interrupt requests from the host and kernel IPI offload. // TODO TDX GUEST VSM supporting VTL 1 proxy irrs requires kernel changes if vtl == GuestVtl::Vtl0 { - // TODO TDX: filter proxy IRRs by setting the `proxy_irr_blocked` field of the run page if let Some(irr) = self.runner.proxy_irr() { // We can't put the interrupts directly on the APIC page because we might need // to clear the tmr state. This can happen if a vector was previously used for a level diff --git a/support/inspect/src/lib.rs b/support/inspect/src/lib.rs index 88e5cd542..11af812e8 100644 --- a/support/inspect/src/lib.rs +++ b/support/inspect/src/lib.rs @@ -1605,6 +1605,7 @@ macro_rules! hexbincount { /// Wrapper around `T` that implements [`Inspect`] by writing a value with /// [`ValueFlags::hex`]. +#[derive(Clone, Copy)] pub struct AsHex(pub T); hexbincount!(AsHex, into_hex, u8, u16, u32, u64, usize);