From: Alexandre Courbot Date: Fri, 6 Mar 2026 04:52:38 +0000 (+0900) Subject: gpu: nova-core: create falcon firmware DMA objects lazily X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=bc9de9e1af2f05461460e1b215a6d209ee62d65a;p=thirdparty%2Flinux.git gpu: nova-core: create falcon firmware DMA objects lazily When DMA was the only loading option for falcon firmwares, we decided to store them in DMA objects as soon as they were loaded from disk and patch them in-place to avoid having to do an extra copy. This decision complicates the PIO loading patch considerably, and actually does not even stand on its own when put into perspective with the fact that it requires 8 unsafe statements in the code that wouldn't exist if we stored the firmware into a `KVVec` and copied it into a DMA object at the last minute. The cost of the copy is, as can be expected, imperceptible at runtime. Thus, switch to a lazy DMA object creation model and simplify our code a bit. This will also have the nice side-effect of being more fit for PIO loading. Reviewed-by: Eliot Courtney Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20260306-turing_prep-v11-1-8f0042c5d026@nvidia.com [acourbot@nvidia.com: add TODO item to switch back to a coherent allocation when it becomes convenient to do so.] Signed-off-by: Alexandre Courbot --- diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 37bfee1d09492..8d444cf9d55c1 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -2,12 +2,13 @@ //! Falcon microprocessor base support -use core::ops::Deref; - use hal::FalconHal; use kernel::{ - device, + device::{ + self, + Device, // + }, dma::{ DmaAddress, DmaMask, // @@ -15,9 +16,7 @@ use kernel::{ io::poll::read_poll_timeout, prelude::*, sync::aref::ARef, - time::{ - Delta, // - }, + time::Delta, }; use crate::{ @@ -351,6 +350,9 @@ pub(crate) struct FalconBromParams { /// Trait for providing load parameters of falcon firmwares. pub(crate) trait FalconLoadParams { + /// Returns the firmware data as a slice of bytes. + fn as_slice(&self) -> &[u8]; + /// Returns the load parameters for Secure `IMEM`. fn imem_sec_load_params(&self) -> FalconLoadTarget; @@ -370,9 +372,8 @@ pub(crate) trait FalconLoadParams { /// Trait for a falcon firmware. /// -/// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA -/// object. -pub(crate) trait FalconFirmware: FalconLoadParams + Deref { +/// A falcon firmware can be loaded on a given engine. +pub(crate) trait FalconFirmware: FalconLoadParams { /// Engine on which this firmware is to be loaded. type Target: FalconEngine; } @@ -415,10 +416,10 @@ impl Falcon { /// `target_mem`. /// /// `sec` is set if the loaded firmware is expected to run in secure mode. - fn dma_wr>( + fn dma_wr( &self, bar: &Bar0, - fw: &F, + dma_obj: &DmaObject, target_mem: FalconMem, load_offsets: FalconLoadTarget, ) -> Result { @@ -430,11 +431,11 @@ impl Falcon { // For DMEM we can fold the start offset into the DMA handle. let (src_start, dma_start) = match target_mem { FalconMem::ImemSecure | FalconMem::ImemNonSecure => { - (load_offsets.src_start, fw.dma_handle()) + (load_offsets.src_start, dma_obj.dma_handle()) } FalconMem::Dmem => ( 0, - fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, + dma_obj.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?, ), }; if dma_start % DmaAddress::from(DMA_LEN) > 0 { @@ -466,7 +467,7 @@ impl Falcon { dev_err!(self.dev, "DMA transfer length overflow\n"); return Err(EOVERFLOW); } - Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => { + Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => { dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n"); return Err(EINVAL); } @@ -515,7 +516,12 @@ impl Falcon { } /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. - fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { + fn dma_load>( + &self, + dev: &Device, + bar: &Bar0, + fw: &F, + ) -> Result { // The Non-Secure section only exists on firmware used by Turing and GA100, and // those platforms do not use DMA. if fw.imem_ns_load_params().is_some() { @@ -523,14 +529,22 @@ impl Falcon { return Err(EINVAL); } + // Create DMA object with firmware content as the source of the DMA engine. + let dma_obj = DmaObject::from_data(dev, fw.as_slice())?; + self.dma_reset(bar); regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); - self.dma_wr(bar, fw, FalconMem::ImemSecure, fw.imem_sec_load_params())?; - self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params())?; + self.dma_wr( + bar, + &dma_obj, + FalconMem::ImemSecure, + fw.imem_sec_load_params(), + )?; + self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?; self.hal.program_brom(self, bar, &fw.brom_params())?; @@ -641,9 +655,14 @@ impl Falcon { } // Load a firmware image into Falcon memory - pub(crate) fn load>(&self, bar: &Bar0, fw: &F) -> Result { + pub(crate) fn load>( + &self, + dev: &Device, + bar: &Bar0, + fw: &F, + ) -> Result { match self.hal.load_method() { - LoadMethod::Dma => self.dma_load(bar, fw), + LoadMethod::Dma => self.dma_load(dev, bar, fw), LoadMethod::Pio => Err(ENOTSUPP), } } diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 815e8000bf813..5166c1f5972f3 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -15,7 +15,6 @@ use kernel::{ }; use crate::{ - dma::DmaObject, falcon::{ FalconFirmware, FalconLoadTarget, // @@ -292,7 +291,7 @@ impl SignedState for Unsigned {} struct Signed; impl SignedState for Signed {} -/// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. +/// Microcode to be loaded into a specific falcon. /// /// This is module-local and meant for sub-modules to use internally. /// @@ -300,34 +299,35 @@ impl SignedState for Signed {} /// before it can be loaded (with an exception for development hardware). The /// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the /// firmware to its [`Signed`] state. -struct FirmwareDmaObject(DmaObject, PhantomData<(F, S)>); +// TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports +// temporary CPU-exclusive access to the object without unsafe methods. +struct FirmwareObject(KVVec, PhantomData<(F, S)>); /// Trait for signatures to be patched directly into a given firmware. /// /// This is module-local and meant for sub-modules to use internally. trait FirmwareSignature: AsRef<[u8]> {} -impl FirmwareDmaObject { - /// Patches the firmware at offset `sig_base_img` with `signature`. +impl FirmwareObject { + /// Patches the firmware at offset `signature_start` with `signature`. fn patch_signature>( mut self, signature: &S, - sig_base_img: usize, - ) -> Result> { + signature_start: usize, + ) -> Result> { let signature_bytes = signature.as_ref(); - if sig_base_img + signature_bytes.len() > self.0.size() { - return Err(EINVAL); - } - - // SAFETY: We are the only user of this object, so there cannot be any race. - let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; + let signature_end = signature_start + .checked_add(signature_bytes.len()) + .ok_or(EOVERFLOW)?; + let dst = self + .0 + .get_mut(signature_start..signature_end) + .ok_or(EINVAL)?; - // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. - unsafe { - core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) - }; + // PANIC: `dst` and `signature_bytes` have the same length. + dst.copy_from_slice(signature_bytes); - Ok(FirmwareDmaObject(self.0, PhantomData)) + Ok(FirmwareObject(self.0, PhantomData)) } /// Mark the firmware as signed without patching it. @@ -335,8 +335,8 @@ impl FirmwareDmaObject { /// This method is used to explicitly confirm that we do not need to sign the firmware, while /// allowing us to continue as if it was. This is typically only needed for development /// hardware. - fn no_patch_signature(self) -> FirmwareDmaObject { - FirmwareDmaObject(self.0, PhantomData) + fn no_patch_signature(self) -> FirmwareObject { + FirmwareObject(self.0, PhantomData) } } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs index ab374026b1f49..2b7166eaf2832 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -4,10 +4,7 @@ //! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon //! (and optionally unload it through a separate firmware image). -use core::{ - marker::PhantomData, - ops::Deref, // -}; +use core::marker::PhantomData; use kernel::{ device, @@ -16,7 +13,6 @@ use kernel::{ }; use crate::{ - dma::DmaObject, driver::Bar0, falcon::{ sec2::Sec2, @@ -28,7 +24,7 @@ use crate::{ }, firmware::{ BinFirmware, - FirmwareDmaObject, + FirmwareObject, FirmwareSignature, Signed, Unsigned, // @@ -269,12 +265,15 @@ pub(crate) struct BooterFirmware { // BROM falcon parameters. brom_params: FalconBromParams, // Device-mapped firmware image. - ucode: FirmwareDmaObject, + ucode: FirmwareObject, } -impl FirmwareDmaObject { - fn new_booter(dev: &device::Device, data: &[u8]) -> Result { - DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) +impl FirmwareObject { + fn new_booter(data: &[u8]) -> Result { + let mut ucode = KVVec::new(); + ucode.extend_from_slice(data, GFP_KERNEL)?; + + Ok(Self(ucode, PhantomData)) } } @@ -328,7 +327,7 @@ impl BooterFirmware { let ucode = bin_fw .data() .ok_or(EINVAL) - .and_then(|data| FirmwareDmaObject::::new_booter(dev, data))?; + .and_then(FirmwareObject::::new_booter)?; let ucode_signed = { let mut signatures = hs_fw.signatures_iter()?.peekable(); @@ -400,6 +399,10 @@ impl BooterFirmware { } impl FalconLoadParams for BooterFirmware { + fn as_slice(&self) -> &[u8] { + self.ucode.0.as_slice() + } + fn imem_sec_load_params(&self) -> FalconLoadTarget { self.imem_sec_load_target.clone() } @@ -425,14 +428,6 @@ impl FalconLoadParams for BooterFirmware { } } -impl Deref for BooterFirmware { - type Target = DmaObject; - - fn deref(&self) -> &Self::Target { - &self.ucode.0 - } -} - impl FalconFirmware for BooterFirmware { type Target = Sec2; } diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index df3d8de14ca14..7fff3acdaa735 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -10,10 +10,7 @@ //! - The command to be run, as this firmware can perform several tasks ; //! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. -use core::{ - marker::PhantomData, - ops::Deref, // -}; +use core::marker::PhantomData; use kernel::{ device::{ @@ -28,7 +25,6 @@ use kernel::{ }; use crate::{ - dma::DmaObject, driver::Bar0, falcon::{ gsp::Gsp, @@ -40,7 +36,7 @@ use crate::{ }, firmware::{ FalconUCodeDesc, - FirmwareDmaObject, + FirmwareObject, FirmwareSignature, Signed, Unsigned, // @@ -174,52 +170,21 @@ impl AsRef<[u8]> for Bcrt30Rsa3kSignature { impl FirmwareSignature for Bcrt30Rsa3kSignature {} -/// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement -/// [`FromBytes`]) and return a reference to it. -/// -/// # Safety -/// -/// * Callers must ensure that the device does not read/write to/from memory while the returned -/// reference is live. -/// * Callers must ensure that this call does not race with a write to the same region while -/// the returned reference is live. -unsafe fn transmute(fw: &DmaObject, offset: usize) -> Result<&T> { - // SAFETY: The safety requirements of the function guarantee the device won't read - // or write to memory while the reference is alive and that this call won't race - // with writes to the same memory region. - T::from_bytes(unsafe { fw.as_slice(offset, size_of::())? }).ok_or(EINVAL) -} - -/// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must -/// implement [`FromBytes`]) and return a reference to it. -/// -/// # Safety -/// -/// * Callers must ensure that the device does not read/write to/from memory while the returned -/// slice is live. -/// * Callers must ensure that this call does not race with a read or write to the same region -/// while the returned slice is live. -unsafe fn transmute_mut( - fw: &mut DmaObject, - offset: usize, -) -> Result<&mut T> { - // SAFETY: The safety requirements of the function guarantee the device won't read - // or write to memory while the reference is alive and that this call won't race - // with writes or reads to the same memory region. - T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::())? }).ok_or(EINVAL) -} - /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. /// /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. pub(crate) struct FwsecFirmware { /// Descriptor of the firmware. desc: FalconUCodeDesc, - /// GPU-accessible DMA object containing the firmware. - ucode: FirmwareDmaObject, + /// Object containing the firmware binary. + ucode: FirmwareObject, } impl FalconLoadParams for FwsecFirmware { + fn as_slice(&self) -> &[u8] { + self.ucode.0.as_slice() + } + fn imem_sec_load_params(&self) -> FalconLoadTarget { self.desc.imem_sec_load_params() } @@ -245,23 +210,15 @@ impl FalconLoadParams for FwsecFirmware { } } -impl Deref for FwsecFirmware { - type Target = DmaObject; - - fn deref(&self) -> &Self::Target { - &self.ucode.0 - } -} - impl FalconFirmware for FwsecFirmware { type Target = Gsp; } -impl FirmwareDmaObject { - fn new_fwsec(dev: &Device, bios: &Vbios, cmd: FwsecCommand) -> Result { +impl FirmwareObject { + fn new_fwsec(bios: &Vbios, cmd: FwsecCommand) -> Result { let desc = bios.fwsec_image().header()?; - let ucode = bios.fwsec_image().ucode(&desc)?; - let mut dma_object = DmaObject::from_data(dev, ucode)?; + let mut ucode = KVVec::new(); + ucode.extend_from_slice(bios.fwsec_image().ucode(&desc)?, GFP_KERNEL)?; let hdr_offset = desc .imem_load_size() @@ -269,8 +226,11 @@ impl FirmwareDmaObject { .map(usize::from_safe_cast) .ok_or(EINVAL)?; - // SAFETY: we have exclusive access to `dma_object`. - let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; + let hdr = ucode + .get(hdr_offset..) + .and_then(FalconAppifHdrV1::from_bytes_prefix) + .ok_or(EINVAL)? + .0; if hdr.version != 1 { return Err(EINVAL); @@ -284,8 +244,11 @@ impl FirmwareDmaObject { .and_then(|o| o.checked_add(i.checked_mul(usize::from(hdr.entry_size))?)) .ok_or(EINVAL)?; - // SAFETY: we have exclusive access to `dma_object`. - let app: &FalconAppifV1 = unsafe { transmute(&dma_object, entry_offset) }?; + let app = ucode + .get(entry_offset..) + .and_then(FalconAppifV1::from_bytes_prefix) + .ok_or(EINVAL)? + .0; if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { continue; @@ -298,9 +261,11 @@ impl FirmwareDmaObject { .map(usize::from_safe_cast) .ok_or(EINVAL)?; - let dmem_mapper: &mut FalconAppifDmemmapperV3 = - // SAFETY: we have exclusive access to `dma_object`. - unsafe { transmute_mut(&mut dma_object, dmem_mapper_offset) }?; + let dmem_mapper = ucode + .get_mut(dmem_mapper_offset..) + .and_then(FalconAppifDmemmapperV3::from_bytes_mut_prefix) + .ok_or(EINVAL)? + .0; dmem_mapper.init_cmd = match cmd { FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, @@ -314,9 +279,11 @@ impl FirmwareDmaObject { .map(usize::from_safe_cast) .ok_or(EINVAL)?; - let frts_cmd: &mut FrtsCmd = - // SAFETY: we have exclusive access to `dma_object`. - unsafe { transmute_mut(&mut dma_object, frts_cmd_offset) }?; + let frts_cmd = ucode + .get_mut(frts_cmd_offset..) + .and_then(FrtsCmd::from_bytes_mut_prefix) + .ok_or(EINVAL)? + .0; frts_cmd.read_vbios = ReadVbios { ver: 1, @@ -340,7 +307,7 @@ impl FirmwareDmaObject { } // Return early as we found and patched the DMEMMAPPER region. - return Ok(Self(dma_object, PhantomData)); + return Ok(Self(ucode, PhantomData)); } Err(ENOTSUPP) @@ -357,7 +324,7 @@ impl FwsecFirmware { bios: &Vbios, cmd: FwsecCommand, ) -> Result { - let ucode_dma = FirmwareDmaObject::::new_fwsec(dev, bios, cmd)?; + let ucode_dma = FirmwareObject::::new_fwsec(bios, cmd)?; // Patch signature if needed. let desc = bios.fwsec_image().header()?; @@ -429,7 +396,7 @@ impl FwsecFirmware { .reset(bar) .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; falcon - .load(bar, self) + .load(dev, bar, self) .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; let (mbox0, _) = falcon .boot(bar, Some(0), None) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index c56029f444cbb..78957ed8814f6 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -178,7 +178,7 @@ impl super::Gsp { ); sec2_falcon.reset(bar)?; - sec2_falcon.load(bar, &booter_loader)?; + sec2_falcon.load(dev, bar, &booter_loader)?; let wpr_handle = wpr_meta.dma_handle(); let (mbox0, mbox1) = sec2_falcon.boot( bar,