From: Alexandre Courbot Date: Fri, 29 May 2026 07:33:44 +0000 (+0900) Subject: gpu: nova-core: gsp: run the unload bundle if Gsp::boot() fails X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=75d59327367dc6e2141cf4e11cdf57c55851b5c2;p=thirdparty%2Flinux.git gpu: nova-core: gsp: run the unload bundle if Gsp::boot() fails If `Gsp::boot` fails, the GSP can be left in a state where boot cannot be attempted again unless it is reset first. To avoid this, we want to run the unload bundle whenever `boot` fails to try and clear the partially-initialized state. Do this by wrapping the unload bundle into a drop guard up until `boot` returns. After that, running the unload bundle becomes the responsibility of the caller. Reviewed-by: Danilo Krummrich Reviewed-by: Eliot Courtney Link: https://patch.msgid.link/20260529-nova-unload-v7-4-678f39209e00@nvidia.com Signed-off-by: Alexandre Courbot --- diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index f92ff2034d2d5..087ee59da6d9a 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -8,7 +8,8 @@ use kernel::{ io::poll::read_poll_timeout, pci, prelude::*, - time::Delta, // + time::Delta, + types::ScopeGuard, // }; use crate::{ @@ -31,6 +32,66 @@ use crate::{ }, }; +/// Arguments required to call [`Gsp::unload`](super::Gsp::unload). +/// +/// Stored as their own type to avoid repeating a long and tedious list in [`BootUnloadGuard`]. +pub(super) struct BootUnloadArgs<'a> { + gsp: &'a super::Gsp, + dev: &'a device::Device, + bar: &'a Bar0, + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + unload_bundle: Option, +} + +/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a +/// [`UnloadBundle`](super::UnloadBundle) when dropped. +/// +/// Used to ensure the `UnloadBundle` is run during failure paths. +pub(super) struct BootUnloadGuard<'a> { + guard: ScopeGuard, fn(BootUnloadArgs<'a>)>, +} + +impl<'a> BootUnloadGuard<'a> { + /// Wraps `unload_bundle` into a guard that executes it when dropped. + pub(super) fn new( + gsp: &'a super::Gsp, + dev: &'a device::Device, + bar: &'a Bar0, + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + unload_bundle: Option, + ) -> Self { + Self { + guard: ScopeGuard::new_with_data( + BootUnloadArgs { + gsp, + dev, + bar, + gsp_falcon, + sec2_falcon, + unload_bundle, + }, + |args| { + let _ = super::Gsp::unload( + args.gsp, + args.dev, + args.bar, + args.gsp_falcon, + args.sec2_falcon, + args.unload_bundle, + ); + }, + ), + } + } + + /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBundle) it contains. + pub(super) fn dismiss(self) -> Option { + self.guard.dismiss().unload_bundle + } +} + impl super::Gsp { /// Attempt to boot the GSP. /// @@ -59,7 +120,7 @@ impl super::Gsp { let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?; // Perform the chipset-specific boot sequence, and retrieve the unload bundle. - let unload_bundle = hal.boot( + let unload_guard = hal.boot( &self, dev, bar, @@ -99,7 +160,7 @@ impl super::Gsp { Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), } - Ok(unload_bundle) + Ok(unload_guard.dismiss()) } /// Shut down the GSP and wait until it is offline. diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs index 501b852dcb297..88fc3e7911148 100644 --- a/drivers/gpu/nova-core/gsp/hal.rs +++ b/drivers/gpu/nova-core/gsp/hal.rs @@ -25,6 +25,7 @@ use crate::{ Chipset, // }, gsp::{ + boot::BootUnloadGuard, Gsp, GspFwWprMeta, // }, @@ -50,20 +51,20 @@ pub(super) trait UnloadBundle: Send { pub(super) trait GspHal: Send { /// Performs the GSP boot process, loading and running the required firmwares as needed. /// - /// Upon success, returns the [`UnloadBundle`] to be run (if any) in order to properly reset the - /// GSP after it has been stopped. + /// Upon success, returns a guard that runs the GSP unload sequence if GSP boot does not + /// complete. #[allow(clippy::too_many_arguments)] - fn boot( + fn boot<'a>( &self, - gsp: &Gsp, - dev: &device::Device, - bar: &Bar0, + gsp: &'a Gsp, + dev: &'a device::Device, + bar: &'a Bar0, chipset: Chipset, fb_layout: &FbLayout, wpr_meta: &Coherent, - gsp_falcon: &Falcon, - sec2_falcon: &Falcon, - ) -> Result>; + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + ) -> Result>; /// Performs HAL-specific post-GSP boot tasks. /// diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs index 0a8b7f763883a..9a4bb22578b3a 100644 --- a/drivers/gpu/nova-core/gsp/hal/gh100.rs +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs @@ -18,6 +18,7 @@ use crate::{ fb::FbLayout, gpu::Chipset, gsp::{ + boot::BootUnloadGuard, hal::GspHal, Gsp, GspFwWprMeta, // @@ -31,17 +32,17 @@ impl GspHal for Gh100 { /// /// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP handles /// the GSP boot internally - no manual GSP reset/boot is needed. - fn boot( + fn boot<'a>( &self, - _gsp: &Gsp, - _dev: &device::Device, - _bar: &Bar0, + _gsp: &'a Gsp, + _dev: &'a device::Device, + _bar: &'a Bar0, _chipset: Chipset, _fb_layout: &FbLayout, _wpr_meta: &Coherent, - _gsp_falcon: &Falcon, - _sec2_falcon: &Falcon, - ) -> Result> { + _gsp_falcon: &'a Falcon, + _sec2_falcon: &'a Falcon, + ) -> Result> { Err(ENOTSUPP) } } diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs index 53d117ccdddbb..a033bc8920667 100644 --- a/drivers/gpu/nova-core/gsp/hal/tu102.rs +++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs @@ -32,6 +32,7 @@ use crate::{ }, gpu::Chipset, gsp::{ + boot::BootUnloadGuard, hal::{ GspHal, UnloadBundle, // @@ -254,21 +255,23 @@ fn run_fwsec_frts( struct Tu102; impl GspHal for Tu102 { - fn boot( + fn boot<'a>( &self, - gsp: &Gsp, - dev: &device::Device, - bar: &Bar0, + gsp: &'a Gsp, + dev: &'a device::Device, + bar: &'a Bar0, chipset: Chipset, fb_layout: &FbLayout, wpr_meta: &Coherent, - gsp_falcon: &Falcon, - sec2_falcon: &Falcon, - ) -> Result> { + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + ) -> Result> { let bios = Vbios::new(dev, bar)?; - // Try and prepare the unload bundle. If this fails, the GPU will need to be reset - // before the driver can be probed again. + // Try and prepare the unload bundle. + // + // If the unload bundle creation fails, the GPU will need to be reset before the driver can + // be probed again. let unload_bundle = Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, sec2_falcon) .inspect_err(|e| { @@ -279,8 +282,12 @@ impl GspHal for Tu102 { "The GPU will need to be reset before the driver can bind again.\n" ); }) - .map(crate::gsp::UnloadBundle) - .ok(); + .ok() + .map(crate::gsp::UnloadBundle); + + // Wrap the unload bundle into a drop guard so it is automatically run upon failure. + let unload_guard = + BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, unload_bundle); // FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100). if !fb_layout.frts.is_empty() { @@ -311,7 +318,7 @@ impl GspHal for Tu102 { )? .run(dev, bar, sec2_falcon, wpr_meta)?; - Ok(unload_bundle) + Ok(unload_guard) } fn post_boot(