Replace the hardcoded 47-bit DMA mask with a GPU HAL method that
provides the correct value for the architecture.
Set the DMA mask in Gpu::new(). Gpu owns all DMA allocations for
the device, so no concurrent allocations can exist while the
constructor is still running.
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Eliot Courtney <ecourtney@nvidia.com>
Acked-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patch.msgid.link/20260602032111.224790-2-jhubbard@nvidia.com
Co-developed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
use kernel::{
auxiliary,
device::Core,
- dma::Device,
- dma::DmaMask,
pci,
pci::{
Class,
const BAR0_SIZE: usize = SZ_16M;
-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
pub(crate) type Bar0 = kernel::io::Mmio<BAR0_SIZE>;
kernel::pci_device_table!(
pdev.enable_device_mem()?;
pdev.set_master();
- // SAFETY: No concurrent DMA allocations or mappings can be made because
- // the device is still being probed and therefore isn't being used by
- // other threads of execution.
- unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
Ok(try_pin_init!(NovaCore {
bar: pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0")?,
// TODO: Use `&bar` self-referential pin-init syntax once available.
use kernel::{
device,
+ dma::Device,
fmt,
io::Io,
num::Bounded,
impl<'gpu> Gpu<'gpu> {
pub(crate) fn new(
- pdev: &'gpu pci::Device<device::Bound>,
+ pdev: &'gpu pci::Device<device::Core<'_>>,
bar: &'gpu Bar0,
) -> impl PinInit<Self, Error> + 'gpu {
try_pin_init!(Self {
// We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
_: {
- hal::gpu_hal(spec.chipset).wait_gfw_boot_completion(bar)
+ let hal = hal::gpu_hal(spec.chipset);
+ let dma_mask = hal.dma_mask();
+
+ // SAFETY: `Gpu` owns all DMA allocations for this device, and we are
+ // still constructing it, so no concurrent DMA allocations can exist.
+ unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
+
+ hal.wait_gfw_boot_completion(bar)
.inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
},
// SPDX-License-Identifier: GPL-2.0
-use kernel::prelude::*;
+use kernel::{
+ dma::DmaMask,
+ prelude::*, //
+};
use crate::{
driver::Bar0,
pub(crate) trait GpuHal {
/// Waits for GFW_BOOT completion if required by this hardware family.
fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
+
+ /// Returns the DMA mask for the current architecture.
+ fn dma_mask(&self) -> DmaMask;
}
pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
// SPDX-License-Identifier: GPL-2.0
-use kernel::prelude::*;
+use kernel::{
+ dma::DmaMask,
+ prelude::*, //
+};
use crate::driver::Bar0;
fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
Ok(())
}
+
+ fn dma_mask(&self) -> DmaMask {
+ DmaMask::new::<52>()
+ }
}
const GH100: Gh100 = Gh100;
//! Note that the devinit sequence also needs to run during suspend/resume.
use kernel::{
+ dma::DmaMask,
io::{
poll::read_poll_timeout,
Io, //
)
.map(|_| ())
}
+
+ fn dma_mask(&self) -> DmaMask {
+ DmaMask::new::<47>()
+ }
}
const TU102: Tu102 = Tu102;