From: John Hubbard Date: Tue, 2 Jun 2026 03:20:54 +0000 (-0700) Subject: gpu: nova-core: Blackwell: use correct sysmem flush registers X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a5bf742bc28a4e064059c8d137970e56669a21a0;p=thirdparty%2Fkernel%2Fstable.git gpu: nova-core: Blackwell: use correct sysmem flush registers Blackwell GPUs moved the sysmem flush page registers away from the Ampere/Ada location. GB10x routes the flush through a pair of HSHUB0 register sets (primary and egress) that must both be programmed to the same address. GB20x routes it through FBHUB0. Define these registers relative to their HSHUB0 and FBHUB0 bases, as Open RM does, and implement the flush paths in the GB10x and GB20x framebuffer HALs. Signed-off-by: John Hubbard Reviewed-by: Eliot Courtney Link: https://patch.msgid.link/20260602032111.224790-7-jhubbard@nvidia.com Signed-off-by: Alexandre Courbot --- diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs index 8d63350abf8a..ecea4ff446ff 100644 --- a/drivers/gpu/nova-core/fb/hal/gb100.rs +++ b/drivers/gpu/nova-core/fb/hal/gb100.rs @@ -4,6 +4,14 @@ //! Blackwell GB10x framebuffer HAL. use kernel::{ + io::{ + register::{ + RegisterBase, + WithBase, // + }, + Io, // + }, + num::Bounded, prelude::*, ptr::{ const_align_up, @@ -15,11 +23,61 @@ use kernel::{ use crate::{ driver::Bar0, fb::hal::FbHal, - num::usize_into_u32, // + num::usize_into_u32, + regs, // }; struct Gb100; +impl RegisterBase for Gb100 { + const BASE: usize = 0x0087_0000; +} + +fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 { + let lo = u64::from( + bar.read(regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::()) + .adr(), + ); + let hi = u64::from( + bar.read(regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::()) + .adr(), + ); + + lo | (hi << 32) +} + +/// Write the sysmem flush page address through the GB10x HSHUB0 registers. +/// +/// Both the primary and EG (egress) register pairs must be programmed to the same address, +/// as required by hardware. +fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded) { + // CAST: lower 32 bits. Hardware ignores bits 7:0. + let addr_lo = *addr as u32; + let addr_hi = addr.shr::<32, 20>().cast::(); + + // Write HI first. The hardware will trigger the flush on the LO write. + + // Primary HSHUB pair. + bar.write( + regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::(), + regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi), + ); + bar.write( + regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::(), + regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo), + ); + + // EG (egress) pair -- must match the primary pair. + bar.write( + regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::of::(), + regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi), + ); + bar.write( + regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::of::(), + regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo), + ); +} + pub(super) const fn pmu_reserved_size_gb100() -> u32 { usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::()).unwrap() }>( ) @@ -27,11 +85,13 @@ pub(super) const fn pmu_reserved_size_gb100() -> u32 { impl FbHal for Gb100 { fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { - super::ga100::read_sysmem_flush_page_ga100(bar) + read_sysmem_flush_page_gb100(bar) } fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { - super::ga100::write_sysmem_flush_page_ga100(bar, addr); + let addr = Bounded::::try_new(addr).ok_or(EINVAL)?; + + write_sysmem_flush_page_gb100(bar, addr); Ok(()) } diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs index 542c1d7429e9..fa5c3f7f2b2e 100644 --- a/drivers/gpu/nova-core/fb/hal/gb202.rs +++ b/drivers/gpu/nova-core/fb/hal/gb202.rs @@ -4,24 +4,67 @@ //! Blackwell GB20x framebuffer HAL. use kernel::{ + io::{ + register::{ + RegisterBase, + WithBase, // + }, + Io, // + }, + num::Bounded, prelude::*, sizes::SizeConstants, // }; use crate::{ driver::Bar0, - fb::hal::FbHal, // + fb::hal::FbHal, + regs, // }; struct Gb202; +impl RegisterBase for Gb202 { + const BASE: usize = 0x008a_0000; +} + +fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 { + let lo = u64::from( + bar.read(regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::()) + .adr(), + ); + let hi = u64::from( + bar.read(regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::()) + .adr(), + ); + + lo | (hi << 32) +} + +/// Write the sysmem flush page address through the GB20x FBHUB0 registers. +fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded) { + // Write HI first. The hardware will trigger the flush on the LO write. + bar.write( + regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::(), + regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed() + .with_adr(addr.shr::<32, 20>().cast::()), + ); + bar.write( + regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::(), + // CAST: lower 32 bits. Hardware ignores bits 7:0. + regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(*addr as u32), + ); +} + impl FbHal for Gb202 { fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { - super::ga100::read_sysmem_flush_page_ga100(bar) + read_sysmem_flush_page_gb202(bar) } fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { - super::ga100::write_sysmem_flush_page_ga100(bar, addr); + let addr = Bounded::::try_new(addr).ok_or(EINVAL)?; + + write_sysmem_flush_page_gb202(bar, addr); Ok(()) } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 356fbf364ea5..b39647684dd1 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. use kernel::{ io::{ @@ -147,6 +148,50 @@ register! { } } +/// Base of the GB10x HSHUB0 register window (`NV_HSHUB0_PRIV_BASE` in Open RM). +/// +/// The base is provided by the GB10x framebuffer HAL. +pub(crate) struct Hshub0Base(()); + +/// Base of the GB20x FBHUB0 register window (`NV_FBHUB0_PRI_BASE` in Open RM). +/// +/// The base is provided by the GB20x framebuffer HAL. +pub(crate) struct Fbhub0Base(()); + +register! { + // GB10x sysmem flush registers, relative to the HSHUB0 base. GB10x routes sysmembar + // through a primary and an EG (egress) pair that must both be programmed to the same + // address. Hardware ignores bits 7:0 of each LO register. The boot path uses a fixed + // HSHUB0 base, so the multiple runtime-discovered HSHUB bases are not needed here. + pub(crate) NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Hshub0Base + 0x00000e50 { + 31:0 adr => u32; + } + + pub(crate) NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Hshub0Base + 0x00000e54 { + 19:0 adr; + } + + pub(crate) NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Hshub0Base + 0x000006c0 { + 31:0 adr => u32; + } + + pub(crate) NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Hshub0Base + 0x000006c4 { + 19:0 adr; + } + + // GB20x sysmem flush registers, relative to the FBHUB0 base. Unlike the older + // NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an 8-bit + // right-shift, these take the raw address split into lower and upper halves. Hardware + // ignores bits 7:0 of the LO register. + pub(crate) NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Fbhub0Base + 0x00001d58 { + 31:0 adr => u32; + } + + pub(crate) NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Fbhub0Base + 0x00001d5c { + 19:0 adr; + } +} + impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 {