]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
gpu: nova-core: Blackwell: use correct sysmem flush registers
authorJohn Hubbard <jhubbard@nvidia.com>
Tue, 2 Jun 2026 03:20:54 +0000 (20:20 -0700)
committerAlexandre Courbot <acourbot@nvidia.com>
Tue, 2 Jun 2026 13:33:15 +0000 (22:33 +0900)
Blackwell GPUs moved the sysmem flush page registers away from the
Ampere/Ada location. GB10x routes the flush through a pair of HSHUB0
register sets (primary and egress) that must both be programmed to
the same address. GB20x routes it through FBHUB0.

Define these registers relative to their HSHUB0 and FBHUB0 bases, as
Open RM does, and implement the flush paths in the GB10x and GB20x
framebuffer HALs.

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Eliot Courtney <ecourtney@nvidia.com>
Link: https://patch.msgid.link/20260602032111.224790-7-jhubbard@nvidia.com
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
drivers/gpu/nova-core/fb/hal/gb100.rs
drivers/gpu/nova-core/fb/hal/gb202.rs
drivers/gpu/nova-core/regs.rs

index 8d63350abf8aa0d2d724ed2a0eebee5a5f516c31..ecea4ff446fff2becf5c42ae315d8062851ffd9f 100644 (file)
@@ -4,6 +4,14 @@
 //! Blackwell GB10x framebuffer HAL.
 
 use kernel::{
+    io::{
+        register::{
+            RegisterBase,
+            WithBase, //
+        },
+        Io, //
+    },
+    num::Bounded,
     prelude::*,
     ptr::{
         const_align_up,
@@ -15,11 +23,61 @@ use kernel::{
 use crate::{
     driver::Bar0,
     fb::hal::FbHal,
-    num::usize_into_u32, //
+    num::usize_into_u32,
+    regs, //
 };
 
 struct Gb100;
 
+impl RegisterBase<regs::Hshub0Base> for Gb100 {
+    const BASE: usize = 0x0087_0000;
+}
+
+fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 {
+    let lo = u64::from(
+        bar.read(regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb100>())
+            .adr(),
+    );
+    let hi = u64::from(
+        bar.read(regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb100>())
+            .adr(),
+    );
+
+    lo | (hi << 32)
+}
+
+/// Write the sysmem flush page address through the GB10x HSHUB0 registers.
+///
+/// Both the primary and EG (egress) register pairs must be programmed to the same address,
+/// as required by hardware.
+fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded<u64, 52>) {
+    // CAST: lower 32 bits. Hardware ignores bits 7:0.
+    let addr_lo = *addr as u32;
+    let addr_hi = addr.shr::<32, 20>().cast::<u32>();
+
+    // Write HI first. The hardware will trigger the flush on the LO write.
+
+    // Primary HSHUB pair.
+    bar.write(
+        regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb100>(),
+        regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi),
+    );
+    bar.write(
+        regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb100>(),
+        regs::NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo),
+    );
+
+    // EG (egress) pair -- must match the primary pair.
+    bar.write(
+        regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb100>(),
+        regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi),
+    );
+    bar.write(
+        regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb100>(),
+        regs::NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo),
+    );
+}
+
 pub(super) const fn pmu_reserved_size_gb100() -> u32 {
     usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::<SZ_128K>()).unwrap() }>(
     )
@@ -27,11 +85,13 @@ pub(super) const fn pmu_reserved_size_gb100() -> u32 {
 
 impl FbHal for Gb100 {
     fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
-        super::ga100::read_sysmem_flush_page_ga100(bar)
+        read_sysmem_flush_page_gb100(bar)
     }
 
     fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
-        super::ga100::write_sysmem_flush_page_ga100(bar, addr);
+        let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;
+
+        write_sysmem_flush_page_gb100(bar, addr);
 
         Ok(())
     }
index 542c1d7429e94f6a346df6a86cee75119c296c12..fa5c3f7f2b2ee81115f9ec00d76551645c721e59 100644 (file)
@@ -4,24 +4,67 @@
 //! Blackwell GB20x framebuffer HAL.
 
 use kernel::{
+    io::{
+        register::{
+            RegisterBase,
+            WithBase, //
+        },
+        Io, //
+    },
+    num::Bounded,
     prelude::*,
     sizes::SizeConstants, //
 };
 
 use crate::{
     driver::Bar0,
-    fb::hal::FbHal, //
+    fb::hal::FbHal,
+    regs, //
 };
 
 struct Gb202;
 
+impl RegisterBase<regs::Fbhub0Base> for Gb202 {
+    const BASE: usize = 0x008a_0000;
+}
+
+fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 {
+    let lo = u64::from(
+        bar.read(regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb202>())
+            .adr(),
+    );
+    let hi = u64::from(
+        bar.read(regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb202>())
+            .adr(),
+    );
+
+    lo | (hi << 32)
+}
+
+/// Write the sysmem flush page address through the GB20x FBHUB0 registers.
+fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded<u64, 52>) {
+    // Write HI first. The hardware will trigger the flush on the LO write.
+    bar.write(
+        regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::of::<Gb202>(),
+        regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
+            .with_adr(addr.shr::<32, 20>().cast::<u32>()),
+    );
+    bar.write(
+        regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::of::<Gb202>(),
+        // CAST: lower 32 bits. Hardware ignores bits 7:0.
+        regs::NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(*addr as u32),
+    );
+}
+
 impl FbHal for Gb202 {
     fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
-        super::ga100::read_sysmem_flush_page_ga100(bar)
+        read_sysmem_flush_page_gb202(bar)
     }
 
     fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
-        super::ga100::write_sysmem_flush_page_ga100(bar, addr);
+        let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;
+
+        write_sysmem_flush_page_gb202(bar, addr);
 
         Ok(())
     }
index 356fbf364ea533e48e91688666306cf6c031602f..b39647684dd1f75a3f67116884589f33a38583fb 100644 (file)
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 
 use kernel::{
     io::{
@@ -147,6 +148,50 @@ register! {
     }
 }
 
+/// Base of the GB10x HSHUB0 register window (`NV_HSHUB0_PRIV_BASE` in Open RM).
+///
+/// The base is provided by the GB10x framebuffer HAL.
+pub(crate) struct Hshub0Base(());
+
+/// Base of the GB20x FBHUB0 register window (`NV_FBHUB0_PRI_BASE` in Open RM).
+///
+/// The base is provided by the GB20x framebuffer HAL.
+pub(crate) struct Fbhub0Base(());
+
+register! {
+    // GB10x sysmem flush registers, relative to the HSHUB0 base. GB10x routes sysmembar
+    // through a primary and an EG (egress) pair that must both be programmed to the same
+    // address. Hardware ignores bits 7:0 of each LO register. The boot path uses a fixed
+    // HSHUB0 base, so the multiple runtime-discovered HSHUB bases are not needed here.
+    pub(crate) NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Hshub0Base + 0x00000e50 {
+        31:0    adr => u32;
+    }
+
+    pub(crate) NV_PFB_HSHUB_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Hshub0Base + 0x00000e54 {
+        19:0    adr;
+    }
+
+    pub(crate) NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Hshub0Base + 0x000006c0 {
+        31:0    adr => u32;
+    }
+
+    pub(crate) NV_PFB_HSHUB_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Hshub0Base + 0x000006c4 {
+        19:0    adr;
+    }
+
+    // GB20x sysmem flush registers, relative to the FBHUB0 base. Unlike the older
+    // NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an 8-bit
+    // right-shift, these take the raw address split into lower and upper halves. Hardware
+    // ignores bits 7:0 of the LO register.
+    pub(crate) NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ Fbhub0Base + 0x00001d58 {
+        31:0    adr => u32;
+    }
+
+    pub(crate) NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ Fbhub0Base + 0x00001d5c {
+        19:0    adr;
+    }
+}
+
 impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE {
     /// Returns the usable framebuffer size, in bytes.
     pub(crate) fn usable_fb_size(self) -> u64 {