summaryrefslogtreecommitdiff
path: root/drivers/gpu/nova-core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nova-core')
-rw-r--r--drivers/gpu/nova-core/Kconfig16
-rw-r--r--drivers/gpu/nova-core/Makefile3
-rw-r--r--drivers/gpu/nova-core/bitfield.rs329
-rw-r--r--drivers/gpu/nova-core/driver.rs114
-rw-r--r--drivers/gpu/nova-core/falcon.rs789
-rw-r--r--drivers/gpu/nova-core/falcon/gsp.rs60
-rw-r--r--drivers/gpu/nova-core/falcon/hal.rs90
-rw-r--r--drivers/gpu/nova-core/falcon/hal/ga102.rs175
-rw-r--r--drivers/gpu/nova-core/falcon/hal/tu102.rs81
-rw-r--r--drivers/gpu/nova-core/falcon/sec2.rs22
-rw-r--r--drivers/gpu/nova-core/fb.rs276
-rw-r--r--drivers/gpu/nova-core/fb/hal.rs41
-rw-r--r--drivers/gpu/nova-core/fb/hal/ga100.rs72
-rw-r--r--drivers/gpu/nova-core/fb/hal/ga102.rs41
-rw-r--r--drivers/gpu/nova-core/fb/hal/tu102.rs62
-rw-r--r--drivers/gpu/nova-core/firmware.rs537
-rw-r--r--drivers/gpu/nova-core/firmware/booter.rs433
-rw-r--r--drivers/gpu/nova-core/firmware/fwsec.rs417
-rw-r--r--drivers/gpu/nova-core/firmware/fwsec/bootloader.rs350
-rw-r--r--drivers/gpu/nova-core/firmware/gsp.rs184
-rw-r--r--drivers/gpu/nova-core/firmware/riscv.rs95
-rw-r--r--drivers/gpu/nova-core/gfw.rs76
-rw-r--r--drivers/gpu/nova-core/gpu.rs288
-rw-r--r--drivers/gpu/nova-core/gsp.rs186
-rw-r--r--drivers/gpu/nova-core/gsp/boot.rs240
-rw-r--r--drivers/gpu/nova-core/gsp/cmdq.rs849
-rw-r--r--drivers/gpu/nova-core/gsp/cmdq/continuation.rs307
-rw-r--r--drivers/gpu/nova-core/gsp/commands.rs239
-rw-r--r--drivers/gpu/nova-core/gsp/fw.rs922
-rw-r--r--drivers/gpu/nova-core/gsp/fw/commands.rs131
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144.rs31
-rw-r--r--drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs965
-rw-r--r--drivers/gpu/nova-core/gsp/sequencer.rs400
-rw-r--r--drivers/gpu/nova-core/nova_core.rs78
-rw-r--r--drivers/gpu/nova-core/num.rs297
-rw-r--r--drivers/gpu/nova-core/regs.rs540
-rw-r--r--drivers/gpu/nova-core/sbuffer.rs224
-rw-r--r--drivers/gpu/nova-core/vbios.rs1086
38 files changed, 11046 insertions, 0 deletions
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig
new file mode 100644
index 000000000000..a4f2380654e2
--- /dev/null
+++ b/drivers/gpu/nova-core/Kconfig
@@ -0,0 +1,16 @@
+config NOVA_CORE
+ tristate "Nova Core GPU driver"
+ depends on 64BIT
+ depends on PCI
+ depends on RUST
+ select AUXILIARY_BUS
+ select RUST_FW_LOADER_ABSTRACTIONS
+ default n
+ help
+ Choose this if you want to build the Nova Core driver for Nvidia
+ GPUs based on the GPU System Processor (GSP). This is true for Turing
+ and later GPUs.
+
+ This driver is work in progress and may not be functional.
+
+ If M is selected, the module will be called nova_core.
diff --git a/drivers/gpu/nova-core/Makefile b/drivers/gpu/nova-core/Makefile
new file mode 100644
index 000000000000..2d78c50126e1
--- /dev/null
+++ b/drivers/gpu/nova-core/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_NOVA_CORE) += nova_core.o
diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs
new file mode 100644
index 000000000000..02efdcf78d89
--- /dev/null
+++ b/drivers/gpu/nova-core/bitfield.rs
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Bitfield library for Rust structures
+//!
+//! Support for defining bitfields in Rust structures. Also used by the [`register!`] macro.
+
+/// Defines a struct with accessors to access bits within an inner unsigned integer.
+///
+/// # Syntax
+///
+/// ```rust
+/// use nova_core::bitfield;
+///
+/// #[derive(Debug, Clone, Copy, Default)]
+/// enum Mode {
+/// #[default]
+/// Low = 0,
+/// High = 1,
+/// Auto = 2,
+/// }
+///
+/// impl TryFrom<u8> for Mode {
+/// type Error = u8;
+/// fn try_from(value: u8) -> Result<Self, Self::Error> {
+/// match value {
+/// 0 => Ok(Mode::Low),
+/// 1 => Ok(Mode::High),
+/// 2 => Ok(Mode::Auto),
+/// _ => Err(value),
+/// }
+/// }
+/// }
+///
+/// impl From<Mode> for u8 {
+/// fn from(mode: Mode) -> u8 {
+/// mode as u8
+/// }
+/// }
+///
+/// #[derive(Debug, Clone, Copy, Default)]
+/// enum State {
+/// #[default]
+/// Inactive = 0,
+/// Active = 1,
+/// }
+///
+/// impl From<bool> for State {
+/// fn from(value: bool) -> Self {
+/// if value { State::Active } else { State::Inactive }
+/// }
+/// }
+///
+/// impl From<State> for bool {
+/// fn from(state: State) -> bool {
+/// match state {
+/// State::Inactive => false,
+/// State::Active => true,
+/// }
+/// }
+/// }
+///
+/// bitfield! {
+/// pub struct ControlReg(u32) {
+/// 7:7 state as bool => State;
+/// 3:0 mode as u8 ?=> Mode;
+/// }
+/// }
+/// ```
+///
+/// This generates a struct with:
+/// - Field accessors: `mode()`, `state()`, etc.
+/// - Field setters: `set_mode()`, `set_state()`, etc. (supports chaining with builder pattern).
+/// Note that the compiler will error out if the size of the setter's arg exceeds the
+/// struct's storage size.
+/// - Debug and Default implementations.
+///
+/// Note: Field accessors and setters inherit the same visibility as the struct itself.
+/// In the example above, both `mode()` and `set_mode()` methods will be `pub`.
+///
+/// Fields are defined as follows:
+///
+/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or
+/// `bool`. Note that `bool` fields must have a range of 1 bit.
+/// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns
+/// the result.
+/// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation
+/// and returns the result. This is useful with fields for which not all values are valid.
+macro_rules! bitfield {
+ // Main entry point - defines the bitfield struct with fields
+ ($vis:vis struct $name:ident($storage:ty) $(, $comment:literal)? { $($fields:tt)* }) => {
+ bitfield!(@core $vis $name $storage $(, $comment)? { $($fields)* });
+ };
+
+ // All rules below are helpers.
+
+ // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`,
+ // `Default`, and conversion to the value type) and field accessor methods.
+ (@core $vis:vis $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => {
+ $(
+ #[doc=$comment]
+ )?
+ #[repr(transparent)]
+ #[derive(Clone, Copy)]
+ $vis struct $name($storage);
+
+ impl ::core::convert::From<$name> for $storage {
+ fn from(val: $name) -> $storage {
+ val.0
+ }
+ }
+
+ bitfield!(@fields_dispatcher $vis $name $storage { $($fields)* });
+ };
+
+ // Captures the fields and passes them to all the implementers that require field information.
+ //
+ // Used to simplify the matching rules for implementers, so they don't need to match the entire
+ // complex fields rule even though they only make use of part of it.
+ (@fields_dispatcher $vis:vis $name:ident $storage:ty {
+ $($hi:tt:$lo:tt $field:ident as $type:tt
+ $(?=> $try_into_type:ty)?
+ $(=> $into_type:ty)?
+ $(, $comment:literal)?
+ ;
+ )*
+ }
+ ) => {
+ bitfield!(@field_accessors $vis $name $storage {
+ $(
+ $hi:$lo $field as $type
+ $(?=> $try_into_type)?
+ $(=> $into_type)?
+ $(, $comment)?
+ ;
+ )*
+ });
+ bitfield!(@debug $name { $($field;)* });
+ bitfield!(@default $name { $($field;)* });
+ };
+
+ // Defines all the field getter/setter methods for `$name`.
+ (
+ @field_accessors $vis:vis $name:ident $storage:ty {
+ $($hi:tt:$lo:tt $field:ident as $type:tt
+ $(?=> $try_into_type:ty)?
+ $(=> $into_type:ty)?
+ $(, $comment:literal)?
+ ;
+ )*
+ }
+ ) => {
+ $(
+ bitfield!(@check_field_bounds $hi:$lo $field as $type);
+ )*
+
+ #[allow(dead_code)]
+ impl $name {
+ $(
+ bitfield!(@field_accessor $vis $name $storage, $hi:$lo $field as $type
+ $(?=> $try_into_type)?
+ $(=> $into_type)?
+ $(, $comment)?
+ ;
+ );
+ )*
+ }
+ };
+
+ // Boolean fields must have `$hi == $lo`.
+ (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => {
+ #[allow(clippy::eq_op)]
+ const _: () = {
+ ::kernel::build_assert!(
+ $hi == $lo,
+ concat!("boolean field `", stringify!($field), "` covers more than one bit")
+ );
+ };
+ };
+
+ // Non-boolean fields must have `$hi >= $lo`.
+ (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => {
+ #[allow(clippy::eq_op)]
+ const _: () = {
+ ::kernel::build_assert!(
+ $hi >= $lo,
+ concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB")
+ );
+ };
+ };
+
+ // Catches fields defined as `bool` and convert them into a boolean value.
+ (
+ @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool
+ => $into_type:ty $(, $comment:literal)?;
+ ) => {
+ bitfield!(
+ @leaf_accessor $vis $name $storage, $hi:$lo $field
+ { |f| <$into_type>::from(f != 0) }
+ bool $into_type => $into_type $(, $comment)?;
+ );
+ };
+
+ // Shortcut for fields defined as `bool` without the `=>` syntax.
+ (
+ @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool
+ $(, $comment:literal)?;
+ ) => {
+ bitfield!(
+ @field_accessor $vis $name $storage, $hi:$lo $field as bool => bool $(, $comment)?;
+ );
+ };
+
+ // Catches the `?=>` syntax for non-boolean fields.
+ (
+ @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt
+ ?=> $try_into_type:ty $(, $comment:literal)?;
+ ) => {
+ bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field
+ { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type =>
+ ::core::result::Result<
+ $try_into_type,
+ <$try_into_type as ::core::convert::TryFrom<$type>>::Error
+ >
+ $(, $comment)?;);
+ };
+
+ // Catches the `=>` syntax for non-boolean fields.
+ (
+ @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt
+ => $into_type:ty $(, $comment:literal)?;
+ ) => {
+ bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field
+ { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;);
+ };
+
+ // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax.
+ (
+ @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt
+ $(, $comment:literal)?;
+ ) => {
+ bitfield!(
+ @field_accessor $vis $name $storage, $hi:$lo $field as $type => $type $(, $comment)?;
+ );
+ };
+
+ // Generates the accessor methods for a single field.
+ (
+ @leaf_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident
+ { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?;
+ ) => {
+ ::kernel::macros::paste!(
+ const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi;
+ const [<$field:upper _MASK>]: $storage = {
+ // Generate mask for shifting
+ match ::core::mem::size_of::<$storage>() {
+ 1 => ::kernel::bits::genmask_u8($lo..=$hi) as $storage,
+ 2 => ::kernel::bits::genmask_u16($lo..=$hi) as $storage,
+ 4 => ::kernel::bits::genmask_u32($lo..=$hi) as $storage,
+ 8 => ::kernel::bits::genmask_u64($lo..=$hi) as $storage,
+ _ => ::kernel::build_error!("Unsupported storage type size")
+ }
+ };
+ const [<$field:upper _SHIFT>]: u32 = $lo;
+ );
+
+ $(
+ #[doc="Returns the value of this field:"]
+ #[doc=$comment]
+ )?
+ #[inline(always)]
+ $vis fn $field(self) -> $res_type {
+ ::kernel::macros::paste!(
+ const MASK: $storage = $name::[<$field:upper _MASK>];
+ const SHIFT: u32 = $name::[<$field:upper _SHIFT>];
+ );
+ let field = ((self.0 & MASK) >> SHIFT);
+
+ $process(field)
+ }
+
+ ::kernel::macros::paste!(
+ $(
+ #[doc="Sets the value of this field:"]
+ #[doc=$comment]
+ )?
+ #[inline(always)]
+ $vis fn [<set_ $field>](mut self, value: $to_type) -> Self {
+ const MASK: $storage = $name::[<$field:upper _MASK>];
+ const SHIFT: u32 = $name::[<$field:upper _SHIFT>];
+ let value = ($storage::from($prim_type::from(value)) << SHIFT) & MASK;
+ self.0 = (self.0 & !MASK) | value;
+
+ self
+ }
+ );
+ };
+
+ // Generates the `Debug` implementation for `$name`.
+ (@debug $name:ident { $($field:ident;)* }) => {
+ impl ::kernel::fmt::Debug for $name {
+ fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result {
+ f.debug_struct(stringify!($name))
+ .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0))
+ $(
+ .field(stringify!($field), &self.$field())
+ )*
+ .finish()
+ }
+ }
+ };
+
+ // Generates the `Default` implementation for `$name`.
+ (@default $name:ident { $($field:ident;)* }) => {
+ /// Returns a value for the bitfield where all fields are set to their default value.
+ impl ::core::default::Default for $name {
+ fn default() -> Self {
+ let value = Self(Default::default());
+
+ ::kernel::macros::paste!(
+ $(
+ let value = value.[<set_ $field>](Default::default());
+ )*
+ );
+
+ value
+ }
+ }
+ };
+}
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
new file mode 100644
index 000000000000..84b0e1703150
--- /dev/null
+++ b/drivers/gpu/nova-core/driver.rs
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ auxiliary,
+ device::Core,
+ devres::Devres,
+ dma::Device,
+ dma::DmaMask,
+ pci,
+ pci::{
+ Class,
+ ClassMask,
+ Vendor, //
+ },
+ prelude::*,
+ sizes::SZ_16M,
+ sync::{
+ atomic::{
+ Atomic,
+ Relaxed, //
+ },
+ Arc,
+ },
+};
+
+use crate::gpu::Gpu;
+
+/// Counter for generating unique auxiliary device IDs.
+static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0);
+
+#[pin_data]
+pub(crate) struct NovaCore {
+ #[pin]
+ pub(crate) gpu: Gpu,
+ #[pin]
+ _reg: Devres<auxiliary::Registration>,
+}
+
+const BAR0_SIZE: usize = SZ_16M;
+
+// For now we only support Ampere which can use up to 47-bit DMA addresses.
+//
+// TODO: Add an abstraction for this to support newer GPUs which may support
+// larger DMA addresses. Limiting these GPUs to smaller address widths won't
+// have any adverse affects, unless installed on systems which require larger
+// DMA addresses. These systems should be quite rare.
+const GPU_DMA_BITS: u32 = 47;
+
+pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
+
+kernel::pci_device_table!(
+ PCI_TABLE,
+ MODULE_PCI_TABLE,
+ <NovaCore as pci::Driver>::IdInfo,
+ [
+ // Modern NVIDIA GPUs will show up as either VGA or 3D controllers.
+ (
+ pci::DeviceId::from_class_and_vendor(
+ Class::DISPLAY_VGA,
+ ClassMask::ClassSubclass,
+ Vendor::NVIDIA
+ ),
+ ()
+ ),
+ (
+ pci::DeviceId::from_class_and_vendor(
+ Class::DISPLAY_3D,
+ ClassMask::ClassSubclass,
+ Vendor::NVIDIA
+ ),
+ ()
+ ),
+ ]
+);
+
+impl pci::Driver for NovaCore {
+ type IdInfo = ();
+ const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE;
+
+ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> {
+ pin_init::pin_init_scope(move || {
+ dev_dbg!(pdev, "Probe Nova Core GPU driver.\n");
+
+ pdev.enable_device_mem()?;
+ pdev.set_master();
+
+ // SAFETY: No concurrent DMA allocations or mappings can be made because
+ // the device is still being probed and therefore isn't being used by
+ // other threads of execution.
+ unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
+
+ let bar = Arc::pin_init(
+ pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
+ GFP_KERNEL,
+ )?;
+
+ Ok(try_pin_init!(Self {
+ gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?),
+ _reg <- auxiliary::Registration::new(
+ pdev.as_ref(),
+ c"nova-drm",
+ // TODO[XARR]: Use XArray or perhaps IDA for proper ID allocation/recycling. For
+ // now, use a simple atomic counter that never recycles IDs.
+ AUXILIARY_ID_COUNTER.fetch_add(1, Relaxed),
+ crate::MODULE_NAME
+ ),
+ }))
+ })
+ }
+
+ fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) {
+ this.gpu.unbind(pdev.as_ref());
+ }
+}
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
new file mode 100644
index 000000000000..33927af4134c
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -0,0 +1,789 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Falcon microprocessor base support
+
+use hal::FalconHal;
+
+use kernel::{
+ device::{
+ self,
+ Device, //
+ },
+ dma::{
+ Coherent,
+ CoherentBox,
+ DmaAddress,
+ DmaMask, //
+ },
+ io::{
+ poll::read_poll_timeout,
+ register::{
+ RegisterBase,
+ WithBase, //
+ },
+ Io,
+ },
+ prelude::*,
+ sync::aref::ARef,
+ time::Delta,
+};
+
+use crate::{
+ bounded_enum,
+ driver::Bar0,
+ falcon::hal::LoadMethod,
+ gpu::Chipset,
+ num::{
+ self,
+ FromSafeCast, //
+ },
+ regs,
+};
+
+pub(crate) mod gsp;
+mod hal;
+pub(crate) mod sec2;
+
+/// Alignment (in bytes) of falcon memory blocks.
+pub(crate) const MEM_BLOCK_ALIGNMENT: usize = 256;
+
+bounded_enum! {
+ /// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`]
+ /// register.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum FalconCoreRev with TryFrom<Bounded<u32, 4>> {
+ Rev1 = 1,
+ Rev2 = 2,
+ Rev3 = 3,
+ Rev4 = 4,
+ Rev5 = 5,
+ Rev6 = 6,
+ Rev7 = 7,
+ }
+}
+
+bounded_enum! {
+ /// Revision subversion number of a falcon core, used in the
+ /// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum FalconCoreRevSubversion with From<Bounded<u32, 2>> {
+ Subversion0 = 0,
+ Subversion1 = 1,
+ Subversion2 = 2,
+ Subversion3 = 3,
+ }
+}
+
+bounded_enum! {
+ /// Security mode of the Falcon microprocessor.
+ ///
+ /// See `falcon.rst` for more details.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum FalconSecurityModel with TryFrom<Bounded<u32, 2>> {
+ /// Non-Secure: runs unsigned code without privileges.
+ None = 0,
+ /// Light-Secured (LS): Runs signed code with some privileges.
+ /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the
+ /// code's signature.
+ ///
+ /// Also known as Low-Secure, Privilege Level 2 or PL2.
+ Light = 2,
+ /// Heavy-Secured (HS): Runs signed code with full privileges.
+ /// The code's signature is verified by the Falcon Boot ROM (BROM).
+ ///
+ /// Also known as High-Secure, Privilege Level 3 or PL3.
+ Heavy = 3,
+ }
+}
+
+bounded_enum! {
+ /// Signing algorithm for a given firmware, used in the
+ /// [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] register. It is passed to the Falcon Boot ROM
+ /// (BROM) as a parameter.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum FalconModSelAlgo with TryFrom<Bounded<u32, 8>> {
+ /// AES.
+ Aes = 0,
+ /// RSA3K.
+ Rsa3k = 1,
+ }
+}
+
+bounded_enum! {
+ /// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`]
+ /// register.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum DmaTrfCmdSize with TryFrom<Bounded<u32, 3>> {
+ /// 256 bytes transfer.
+ Size256B = 0x6,
+ }
+}
+
+bounded_enum! {
+ /// Currently active core on a dual falcon/riscv (Peregrine) controller.
+ #[derive(Debug, Copy, Clone, PartialEq, Eq)]
+ pub(crate) enum PeregrineCoreSelect with From<Bounded<u32, 1>> {
+ /// Falcon core is active.
+ Falcon = 0,
+ /// RISC-V core is active.
+ Riscv = 1,
+ }
+}
+
+/// Different types of memory present in a falcon core.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(crate) enum FalconMem {
+ /// Secure Instruction Memory.
+ ImemSecure,
+ /// Non-Secure Instruction Memory.
+ #[expect(unused)]
+ ImemNonSecure,
+ /// Data Memory.
+ Dmem,
+}
+
+bounded_enum! {
+ /// Defines the Framebuffer Interface (FBIF) aperture type.
+ /// This determines the memory type for external memory access during a DMA transfer, which is
+ /// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum FalconFbifTarget with TryFrom<Bounded<u32, 2>> {
+ /// Local Framebuffer (GPU's VRAM memory).
+ LocalFb = 0,
+ /// Coherent system memory (System DRAM).
+ CoherentSysmem = 1,
+ /// Non-coherent system memory (System DRAM).
+ NoncoherentSysmem = 2,
+ }
+}
+
+bounded_enum! {
+ /// Type of memory addresses to use.
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) enum FalconFbifMemType with From<Bounded<u32, 1>> {
+ /// Virtual memory addresses.
+ Virtual = 0,
+ /// Physical memory addresses.
+ Physical = 1,
+ }
+}
+
+/// Type used to represent the `PFALCON` registers address base for a given falcon engine.
+pub(crate) struct PFalconBase(());
+
+/// Type used to represent the `PFALCON2` registers address base for a given falcon engine.
+pub(crate) struct PFalcon2Base(());
+
+/// Trait defining the parameters of a given Falcon engine.
+///
+/// Each engine provides one base for `PFALCON` and `PFALCON2` registers.
+pub(crate) trait FalconEngine:
+ Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized
+{
+}
+
+/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM)
+/// using DMA.
+#[derive(Debug, Clone)]
+pub(crate) struct FalconDmaLoadTarget {
+ /// Offset from the start of the source object to copy from.
+ pub(crate) src_start: u32,
+ /// Offset from the start of the destination memory to copy into.
+ pub(crate) dst_start: u32,
+ /// Number of bytes to copy.
+ pub(crate) len: u32,
+}
+
+/// Parameters for the falcon boot ROM.
+#[derive(Debug, Clone)]
+pub(crate) struct FalconBromParams {
+ /// Offset in `DMEM`` of the firmware's signature.
+ pub(crate) pkc_data_offset: u32,
+ /// Mask of engines valid for this firmware.
+ pub(crate) engine_id_mask: u16,
+ /// ID of the ucode used to infer a fuse register to validate the signature.
+ pub(crate) ucode_id: u8,
+}
+
+/// Trait implemented by falcon firmwares that can be loaded using DMA.
+pub(crate) trait FalconDmaLoadable {
+ /// Returns the firmware data as a slice of bytes.
+ fn as_slice(&self) -> &[u8];
+
+ /// Returns the load parameters for Secure `IMEM`.
+ fn imem_sec_load_params(&self) -> FalconDmaLoadTarget;
+
+ /// Returns the load parameters for Non-Secure `IMEM`,
+ /// used only on Turing and GA100.
+ fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget>;
+
+ /// Returns the load parameters for `DMEM`.
+ fn dmem_load_params(&self) -> FalconDmaLoadTarget;
+
+ /// Returns an adapter that provides the required parameter to load this firmware using PIO.
+ ///
+ /// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in
+ /// the headers are invalid.
+ fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> {
+ let new_pio_imem = |params: FalconDmaLoadTarget, secure| {
+ let start = usize::from_safe_cast(params.src_start);
+ let end = start + usize::from_safe_cast(params.len);
+ let data = self.as_slice().get(start..end).ok_or(EINVAL)?;
+
+ let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?;
+
+ Ok::<_, Error>(FalconPioImemLoadTarget {
+ data,
+ dst_start,
+ secure,
+ start_tag: dst_start >> 8,
+ })
+ };
+
+ let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?;
+
+ let imem_ns = if let Some(params) = self.imem_ns_load_params() {
+ Some(new_pio_imem(params, false)?)
+ } else {
+ None
+ };
+
+ let dmem = {
+ let params = self.dmem_load_params();
+ let start = usize::from_safe_cast(params.src_start);
+ let end = start + usize::from_safe_cast(params.len);
+ let data = self.as_slice().get(start..end).ok_or(EINVAL)?;
+
+ let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?;
+
+ FalconPioDmemLoadTarget { data, dst_start }
+ };
+
+ Ok(FalconDmaFirmwarePioAdapter {
+ fw: self,
+ imem_sec,
+ imem_ns,
+ dmem,
+ })
+ }
+}
+
+/// Represents a portion of the firmware to be loaded into IMEM using PIO.
+#[derive(Clone)]
+pub(crate) struct FalconPioImemLoadTarget<'a> {
+ pub(crate) data: &'a [u8],
+ pub(crate) dst_start: u16,
+ pub(crate) secure: bool,
+ pub(crate) start_tag: u16,
+}
+
+/// Represents a portion of the firmware to be loaded into DMEM using PIO.
+#[derive(Clone)]
+pub(crate) struct FalconPioDmemLoadTarget<'a> {
+ pub(crate) data: &'a [u8],
+ pub(crate) dst_start: u16,
+}
+
+/// Trait for providing PIO load parameters of falcon firmwares.
+pub(crate) trait FalconPioLoadable {
+ /// Returns the load parameters for Secure `IMEM`, if any.
+ fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>;
+
+ /// Returns the load parameters for Non-Secure `IMEM`, if any.
+ fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>;
+
+ /// Returns the load parameters for `DMEM`.
+ fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>;
+}
+
+/// Adapter type that makes any DMA-loadable firmware also loadable via PIO.
+///
+/// Created using [`FalconDmaLoadable::try_as_pio_loadable`].
+pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> {
+ /// Reference to the DMA firmware.
+ fw: &'a T,
+ /// Validated secure IMEM parameters.
+ imem_sec: FalconPioImemLoadTarget<'a>,
+ /// Validated non-secure IMEM parameters.
+ imem_ns: Option<FalconPioImemLoadTarget<'a>>,
+ /// Validated DMEM parameters.
+ dmem: FalconPioDmemLoadTarget<'a>,
+}
+
+impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T>
+where
+ T: FalconDmaLoadable + ?Sized,
+{
+ fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
+ Some(self.imem_sec.clone())
+ }
+
+ fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
+ self.imem_ns.clone()
+ }
+
+ fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> {
+ self.dmem.clone()
+ }
+}
+
+impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T>
+where
+ T: FalconDmaLoadable + FalconFirmware + ?Sized,
+{
+ type Target = <T as FalconFirmware>::Target;
+
+ fn brom_params(&self) -> FalconBromParams {
+ self.fw.brom_params()
+ }
+
+ fn boot_addr(&self) -> u32 {
+ self.fw.boot_addr()
+ }
+}
+
+/// Trait for a falcon firmware.
+///
+/// A falcon firmware can be loaded on a given engine.
+pub(crate) trait FalconFirmware {
+ /// Engine on which this firmware is to be loaded.
+ type Target: FalconEngine;
+
+ /// Returns the parameters to write into the BROM registers.
+ fn brom_params(&self) -> FalconBromParams;
+
+ /// Returns the start address of the firmware.
+ fn boot_addr(&self) -> u32;
+}
+
+/// Contains the base parameters common to all Falcon instances.
+pub(crate) struct Falcon<E: FalconEngine> {
+ hal: KBox<dyn FalconHal<E>>,
+ dev: ARef<device::Device>,
+}
+
+impl<E: FalconEngine + 'static> Falcon<E> {
+ /// Create a new falcon instance.
+ pub(crate) fn new(dev: &device::Device, chipset: Chipset) -> Result<Self> {
+ Ok(Self {
+ hal: hal::falcon_hal(chipset)?,
+ dev: dev.into(),
+ })
+ }
+
+ /// Resets DMA-related registers.
+ pub(crate) fn dma_reset(&self, bar: &Bar0) {
+ bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| {
+ v.with_allow_phys_no_ctx(true)
+ });
+
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_DMACTL::zeroed(),
+ );
+ }
+
+ /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete.
+ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
+ self.hal.reset_eng(bar)?;
+ self.hal.select_core(self, bar)?;
+ self.hal.reset_wait_mem_scrubbing(bar)?;
+
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_RM::from(bar.read(regs::NV_PMC_BOOT_0).into_raw()),
+ );
+
+ Ok(())
+ }
+
+ /// Falcons supports up to four ports, but we only ever use one, so just hard-code it.
+ const PIO_PORT: usize = 0;
+
+ /// Write a slice to Falcon IMEM memory using programmed I/O (PIO).
+ ///
+ /// Returns `EINVAL` if `img.len()` is not a multiple of 4.
+ fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result {
+ // Rejecting misaligned images here allows us to avoid checking
+ // inside the loops.
+ if load_offsets.data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ bar.write(
+ WithBase::of::<E>().at(Self::PIO_PORT),
+ regs::NV_PFALCON_FALCON_IMEMC::zeroed()
+ .with_secure(load_offsets.secure)
+ .with_aincw(true)
+ .with_offs(load_offsets.dst_start),
+ );
+
+ for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() {
+ let n = u16::try_from(n)?;
+ let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?;
+ bar.write(
+ WithBase::of::<E>().at(Self::PIO_PORT),
+ regs::NV_PFALCON_FALCON_IMEMT::zeroed().with_tag(tag),
+ );
+ for word in block.chunks_exact(4) {
+ let w = [word[0], word[1], word[2], word[3]];
+ bar.write(
+ WithBase::of::<E>().at(Self::PIO_PORT),
+ regs::NV_PFALCON_FALCON_IMEMD::zeroed().with_data(u32::from_le_bytes(w)),
+ );
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Write a slice to Falcon DMEM memory using programmed I/O (PIO).
+ ///
+ /// Returns `EINVAL` if `img.len()` is not a multiple of 4.
+ fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result {
+ // Rejecting misaligned images here allows us to avoid checking
+ // inside the loops.
+ if load_offsets.data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ bar.write(
+ WithBase::of::<E>().at(Self::PIO_PORT),
+ regs::NV_PFALCON_FALCON_DMEMC::zeroed()
+ .with_aincw(true)
+ .with_offs(load_offsets.dst_start),
+ );
+
+ for word in load_offsets.data.chunks_exact(4) {
+ let w = [word[0], word[1], word[2], word[3]];
+ bar.write(
+ WithBase::of::<E>().at(Self::PIO_PORT),
+ regs::NV_PFALCON_FALCON_DMEMD::zeroed().with_data(u32::from_le_bytes(w)),
+ );
+ }
+
+ Ok(())
+ }
+
+ /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
+ pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ ) -> Result {
+ bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| {
+ v.with_allow_phys_no_ctx(true)
+ });
+
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_DMACTL::zeroed(),
+ );
+
+ if let Some(imem_ns) = fw.imem_ns_load_params() {
+ self.pio_wr_imem_slice(bar, imem_ns)?;
+ }
+ if let Some(imem_sec) = fw.imem_sec_load_params() {
+ self.pio_wr_imem_slice(bar, imem_sec)?;
+ }
+ self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?;
+
+ self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()),
+ );
+
+ Ok(())
+ }
+
+ /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's
+ /// `target_mem`.
+ ///
+ /// `sec` is set if the loaded firmware is expected to run in secure mode.
+ fn dma_wr(
+ &self,
+ bar: &Bar0,
+ dma_obj: &Coherent<[u8]>,
+ target_mem: FalconMem,
+ load_offsets: FalconDmaLoadTarget,
+ ) -> Result {
+ const DMA_LEN: u32 = num::usize_into_u32::<{ MEM_BLOCK_ALIGNMENT }>();
+
+ // For IMEM, we want to use the start offset as a virtual address tag for each page, since
+ // code addresses in the firmware (and the boot vector) are virtual.
+ //
+ // For DMEM we can fold the start offset into the DMA handle.
+ let (src_start, dma_start) = match target_mem {
+ FalconMem::ImemSecure | FalconMem::ImemNonSecure => {
+ (load_offsets.src_start, dma_obj.dma_handle())
+ }
+ FalconMem::Dmem => (
+ 0,
+ dma_obj.dma_handle() + DmaAddress::from(load_offsets.src_start),
+ ),
+ };
+ if dma_start % DmaAddress::from(DMA_LEN) > 0 {
+ dev_err!(
+ self.dev,
+ "DMA transfer start addresses must be a multiple of {}\n",
+ DMA_LEN
+ );
+ return Err(EINVAL);
+ }
+
+ // The DMATRFBASE/1 register pair only supports a 49-bit address.
+ if dma_start > DmaMask::new::<49>().value() {
+ dev_err!(self.dev, "DMA address {:#x} exceeds 49 bits\n", dma_start);
+ return Err(ERANGE);
+ }
+
+ // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we
+ // need to perform.
+ let num_transfers = load_offsets.len.div_ceil(DMA_LEN);
+
+ // Check that the area we are about to transfer is within the bounds of the DMA object.
+ // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`.
+ match num_transfers
+ .checked_mul(DMA_LEN)
+ .and_then(|size| size.checked_add(load_offsets.src_start))
+ {
+ None => {
+ dev_err!(self.dev, "DMA transfer length overflow\n");
+ return Err(EOVERFLOW);
+ }
+ Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => {
+ dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n");
+ return Err(EINVAL);
+ }
+ Some(_) => (),
+ };
+
+ // Set up the base source DMA address.
+
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_DMATRFBASE::zeroed().with_base(
+ // CAST: `as u32` is used on purpose since we do want to strip the upper bits,
+ // which will be written to `NV_PFALCON_FALCON_DMATRFBASE1`.
+ (dma_start >> 8) as u32,
+ ),
+ );
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_DMATRFBASE1::zeroed().try_with_base(dma_start >> 40)?,
+ );
+
+ let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::zeroed()
+ .with_size(DmaTrfCmdSize::Size256B)
+ .with_falcon_mem(target_mem);
+
+ for pos in (0..num_transfers).map(|i| i * DMA_LEN) {
+ // Perform a transfer of size `DMA_LEN`.
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_DMATRFMOFFS::zeroed()
+ .try_with_offs(load_offsets.dst_start + pos)?,
+ );
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_DMATRFFBOFFS::zeroed().with_offs(src_start + pos),
+ );
+
+ bar.write(WithBase::of::<E>(), cmd);
+
+ // Wait for the transfer to complete.
+ // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories
+ // should ever take that long.
+ read_poll_timeout(
+ || Ok(bar.read(regs::NV_PFALCON_FALCON_DMATRFCMD::of::<E>())),
+ |r| r.idle(),
+ Delta::ZERO,
+ Delta::from_secs(2),
+ )?;
+ }
+
+ Ok(())
+ }
+
+ /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
+ fn dma_load<F: FalconFirmware<Target = E> + FalconDmaLoadable>(
+ &self,
+ dev: &Device<device::Bound>,
+ bar: &Bar0,
+ fw: &F,
+ ) -> Result {
+ // DMA object with firmware content as the source of the DMA engine.
+ let dma_obj = {
+ let fw_slice = fw.as_slice();
+
+ // DMA copies are done in chunks of `MEM_BLOCK_ALIGNMENT`, so pad the length
+ // accordingly and fill with `0`.
+ let mut dma_obj = CoherentBox::zeroed_slice(
+ dev,
+ fw_slice.len().next_multiple_of(MEM_BLOCK_ALIGNMENT),
+ GFP_KERNEL,
+ )?;
+
+ // PANIC: `dma_obj` has been created with a length equal to or larger than
+ // `fw_slice.len()`, so the range `..fw_slice.len()` is valid.
+ dma_obj[..fw_slice.len()].copy_from_slice(fw_slice);
+
+ dma_obj.into()
+ };
+
+ self.dma_reset(bar);
+ bar.update(regs::NV_PFALCON_FBIF_TRANSCFG::of::<E>().at(0), |v| {
+ v.with_target(FalconFbifTarget::CoherentSysmem)
+ .with_mem_type(FalconFbifMemType::Physical)
+ });
+
+ self.dma_wr(
+ bar,
+ &dma_obj,
+ FalconMem::ImemSecure,
+ fw.imem_sec_load_params(),
+ )?;
+ self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?;
+
+ self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+ // Set `BootVec` to start of non-secure code.
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()),
+ );
+
+ Ok(())
+ }
+
+ /// Wait until the falcon CPU is halted.
+ pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> {
+ // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds.
+ read_poll_timeout(
+ || Ok(bar.read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>())),
+ |r| r.halted(),
+ Delta::ZERO,
+ Delta::from_secs(2),
+ )?;
+
+ Ok(())
+ }
+
+ /// Start the falcon CPU.
+ pub(crate) fn start(&self, bar: &Bar0) -> Result<()> {
+ match bar
+ .read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>())
+ .alias_en()
+ {
+ true => bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::zeroed().with_startcpu(true),
+ ),
+ false => bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_CPUCTL::zeroed().with_startcpu(true),
+ ),
+ }
+
+ Ok(())
+ }
+
+ /// Writes values to the mailbox registers if provided.
+ pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option<u32>, mbox1: Option<u32>) {
+ if let Some(mbox0) = mbox0 {
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_MAILBOX0::zeroed().with_value(mbox0),
+ );
+ }
+
+ if let Some(mbox1) = mbox1 {
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_MAILBOX1::zeroed().with_value(mbox1),
+ );
+ }
+ }
+
+ /// Reads the value from `mbox0` register.
+ pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 {
+ bar.read(regs::NV_PFALCON_FALCON_MAILBOX0::of::<E>())
+ .value()
+ }
+
+ /// Reads the value from `mbox1` register.
+ pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 {
+ bar.read(regs::NV_PFALCON_FALCON_MAILBOX1::of::<E>())
+ .value()
+ }
+
+ /// Reads values from both mailbox registers.
+ pub(crate) fn read_mailboxes(&self, bar: &Bar0) -> (u32, u32) {
+ let mbox0 = self.read_mailbox0(bar);
+ let mbox1 = self.read_mailbox1(bar);
+
+ (mbox0, mbox1)
+ }
+
+ /// Start running the loaded firmware.
+ ///
+ /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers
+ /// prior to running.
+ ///
+ /// Wait up to two seconds for the firmware to complete, and return its exit status read from
+ /// the `MBOX0` and `MBOX1` registers.
+ pub(crate) fn boot(
+ &self,
+ bar: &Bar0,
+ mbox0: Option<u32>,
+ mbox1: Option<u32>,
+ ) -> Result<(u32, u32)> {
+ self.write_mailboxes(bar, mbox0, mbox1);
+ self.start(bar)?;
+ self.wait_till_halted(bar)?;
+ Ok(self.read_mailboxes(bar))
+ }
+
+ /// Returns the fused version of the signature to use in order to run a HS firmware on this
+ /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header.
+ pub(crate) fn signature_reg_fuse_version(
+ &self,
+ bar: &Bar0,
+ engine_id_mask: u16,
+ ucode_id: u8,
+ ) -> Result<u32> {
+ self.hal
+ .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id)
+ }
+
+ /// Check if the RISC-V core is active.
+ ///
+ /// Returns `true` if the RISC-V core is active, `false` otherwise.
+ pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool {
+ self.hal.is_riscv_active(bar)
+ }
+
+ /// Load a firmware image into Falcon memory, using the preferred method for the current
+ /// chipset.
+ pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>(
+ &self,
+ dev: &Device<device::Bound>,
+ bar: &Bar0,
+ fw: &F,
+ ) -> Result {
+ match self.hal.load_method() {
+ LoadMethod::Dma => self.dma_load(dev, bar, fw),
+ LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?),
+ }
+ }
+
+ /// Write the application version to the OS register.
+ pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) {
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON_FALCON_OS::zeroed().with_value(app_version),
+ );
+ }
+}
diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs
new file mode 100644
index 000000000000..df6d5a382c7a
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/gsp.rs
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ io::{
+ poll::read_poll_timeout,
+ register::{
+ RegisterBase,
+ WithBase, //
+ },
+ Io,
+ },
+ prelude::*,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ Falcon,
+ FalconEngine,
+ PFalcon2Base,
+ PFalconBase, //
+ },
+ regs,
+};
+
+/// Type specifying the `Gsp` falcon engine. Cannot be instantiated.
+pub(crate) struct Gsp(());
+
+impl RegisterBase<PFalconBase> for Gsp {
+ const BASE: usize = 0x00110000;
+}
+
+impl RegisterBase<PFalcon2Base> for Gsp {
+ const BASE: usize = 0x00111000;
+}
+
+impl FalconEngine for Gsp {}
+
+impl Falcon<Gsp> {
+ /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to
+ /// allow GSP to signal CPU for processing new messages in message queue.
+ pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) {
+ bar.write(
+ WithBase::of::<Gsp>(),
+ regs::NV_PFALCON_FALCON_IRQSCLR::zeroed().with_swgen0(true),
+ );
+ }
+
+ /// Checks if GSP reload/resume has completed during the boot process.
+ pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result<bool> {
+ read_poll_timeout(
+ || Ok(bar.read(regs::NV_PGC6_BSI_SECURE_SCRATCH_14)),
+ |val| val.boot_stage_3_handoff(),
+ Delta::ZERO,
+ timeout,
+ )
+ .map(|_| true)
+ }
+}
diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs
new file mode 100644
index 000000000000..a7e5ea8d0272
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/hal.rs
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::prelude::*;
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ Falcon,
+ FalconBromParams,
+ FalconEngine, //
+ },
+ gpu::Chipset,
+};
+
+mod ga102;
+mod tu102;
+
+/// Method used to load data into falcon memory. Some GPU architectures need
+/// PIO and others can use DMA.
+pub(crate) enum LoadMethod {
+ /// Programmed I/O
+ Pio,
+ /// Direct Memory Access
+ Dma,
+}
+
+/// Hardware Abstraction Layer for Falcon cores.
+///
+/// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`]
+/// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing
+/// registers.
+pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync {
+ /// Activates the Falcon core if the engine is a risvc/falcon dual engine.
+ fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result {
+ Ok(())
+ }
+
+ /// Returns the fused version of the signature to use in order to run a HS firmware on this
+ /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header.
+ fn signature_reg_fuse_version(
+ &self,
+ falcon: &Falcon<E>,
+ bar: &Bar0,
+ engine_id_mask: u16,
+ ucode_id: u8,
+ ) -> Result<u32>;
+
+ /// Program the boot ROM registers prior to starting a secure firmware.
+ fn program_brom(&self, falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result;
+
+ /// Check if the RISC-V core is active.
+ /// Returns `true` if the RISC-V core is active, `false` otherwise.
+ fn is_riscv_active(&self, bar: &Bar0) -> bool;
+
+ /// Wait for memory scrubbing to complete.
+ fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result;
+
+ /// Reset the falcon engine.
+ fn reset_eng(&self, bar: &Bar0) -> Result;
+
+ /// Returns the method used to load data into the falcon's memory.
+ ///
+ /// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for
+ /// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the
+ /// only usable method.
+ fn load_method(&self) -> LoadMethod;
+}
+
+/// Returns a boxed falcon HAL adequate for `chipset`.
+///
+/// We use a heap-allocated trait object instead of a statically defined one because the
+/// generic `FalconEngine` argument makes it difficult to define all the combinations
+/// statically.
+pub(super) fn falcon_hal<E: FalconEngine + 'static>(
+ chipset: Chipset,
+) -> Result<KBox<dyn FalconHal<E>>> {
+ use Chipset::*;
+
+ let hal = match chipset {
+ TU102 | TU104 | TU106 | TU116 | TU117 => {
+ KBox::new(tu102::Tu102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>>
+ }
+ GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => {
+ KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>>
+ }
+ _ => return Err(ENOTSUPP),
+ };
+
+ Ok(hal)
+}
diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs
new file mode 100644
index 000000000000..8368a61ddeef
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::marker::PhantomData;
+
+use kernel::{
+ device,
+ io::{
+ poll::read_poll_timeout,
+ register::{
+ Array,
+ WithBase, //
+ },
+ Io, //
+ },
+ prelude::*,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ hal::LoadMethod,
+ Falcon,
+ FalconBromParams,
+ FalconEngine,
+ FalconModSelAlgo,
+ PeregrineCoreSelect, //
+ },
+ regs,
+};
+
+use super::FalconHal;
+
+fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result {
+ let bcr_ctrl = bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>());
+ if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon {
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PRISCV_RISCV_BCR_CTRL::zeroed().with_core_select(PeregrineCoreSelect::Falcon),
+ );
+
+ // TIMEOUT: falcon core should take less than 10ms to report being enabled.
+ read_poll_timeout(
+ || Ok(bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>())),
+ |r| r.valid(),
+ Delta::ZERO,
+ Delta::from_millis(10),
+ )?;
+ }
+
+ Ok(())
+}
+
+fn signature_reg_fuse_version_ga102(
+ dev: &device::Device,
+ bar: &Bar0,
+ engine_id_mask: u16,
+ ucode_id: u8,
+) -> Result<u32> {
+ // Each engine has 16 ucode version registers numbered from 1 to 16.
+ let ucode_idx = match usize::from(ucode_id) {
+ ucode_id @ 1..=regs::NV_FUSE_OPT_FPF_SIZE => ucode_id - 1,
+ _ => {
+ dev_err!(dev, "invalid ucode id {:#x}\n", ucode_id);
+ return Err(EINVAL);
+ }
+ };
+
+ // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid
+ // at build-time.
+ let reg_fuse_version: u16 = if engine_id_mask & 0x0001 != 0 {
+ bar.read(regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::at(ucode_idx))
+ .data()
+ } else if engine_id_mask & 0x0004 != 0 {
+ bar.read(regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::at(ucode_idx))
+ .data()
+ } else if engine_id_mask & 0x0400 != 0 {
+ bar.read(regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::at(ucode_idx))
+ .data()
+ } else {
+ dev_err!(dev, "unexpected engine_id_mask {:#x}\n", engine_id_mask);
+ return Err(EINVAL);
+ };
+
+ // TODO[NUMM]: replace with `last_set_bit` once it lands.
+ Ok(u16::BITS - reg_fuse_version.leading_zeros())
+}
+
+fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result {
+ bar.write(
+ WithBase::of::<E>().at(0),
+ regs::NV_PFALCON2_FALCON_BROM_PARAADDR::zeroed().with_value(params.pkc_data_offset),
+ );
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::zeroed()
+ .with_value(u32::from(params.engine_id_mask)),
+ );
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::zeroed().with_ucode_id(params.ucode_id),
+ );
+ bar.write(
+ WithBase::of::<E>(),
+ regs::NV_PFALCON2_FALCON_MOD_SEL::zeroed().with_algo(FalconModSelAlgo::Rsa3k),
+ );
+
+ Ok(())
+}
+
+pub(super) struct Ga102<E: FalconEngine>(PhantomData<E>);
+
+impl<E: FalconEngine> Ga102<E> {
+ pub(super) fn new() -> Self {
+ Self(PhantomData)
+ }
+}
+
+impl<E: FalconEngine> FalconHal<E> for Ga102<E> {
+ fn select_core(&self, _falcon: &Falcon<E>, bar: &Bar0) -> Result {
+ select_core_ga102::<E>(bar)
+ }
+
+ fn signature_reg_fuse_version(
+ &self,
+ falcon: &Falcon<E>,
+ bar: &Bar0,
+ engine_id_mask: u16,
+ ucode_id: u8,
+ ) -> Result<u32> {
+ signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id)
+ }
+
+ fn program_brom(&self, _falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result {
+ program_brom_ga102::<E>(bar, params)
+ }
+
+ fn is_riscv_active(&self, bar: &Bar0) -> bool {
+ bar.read(regs::NV_PRISCV_RISCV_CPUCTL::of::<E>())
+ .active_stat()
+ }
+
+ fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result {
+ // TIMEOUT: memory scrubbing should complete in less than 20ms.
+ read_poll_timeout(
+ || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())),
+ |r| r.mem_scrubbing_done(),
+ Delta::ZERO,
+ Delta::from_millis(20),
+ )
+ .map(|_| ())
+ }
+
+ fn reset_eng(&self, bar: &Bar0) -> Result {
+ let _ = bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>());
+
+ // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set
+ // RESET_READY so a non-failing timeout is used.
+ let _ = read_poll_timeout(
+ || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())),
+ |r| r.reset_ready(),
+ Delta::ZERO,
+ Delta::from_micros(150),
+ );
+
+ regs::NV_PFALCON_FALCON_ENGINE::reset_engine::<E>(bar);
+ self.reset_wait_mem_scrubbing(bar)?;
+
+ Ok(())
+ }
+
+ fn load_method(&self) -> LoadMethod {
+ LoadMethod::Dma
+ }
+}
diff --git a/drivers/gpu/nova-core/falcon/hal/tu102.rs b/drivers/gpu/nova-core/falcon/hal/tu102.rs
new file mode 100644
index 000000000000..c7a90266cb44
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/hal/tu102.rs
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::marker::PhantomData;
+
+use kernel::{
+ io::{
+ poll::read_poll_timeout,
+ register::WithBase,
+ Io, //
+ },
+ prelude::*,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ hal::LoadMethod,
+ Falcon,
+ FalconBromParams,
+ FalconEngine, //
+ },
+ regs, //
+};
+
+use super::FalconHal;
+
+pub(super) struct Tu102<E: FalconEngine>(PhantomData<E>);
+
+impl<E: FalconEngine> Tu102<E> {
+ pub(super) fn new() -> Self {
+ Self(PhantomData)
+ }
+}
+
+impl<E: FalconEngine> FalconHal<E> for Tu102<E> {
+ fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result {
+ Ok(())
+ }
+
+ fn signature_reg_fuse_version(
+ &self,
+ _falcon: &Falcon<E>,
+ _bar: &Bar0,
+ _engine_id_mask: u16,
+ _ucode_id: u8,
+ ) -> Result<u32> {
+ Ok(0)
+ }
+
+ fn program_brom(&self, _falcon: &Falcon<E>, _bar: &Bar0, _params: &FalconBromParams) -> Result {
+ Ok(())
+ }
+
+ fn is_riscv_active(&self, bar: &Bar0) -> bool {
+ bar.read(regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::of::<E>())
+ .active_stat()
+ }
+
+ fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result {
+ // TIMEOUT: memory scrubbing should complete in less than 10ms.
+ read_poll_timeout(
+ || Ok(bar.read(regs::NV_PFALCON_FALCON_DMACTL::of::<E>())),
+ |r| r.mem_scrubbing_done(),
+ Delta::ZERO,
+ Delta::from_millis(10),
+ )
+ .map(|_| ())
+ }
+
+ fn reset_eng(&self, bar: &Bar0) -> Result {
+ regs::NV_PFALCON_FALCON_ENGINE::reset_engine::<E>(bar);
+ self.reset_wait_mem_scrubbing(bar)?;
+
+ Ok(())
+ }
+
+ fn load_method(&self) -> LoadMethod {
+ LoadMethod::Pio
+ }
+}
diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs
new file mode 100644
index 000000000000..91ec7d49c1f5
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/sec2.rs
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::io::register::RegisterBase;
+
+use crate::falcon::{
+ FalconEngine,
+ PFalcon2Base,
+ PFalconBase, //
+};
+
+/// Type specifying the `Sec2` falcon engine. Cannot be instantiated.
+pub(crate) struct Sec2(());
+
+impl RegisterBase<PFalconBase> for Sec2 {
+ const BASE: usize = 0x00840000;
+}
+
+impl RegisterBase<PFalcon2Base> for Sec2 {
+ const BASE: usize = 0x00841000;
+}
+
+impl FalconEngine for Sec2 {}
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
new file mode 100644
index 000000000000..bdd5eed760e1
--- /dev/null
+++ b/drivers/gpu/nova-core/fb.rs
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::ops::{
+ Deref,
+ Range, //
+};
+
+use kernel::{
+ device,
+ dma::CoherentHandle,
+ fmt,
+ io::Io,
+ prelude::*,
+ ptr::{
+ Alignable,
+ Alignment, //
+ },
+ sizes::*,
+ sync::aref::ARef, //
+};
+
+use crate::{
+ driver::Bar0,
+ firmware::gsp::GspFirmware,
+ gpu::Chipset,
+ gsp,
+ num::{
+ usize_as_u64,
+ FromSafeCast, //
+ },
+ regs,
+};
+
+mod hal;
+
+/// Type holding the sysmem flush memory page, a page of memory to be written into the
+/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR*` registers and used to maintain memory coherency.
+///
+/// A system memory page is required for `sysmembar`, which is a GPU-initiated hardware
+/// memory-barrier operation that flushes all pending GPU-side memory writes that were done through
+/// PCIE to system memory. It is required for falcons to be reset as the reset operation involves a
+/// reset handshake. When the falcon acknowledges a reset, it writes into system memory. To ensure
+/// this write is visible to the host and prevent driver timeouts, the falcon must perform a
+/// sysmembar operation to flush its writes.
+///
+/// Because of this, the sysmem flush memory page must be registered as early as possible during
+/// driver initialization, and before any falcon is reset.
+///
+/// Users are responsible for manually calling [`Self::unregister`] before dropping this object,
+/// otherwise the GPU might still use it even after it has been freed.
+pub(crate) struct SysmemFlush {
+ /// Chipset we are operating on.
+ chipset: Chipset,
+ device: ARef<device::Device>,
+ /// Keep the page alive as long as we need it.
+ page: CoherentHandle,
+}
+
+impl SysmemFlush {
+ /// Allocate a memory page and register it as the sysmem flush page.
+ pub(crate) fn register(
+ dev: &device::Device<device::Bound>,
+ bar: &Bar0,
+ chipset: Chipset,
+ ) -> Result<Self> {
+ let page = CoherentHandle::alloc(dev, kernel::page::PAGE_SIZE, GFP_KERNEL)?;
+
+ hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?;
+
+ Ok(Self {
+ chipset,
+ device: dev.into(),
+ page,
+ })
+ }
+
+ /// Unregister the managed sysmem flush page.
+ ///
+ /// In order to gracefully tear down the GPU, users must make sure to call this method before
+ /// dropping the object.
+ pub(crate) fn unregister(&self, bar: &Bar0) {
+ let hal = hal::fb_hal(self.chipset);
+
+ if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() {
+ let _ = hal.write_sysmem_flush_page(bar, 0).inspect_err(|e| {
+ dev_warn!(
+ &self.device,
+ "failed to unregister sysmem flush page: {:?}\n",
+ e
+ )
+ });
+ } else {
+ // Another page has been registered after us for some reason - warn as this is a bug.
+ dev_warn!(
+ &self.device,
+ "attempt to unregister a sysmem flush page that is not active\n"
+ );
+ }
+ }
+}
+
+pub(crate) struct FbRange(Range<u64>);
+
+impl FbRange {
+ pub(crate) fn len(&self) -> u64 {
+ self.0.end - self.0.start
+ }
+}
+
+impl From<Range<u64>> for FbRange {
+ fn from(range: Range<u64>) -> Self {
+ Self(range)
+ }
+}
+
+impl Deref for FbRange {
+ type Target = Range<u64>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl fmt::Debug for FbRange {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Use alternate format ({:#?}) to include size, compact format ({:?}) for just the range.
+ if f.alternate() {
+ let size = self.len();
+
+ if size < usize_as_u64(SZ_1M) {
+ let size_kib = size / usize_as_u64(SZ_1K);
+ f.write_fmt(fmt!(
+ "{:#x}..{:#x} ({} KiB)",
+ self.0.start,
+ self.0.end,
+ size_kib
+ ))
+ } else {
+ let size_mib = size / usize_as_u64(SZ_1M);
+ f.write_fmt(fmt!(
+ "{:#x}..{:#x} ({} MiB)",
+ self.0.start,
+ self.0.end,
+ size_mib
+ ))
+ }
+ } else {
+ f.write_fmt(fmt!("{:#x}..{:#x}", self.0.start, self.0.end))
+ }
+ }
+}
+
+/// Layout of the GPU framebuffer memory.
+///
+/// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process.
+#[derive(Debug)]
+pub(crate) struct FbLayout {
+ /// Range of the framebuffer. Starts at `0`.
+ pub(crate) fb: FbRange,
+ /// VGA workspace, small area of reserved memory at the end of the framebuffer.
+ pub(crate) vga_workspace: FbRange,
+ /// FRTS range.
+ pub(crate) frts: FbRange,
+ /// Memory area containing the GSP bootloader image.
+ pub(crate) boot: FbRange,
+ /// Memory area containing the GSP firmware image.
+ pub(crate) elf: FbRange,
+ /// WPR2 heap.
+ pub(crate) wpr2_heap: FbRange,
+ /// WPR2 region range, starting with an instance of `GspFwWprMeta`.
+ pub(crate) wpr2: FbRange,
+ pub(crate) heap: FbRange,
+ pub(crate) vf_partition_count: u8,
+}
+
+impl FbLayout {
+ /// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP firmware.
+ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw: &GspFirmware) -> Result<Self> {
+ let hal = hal::fb_hal(chipset);
+
+ let fb = {
+ let fb_size = hal.vidmem_size(bar);
+
+ FbRange(0..fb_size)
+ };
+
+ let vga_workspace = {
+ let vga_base = {
+ const NV_PRAMIN_SIZE: u64 = usize_as_u64(SZ_1M);
+ let base = fb.end - NV_PRAMIN_SIZE;
+
+ if hal.supports_display(bar) {
+ match bar
+ .read(regs::NV_PDISP_VGA_WORKSPACE_BASE)
+ .vga_workspace_addr()
+ {
+ Some(addr) => {
+ if addr < base {
+ const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K);
+
+ // Point workspace address to end of framebuffer.
+ fb.end - VBIOS_WORKSPACE_SIZE
+ } else {
+ addr
+ }
+ }
+ None => base,
+ }
+ } else {
+ base
+ }
+ };
+
+ FbRange(vga_base..fb.end)
+ };
+
+ let frts = {
+ const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>();
+ const FRTS_SIZE: u64 = usize_as_u64(SZ_1M);
+ let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE;
+
+ FbRange(frts_base..frts_base + FRTS_SIZE)
+ };
+
+ let boot = {
+ const BOOTLOADER_DOWN_ALIGN: Alignment = Alignment::new::<SZ_4K>();
+ let bootloader_size = u64::from_safe_cast(gsp_fw.bootloader.ucode.size());
+ let bootloader_base = (frts.start - bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN);
+
+ FbRange(bootloader_base..bootloader_base + bootloader_size)
+ };
+
+ let elf = {
+ const ELF_DOWN_ALIGN: Alignment = Alignment::new::<SZ_64K>();
+ let elf_size = u64::from_safe_cast(gsp_fw.size);
+ let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN);
+
+ FbRange(elf_addr..elf_addr + elf_size)
+ };
+
+ let wpr2_heap = {
+ const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>();
+ let wpr2_heap_size =
+ gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end);
+ let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN);
+
+ FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN))
+ };
+
+ let wpr2 = {
+ const WPR2_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>();
+ let wpr2_addr = (wpr2_heap.start - u64::from_safe_cast(size_of::<gsp::GspFwWprMeta>()))
+ .align_down(WPR2_DOWN_ALIGN);
+
+ FbRange(wpr2_addr..frts.end)
+ };
+
+ let heap = {
+ const HEAP_SIZE: u64 = usize_as_u64(SZ_1M);
+
+ FbRange(wpr2.start - HEAP_SIZE..wpr2.start)
+ };
+
+ Ok(Self {
+ fb,
+ vga_workspace,
+ frts,
+ boot,
+ elf,
+ wpr2_heap,
+ wpr2,
+ heap,
+ vf_partition_count: 0,
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs
new file mode 100644
index 000000000000..aba0abd8ee00
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal.rs
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::prelude::*;
+
+use crate::{
+ driver::Bar0,
+ gpu::Chipset, //
+};
+
+mod ga100;
+mod ga102;
+mod tu102;
+
+pub(crate) trait FbHal {
+ /// Returns the address of the currently-registered sysmem flush page.
+ fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64;
+
+ /// Register `addr` as the address of the sysmem flush page.
+ ///
+ /// This might fail if the address is too large for the receiving register.
+ fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result;
+
+ /// Returns `true` is display is supported.
+ fn supports_display(&self, bar: &Bar0) -> bool;
+
+ /// Returns the VRAM size, in bytes.
+ fn vidmem_size(&self, bar: &Bar0) -> u64;
+}
+
+/// Returns the HAL corresponding to `chipset`.
+pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal {
+ use Chipset::*;
+
+ match chipset {
+ TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL,
+ GA100 => ga100::GA100_HAL,
+ GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => {
+ ga102::GA102_HAL
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs
new file mode 100644
index 000000000000..1c03783cddef
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/ga100.rs
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ io::Io,
+ num::Bounded,
+ prelude::*, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal,
+ regs, //
+};
+
+use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT;
+
+struct Ga100;
+
+pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 {
+ u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT
+ | u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI).adr_63_40())
+ << FLUSH_SYSMEM_ADDR_SHIFT_HI
+}
+
+pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) {
+ bar.write_reg(
+ regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr_63_40(
+ Bounded::<u64, _>::from(addr)
+ .shr::<FLUSH_SYSMEM_ADDR_SHIFT_HI, _>()
+ .cast(),
+ ),
+ );
+
+ bar.write_reg(
+ regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed()
+ // CAST: `as u32` is used on purpose since we want to strip the upper bits that have
+ // been written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`.
+ .with_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32),
+ );
+}
+
+pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool {
+ !bar.read(regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY)
+ .display_disabled()
+}
+
+/// Shift applied to the sysmem address before it is written into
+/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`,
+const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40;
+
+impl FbHal for Ga100 {
+ fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ read_sysmem_flush_page_ga100(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ write_sysmem_flush_page_ga100(bar, addr);
+
+ Ok(())
+ }
+
+ fn supports_display(&self, bar: &Bar0) -> bool {
+ display_enabled_ga100(bar)
+ }
+
+ fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ super::tu102::vidmem_size_gp102(bar)
+ }
+}
+
+const GA100: Ga100 = Ga100;
+pub(super) const GA100_HAL: &dyn FbHal = &GA100;
diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs
new file mode 100644
index 000000000000..4b9f0f74d0e7
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/ga102.rs
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ io::Io,
+ prelude::*, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal,
+ regs, //
+};
+
+fn vidmem_size_ga102(bar: &Bar0) -> u64 {
+ bar.read(regs::NV_USABLE_FB_SIZE_IN_MB).usable_fb_size()
+}
+
+struct Ga102;
+
+impl FbHal for Ga102 {
+ fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ super::ga100::read_sysmem_flush_page_ga100(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ super::ga100::write_sysmem_flush_page_ga100(bar, addr);
+
+ Ok(())
+ }
+
+ fn supports_display(&self, bar: &Bar0) -> bool {
+ super::ga100::display_enabled_ga100(bar)
+ }
+
+ fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ vidmem_size_ga102(bar)
+ }
+}
+
+const GA102: Ga102 = Ga102;
+pub(super) const GA102_HAL: &dyn FbHal = &GA102;
diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs
new file mode 100644
index 000000000000..281bb796e198
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/tu102.rs
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ io::Io,
+ prelude::*, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal,
+ regs, //
+};
+
+/// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`,
+/// to be used by HALs.
+pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8;
+
+pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 {
+ u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT
+}
+
+pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result {
+ // Check that the address doesn't overflow the receiving 32-bit register.
+ u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT)
+ .map_err(|_| EINVAL)
+ .map(|addr| {
+ bar.write_reg(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed().with_adr_39_08(addr))
+ })
+}
+
+pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool {
+ !bar.read(regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY)
+ .display_disabled()
+}
+
+pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 {
+ bar.read(regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE)
+ .usable_fb_size()
+}
+
+struct Tu102;
+
+impl FbHal for Tu102 {
+ fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ read_sysmem_flush_page_gm107(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ write_sysmem_flush_page_gm107(bar, addr)
+ }
+
+ fn supports_display(&self, bar: &Bar0) -> bool {
+ display_enabled_gm107(bar)
+ }
+
+ fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ vidmem_size_gp102(bar)
+ }
+}
+
+const TU102: Tu102 = Tu102;
+pub(super) const TU102_HAL: &dyn FbHal = &TU102;
diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs
new file mode 100644
index 000000000000..6c2ab69cb605
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -0,0 +1,537 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Contains structures and functions dedicated to the parsing, building and patching of firmwares
+//! to be loaded into a given execution unit.
+
+use core::marker::PhantomData;
+use core::ops::Deref;
+
+use kernel::{
+ device,
+ firmware,
+ prelude::*,
+ str::CString,
+ transmute::FromBytes, //
+};
+
+use crate::{
+ falcon::{
+ FalconDmaLoadTarget,
+ FalconFirmware, //
+ },
+ gpu,
+ num::{
+ FromSafeCast,
+ IntoSafeCast, //
+ },
+};
+
+pub(crate) mod booter;
+pub(crate) mod fwsec;
+pub(crate) mod gsp;
+pub(crate) mod riscv;
+
+pub(crate) const FIRMWARE_VERSION: &str = "570.144";
+
+/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`.
+fn request_firmware(
+ dev: &device::Device,
+ chipset: gpu::Chipset,
+ name: &str,
+ ver: &str,
+) -> Result<firmware::Firmware> {
+ let chip_name = chipset.name();
+
+ CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin"))
+ .and_then(|path| firmware::Firmware::request(&path, dev))
+}
+
+/// Structure used to describe some firmwares, notably FWSEC-FRTS.
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub(crate) struct FalconUCodeDescV2 {
+ /// Header defined by 'NV_BIT_FALCON_UCODE_DESC_HEADER_VDESC*' in OpenRM.
+ hdr: u32,
+ /// Stored size of the ucode after the header, compressed or uncompressed
+ stored_size: u32,
+ /// Uncompressed size of the ucode. If store_size == uncompressed_size, then the ucode
+ /// is not compressed.
+ pub(crate) uncompressed_size: u32,
+ /// Code entry point
+ pub(crate) virtual_entry: u32,
+ /// Offset after the code segment at which the Application Interface Table headers are located.
+ pub(crate) interface_offset: u32,
+ /// Base address at which to load the code segment into 'IMEM'.
+ pub(crate) imem_phys_base: u32,
+ /// Size in bytes of the code to copy into 'IMEM' (includes both secure and non-secure
+ /// segments).
+ pub(crate) imem_load_size: u32,
+ /// Virtual 'IMEM' address (i.e. 'tag') at which the code should start.
+ pub(crate) imem_virt_base: u32,
+ /// Virtual address of secure IMEM segment.
+ pub(crate) imem_sec_base: u32,
+ /// Size of secure IMEM segment.
+ pub(crate) imem_sec_size: u32,
+ /// Offset into stored (uncompressed) image at which DMEM begins.
+ pub(crate) dmem_offset: u32,
+ /// Base address at which to load the data segment into 'DMEM'.
+ pub(crate) dmem_phys_base: u32,
+ /// Size in bytes of the data to copy into 'DMEM'.
+ pub(crate) dmem_load_size: u32,
+ /// "Alternate" Size of data to load into IMEM.
+ pub(crate) alt_imem_load_size: u32,
+ /// "Alternate" Size of data to load into DMEM.
+ pub(crate) alt_dmem_load_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for FalconUCodeDescV2 {}
+
+/// Structure used to describe some firmwares, notably FWSEC-FRTS.
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub(crate) struct FalconUCodeDescV3 {
+ /// Header defined by `NV_BIT_FALCON_UCODE_DESC_HEADER_VDESC*` in OpenRM.
+ hdr: u32,
+ /// Stored size of the ucode after the header.
+ stored_size: u32,
+ /// Offset in `DMEM` at which the signature is expected to be found.
+ pub(crate) pkc_data_offset: u32,
+ /// Offset after the code segment at which the app headers are located.
+ pub(crate) interface_offset: u32,
+ /// Base address at which to load the code segment into `IMEM`.
+ pub(crate) imem_phys_base: u32,
+ /// Size in bytes of the code to copy into `IMEM`.
+ pub(crate) imem_load_size: u32,
+ /// Virtual `IMEM` address (i.e. `tag`) at which the code should start.
+ pub(crate) imem_virt_base: u32,
+ /// Base address at which to load the data segment into `DMEM`.
+ pub(crate) dmem_phys_base: u32,
+ /// Size in bytes of the data to copy into `DMEM`.
+ pub(crate) dmem_load_size: u32,
+ /// Mask of the falcon engines on which this firmware can run.
+ pub(crate) engine_id_mask: u16,
+ /// ID of the ucode used to infer a fuse register to validate the signature.
+ pub(crate) ucode_id: u8,
+ /// Number of signatures in this firmware.
+ pub(crate) signature_count: u8,
+ /// Versions of the signatures, used to infer a valid signature to use.
+ pub(crate) signature_versions: u16,
+ _reserved: u16,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use
+// interior mutability.
+unsafe impl FromBytes for FalconUCodeDescV3 {}
+
+/// Enum wrapping the different versions of Falcon microcode descriptors.
+///
+/// This allows handling both V2 and V3 descriptor formats through a
+/// unified type, providing version-agnostic access to firmware metadata
+/// via the [`FalconUCodeDescriptor`] trait.
+#[derive(Debug, Clone)]
+pub(crate) enum FalconUCodeDesc {
+ V2(FalconUCodeDescV2),
+ V3(FalconUCodeDescV3),
+}
+
+impl Deref for FalconUCodeDesc {
+ type Target = dyn FalconUCodeDescriptor;
+
+ fn deref(&self) -> &Self::Target {
+ match self {
+ FalconUCodeDesc::V2(v2) => v2,
+ FalconUCodeDesc::V3(v3) => v3,
+ }
+ }
+}
+
+/// Trait providing a common interface for accessing Falcon microcode descriptor fields.
+///
+/// This trait abstracts over the different descriptor versions ([`FalconUCodeDescV2`] and
+/// [`FalconUCodeDescV3`]), allowing code to work with firmware metadata without needing to
+/// know the specific descriptor version. Fields not present return zero.
+pub(crate) trait FalconUCodeDescriptor {
+ fn hdr(&self) -> u32;
+ fn imem_load_size(&self) -> u32;
+ fn interface_offset(&self) -> u32;
+ fn dmem_load_size(&self) -> u32;
+ fn pkc_data_offset(&self) -> u32;
+ fn engine_id_mask(&self) -> u16;
+ fn ucode_id(&self) -> u8;
+ fn signature_count(&self) -> u8;
+ fn signature_versions(&self) -> u16;
+
+ /// Returns the size in bytes of the header.
+ fn size(&self) -> usize {
+ let hdr = self.hdr();
+
+ const HDR_SIZE_SHIFT: u32 = 16;
+ const HDR_SIZE_MASK: u32 = 0xffff0000;
+ ((hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT).into_safe_cast()
+ }
+
+ fn imem_sec_load_params(&self) -> FalconDmaLoadTarget;
+ fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget>;
+ fn dmem_load_params(&self) -> FalconDmaLoadTarget;
+}
+
+impl FalconUCodeDescriptor for FalconUCodeDescV2 {
+ fn hdr(&self) -> u32 {
+ self.hdr
+ }
+ fn imem_load_size(&self) -> u32 {
+ self.imem_load_size
+ }
+ fn interface_offset(&self) -> u32 {
+ self.interface_offset
+ }
+ fn dmem_load_size(&self) -> u32 {
+ self.dmem_load_size
+ }
+ fn pkc_data_offset(&self) -> u32 {
+ 0
+ }
+ fn engine_id_mask(&self) -> u16 {
+ 0
+ }
+ fn ucode_id(&self) -> u8 {
+ 0
+ }
+ fn signature_count(&self) -> u8 {
+ 0
+ }
+ fn signature_versions(&self) -> u16 {
+ 0
+ }
+
+ fn imem_sec_load_params(&self) -> FalconDmaLoadTarget {
+ // `imem_sec_base` is the *virtual* start address of the secure IMEM segment, so subtract
+ // `imem_virt_base` to get its physical offset.
+ let imem_sec_start = self.imem_sec_base.saturating_sub(self.imem_virt_base);
+
+ FalconDmaLoadTarget {
+ src_start: imem_sec_start,
+ dst_start: self.imem_phys_base.saturating_add(imem_sec_start),
+ len: self.imem_sec_size,
+ }
+ }
+
+ fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> {
+ Some(FalconDmaLoadTarget {
+ // Non-secure code always starts at offset 0.
+ src_start: 0,
+ dst_start: self.imem_phys_base,
+ // `imem_load_size` includes the size of the secure segment, so subtract it to
+ // get the correct amount of data to copy.
+ len: self.imem_load_size.saturating_sub(self.imem_sec_size),
+ })
+ }
+
+ fn dmem_load_params(&self) -> FalconDmaLoadTarget {
+ FalconDmaLoadTarget {
+ src_start: self.dmem_offset,
+ dst_start: self.dmem_phys_base,
+ len: self.dmem_load_size,
+ }
+ }
+}
+
+impl FalconUCodeDescriptor for FalconUCodeDescV3 {
+ fn hdr(&self) -> u32 {
+ self.hdr
+ }
+ fn imem_load_size(&self) -> u32 {
+ self.imem_load_size
+ }
+ fn interface_offset(&self) -> u32 {
+ self.interface_offset
+ }
+ fn dmem_load_size(&self) -> u32 {
+ self.dmem_load_size
+ }
+ fn pkc_data_offset(&self) -> u32 {
+ self.pkc_data_offset
+ }
+ fn engine_id_mask(&self) -> u16 {
+ self.engine_id_mask
+ }
+ fn ucode_id(&self) -> u8 {
+ self.ucode_id
+ }
+ fn signature_count(&self) -> u8 {
+ self.signature_count
+ }
+ fn signature_versions(&self) -> u16 {
+ self.signature_versions
+ }
+
+ fn imem_sec_load_params(&self) -> FalconDmaLoadTarget {
+ FalconDmaLoadTarget {
+ // IMEM segment always starts at offset 0.
+ src_start: 0,
+ dst_start: self.imem_phys_base,
+ len: self.imem_load_size,
+ }
+ }
+
+ fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> {
+ // Not used on V3 platforms
+ None
+ }
+
+ fn dmem_load_params(&self) -> FalconDmaLoadTarget {
+ FalconDmaLoadTarget {
+ // DMEM segment starts right after the IMEM one.
+ src_start: self.imem_load_size,
+ dst_start: self.dmem_phys_base,
+ len: self.dmem_load_size,
+ }
+ }
+}
+
+/// Trait implemented by types defining the signed state of a firmware.
+trait SignedState {}
+
+/// Type indicating that the firmware must be signed before it can be used.
+struct Unsigned;
+impl SignedState for Unsigned {}
+
+/// Type indicating that the firmware is signed and ready to be loaded.
+struct Signed;
+impl SignedState for Signed {}
+
+/// Microcode to be loaded into a specific falcon.
+///
+/// This is module-local and meant for sub-modules to use internally.
+///
+/// After construction, a firmware is [`Unsigned`], and must generally be patched with a signature
+/// before it can be loaded (with an exception for development hardware). The
+/// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the
+/// firmware to its [`Signed`] state.
+// TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports
+// temporary CPU-exclusive access to the object without unsafe methods.
+struct FirmwareObject<F: FalconFirmware, S: SignedState>(KVVec<u8>, PhantomData<(F, S)>);
+
+/// Trait for signatures to be patched directly into a given firmware.
+///
+/// This is module-local and meant for sub-modules to use internally.
+trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {}
+
+impl<F: FalconFirmware> FirmwareObject<F, Unsigned> {
+ /// Patches the firmware at offset `signature_start` with `signature`.
+ fn patch_signature<S: FirmwareSignature<F>>(
+ mut self,
+ signature: &S,
+ signature_start: usize,
+ ) -> Result<FirmwareObject<F, Signed>> {
+ let signature_bytes = signature.as_ref();
+ let signature_end = signature_start
+ .checked_add(signature_bytes.len())
+ .ok_or(EOVERFLOW)?;
+ let dst = self
+ .0
+ .get_mut(signature_start..signature_end)
+ .ok_or(EINVAL)?;
+
+ // PANIC: `dst` and `signature_bytes` have the same length.
+ dst.copy_from_slice(signature_bytes);
+
+ Ok(FirmwareObject(self.0, PhantomData))
+ }
+
+ /// Mark the firmware as signed without patching it.
+ ///
+ /// This method is used to explicitly confirm that we do not need to sign the firmware, while
+ /// allowing us to continue as if it was. This is typically only needed for development
+ /// hardware.
+ fn no_patch_signature(self) -> FirmwareObject<F, Signed> {
+ FirmwareObject(self.0, PhantomData)
+ }
+}
+
+/// Header common to most firmware files.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct BinHdr {
+ /// Magic number, must be `0x10de`.
+ bin_magic: u32,
+ /// Version of the header.
+ bin_ver: u32,
+ /// Size in bytes of the binary (to be ignored).
+ bin_size: u32,
+ /// Offset of the start of the application-specific header.
+ header_offset: u32,
+ /// Offset of the start of the data payload.
+ data_offset: u32,
+ /// Size in bytes of the data payload.
+ data_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for BinHdr {}
+
+// A firmware blob starting with a `BinHdr`.
+struct BinFirmware<'a> {
+ hdr: BinHdr,
+ fw: &'a [u8],
+}
+
+impl<'a> BinFirmware<'a> {
+ /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the
+ /// corresponding [`BinFirmware`] that can be used to extract its payload.
+ fn new(fw: &'a firmware::Firmware) -> Result<Self> {
+ const BIN_MAGIC: u32 = 0x10de;
+ let fw = fw.data();
+
+ fw.get(0..size_of::<BinHdr>())
+ // Extract header.
+ .and_then(BinHdr::from_bytes_copy)
+ // Validate header.
+ .and_then(|hdr| {
+ if hdr.bin_magic == BIN_MAGIC {
+ Some(hdr)
+ } else {
+ None
+ }
+ })
+ .map(|hdr| Self { hdr, fw })
+ .ok_or(EINVAL)
+ }
+
+ /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of
+ /// the firmware image.
+ fn data(&self) -> Option<&[u8]> {
+ let fw_start = usize::from_safe_cast(self.hdr.data_offset);
+ let fw_size = usize::from_safe_cast(self.hdr.data_size);
+ let fw_end = fw_start.checked_add(fw_size)?;
+
+ self.fw.get(fw_start..fw_end)
+ }
+}
+
+pub(crate) struct ModInfoBuilder<const N: usize>(firmware::ModInfoBuilder<N>);
+
+impl<const N: usize> ModInfoBuilder<N> {
+ const fn make_entry_file(self, chipset: &str, fw: &str) -> Self {
+ ModInfoBuilder(
+ self.0
+ .new_entry()
+ .push("nvidia/")
+ .push(chipset)
+ .push("/gsp/")
+ .push(fw)
+ .push("-")
+ .push(FIRMWARE_VERSION)
+ .push(".bin"),
+ )
+ }
+
+ const fn make_entry_chipset(self, chipset: gpu::Chipset) -> Self {
+ let name = chipset.name();
+
+ let this = self
+ .make_entry_file(name, "booter_load")
+ .make_entry_file(name, "booter_unload")
+ .make_entry_file(name, "bootloader")
+ .make_entry_file(name, "gsp");
+
+ if chipset.needs_fwsec_bootloader() {
+ this.make_entry_file(name, "gen_bootloader")
+ } else {
+ this
+ }
+ }
+
+ pub(crate) const fn create(
+ module_name: &'static core::ffi::CStr,
+ ) -> firmware::ModInfoBuilder<N> {
+ let mut this = Self(firmware::ModInfoBuilder::new(module_name));
+ let mut i = 0;
+
+ while i < gpu::Chipset::ALL.len() {
+ this = this.make_entry_chipset(gpu::Chipset::ALL[i]);
+ i += 1;
+ }
+
+ this.0
+ }
+}
+
+/// Ad-hoc and temporary module to extract sections from ELF images.
+///
+/// Some firmware images are currently packaged as ELF files, where sections names are used as keys
+/// to specific and related bits of data. Future firmware versions are scheduled to move away from
+/// that scheme before nova-core becomes stable, which means this module will eventually be
+/// removed.
+mod elf {
+ use core::mem::size_of;
+
+ use kernel::{
+ bindings,
+ str::CStr,
+ transmute::FromBytes, //
+ };
+
+ /// Newtype to provide a [`FromBytes`] implementation.
+ #[repr(transparent)]
+ struct Elf64Hdr(bindings::elf64_hdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+ unsafe impl FromBytes for Elf64Hdr {}
+
+ #[repr(transparent)]
+ struct Elf64SHdr(bindings::elf64_shdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+ unsafe impl FromBytes for Elf64SHdr {}
+
+ /// Returns a NULL-terminated string from the ELF image at `offset`.
+ fn elf_str(elf: &[u8], offset: u64) -> Option<&str> {
+ let idx = usize::try_from(offset).ok()?;
+ let bytes = elf.get(idx..)?;
+ CStr::from_bytes_until_nul(bytes).ok()?.to_str().ok()
+ }
+
+ /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it.
+ pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> {
+ let hdr = &elf
+ .get(0..size_of::<bindings::elf64_hdr>())
+ .and_then(Elf64Hdr::from_bytes)?
+ .0;
+
+ // Get all the section headers.
+ let mut shdr = {
+ let shdr_num = usize::from(hdr.e_shnum);
+ let shdr_start = usize::try_from(hdr.e_shoff).ok()?;
+ let shdr_end = shdr_num
+ .checked_mul(size_of::<Elf64SHdr>())
+ .and_then(|v| v.checked_add(shdr_start))?;
+
+ elf.get(shdr_start..shdr_end)
+ .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))?
+ };
+
+ // Get the strings table.
+ let strhdr = shdr
+ .clone()
+ .nth(usize::from(hdr.e_shstrndx))
+ .and_then(Elf64SHdr::from_bytes)?;
+
+ // Find the section which name matches `name` and return it.
+ shdr.find_map(|sh| {
+ let hdr = Elf64SHdr::from_bytes(sh)?;
+ let name_offset = strhdr.0.sh_offset.checked_add(u64::from(hdr.0.sh_name))?;
+ let section_name = elf_str(elf, name_offset)?;
+
+ if section_name != name {
+ return None;
+ }
+
+ let start = usize::try_from(hdr.0.sh_offset).ok()?;
+ let end = usize::try_from(hdr.0.sh_size)
+ .ok()
+ .and_then(|sh_size| start.checked_add(sh_size))?;
+
+ elf.get(start..end)
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs
new file mode 100644
index 000000000000..de2a4536b532
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/booter.rs
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware
+//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon
+//! (and optionally unload it through a separate firmware image).
+
+use core::marker::PhantomData;
+
+use kernel::{
+ device,
+ prelude::*,
+ transmute::FromBytes, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ sec2::Sec2,
+ Falcon,
+ FalconBromParams,
+ FalconDmaLoadTarget,
+ FalconDmaLoadable,
+ FalconFirmware, //
+ },
+ firmware::{
+ BinFirmware,
+ FirmwareObject,
+ FirmwareSignature,
+ Signed,
+ Unsigned, //
+ },
+ gpu::Chipset,
+ num::{
+ FromSafeCast,
+ IntoSafeCast, //
+ },
+};
+
+/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at
+/// `offset` in `slice`.
+fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> {
+ let end = offset.checked_add(size_of::<S>()).ok_or(EINVAL)?;
+ slice
+ .get(offset..end)
+ .and_then(S::from_bytes_copy)
+ .ok_or(EINVAL)
+}
+
+/// Heavy-Secured firmware header.
+///
+/// Such firmwares have an application-specific payload that needs to be patched with a given
+/// signature.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct HsHeaderV2 {
+ /// Offset to the start of the signatures.
+ sig_prod_offset: u32,
+ /// Size in bytes of the signatures.
+ sig_prod_size: u32,
+ /// Offset to a `u32` containing the location at which to patch the signature in the microcode
+ /// image.
+ patch_loc_offset: u32,
+ /// Offset to a `u32` containing the index of the signature to patch.
+ patch_sig_offset: u32,
+ /// Start offset to the signature metadata.
+ meta_data_offset: u32,
+ /// Size in bytes of the signature metadata.
+ meta_data_size: u32,
+ /// Offset to a `u32` containing the number of signatures in the signatures section.
+ num_sig_offset: u32,
+ /// Offset of the application-specific header.
+ header_offset: u32,
+ /// Size in bytes of the application-specific header.
+ header_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsHeaderV2 {}
+
+/// Heavy-Secured Firmware image container.
+///
+/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to
+/// read from in the firmware data.
+struct HsFirmwareV2<'a> {
+ hdr: HsHeaderV2,
+ fw: &'a [u8],
+}
+
+impl<'a> HsFirmwareV2<'a> {
+ /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of
+ /// `HsFirmwareV2` for further parsing.
+ ///
+ /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image.
+ fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> {
+ frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset.into_safe_cast())
+ .map(|hdr| Self { hdr, fw: bin_fw.fw })
+ }
+
+ /// Returns the location at which the signatures should be patched in the microcode image.
+ ///
+ /// Fails if the offset of the patch location is outside the bounds of the firmware
+ /// image.
+ fn patch_location(&self) -> Result<u32> {
+ frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset.into_safe_cast())
+ }
+
+ /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the
+ /// firmware is unsigned.
+ ///
+ /// Fails if the pointed signatures are outside the bounds of the firmware image.
+ fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> {
+ let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset.into_safe_cast())?;
+ let iter = match self.hdr.sig_prod_size.checked_div(num_sig) {
+ // If there are no signatures, return an iterator that will yield zero elements.
+ None => (&[] as &[u8]).chunks_exact(1),
+ Some(sig_size) => {
+ let patch_sig =
+ frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset.into_safe_cast())?;
+
+ let signatures_start = self
+ .hdr
+ .sig_prod_offset
+ .checked_add(patch_sig)
+ .map(usize::from_safe_cast)
+ .ok_or(EINVAL)?;
+
+ let signatures_end = signatures_start
+ .checked_add(usize::from_safe_cast(self.hdr.sig_prod_size))
+ .ok_or(EINVAL)?;
+
+ self.fw
+ // Get signatures range.
+ .get(signatures_start..signatures_end)
+ .ok_or(EINVAL)?
+ .chunks_exact(sig_size.into_safe_cast())
+ }
+ };
+
+ // Map the byte slices into signatures.
+ Ok(iter.map(BooterSignature))
+ }
+}
+
+/// Signature parameters, as defined in the firmware.
+#[repr(C)]
+struct HsSignatureParams {
+ /// Fuse version to use.
+ fuse_ver: u32,
+ /// Mask of engine IDs this firmware applies to.
+ engine_id_mask: u32,
+ /// ID of the microcode.
+ ucode_id: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsSignatureParams {}
+
+impl HsSignatureParams {
+ /// Returns the signature parameters contained in `hs_fw`.
+ ///
+ /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or
+ /// if its size doesn't match that of [`HsSignatureParams`].
+ fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> {
+ let start = usize::from_safe_cast(hs_fw.hdr.meta_data_offset);
+ let end = start
+ .checked_add(hs_fw.hdr.meta_data_size.into_safe_cast())
+ .ok_or(EINVAL)?;
+
+ hs_fw
+ .fw
+ .get(start..end)
+ .and_then(Self::from_bytes_copy)
+ .ok_or(EINVAL)
+ }
+}
+
+/// Header for code and data load offsets.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct HsLoadHeaderV2 {
+ // Offset at which the code starts.
+ os_code_offset: u32,
+ // Total size of the code, for all apps.
+ os_code_size: u32,
+ // Offset at which the data starts.
+ os_data_offset: u32,
+ // Size of the data.
+ os_data_size: u32,
+ // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`].
+ num_apps: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsLoadHeaderV2 {}
+
+impl HsLoadHeaderV2 {
+ /// Returns the load header contained in `hs_fw`.
+ ///
+ /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image.
+ fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> {
+ frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset.into_safe_cast())
+ }
+}
+
+/// Header for app code loader.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct HsLoadHeaderV2App {
+ /// Offset at which to load the app code.
+ offset: u32,
+ /// Length in bytes of the app code.
+ len: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for HsLoadHeaderV2App {}
+
+impl HsLoadHeaderV2App {
+ /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`.
+ ///
+ /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is
+ /// not within the bounds of the firmware image.
+ fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> {
+ let load_hdr = HsLoadHeaderV2::new(hs_fw)?;
+ if idx >= load_hdr.num_apps {
+ Err(EINVAL)
+ } else {
+ frombytes_at::<Self>(
+ hs_fw.fw,
+ usize::from_safe_cast(hs_fw.hdr.header_offset)
+ // Skip the load header...
+ .checked_add(size_of::<HsLoadHeaderV2>())
+ // ... and jump to app header `idx`.
+ .and_then(|offset| {
+ offset
+ .checked_add(usize::from_safe_cast(idx).checked_mul(size_of::<Self>())?)
+ })
+ .ok_or(EINVAL)?,
+ )
+ }
+ }
+}
+
+/// Signature for Booter firmware. Their size is encoded into the header and not known a compile
+/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`].
+struct BooterSignature<'a>(&'a [u8]);
+
+impl<'a> AsRef<[u8]> for BooterSignature<'a> {
+ fn as_ref(&self) -> &[u8] {
+ self.0
+ }
+}
+
+impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {}
+
+/// The `Booter` loader firmware, responsible for loading the GSP.
+pub(crate) struct BooterFirmware {
+ // Load parameters for Secure `IMEM` falcon memory.
+ imem_sec_load_target: FalconDmaLoadTarget,
+ // Load parameters for Non-Secure `IMEM` falcon memory,
+ // used only on Turing and GA100
+ imem_ns_load_target: Option<FalconDmaLoadTarget>,
+ // Load parameters for `DMEM` falcon memory.
+ dmem_load_target: FalconDmaLoadTarget,
+ // BROM falcon parameters.
+ brom_params: FalconBromParams,
+ // Device-mapped firmware image.
+ ucode: FirmwareObject<Self, Signed>,
+}
+
+impl FirmwareObject<BooterFirmware, Unsigned> {
+ fn new_booter(data: &[u8]) -> Result<Self> {
+ let mut ucode = KVVec::new();
+ ucode.extend_from_slice(data, GFP_KERNEL)?;
+
+ Ok(Self(ucode, PhantomData))
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) enum BooterKind {
+ Loader,
+ #[expect(unused)]
+ Unloader,
+}
+
+impl BooterFirmware {
+ /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is
+ /// ready to be loaded and run on `falcon`.
+ pub(crate) fn new(
+ dev: &device::Device<device::Bound>,
+ kind: BooterKind,
+ chipset: Chipset,
+ ver: &str,
+ falcon: &Falcon<<Self as FalconFirmware>::Target>,
+ bar: &Bar0,
+ ) -> Result<Self> {
+ let fw_name = match kind {
+ BooterKind::Loader => "booter_load",
+ BooterKind::Unloader => "booter_unload",
+ };
+ let fw = super::request_firmware(dev, chipset, fw_name, ver)?;
+ let bin_fw = BinFirmware::new(&fw)?;
+
+ // The binary firmware embeds a Heavy-Secured firmware.
+ let hs_fw = HsFirmwareV2::new(&bin_fw)?;
+
+ // The Heavy-Secured firmware embeds a firmware load descriptor.
+ let load_hdr = HsLoadHeaderV2::new(&hs_fw)?;
+
+ // Offset in `ucode` where to patch the signature.
+ let patch_loc = hs_fw.patch_location()?;
+
+ let sig_params = HsSignatureParams::new(&hs_fw)?;
+ let brom_params = FalconBromParams {
+ // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the
+ // signature patch location.
+ pkc_data_offset: patch_loc
+ .checked_sub(load_hdr.os_data_offset)
+ .ok_or(EINVAL)?,
+ engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?,
+ ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?,
+ };
+ let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?;
+
+ // Object containing the firmware microcode to be signature-patched.
+ let ucode = bin_fw
+ .data()
+ .ok_or(EINVAL)
+ .and_then(FirmwareObject::<Self, _>::new_booter)?;
+
+ let ucode_signed = {
+ let mut signatures = hs_fw.signatures_iter()?.peekable();
+
+ if signatures.peek().is_none() {
+ // If there are no signatures, then the firmware is unsigned.
+ ucode.no_patch_signature()
+ } else {
+ // Obtain the version from the fuse register, and extract the corresponding
+ // signature.
+ let reg_fuse_version = falcon.signature_reg_fuse_version(
+ bar,
+ brom_params.engine_id_mask,
+ brom_params.ucode_id,
+ )?;
+
+ // `0` means the last signature should be used.
+ const FUSE_VERSION_USE_LAST_SIG: u32 = 0;
+ let signature = match reg_fuse_version {
+ FUSE_VERSION_USE_LAST_SIG => signatures.last(),
+ // Otherwise hardware fuse version needs to be subtracted to obtain the index.
+ reg_fuse_version => {
+ let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else {
+ dev_err!(dev, "invalid fuse version for Booter firmware\n");
+ return Err(EINVAL);
+ };
+ signatures.nth(idx.into_safe_cast())
+ }
+ }
+ .ok_or(EINVAL)?;
+
+ ucode.patch_signature(&signature, patch_loc.into_safe_cast())?
+ }
+ };
+
+ // There are two versions of Booter, one for Turing/GA100, and another for
+ // GA102+. The extraction of the IMEM sections differs between the two
+ // versions. Unfortunately, the file names are the same, and the headers
+ // don't indicate the versions. The only way to differentiate is by the Chipset.
+ let (imem_sec_dst_start, imem_ns_load_target) = if chipset <= Chipset::GA100 {
+ (
+ app0.offset,
+ Some(FalconDmaLoadTarget {
+ src_start: 0,
+ dst_start: load_hdr.os_code_offset,
+ len: load_hdr.os_code_size,
+ }),
+ )
+ } else {
+ (0, None)
+ };
+
+ Ok(Self {
+ imem_sec_load_target: FalconDmaLoadTarget {
+ src_start: app0.offset,
+ dst_start: imem_sec_dst_start,
+ len: app0.len,
+ },
+ imem_ns_load_target,
+ dmem_load_target: FalconDmaLoadTarget {
+ src_start: load_hdr.os_data_offset,
+ dst_start: 0,
+ len: load_hdr.os_data_size,
+ },
+ brom_params,
+ ucode: ucode_signed,
+ })
+ }
+}
+
+impl FalconDmaLoadable for BooterFirmware {
+ fn as_slice(&self) -> &[u8] {
+ self.ucode.0.as_slice()
+ }
+
+ fn imem_sec_load_params(&self) -> FalconDmaLoadTarget {
+ self.imem_sec_load_target.clone()
+ }
+
+ fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> {
+ self.imem_ns_load_target.clone()
+ }
+
+ fn dmem_load_params(&self) -> FalconDmaLoadTarget {
+ self.dmem_load_target.clone()
+ }
+}
+
+impl FalconFirmware for BooterFirmware {
+ type Target = Sec2;
+
+ fn brom_params(&self) -> FalconBromParams {
+ self.brom_params.clone()
+ }
+
+ fn boot_addr(&self) -> u32 {
+ if let Some(ns_target) = &self.imem_ns_load_target {
+ ns_target.dst_start
+ } else {
+ self.imem_sec_load_target.src_start
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs
new file mode 100644
index 000000000000..8810cb49db67
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! FWSEC is a High Secure firmware that is extracted from the BIOS and performs the first step of
+//! the GSP startup by creating the WPR2 memory region and copying critical areas of the VBIOS into
+//! it after authenticating them, ensuring they haven't been tampered with. It runs on the GSP
+//! falcon.
+//!
+//! Before being run, it needs to be patched in two areas:
+//!
+//! - The command to be run, as this firmware can perform several tasks ;
+//! - The ucode signature, so the GSP falcon can run FWSEC in HS mode.
+
+pub(crate) mod bootloader;
+
+use core::marker::PhantomData;
+
+use kernel::{
+ device::{
+ self,
+ Device, //
+ },
+ prelude::*,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ },
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp,
+ Falcon,
+ FalconBromParams,
+ FalconDmaLoadTarget,
+ FalconDmaLoadable,
+ FalconFirmware, //
+ },
+ firmware::{
+ FalconUCodeDesc,
+ FirmwareObject,
+ FirmwareSignature,
+ Signed,
+ Unsigned, //
+ },
+ num::FromSafeCast,
+ vbios::Vbios,
+};
+
+const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4;
+
+#[repr(C)]
+#[derive(Debug)]
+struct FalconAppifHdrV1 {
+ version: u8,
+ header_size: u8,
+ entry_size: u8,
+ entry_count: u8,
+}
+// SAFETY: Any byte sequence is valid for this struct.
+unsafe impl FromBytes for FalconAppifHdrV1 {}
+
+#[repr(C, packed)]
+#[derive(Debug)]
+struct FalconAppifV1 {
+ id: u32,
+ dmem_base: u32,
+}
+// SAFETY: Any byte sequence is valid for this struct.
+unsafe impl FromBytes for FalconAppifV1 {}
+
+#[derive(Debug)]
+#[repr(C, packed)]
+struct FalconAppifDmemmapperV3 {
+ signature: u32,
+ version: u16,
+ size: u16,
+ cmd_in_buffer_offset: u32,
+ cmd_in_buffer_size: u32,
+ cmd_out_buffer_offset: u32,
+ cmd_out_buffer_size: u32,
+ nvf_img_data_buffer_offset: u32,
+ nvf_img_data_buffer_size: u32,
+ printf_buffer_hdr: u32,
+ ucode_build_time_stamp: u32,
+ ucode_signature: u32,
+ init_cmd: u32,
+ ucode_feature: u32,
+ ucode_cmd_mask0: u32,
+ ucode_cmd_mask1: u32,
+ multi_tgt_tbl: u32,
+}
+// SAFETY: Any byte sequence is valid for this struct.
+unsafe impl FromBytes for FalconAppifDmemmapperV3 {}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
+unsafe impl AsBytes for FalconAppifDmemmapperV3 {}
+
+#[derive(Debug)]
+#[repr(C, packed)]
+struct ReadVbios {
+ ver: u32,
+ hdr: u32,
+ addr: u64,
+ size: u32,
+ flags: u32,
+}
+// SAFETY: Any byte sequence is valid for this struct.
+unsafe impl FromBytes for ReadVbios {}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
+unsafe impl AsBytes for ReadVbios {}
+
+#[derive(Debug)]
+#[repr(C, packed)]
+struct FrtsRegion {
+ ver: u32,
+ hdr: u32,
+ addr: u32,
+ size: u32,
+ ftype: u32,
+}
+// SAFETY: Any byte sequence is valid for this struct.
+unsafe impl FromBytes for FrtsRegion {}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
+unsafe impl AsBytes for FrtsRegion {}
+
+const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2;
+
+#[repr(C, packed)]
+struct FrtsCmd {
+ read_vbios: ReadVbios,
+ frts_region: FrtsRegion,
+}
+// SAFETY: Any byte sequence is valid for this struct.
+unsafe impl FromBytes for FrtsCmd {}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
+unsafe impl AsBytes for FrtsCmd {}
+
+const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15;
+const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19;
+
+/// Command for the [`FwsecFirmware`] to execute.
+pub(crate) enum FwsecCommand {
+ /// Asks [`FwsecFirmware`] to carve out the WPR2 area and place a verified copy of the VBIOS
+ /// image into it.
+ Frts { frts_addr: u64, frts_size: u64 },
+ /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU.
+ #[expect(dead_code)]
+ Sb,
+}
+
+/// Size of the signatures used in FWSEC.
+const BCRT30_RSA3K_SIG_SIZE: usize = 384;
+
+/// A single signature that can be patched into a FWSEC image.
+#[repr(transparent)]
+pub(crate) struct Bcrt30Rsa3kSignature([u8; BCRT30_RSA3K_SIG_SIZE]);
+
+/// SAFETY: A signature is just an array of bytes.
+unsafe impl FromBytes for Bcrt30Rsa3kSignature {}
+
+impl From<[u8; BCRT30_RSA3K_SIG_SIZE]> for Bcrt30Rsa3kSignature {
+ fn from(sig: [u8; BCRT30_RSA3K_SIG_SIZE]) -> Self {
+ Self(sig)
+ }
+}
+
+impl AsRef<[u8]> for Bcrt30Rsa3kSignature {
+ fn as_ref(&self) -> &[u8] {
+ &self.0
+ }
+}
+
+impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {}
+
+/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon.
+///
+/// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow.
+pub(crate) struct FwsecFirmware {
+ /// Descriptor of the firmware.
+ desc: FalconUCodeDesc,
+ /// Object containing the firmware binary.
+ ucode: FirmwareObject<Self, Signed>,
+}
+
+impl FalconDmaLoadable for FwsecFirmware {
+ fn as_slice(&self) -> &[u8] {
+ self.ucode.0.as_slice()
+ }
+
+ fn imem_sec_load_params(&self) -> FalconDmaLoadTarget {
+ self.desc.imem_sec_load_params()
+ }
+
+ fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> {
+ self.desc.imem_ns_load_params()
+ }
+
+ fn dmem_load_params(&self) -> FalconDmaLoadTarget {
+ self.desc.dmem_load_params()
+ }
+}
+
+impl FalconFirmware for FwsecFirmware {
+ type Target = Gsp;
+
+ fn brom_params(&self) -> FalconBromParams {
+ FalconBromParams {
+ pkc_data_offset: self.desc.pkc_data_offset(),
+ engine_id_mask: self.desc.engine_id_mask(),
+ ucode_id: self.desc.ucode_id(),
+ }
+ }
+
+ fn boot_addr(&self) -> u32 {
+ 0
+ }
+}
+
+impl FirmwareObject<FwsecFirmware, Unsigned> {
+ fn new_fwsec(bios: &Vbios, cmd: FwsecCommand) -> Result<Self> {
+ let desc = bios.fwsec_image().header()?;
+ let mut ucode = KVVec::new();
+ ucode.extend_from_slice(bios.fwsec_image().ucode(&desc)?, GFP_KERNEL)?;
+
+ let hdr_offset = desc
+ .imem_load_size()
+ .checked_add(desc.interface_offset())
+ .map(usize::from_safe_cast)
+ .ok_or(EINVAL)?;
+
+ let hdr = ucode
+ .get(hdr_offset..)
+ .and_then(FalconAppifHdrV1::from_bytes_prefix)
+ .ok_or(EINVAL)?
+ .0;
+
+ if hdr.version != 1 {
+ return Err(EINVAL);
+ }
+
+ // Find the DMEM mapper section in the firmware.
+ for i in 0..usize::from(hdr.entry_count) {
+ // CALC: hdr_offset + header_size + i * entry_size.
+ let entry_offset = hdr_offset
+ .checked_add(usize::from(hdr.header_size))
+ .and_then(|o| o.checked_add(i.checked_mul(usize::from(hdr.entry_size))?))
+ .ok_or(EINVAL)?;
+
+ let app = ucode
+ .get(entry_offset..)
+ .and_then(FalconAppifV1::from_bytes_prefix)
+ .ok_or(EINVAL)?
+ .0;
+
+ if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER {
+ continue;
+ }
+ let dmem_base = app.dmem_base;
+
+ let dmem_mapper_offset = desc
+ .imem_load_size()
+ .checked_add(dmem_base)
+ .map(usize::from_safe_cast)
+ .ok_or(EINVAL)?;
+
+ let dmem_mapper = ucode
+ .get_mut(dmem_mapper_offset..)
+ .and_then(FalconAppifDmemmapperV3::from_bytes_mut_prefix)
+ .ok_or(EINVAL)?
+ .0;
+
+ dmem_mapper.init_cmd = match cmd {
+ FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS,
+ FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB,
+ };
+ let cmd_in_buffer_offset = dmem_mapper.cmd_in_buffer_offset;
+
+ let frts_cmd_offset = desc
+ .imem_load_size()
+ .checked_add(cmd_in_buffer_offset)
+ .map(usize::from_safe_cast)
+ .ok_or(EINVAL)?;
+
+ let frts_cmd = ucode
+ .get_mut(frts_cmd_offset..)
+ .and_then(FrtsCmd::from_bytes_mut_prefix)
+ .ok_or(EINVAL)?
+ .0;
+
+ frts_cmd.read_vbios = ReadVbios {
+ ver: 1,
+ hdr: u32::try_from(size_of::<ReadVbios>())?,
+ addr: 0,
+ size: 0,
+ flags: 2,
+ };
+ if let FwsecCommand::Frts {
+ frts_addr,
+ frts_size,
+ } = cmd
+ {
+ frts_cmd.frts_region = FrtsRegion {
+ ver: 1,
+ hdr: u32::try_from(size_of::<FrtsRegion>())?,
+ addr: u32::try_from(frts_addr >> 12)?,
+ size: u32::try_from(frts_size >> 12)?,
+ ftype: NVFW_FRTS_CMD_REGION_TYPE_FB,
+ };
+ }
+
+ // Return early as we found and patched the DMEMMAPPER region.
+ return Ok(Self(ucode, PhantomData));
+ }
+
+ Err(ENOTSUPP)
+ }
+}
+
+impl FwsecFirmware {
+ /// Extract the Fwsec firmware from `bios` and patch it to run on `falcon` with the `cmd`
+ /// command.
+ pub(crate) fn new(
+ dev: &Device<device::Bound>,
+ falcon: &Falcon<Gsp>,
+ bar: &Bar0,
+ bios: &Vbios,
+ cmd: FwsecCommand,
+ ) -> Result<Self> {
+ let ucode_dma = FirmwareObject::<Self, _>::new_fwsec(bios, cmd)?;
+
+ // Patch signature if needed.
+ let desc = bios.fwsec_image().header()?;
+ let ucode_signed = if desc.signature_count() != 0 {
+ let sig_base_img = desc
+ .imem_load_size()
+ .checked_add(desc.pkc_data_offset())
+ .map(usize::from_safe_cast)
+ .ok_or(EINVAL)?;
+ let desc_sig_versions = u32::from(desc.signature_versions());
+ let reg_fuse_version =
+ falcon.signature_reg_fuse_version(bar, desc.engine_id_mask(), desc.ucode_id())?;
+ dev_dbg!(
+ dev,
+ "desc_sig_versions: {:#x}, reg_fuse_version: {}\n",
+ desc_sig_versions,
+ reg_fuse_version
+ );
+ let signature_idx = {
+ let reg_fuse_version_bit = 1 << reg_fuse_version;
+
+ // Check if the fuse version is supported by the firmware.
+ if desc_sig_versions & reg_fuse_version_bit == 0 {
+ dev_err!(
+ dev,
+ "no matching signature: {:#x} {:#x}\n",
+ reg_fuse_version_bit,
+ desc_sig_versions,
+ );
+ return Err(EINVAL);
+ }
+
+ // `desc_sig_versions` has one bit set per included signature. Thus, the index of
+ // the signature to patch is the number of bits in `desc_sig_versions` set to `1`
+ // before `reg_fuse_version_bit`.
+
+ // Mask of the bits of `desc_sig_versions` to preserve.
+ let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1);
+
+ usize::from_safe_cast((desc_sig_versions & reg_fuse_version_mask).count_ones())
+ };
+
+ dev_dbg!(dev, "patching signature with index {}\n", signature_idx);
+ let signature = bios
+ .fwsec_image()
+ .sigs(&desc)
+ .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?;
+
+ ucode_dma.patch_signature(signature, sig_base_img)?
+ } else {
+ ucode_dma.no_patch_signature()
+ };
+
+ Ok(FwsecFirmware {
+ desc,
+ ucode: ucode_signed,
+ })
+ }
+
+ /// Loads the FWSEC firmware into `falcon` and execute it.
+ ///
+ /// This must only be called on chipsets that do not need the FWSEC bootloader (i.e., where
+ /// [`Chipset::needs_fwsec_bootloader()`](crate::gpu::Chipset::needs_fwsec_bootloader) returns
+ /// `false`). On chipsets that do, use [`bootloader::FwsecFirmwareWithBl`] instead.
+ pub(crate) fn run(
+ &self,
+ dev: &Device<device::Bound>,
+ falcon: &Falcon<Gsp>,
+ bar: &Bar0,
+ ) -> Result<()> {
+ // Reset falcon, load the firmware, and run it.
+ falcon
+ .reset(bar)
+ .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?;
+ falcon
+ .load(dev, bar, self)
+ .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?;
+ let (mbox0, _) = falcon
+ .boot(bar, Some(0), None)
+ .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?;
+ if mbox0 != 0 {
+ dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0);
+ Err(EIO)
+ } else {
+ Ok(())
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs
new file mode 100644
index 000000000000..bcb713a868e2
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Bootloader support for the FWSEC firmware.
+//!
+//! On Turing, the FWSEC firmware is not loaded directly, but is instead loaded through a small
+//! bootloader program that performs the required DMA operations. This bootloader itself needs to
+//! be loaded using PIO.
+
+use kernel::{
+ alloc::KVec,
+ device::{
+ self,
+ Device, //
+ },
+ dma::Coherent,
+ io::{
+ register::WithBase, //
+ Io,
+ },
+ prelude::*,
+ ptr::{
+ Alignable,
+ Alignment, //
+ },
+ sizes,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ },
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ self,
+ gsp::Gsp,
+ Falcon,
+ FalconBromParams,
+ FalconDmaLoadable,
+ FalconFbifMemType,
+ FalconFbifTarget,
+ FalconFirmware,
+ FalconPioDmemLoadTarget,
+ FalconPioImemLoadTarget,
+ FalconPioLoadable, //
+ },
+ firmware::{
+ fwsec::FwsecFirmware,
+ request_firmware,
+ BinHdr,
+ FIRMWARE_VERSION, //
+ },
+ gpu::Chipset,
+ num::FromSafeCast,
+ regs,
+};
+
+/// Descriptor used by RM to figure out the requirements of the boot loader.
+///
+/// Most of its fields appear to be legacy and carry incorrect values, so they are left unused.
+#[repr(C)]
+#[derive(Debug, Clone)]
+struct BootloaderDesc {
+ /// Starting tag of bootloader.
+ start_tag: u32,
+ /// DMEM load offset - unused here as we always load at offset `0`.
+ _dmem_load_off: u32,
+ /// Offset of code section in the image. Unused as there is only one section in the bootloader
+ /// binary.
+ _code_off: u32,
+ /// Size of code section in the image.
+ code_size: u32,
+ /// Offset of data section in the image. Unused as we build the data section ourselves.
+ _data_off: u32,
+ /// Size of data section in the image. Unused as we build the data section ourselves.
+ _data_size: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl FromBytes for BootloaderDesc {}
+
+/// Structure used by the boot-loader to load the rest of the code.
+///
+/// This has to be filled by the GPU driver and copied into DMEM at offset
+/// [`BootloaderDesc.dmem_load_off`].
+#[repr(C, packed)]
+#[derive(Debug, Clone)]
+struct BootloaderDmemDescV2 {
+ /// Reserved, should always be first element.
+ reserved: [u32; 4],
+ /// 16B signature for secure code, 0s if no secure code.
+ signature: [u32; 4],
+ /// DMA context used by the bootloader while loading code/data.
+ ctx_dma: u32,
+ /// 256B-aligned physical FB address where code is located.
+ code_dma_base: u64,
+ /// Offset from `code_dma_base` where the non-secure code is located.
+ ///
+ /// Also used as destination IMEM offset of non-secure code as the DMA firmware object is
+ /// expected to be a mirror image of its loaded state.
+ ///
+ /// Must be multiple of 256.
+ non_sec_code_off: u32,
+ /// Size of the non-secure code part.
+ non_sec_code_size: u32,
+ /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256).
+ ///
+ /// Also used as destination IMEM offset of secure code as the DMA firmware object is expected
+ /// to be a mirror image of its loaded state.
+ ///
+ /// Must be multiple of 256.
+ sec_code_off: u32,
+ /// Size of the secure code part.
+ sec_code_size: u32,
+ /// Code entry point invoked by the bootloader after code is loaded.
+ code_entry_point: u32,
+ /// 256B-aligned physical FB address where data is located.
+ data_dma_base: u64,
+ /// Size of data block (should be multiple of 256B).
+ data_size: u32,
+ /// Number of arguments to be passed to the target firmware being loaded.
+ argc: u32,
+ /// Arguments to be passed to the target firmware being loaded.
+ argv: u32,
+}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
+unsafe impl AsBytes for BootloaderDmemDescV2 {}
+
+/// Wrapper for [`FwsecFirmware`] that includes the bootloader performing the actual load
+/// operation.
+pub(crate) struct FwsecFirmwareWithBl {
+ /// DMA object the bootloader will copy the firmware from.
+ _firmware_dma: Coherent<[u8]>,
+ /// Code of the bootloader to be loaded into non-secure IMEM.
+ ucode: KVec<u8>,
+ /// Descriptor to be loaded into DMEM for the bootloader to read.
+ dmem_desc: BootloaderDmemDescV2,
+ /// Range-validated start offset of the firmware code in IMEM.
+ imem_dst_start: u16,
+ /// BROM parameters of the loaded firmware.
+ brom_params: FalconBromParams,
+ /// Range-validated `desc.start_tag`.
+ start_tag: u16,
+}
+
+impl FwsecFirmwareWithBl {
+ /// Loads the bootloader firmware for `dev` and `chipset`, and wrap `firmware` so it can be
+ /// loaded using it.
+ pub(crate) fn new(
+ firmware: FwsecFirmware,
+ dev: &Device<device::Bound>,
+ chipset: Chipset,
+ ) -> Result<Self> {
+ let fw = request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION)?;
+ let hdr = fw
+ .data()
+ .get(0..size_of::<BinHdr>())
+ .and_then(BinHdr::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let desc = {
+ let desc_offset = usize::from_safe_cast(hdr.header_offset);
+
+ fw.data()
+ .get(desc_offset..)
+ .and_then(BootloaderDesc::from_bytes_copy_prefix)
+ .ok_or(EINVAL)?
+ .0
+ };
+
+ let ucode = {
+ let ucode_start = usize::from_safe_cast(hdr.data_offset);
+ let code_size = usize::from_safe_cast(desc.code_size);
+ // Align to falcon block size (256 bytes).
+ let aligned_code_size = code_size
+ .align_up(Alignment::new::<{ falcon::MEM_BLOCK_ALIGNMENT }>())
+ .ok_or(EINVAL)?;
+
+ let mut ucode = KVec::with_capacity(aligned_code_size, GFP_KERNEL)?;
+ ucode.extend_from_slice(
+ fw.data()
+ .get(ucode_start..ucode_start + code_size)
+ .ok_or(EINVAL)?,
+ GFP_KERNEL,
+ )?;
+ ucode.resize(aligned_code_size, 0, GFP_KERNEL)?;
+
+ ucode
+ };
+
+ // `BootloaderDmemDescV2` expects the source to be a mirror image of the destination and
+ // uses the same offset parameter for both.
+ //
+ // Thus, the start of the source object needs to be padded with the difference between the
+ // destination and source offsets.
+ //
+ // In practice, this is expected to always be zero but is required for code correctness.
+ let (align_padding, firmware_dma) = {
+ let align_padding = {
+ let imem_sec = firmware.imem_sec_load_params();
+
+ imem_sec
+ .dst_start
+ .checked_sub(imem_sec.src_start)
+ .map(usize::from_safe_cast)
+ .ok_or(EOVERFLOW)?
+ };
+
+ let mut firmware_obj = KVVec::new();
+ firmware_obj.extend_with(align_padding, 0u8, GFP_KERNEL)?;
+ firmware_obj.extend_from_slice(firmware.ucode.0.as_slice(), GFP_KERNEL)?;
+
+ (
+ align_padding,
+ Coherent::from_slice(dev, firmware_obj.as_slice(), GFP_KERNEL)?,
+ )
+ };
+
+ let dmem_desc = {
+ // Bootloader payload is in non-coherent system memory.
+ const FALCON_DMAIDX_PHYS_SYS_NCOH: u32 = 4;
+
+ let imem_sec = firmware.imem_sec_load_params();
+ let imem_ns = firmware.imem_ns_load_params().ok_or(EINVAL)?;
+ let dmem = firmware.dmem_load_params();
+
+ // The bootloader does not have a data destination offset field and copies the data at
+ // the start of DMEM, so it can only be used if the destination offset of the firmware
+ // is 0.
+ if dmem.dst_start != 0 {
+ return Err(EINVAL);
+ }
+
+ BootloaderDmemDescV2 {
+ reserved: [0; 4],
+ signature: [0; 4],
+ ctx_dma: FALCON_DMAIDX_PHYS_SYS_NCOH,
+ code_dma_base: firmware_dma.dma_handle(),
+ // `dst_start` is also valid as the source offset since the firmware DMA object is
+ // a mirror image of the target IMEM layout.
+ non_sec_code_off: imem_ns.dst_start,
+ non_sec_code_size: imem_ns.len,
+ // `dst_start` is also valid as the source offset since the firmware DMA object is
+ // a mirror image of the target IMEM layout.
+ sec_code_off: imem_sec.dst_start,
+ sec_code_size: imem_sec.len,
+ code_entry_point: 0,
+ // Start of data section is the added padding + the DMEM `src_start` field.
+ data_dma_base: firmware_dma
+ .dma_handle()
+ .checked_add(u64::from_safe_cast(align_padding))
+ .and_then(|offset| offset.checked_add(dmem.src_start.into()))
+ .ok_or(EOVERFLOW)?,
+ data_size: dmem.len,
+ argc: 0,
+ argv: 0,
+ }
+ };
+
+ // The bootloader's code must be loaded in the area right below the first 64K of IMEM.
+ const BOOTLOADER_LOAD_CEILING: usize = sizes::SZ_64K;
+ let imem_dst_start = BOOTLOADER_LOAD_CEILING
+ .checked_sub(ucode.len())
+ .ok_or(EOVERFLOW)?;
+
+ Ok(Self {
+ _firmware_dma: firmware_dma,
+ ucode,
+ dmem_desc,
+ brom_params: firmware.brom_params(),
+ imem_dst_start: u16::try_from(imem_dst_start)?,
+ start_tag: u16::try_from(desc.start_tag)?,
+ })
+ }
+
+ /// Loads the bootloader into `falcon` and execute it.
+ ///
+ /// The bootloader will load the FWSEC firmware and then execute it. This function returns
+ /// after FWSEC has reached completion.
+ pub(crate) fn run(
+ &self,
+ dev: &Device<device::Bound>,
+ falcon: &Falcon<Gsp>,
+ bar: &Bar0,
+ ) -> Result<()> {
+ // Reset falcon, load the firmware, and run it.
+ falcon
+ .reset(bar)
+ .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?;
+ falcon
+ .pio_load(bar, self)
+ .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?;
+
+ // Configure DMA index for the bootloader to fetch the FWSEC firmware from system memory.
+ bar.update(
+ regs::NV_PFALCON_FBIF_TRANSCFG::of::<Gsp>()
+ .try_at(usize::from_safe_cast(self.dmem_desc.ctx_dma))
+ .ok_or(EINVAL)?,
+ |v| {
+ v.with_target(FalconFbifTarget::CoherentSysmem)
+ .with_mem_type(FalconFbifMemType::Physical)
+ },
+ );
+
+ let (mbox0, _) = falcon
+ .boot(bar, Some(0), None)
+ .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?;
+ if mbox0 != 0 {
+ dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0);
+ Err(EIO)
+ } else {
+ Ok(())
+ }
+ }
+}
+
+impl FalconFirmware for FwsecFirmwareWithBl {
+ type Target = Gsp;
+
+ fn brom_params(&self) -> FalconBromParams {
+ self.brom_params.clone()
+ }
+
+ fn boot_addr(&self) -> u32 {
+ // On V2 platforms, the boot address is extracted from the generic bootloader, because the
+ // gbl is what actually copies FWSEC into memory, so that is what needs to be booted.
+ u32::from(self.start_tag) << 8
+ }
+}
+
+impl FalconPioLoadable for FwsecFirmwareWithBl {
+ fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
+ None
+ }
+
+ fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> {
+ Some(FalconPioImemLoadTarget {
+ data: self.ucode.as_ref(),
+ dst_start: self.imem_dst_start,
+ secure: false,
+ start_tag: self.start_tag,
+ })
+ }
+
+ fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> {
+ FalconPioDmemLoadTarget {
+ data: self.dmem_desc.as_bytes(),
+ dst_start: 0,
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs
new file mode 100644
index 000000000000..2fcc255c3bc8
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ device,
+ dma::{
+ Coherent,
+ CoherentBox,
+ DataDirection,
+ DmaAddress, //
+ },
+ prelude::*,
+ scatterlist::{
+ Owned,
+ SGTable, //
+ },
+};
+
+use crate::{
+ firmware::{
+ elf,
+ riscv::RiscvFirmware, //
+ },
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
+ gsp::GSP_PAGE_SIZE,
+ num::FromSafeCast,
+};
+
+/// GSP firmware with 3-level radix page tables for the GSP bootloader.
+///
+/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address
+/// space:
+///
+/// ```text
+/// Level 0: 1 page, 1 entry -> points to first level 1 page
+/// Level 1: Multiple pages/entries -> each entry points to a level 2 page
+/// Level 2: Multiple pages/entries -> each entry points to a firmware page
+/// ```
+///
+/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address).
+/// Also known as "Radix3" firmware.
+#[pin_data]
+pub(crate) struct GspFirmware {
+ /// The GSP firmware inside a [`VVec`], device-mapped via a SG table.
+ #[pin]
+ fw: SGTable<Owned<VVec<u8>>>,
+ /// Level 2 page table whose entries contain DMA addresses of firmware pages.
+ #[pin]
+ level2: SGTable<Owned<VVec<u8>>>,
+ /// Level 1 page table whose entries contain DMA addresses of level 2 pages.
+ #[pin]
+ level1: SGTable<Owned<VVec<u8>>>,
+ /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page.
+ level0: Coherent<[u64]>,
+ /// Size in bytes of the firmware contained in [`Self::fw`].
+ pub(crate) size: usize,
+ /// Device-mapped GSP signatures matching the GPU's [`Chipset`].
+ pub(crate) signatures: Coherent<[u8]>,
+ /// GSP bootloader, verifies the GSP firmware before loading and running it.
+ pub(crate) bootloader: RiscvFirmware,
+}
+
+impl GspFirmware {
+ /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page
+ /// tables expected by the GSP bootloader to load it.
+ pub(crate) fn new<'a>(
+ dev: &'a device::Device<device::Bound>,
+ chipset: Chipset,
+ ver: &'a str,
+ ) -> impl PinInit<Self, Error> + 'a {
+ pin_init::pin_init_scope(move || {
+ let firmware = super::request_firmware(dev, chipset, "gsp", ver)?;
+
+ let fw_section = elf::elf64_section(firmware.data(), ".fwimage").ok_or(EINVAL)?;
+
+ let size = fw_section.len();
+
+ // Move the firmware into a vmalloc'd vector and map it into the device address
+ // space.
+ let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL)
+ .and_then(|mut v| {
+ v.extend_from_slice(fw_section, GFP_KERNEL)?;
+ Ok(v)
+ })
+ .map_err(|_| ENOMEM)?;
+
+ Ok(try_pin_init!(Self {
+ fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL),
+ level2 <- {
+ // Allocate the level 2 page table, map the firmware onto it, and map it into
+ // the device address space.
+ VVec::<u8>::with_capacity(
+ fw.iter().count() * core::mem::size_of::<u64>(),
+ GFP_KERNEL,
+ )
+ .map_err(|_| ENOMEM)
+ .and_then(|level2| map_into_lvl(&fw, level2))
+ .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))?
+ },
+ level1 <- {
+ // Allocate the level 1 page table, map the level 2 page table onto it, and map
+ // it into the device address space.
+ VVec::<u8>::with_capacity(
+ level2.iter().count() * core::mem::size_of::<u64>(),
+ GFP_KERNEL,
+ )
+ .map_err(|_| ENOMEM)
+ .and_then(|level1| map_into_lvl(&level2, level1))
+ .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))?
+ },
+ level0: {
+ // Allocate the level 0 page table as a device-visible DMA object, and map the
+ // level 1 page table onto it.
+
+ // Fill level 1 page entry.
+ let level1_entry = level1.iter().next().ok_or(EINVAL)?;
+ let level1_entry_addr = level1_entry.dma_address();
+
+ // Create level 0 page table data and fill its first entry with the level 1
+ // table.
+ let mut level0 = CoherentBox::<[u64]>::zeroed_slice(
+ dev,
+ GSP_PAGE_SIZE / size_of::<u64>(),
+ GFP_KERNEL
+ )?;
+ level0[0] = level1_entry_addr.to_le();
+
+ level0.into()
+ },
+ size,
+ signatures: {
+ let sigs_section = match chipset.arch() {
+ Architecture::Turing
+ if matches!(chipset, Chipset::TU116 | Chipset::TU117) =>
+ {
+ ".fwsignature_tu11x"
+ }
+ Architecture::Turing => ".fwsignature_tu10x",
+ // GA100 uses the same firmware as Turing
+ Architecture::Ampere if chipset == Chipset::GA100 => ".fwsignature_tu10x",
+ Architecture::Ampere => ".fwsignature_ga10x",
+ Architecture::Ada => ".fwsignature_ad10x",
+ };
+
+ elf::elf64_section(firmware.data(), sigs_section)
+ .ok_or(EINVAL)
+ .and_then(|data| Coherent::from_slice(dev, data, GFP_KERNEL))?
+ },
+ bootloader: {
+ let bl = super::request_firmware(dev, chipset, "bootloader", ver)?;
+
+ RiscvFirmware::new(dev, &bl)?
+ },
+ }))
+ })
+ }
+
+ /// Returns the DMA handle of the radix3 level 0 page table.
+ pub(crate) fn radix3_dma_handle(&self) -> DmaAddress {
+ self.level0.dma_handle()
+ }
+}
+
+/// Build a page table from a scatter-gather list.
+///
+/// Takes each DMA-mapped region from `sg_table` and writes page table entries
+/// for all 4KB pages within that region. For example, a 16KB SG entry becomes
+/// 4 consecutive page table entries.
+fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> {
+ for sg_entry in sg_table.iter() {
+ // Number of pages we need to map.
+ let num_pages = usize::from_safe_cast(sg_entry.dma_len()).div_ceil(GSP_PAGE_SIZE);
+
+ for i in 0..num_pages {
+ let entry = sg_entry.dma_address()
+ + (u64::from_safe_cast(i) * u64::from_safe_cast(GSP_PAGE_SIZE));
+ dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?;
+ }
+ }
+
+ Ok(dst)
+}
diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs
new file mode 100644
index 000000000000..2afa7f36404e
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/riscv.rs
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a
+//! dedicated header.
+
+use kernel::{
+ device,
+ dma::Coherent,
+ firmware::Firmware,
+ prelude::*,
+ transmute::FromBytes, //
+};
+
+use crate::{
+ firmware::BinFirmware,
+ num::FromSafeCast, //
+};
+
+/// Descriptor for microcode running on a RISC-V core.
+#[repr(C)]
+#[derive(Debug)]
+struct RmRiscvUCodeDesc {
+ version: u32,
+ bootloader_offset: u32,
+ bootloader_size: u32,
+ bootloader_param_offset: u32,
+ bootloader_param_size: u32,
+ riscv_elf_offset: u32,
+ riscv_elf_size: u32,
+ app_version: u32,
+ manifest_offset: u32,
+ manifest_size: u32,
+ monitor_data_offset: u32,
+ monitor_data_size: u32,
+ monitor_code_offset: u32,
+ monitor_code_size: u32,
+}
+
+// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability.
+unsafe impl FromBytes for RmRiscvUCodeDesc {}
+
+impl RmRiscvUCodeDesc {
+ /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it.
+ ///
+ /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image.
+ fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> {
+ let offset = usize::from_safe_cast(bin_fw.hdr.header_offset);
+ let end = offset.checked_add(size_of::<Self>()).ok_or(EINVAL)?;
+
+ bin_fw
+ .fw
+ .get(offset..end)
+ .and_then(Self::from_bytes_copy)
+ .ok_or(EINVAL)
+ }
+}
+
+/// A parsed firmware for a RISC-V core, ready to be loaded and run.
+pub(crate) struct RiscvFirmware {
+ /// Offset at which the code starts in the firmware image.
+ pub(crate) code_offset: u32,
+ /// Offset at which the data starts in the firmware image.
+ pub(crate) data_offset: u32,
+ /// Offset at which the manifest starts in the firmware image.
+ pub(crate) manifest_offset: u32,
+ /// Application version.
+ pub(crate) app_version: u32,
+ /// Device-mapped firmware image.
+ pub(crate) ucode: Coherent<[u8]>,
+}
+
+impl RiscvFirmware {
+ /// Parses the RISC-V firmware image contained in `fw`.
+ pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> {
+ let bin_fw = BinFirmware::new(fw)?;
+
+ let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?;
+
+ let ucode = {
+ let start = usize::from_safe_cast(bin_fw.hdr.data_offset);
+ let len = usize::from_safe_cast(bin_fw.hdr.data_size);
+ let end = start.checked_add(len).ok_or(EINVAL)?;
+
+ Coherent::from_slice(dev, fw.data().get(start..end).ok_or(EINVAL)?, GFP_KERNEL)?
+ };
+
+ Ok(Self {
+ ucode,
+ code_offset: riscv_desc.monitor_code_offset,
+ data_offset: riscv_desc.monitor_data_offset,
+ manifest_offset: riscv_desc.manifest_offset,
+ app_version: riscv_desc.app_version,
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs
new file mode 100644
index 000000000000..fb75dd10a172
--- /dev/null
+++ b/drivers/gpu/nova-core/gfw.rs
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GPU Firmware (`GFW`) support, a.k.a `devinit`.
+//!
+//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of
+//! the GPU is considered unusable until this step is completed, so we must wait on it before
+//! performing driver initialization.
+//!
+//! A clarification about devinit terminology: devinit is a sequence of register read/writes after
+//! reset that performs tasks such as:
+//! 1. Programming VRAM memory controller timings.
+//! 2. Power sequencing.
+//! 3. Clock and PLL configuration.
+//! 4. Thermal management.
+//!
+//! devinit itself is a 'script' which is interpreted by an interpreter program typically running
+//! on the PMU microcontroller.
+//!
+//! Note that the devinit sequence also needs to run during suspend/resume.
+
+use kernel::{
+ io::{
+ poll::read_poll_timeout,
+ Io, //
+ },
+ prelude::*,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ regs, //
+};
+
+/// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout.
+///
+/// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware code to
+/// setup its core parameters. Most of the GPU is considered unusable until this step is completed,
+/// so it must be waited on very early during driver initialization.
+///
+/// The `GFW` code includes several components that need to execute before the driver loads. These
+/// components are located in the VBIOS ROM and executed in a sequence on these different
+/// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the
+/// GSP.
+///
+/// This function waits for a signal indicating that core initialization is complete. Before this
+/// signal is received, little can be done with the GPU. This signal is set by the FWSEC running on
+/// the GSP in Heavy-secured mode.
+pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result {
+ // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must
+ // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because
+ // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware
+ // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only
+ // safely read the status register from CPU (LS/Light-secured) once the mask indicates
+ // that the privilege level has been lowered.
+ //
+ // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of
+ // reset, and should complete in less time than that.
+ read_poll_timeout(
+ || {
+ Ok(
+ // Check that FWSEC has lowered its protection level before reading the GFW_BOOT
+ // status.
+ bar.read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK)
+ .read_protection_level0()
+ && bar
+ .read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT)
+ .completed(),
+ )
+ },
+ |&gfw_booted| gfw_booted,
+ Delta::from_millis(1),
+ Delta::from_secs(4),
+ )
+ .map(|_| ())
+}
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
new file mode 100644
index 000000000000..0f6fe9a1b955
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ device,
+ devres::Devres,
+ fmt,
+ io::Io,
+ num::Bounded,
+ pci,
+ prelude::*,
+ sync::Arc, //
+};
+
+use crate::{
+ bounded_enum,
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp as GspFalcon,
+ sec2::Sec2 as Sec2Falcon,
+ Falcon, //
+ },
+ fb::SysmemFlush,
+ gfw,
+ gsp::Gsp,
+ regs,
+};
+
+macro_rules! define_chipset {
+ ({ $($variant:ident = $value:expr),* $(,)* }) =>
+ {
+ /// Enum representation of the GPU chipset.
+ #[derive(fmt::Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)]
+ pub(crate) enum Chipset {
+ $($variant = $value),*,
+ }
+
+ impl Chipset {
+ pub(crate) const ALL: &'static [Chipset] = &[
+ $( Chipset::$variant, )*
+ ];
+
+ ::kernel::macros::paste!(
+ /// Returns the name of this chipset, in lowercase.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let chipset = Chipset::GA102;
+ /// assert_eq!(chipset.name(), "ga102");
+ /// ```
+ pub(crate) const fn name(&self) -> &'static str {
+ match *self {
+ $(
+ Chipset::$variant => stringify!([<$variant:lower>]),
+ )*
+ }
+ }
+ );
+ }
+
+ // TODO[FPRI]: replace with something like derive(FromPrimitive)
+ impl TryFrom<u32> for Chipset {
+ type Error = kernel::error::Error;
+
+ fn try_from(value: u32) -> Result<Self, Self::Error> {
+ match value {
+ $( $value => Ok(Chipset::$variant), )*
+ _ => Err(ENODEV),
+ }
+ }
+ }
+ }
+}
+
+define_chipset!({
+ // Turing
+ TU102 = 0x162,
+ TU104 = 0x164,
+ TU106 = 0x166,
+ TU117 = 0x167,
+ TU116 = 0x168,
+ // Ampere
+ GA100 = 0x170,
+ GA102 = 0x172,
+ GA103 = 0x173,
+ GA104 = 0x174,
+ GA106 = 0x176,
+ GA107 = 0x177,
+ // Ada
+ AD102 = 0x192,
+ AD103 = 0x193,
+ AD104 = 0x194,
+ AD106 = 0x196,
+ AD107 = 0x197,
+});
+
+impl Chipset {
+ pub(crate) const fn arch(self) -> Architecture {
+ match self {
+ Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => {
+ Architecture::Turing
+ }
+ Self::GA100 | Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106 | Self::GA107 => {
+ Architecture::Ampere
+ }
+ Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107 => {
+ Architecture::Ada
+ }
+ }
+ }
+
+ /// Returns `true` if this chipset requires the PIO-loaded bootloader in order to boot FWSEC.
+ ///
+ /// This includes all chipsets < GA102.
+ pub(crate) const fn needs_fwsec_bootloader(self) -> bool {
+ matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100)
+ }
+}
+
+// TODO
+//
+// The resulting strings are used to generate firmware paths, hence the
+// generated strings have to be stable.
+//
+// Hence, replace with something like strum_macros derive(Display).
+//
+// For now, redirect to fmt::Debug for convenience.
+impl fmt::Display for Chipset {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{self:?}")
+ }
+}
+
+bounded_enum! {
+ /// Enum representation of the GPU generation.
+ #[derive(fmt::Debug, Copy, Clone)]
+ pub(crate) enum Architecture with TryFrom<Bounded<u32, 6>> {
+ Turing = 0x16,
+ Ampere = 0x17,
+ Ada = 0x19,
+ }
+}
+
+pub(crate) struct Revision {
+ major: Bounded<u8, 4>,
+ minor: Bounded<u8, 4>,
+}
+
+impl From<regs::NV_PMC_BOOT_42> for Revision {
+ fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
+ Self {
+ major: boot0.major_revision().cast(),
+ minor: boot0.minor_revision().cast(),
+ }
+ }
+}
+
+impl fmt::Display for Revision {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{:x}.{:x}", self.major, self.minor)
+ }
+}
+
+/// Structure holding a basic description of the GPU: `Chipset` and `Revision`.
+pub(crate) struct Spec {
+ chipset: Chipset,
+ revision: Revision,
+}
+
+impl Spec {
+ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
+ // Some brief notes about boot0 and boot42, in chronological order:
+ //
+ // NV04 through NV50:
+ //
+ // Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs.
+ // boot42 may not even exist on some of these GPUs.
+ //
+ // Fermi through Volta:
+ //
+ // Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42
+ // is also guaranteed to be both present and accurate.
+ //
+ // Turing and later:
+ //
+ // Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not
+ // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
+ // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
+
+ let boot0 = bar.read(regs::NV_PMC_BOOT_0);
+
+ if boot0.is_older_than_fermi() {
+ return Err(ENODEV);
+ }
+
+ let boot42 = bar.read(regs::NV_PMC_BOOT_42);
+ Spec::try_from(boot42).inspect_err(|_| {
+ dev_err!(dev, "Unsupported chipset: {}\n", boot42);
+ })
+ }
+}
+
+impl TryFrom<regs::NV_PMC_BOOT_42> for Spec {
+ type Error = Error;
+
+ fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result<Self> {
+ Ok(Self {
+ chipset: boot42.chipset()?,
+ revision: boot42.into(),
+ })
+ }
+}
+
+impl fmt::Display for Spec {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_fmt(fmt!(
+ "Chipset: {}, Architecture: {:?}, Revision: {}",
+ self.chipset,
+ self.chipset.arch(),
+ self.revision
+ ))
+ }
+}
+
+/// Structure holding the resources required to operate the GPU.
+#[pin_data]
+pub(crate) struct Gpu {
+ spec: Spec,
+ /// MMIO mapping of PCI BAR 0
+ bar: Arc<Devres<Bar0>>,
+ /// System memory page required for flushing all pending GPU-side memory writes done through
+ /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
+ sysmem_flush: SysmemFlush,
+ /// GSP falcon instance, used for GSP boot up and cleanup.
+ gsp_falcon: Falcon<GspFalcon>,
+ /// SEC2 falcon instance, used for GSP boot up and cleanup.
+ sec2_falcon: Falcon<Sec2Falcon>,
+ /// GSP runtime data. Temporarily an empty placeholder.
+ #[pin]
+ gsp: Gsp,
+}
+
+impl Gpu {
+ pub(crate) fn new<'a>(
+ pdev: &'a pci::Device<device::Bound>,
+ devres_bar: Arc<Devres<Bar0>>,
+ bar: &'a Bar0,
+ ) -> impl PinInit<Self, Error> + 'a {
+ try_pin_init!(Self {
+ spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
+ dev_info!(pdev,"NVIDIA ({})\n", spec);
+ })?,
+
+ // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
+ _: {
+ gfw::wait_gfw_boot_completion(bar)
+ .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
+ },
+
+ sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
+
+ gsp_falcon: Falcon::new(
+ pdev.as_ref(),
+ spec.chipset,
+ )
+ .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
+
+ sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
+
+ gsp <- Gsp::new(pdev),
+
+ _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
+
+ bar: devres_bar,
+ })
+ }
+
+ /// Called when the corresponding [`Device`](device::Device) is unbound.
+ ///
+ /// Note: This method must only be called from `Driver::unbind`.
+ pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) {
+ kernel::warn_on!(self
+ .bar
+ .access(dev)
+ .inspect(|bar| self.sysmem_flush.unregister(bar))
+ .is_err());
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
new file mode 100644
index 000000000000..ba5b7f990031
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+
+mod boot;
+
+use kernel::{
+ debugfs,
+ device,
+ dma::{
+ Coherent,
+ CoherentBox,
+ DmaAddress, //
+ },
+ pci,
+ prelude::*,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ }, //
+};
+
+pub(crate) mod cmdq;
+pub(crate) mod commands;
+mod fw;
+mod sequencer;
+
+pub(crate) use fw::{
+ GspFwWprMeta,
+ LibosParams, //
+};
+
+use crate::{
+ gsp::cmdq::Cmdq,
+ gsp::fw::{
+ GspArgumentsPadded,
+ LibosMemoryRegionInitArgument, //
+ },
+ num,
+};
+
+pub(crate) const GSP_PAGE_SHIFT: usize = 12;
+pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT;
+
+/// Number of GSP pages to use in a RM log buffer.
+const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10;
+const LOG_BUFFER_SIZE: usize = RM_LOG_BUFFER_NUM_PAGES * GSP_PAGE_SIZE;
+
+/// Array of page table entries, as understood by the GSP bootloader.
+#[repr(C)]
+struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]);
+
+/// SAFETY: arrays of `u64` implement `FromBytes` and we are but a wrapper around one.
+unsafe impl<const NUM_ENTRIES: usize> FromBytes for PteArray<NUM_ENTRIES> {}
+
+/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one.
+unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
+
+impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
+ /// Returns the page table entry for `index`, for a mapping starting at `start`.
+ // TODO: Replace with `IoView` projection once available.
+ fn entry(start: DmaAddress, index: usize) -> Result<u64> {
+ start
+ .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
+ .ok_or(EOVERFLOW)
+ }
+}
+
+/// The logging buffers are byte queues that contain encoded printf-like
+/// messages from GSP-RM. They need to be decoded by a special application
+/// that can parse the buffers.
+///
+/// The 'loginit' buffer contains logs from early GSP-RM init and
+/// exception dumps. The 'logrm' buffer contains the subsequent logs. Both are
+/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
+///
+/// The physical address map for the log buffer is stored in the buffer
+/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp).
+/// Initially, pp is equal to 0. If the buffer has valid logging data in it,
+/// then pp points to index into the buffer where the next logging entry will
+/// be written. Therefore, the logging data is valid if:
+/// 1 <= pp < sizeof(buffer)/sizeof(u64)
+struct LogBuffer(Coherent<[u8; LOG_BUFFER_SIZE]>);
+
+impl LogBuffer {
+ /// Creates a new `LogBuffer` mapped on `dev`.
+ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
+ let obj = Self(Coherent::zeroed(dev, GFP_KERNEL)?);
+
+ let start_addr = obj.0.dma_handle();
+
+ // SAFETY: `obj` has just been created and we are its sole user.
+ let pte_region = unsafe {
+ &mut obj.0.as_mut()[size_of::<u64>()..][..RM_LOG_BUFFER_NUM_PAGES * size_of::<u64>()]
+ };
+
+ // Write values one by one to avoid an on-stack instance of `PteArray`.
+ for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
+ let pte_value = PteArray::<0>::entry(start_addr, i)?;
+
+ chunk.copy_from_slice(&pte_value.to_ne_bytes());
+ }
+
+ Ok(obj)
+ }
+}
+
+struct LogBuffers {
+ /// Init log buffer.
+ loginit: LogBuffer,
+ /// Interrupts log buffer.
+ logintr: LogBuffer,
+ /// RM log buffer.
+ logrm: LogBuffer,
+}
+
+/// GSP runtime data.
+#[pin_data]
+pub(crate) struct Gsp {
+ /// Libos arguments.
+ pub(crate) libos: Coherent<[LibosMemoryRegionInitArgument]>,
+ /// Log buffers, optionally exposed via debugfs.
+ #[pin]
+ logs: debugfs::Scope<LogBuffers>,
+ /// Command queue.
+ #[pin]
+ pub(crate) cmdq: Cmdq,
+ /// RM arguments.
+ rmargs: Coherent<GspArgumentsPadded>,
+}
+
+impl Gsp {
+ // Creates an in-place initializer for a `Gsp` manager for `pdev`.
+ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> impl PinInit<Self, Error> + '_ {
+ pin_init::pin_init_scope(move || {
+ let dev = pdev.as_ref();
+
+ let loginit = LogBuffer::new(dev)?;
+ let logintr = LogBuffer::new(dev)?;
+ let logrm = LogBuffer::new(dev)?;
+
+ // Initialise the logging structures. The OpenRM equivalents are in:
+ // _kgspInitLibosLoggingStructures (allocates memory for buffers)
+ // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
+ Ok(try_pin_init!(Self {
+ cmdq <- Cmdq::new(dev),
+ rmargs: Coherent::init(dev, GFP_KERNEL, GspArgumentsPadded::new(&cmdq))?,
+ libos: {
+ let mut libos = CoherentBox::zeroed_slice(
+ dev,
+ GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(),
+ GFP_KERNEL,
+ )?;
+
+ libos.init_at(0, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0))?;
+ libos.init_at(1, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0))?;
+ libos.init_at(2, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?;
+ libos.init_at(3, LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?;
+
+ libos.into()
+ },
+ logs <- {
+ let log_buffers = LogBuffers {
+ loginit,
+ logintr,
+ logrm,
+ };
+
+ #[allow(static_mut_refs)]
+ // SAFETY: `DEBUGFS_ROOT` is created before driver registration and cleared
+ // after driver unregistration, so no probe() can race with its modification.
+ //
+ // PANIC: `DEBUGFS_ROOT` cannot be `None` here. It is set before driver
+ // registration and cleared after driver unregistration, so it is always
+ // `Some` for the entire lifetime that probe() can be called.
+ let log_parent: &debugfs::Dir = unsafe { crate::DEBUGFS_ROOT.as_ref() }
+ .expect("DEBUGFS_ROOT not initialized");
+
+ log_parent.scope(log_buffers, dev.name(), |logs, dir| {
+ dir.read_binary_file(c"loginit", &logs.loginit.0);
+ dir.read_binary_file(c"logintr", &logs.logintr.0);
+ dir.read_binary_file(c"logrm", &logs.logrm.0);
+ })
+ },
+ }))
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
new file mode 100644
index 000000000000..18f356c9178e
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ device,
+ dma::Coherent,
+ io::poll::read_poll_timeout,
+ io::Io,
+ pci,
+ prelude::*,
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp,
+ sec2::Sec2,
+ Falcon, //
+ },
+ fb::FbLayout,
+ firmware::{
+ booter::{
+ BooterFirmware,
+ BooterKind, //
+ },
+ fwsec::{
+ bootloader::FwsecFirmwareWithBl,
+ FwsecCommand,
+ FwsecFirmware, //
+ },
+ gsp::GspFirmware,
+ FIRMWARE_VERSION, //
+ },
+ gpu::Chipset,
+ gsp::{
+ commands,
+ sequencer::{
+ GspSequencer,
+ GspSequencerParams, //
+ },
+ GspFwWprMeta, //
+ },
+ regs,
+ vbios::Vbios,
+};
+
+impl super::Gsp {
+ /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly
+ /// created the WPR2 region.
+ fn run_fwsec_frts(
+ dev: &device::Device<device::Bound>,
+ chipset: Chipset,
+ falcon: &Falcon<Gsp>,
+ bar: &Bar0,
+ bios: &Vbios,
+ fb_layout: &FbLayout,
+ ) -> Result<()> {
+ // Check that the WPR2 region does not already exists - if it does, we cannot run
+ // FWSEC-FRTS until the GPU is reset.
+ if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() != 0 {
+ dev_err!(
+ dev,
+ "WPR2 region already exists - GPU needs to be reset to proceed\n"
+ );
+ return Err(EBUSY);
+ }
+
+ // FWSEC-FRTS will create the WPR2 region.
+ let fwsec_frts = FwsecFirmware::new(
+ dev,
+ falcon,
+ bar,
+ bios,
+ FwsecCommand::Frts {
+ frts_addr: fb_layout.frts.start,
+ frts_size: fb_layout.frts.len(),
+ },
+ )?;
+
+ if chipset.needs_fwsec_bootloader() {
+ let fwsec_frts_bl = FwsecFirmwareWithBl::new(fwsec_frts, dev, chipset)?;
+ // Load and run the bootloader, which will load FWSEC-FRTS and run it.
+ fwsec_frts_bl.run(dev, falcon, bar)?;
+ } else {
+ // Load and run FWSEC-FRTS directly.
+ fwsec_frts.run(dev, falcon, bar)?;
+ }
+
+ // SCRATCH_E contains the error code for FWSEC-FRTS.
+ let frts_status = bar
+ .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR)
+ .frts_err_code();
+ if frts_status != 0 {
+ dev_err!(
+ dev,
+ "FWSEC-FRTS returned with error code {:#x}\n",
+ frts_status
+ );
+
+ return Err(EIO);
+ }
+
+ // Check that the WPR2 region has been created as we requested.
+ let (wpr2_lo, wpr2_hi) = (
+ bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(),
+ bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(),
+ );
+
+ match (wpr2_lo, wpr2_hi) {
+ (_, 0) => {
+ dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n");
+
+ Err(EIO)
+ }
+ (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => {
+ dev_err!(
+ dev,
+ "WPR2 region created at unexpected address {:#x}; expected {:#x}\n",
+ wpr2_lo,
+ fb_layout.frts.start,
+ );
+
+ Err(EIO)
+ }
+ (wpr2_lo, wpr2_hi) => {
+ dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi);
+ dev_dbg!(dev, "GPU instance built\n");
+
+ Ok(())
+ }
+ }
+ }
+
+ /// Attempt to boot the GSP.
+ ///
+ /// This is a GPU-dependent and complex procedure that involves loading firmware files from
+ /// user-space, patching them with signatures, and building firmware-specific intricate data
+ /// structures that the GSP will use at runtime.
+ ///
+ /// Upon return, the GSP is up and running, and its runtime object given as return value.
+ pub(crate) fn boot(
+ self: Pin<&mut Self>,
+ pdev: &pci::Device<device::Bound>,
+ bar: &Bar0,
+ chipset: Chipset,
+ gsp_falcon: &Falcon<Gsp>,
+ sec2_falcon: &Falcon<Sec2>,
+ ) -> Result {
+ let dev = pdev.as_ref();
+
+ let bios = Vbios::new(dev, bar)?;
+
+ let gsp_fw = KBox::pin_init(GspFirmware::new(dev, chipset, FIRMWARE_VERSION), GFP_KERNEL)?;
+
+ let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
+ dev_dbg!(dev, "{:#x?}\n", fb_layout);
+
+ Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?;
+
+ let booter_loader = BooterFirmware::new(
+ dev,
+ BooterKind::Loader,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?;
+
+ let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
+
+ self.cmdq
+ .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev))?;
+ self.cmdq
+ .send_command_no_wait(bar, commands::SetRegistry::new())?;
+
+ gsp_falcon.reset(bar)?;
+ let libos_handle = self.libos.dma_handle();
+ let (mbox0, mbox1) = gsp_falcon.boot(
+ bar,
+ Some(libos_handle as u32),
+ Some((libos_handle >> 32) as u32),
+ )?;
+ dev_dbg!(pdev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1);
+
+ dev_dbg!(
+ pdev,
+ "Using SEC2 to load and run the booter_load firmware...\n"
+ );
+
+ sec2_falcon.reset(bar)?;
+ sec2_falcon.load(dev, bar, &booter_loader)?;
+ let wpr_handle = wpr_meta.dma_handle();
+ let (mbox0, mbox1) = sec2_falcon.boot(
+ bar,
+ Some(wpr_handle as u32),
+ Some((wpr_handle >> 32) as u32),
+ )?;
+ dev_dbg!(pdev, "SEC2 MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1);
+
+ if mbox0 != 0 {
+ dev_err!(pdev, "Booter-load failed with error {:#x}\n", mbox0);
+ return Err(ENODEV);
+ }
+
+ gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);
+
+ // Poll for RISC-V to become active before running sequencer
+ read_poll_timeout(
+ || Ok(gsp_falcon.is_riscv_active(bar)),
+ |val: &bool| *val,
+ Delta::from_millis(10),
+ Delta::from_secs(5),
+ )?;
+
+ dev_dbg!(pdev, "RISC-V active? {}\n", gsp_falcon.is_riscv_active(bar),);
+
+ // Create and run the GSP sequencer.
+ let seq_params = GspSequencerParams {
+ bootloader_app_version: gsp_fw.bootloader.app_version,
+ libos_dma_handle: libos_handle,
+ gsp_falcon,
+ sec2_falcon,
+ dev: pdev.as_ref().into(),
+ bar,
+ };
+ GspSequencer::run(&self.cmdq, seq_params)?;
+
+ // Wait until GSP is fully initialized.
+ commands::wait_gsp_init_done(&self.cmdq)?;
+
+ // Obtain and display basic GPU information.
+ let info = commands::get_gsp_info(&self.cmdq, bar)?;
+ match info.gpu_name() {
+ Ok(name) => dev_info!(pdev, "GPU name: {}\n", name),
+ Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e),
+ }
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
new file mode 100644
index 000000000000..275da9b1ee0e
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0
+
+mod continuation;
+
+use core::mem;
+
+use kernel::{
+ device,
+ dma::{
+ Coherent,
+ DmaAddress, //
+ },
+ dma_write,
+ io::{
+ poll::read_poll_timeout,
+ Io, //
+ },
+ new_mutex,
+ prelude::*,
+ ptr,
+ sync::{
+ aref::ARef,
+ Mutex, //
+ },
+ time::Delta,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ },
+};
+
+use continuation::{
+ ContinuationRecord,
+ SplitState, //
+};
+
+use pin_init::pin_init_scope;
+
+use crate::{
+ driver::Bar0,
+ gsp::{
+ fw::{
+ GspMsgElement,
+ MsgFunction,
+ MsgqRxHeader,
+ MsgqTxHeader,
+ GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, //
+ },
+ PteArray,
+ GSP_PAGE_SHIFT,
+ GSP_PAGE_SIZE, //
+ },
+ num,
+ regs,
+ sbuffer::SBufferIter, //
+};
+
+/// Marker type representing the absence of a reply for a command. Commands using this as their
+/// reply type are sent using [`Cmdq::send_command_no_wait`].
+pub(crate) struct NoReply;
+
+/// Trait implemented by types representing a command to send to the GSP.
+///
+/// The main purpose of this trait is to provide [`Cmdq`] with the information it needs to send
+/// a given command.
+///
+/// [`CommandToGsp::init`] in particular is responsible for initializing the command directly
+/// into the space reserved for it in the command queue buffer.
+///
+/// Some commands may be followed by a variable-length payload. For these, the
+/// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be
+/// defined as well.
+pub(crate) trait CommandToGsp {
+ /// Function identifying this command to the GSP.
+ const FUNCTION: MsgFunction;
+
+ /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer.
+ type Command: FromBytes + AsBytes;
+
+ /// Type of the reply expected from the GSP, or [`NoReply`] for commands that don't
+ /// have a reply.
+ type Reply;
+
+ /// Error type returned by [`CommandToGsp::init`].
+ type InitError;
+
+ /// In-place command initializer responsible for filling the command in the command queue
+ /// buffer.
+ fn init(&self) -> impl Init<Self::Command, Self::InitError>;
+
+ /// Size of the variable-length payload following the command structure generated by
+ /// [`CommandToGsp::init`].
+ ///
+ /// Most commands don't have a variable-length payload, so this is zero by default.
+ fn variable_payload_len(&self) -> usize {
+ 0
+ }
+
+ /// Method initializing the variable-length payload.
+ ///
+ /// The command buffer is circular, which means that we may need to jump back to its beginning
+ /// while in the middle of a command. For this reason, the variable-length payload is
+ /// initialized using a [`SBufferIter`].
+ ///
+ /// This method will receive a buffer of the length returned by
+ /// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving
+ /// unwritten space will lead to an error.
+ ///
+ /// Most commands don't have a variable-length payload, so this does nothing by default.
+ fn init_variable_payload(
+ &self,
+ _dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ Ok(())
+ }
+
+ /// Total size of the command (including its variable-length payload) without the
+ /// [`GspMsgElement`] header.
+ fn size(&self) -> usize {
+ size_of::<Self::Command>() + self.variable_payload_len()
+ }
+}
+
+/// Trait representing messages received from the GSP.
+///
+/// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message.
+pub(crate) trait MessageFromGsp: Sized {
+ /// Function identifying this message from the GSP.
+ const FUNCTION: MsgFunction;
+
+ /// Error type returned by [`MessageFromGsp::read`].
+ type InitError;
+
+ /// Type containing the raw message to be read from the message queue.
+ type Message: FromBytes;
+
+ /// Method reading the message from the message queue and returning it.
+ ///
+ /// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns
+ /// it.
+ fn read(
+ msg: &Self::Message,
+ sbuffer: &mut SBufferIter<core::array::IntoIter<&[u8], 2>>,
+ ) -> Result<Self, Self::InitError>;
+}
+
+/// Number of GSP pages making the [`Msgq`].
+pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f;
+
+/// Circular buffer of a [`Msgq`].
+///
+/// This area of memory is to be shared between the driver and the GSP to exchange commands or
+/// messages.
+#[repr(C, align(0x1000))]
+#[derive(Debug)]
+struct MsgqData {
+ data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)],
+}
+
+// Annoyingly we are forced to use a literal to specify the alignment of
+// `MsgqData`, so check that it corresponds to the actual GSP page size here.
+static_assert!(align_of::<MsgqData>() == GSP_PAGE_SIZE);
+
+/// Unidirectional message queue.
+///
+/// Contains the data for a message queue, that either the driver or GSP writes to.
+///
+/// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the
+/// read pointer of `rx` actually refers to the `Msgq` owned by the other side.
+/// This design ensures that only the driver or GSP ever writes to a given instance of this struct.
+#[repr(C)]
+// There is no struct defined for this in the open-gpu-kernel-source headers.
+// Instead it is defined by code in `GspMsgQueuesInit()`.
+// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module.
+pub(super) struct Msgq {
+ /// Header for sending messages, including the write pointer.
+ pub(super) tx: MsgqTxHeader,
+ /// Header for receiving messages, including the read pointer.
+ pub(super) rx: MsgqRxHeader,
+ /// The message queue proper.
+ msgq: MsgqData,
+}
+
+/// Structure shared between the driver and the GSP and containing the command and message queues.
+#[repr(C)]
+// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module.
+pub(super) struct GspMem {
+ /// Self-mapping page table entries.
+ ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>,
+ /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
+ /// write and read pointers that the CPU updates. This means that the read pointer here is an
+ /// index into the GSP queue.
+ ///
+ /// This member is read-only for the GSP.
+ pub(super) cpuq: Msgq,
+ /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the
+ /// write and read pointers that the GSP updates. This means that the read pointer here is an
+ /// index into the CPU queue.
+ ///
+ /// This member is read-only for the driver.
+ pub(super) gspq: Msgq,
+}
+
+impl GspMem {
+ const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>();
+}
+
+// SAFETY: These structs don't meet the no-padding requirements of AsBytes but
+// that is not a problem because they are not used outside the kernel.
+unsafe impl AsBytes for GspMem {}
+
+// SAFETY: These structs don't meet the no-padding requirements of FromBytes but
+// that is not a problem because they are not used outside the kernel.
+unsafe impl FromBytes for GspMem {}
+
+/// Wrapper around [`GspMem`] to share it with the GPU using a [`Coherent`].
+///
+/// This provides the low-level functionality to communicate with the GSP, including allocation of
+/// queue space to write messages to and management of read/write pointers.
+///
+/// This is shared with the GSP, with clear ownership rules regarding the command queues:
+///
+/// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write
+/// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`].
+/// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read
+/// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`].
+struct DmaGspMem(Coherent<GspMem>);
+
+impl DmaGspMem {
+ /// Allocate a new instance and map it for `dev`.
+ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
+ const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>();
+ const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>();
+
+ let gsp_mem = Coherent::<GspMem>::zeroed(dev, GFP_KERNEL)?;
+
+ let start = gsp_mem.dma_handle();
+ // Write values one by one to avoid an on-stack instance of `PteArray`.
+ for i in 0..GspMem::PTE_ARRAY_SIZE {
+ dma_write!(gsp_mem, .ptes.0[i], PteArray::<0>::entry(start, i)?);
+ }
+
+ dma_write!(
+ gsp_mem,
+ .cpuq.tx,
+ MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES)
+ );
+ dma_write!(gsp_mem, .cpuq.rx, MsgqRxHeader::new());
+
+ Ok(Self(gsp_mem))
+ }
+
+ /// Returns the region of the CPU message queue that the driver is currently allowed to write
+ /// to.
+ ///
+ /// As the message queue is a circular buffer, the region may be discontiguous in memory. In
+ /// that case the second slice will have a non-zero length.
+ fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) {
+ let tx = self.cpu_write_ptr();
+ let rx = self.gsp_read_ptr();
+
+ // Pointer to the first entry of the CPU message queue.
+ let data = ptr::project!(mut self.0.as_mut_ptr(), .cpuq.msgq.data[0]);
+
+ let (tail_end, wrap_end) = if rx == 0 {
+ // The write area is non-wrapping, and stops at the second-to-last entry of the command
+ // queue (to leave the last one empty).
+ (MSGQ_NUM_PAGES - 1, 0)
+ } else if rx <= tx {
+ // The write area wraps and continues until `rx - 1`.
+ (MSGQ_NUM_PAGES, rx - 1)
+ } else {
+ // The write area doesn't wrap and stops at `rx - 1`.
+ (rx - 1, 0)
+ };
+
+ // SAFETY:
+ // - `data` was created from a valid pointer, and `rx` and `tx` are in the
+ // `0..MSGQ_NUM_PAGES` range per the invariants of `cpu_write_ptr` and `gsp_read_ptr`,
+ // thus the created slices are valid.
+ // - The area starting at `tx` and ending at `rx - 2` modulo `MSGQ_NUM_PAGES`,
+ // inclusive, belongs to the driver for writing and is not accessed concurrently by
+ // the GSP.
+ // - The caller holds a reference to `self` for as long as the returned slices are live,
+ // meaning the CPU write pointer cannot be advanced and thus that the returned area
+ // remains exclusive to the CPU for the duration of the slices.
+ // - The created slices point to non-overlapping sub-ranges of `data` in all
+ // branches (in the `rx <= tx` case, the second slice ends at `rx - 1` which is strictly
+ // less than `tx` where the first slice starts; in the other cases the second slice is
+ // empty), so creating two `&mut` references from them does not violate aliasing rules.
+ unsafe {
+ (
+ core::slice::from_raw_parts_mut(
+ data.add(num::u32_as_usize(tx)),
+ num::u32_as_usize(tail_end - tx),
+ ),
+ core::slice::from_raw_parts_mut(data, num::u32_as_usize(wrap_end)),
+ )
+ }
+ }
+
+ /// Returns the size of the region of the CPU message queue that the driver is currently allowed
+ /// to write to, in bytes.
+ fn driver_write_area_size(&self) -> usize {
+ let tx = self.cpu_write_ptr();
+ let rx = self.gsp_read_ptr();
+
+ // `rx` and `tx` are both in `0..MSGQ_NUM_PAGES` per the invariants of `gsp_read_ptr` and
+ // `cpu_write_ptr`. The minimum value case is where `rx == 0` and `tx == MSGQ_NUM_PAGES -
+ // 1`, which gives `0 + MSGQ_NUM_PAGES - (MSGQ_NUM_PAGES - 1) - 1 == 0`.
+ let slots = (rx + MSGQ_NUM_PAGES - tx - 1) % MSGQ_NUM_PAGES;
+ num::u32_as_usize(slots) * GSP_PAGE_SIZE
+ }
+
+ /// Returns the region of the GSP message queue that the driver is currently allowed to read
+ /// from.
+ ///
+ /// As the message queue is a circular buffer, the region may be discontiguous in memory. In
+ /// that case the second slice will have a non-zero length.
+ fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) {
+ let tx = self.gsp_write_ptr();
+ let rx = self.cpu_read_ptr();
+
+ // Pointer to the first entry of the GSP message queue.
+ let data = ptr::project!(self.0.as_ptr(), .gspq.msgq.data[0]);
+
+ let (tail_end, wrap_end) = if rx <= tx {
+ // Read area is non-wrapping and stops right before `tx`.
+ (tx, 0)
+ } else {
+ // Read area is wrapping and stops right before `tx`.
+ (MSGQ_NUM_PAGES, tx)
+ };
+
+ // SAFETY:
+ // - `data` was created from a valid pointer, and `rx` and `tx` are in the
+ // `0..MSGQ_NUM_PAGES` range per the invariants of `gsp_write_ptr` and `cpu_read_ptr`,
+ // thus the created slices are valid.
+ // - The area starting at `rx` and ending at `tx - 1` modulo `MSGQ_NUM_PAGES`,
+ // inclusive, belongs to the driver for reading and is not accessed concurrently by
+ // the GSP.
+ // - The caller holds a reference to `self` for as long as the returned slices are live,
+ // meaning the CPU read pointer cannot be advanced and thus that the returned area
+ // remains exclusive to the CPU for the duration of the slices.
+ unsafe {
+ (
+ core::slice::from_raw_parts(
+ data.add(num::u32_as_usize(rx)),
+ num::u32_as_usize(tail_end - rx),
+ ),
+ core::slice::from_raw_parts(data, num::u32_as_usize(wrap_end)),
+ )
+ }
+ }
+
+ /// Allocates a region on the command queue that is large enough to send a command of `size`
+ /// bytes, waiting for space to become available based on the provided timeout.
+ ///
+ /// This returns a [`GspCommand`] ready to be written to by the caller.
+ ///
+ /// # Errors
+ ///
+ /// - `EMSGSIZE` if the command is larger than [`GSP_MSG_QUEUE_ELEMENT_SIZE_MAX`].
+ /// - `ETIMEDOUT` if space does not become available within the timeout.
+ /// - `EIO` if the command header is not properly aligned.
+ fn allocate_command(&mut self, size: usize, timeout: Delta) -> Result<GspCommand<'_>> {
+ if size_of::<GspMsgElement>() + size > GSP_MSG_QUEUE_ELEMENT_SIZE_MAX {
+ return Err(EMSGSIZE);
+ }
+ read_poll_timeout(
+ || Ok(self.driver_write_area_size()),
+ |available_bytes| *available_bytes >= size_of::<GspMsgElement>() + size,
+ Delta::from_micros(1),
+ timeout,
+ )?;
+
+ // Get the current writable area as an array of bytes.
+ let (slice_1, slice_2) = {
+ let (slice_1, slice_2) = self.driver_write_area();
+
+ (slice_1.as_flattened_mut(), slice_2.as_flattened_mut())
+ };
+
+ // Extract area for the `GspMsgElement`.
+ let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?;
+
+ // Create the contents area.
+ let (slice_1, slice_2) = if slice_1.len() > size {
+ // Contents fits entirely in `slice_1`.
+ (&mut slice_1[..size], &mut slice_2[0..0])
+ } else {
+ // Need all of `slice_1` and some of `slice_2`.
+ let slice_2_len = size - slice_1.len();
+ (slice_1, &mut slice_2[..slice_2_len])
+ };
+
+ Ok(GspCommand {
+ header,
+ contents: (slice_1, slice_2),
+ })
+ }
+
+ // Returns the index of the memory page the GSP will write the next message to.
+ //
+ // # Invariants
+ //
+ // - The returned value is within `0..MSGQ_NUM_PAGES`.
+ fn gsp_write_ptr(&self) -> u32 {
+ super::fw::gsp_mem::gsp_write_ptr(&self.0)
+ }
+
+ // Returns the index of the memory page the GSP will read the next command from.
+ //
+ // # Invariants
+ //
+ // - The returned value is within `0..MSGQ_NUM_PAGES`.
+ fn gsp_read_ptr(&self) -> u32 {
+ super::fw::gsp_mem::gsp_read_ptr(&self.0)
+ }
+
+ // Returns the index of the memory page the CPU can read the next message from.
+ //
+ // # Invariants
+ //
+ // - The returned value is within `0..MSGQ_NUM_PAGES`.
+ fn cpu_read_ptr(&self) -> u32 {
+ super::fw::gsp_mem::cpu_read_ptr(&self.0)
+ }
+
+ // Informs the GSP that it can send `elem_count` new pages into the message queue.
+ fn advance_cpu_read_ptr(&mut self, elem_count: u32) {
+ super::fw::gsp_mem::advance_cpu_read_ptr(&self.0, elem_count)
+ }
+
+ // Returns the index of the memory page the CPU can write the next command to.
+ //
+ // # Invariants
+ //
+ // - The returned value is within `0..MSGQ_NUM_PAGES`.
+ fn cpu_write_ptr(&self) -> u32 {
+ super::fw::gsp_mem::cpu_write_ptr(&self.0)
+ }
+
+ // Informs the GSP that it can process `elem_count` new pages from the command queue.
+ fn advance_cpu_write_ptr(&mut self, elem_count: u32) {
+ super::fw::gsp_mem::advance_cpu_write_ptr(&self.0, elem_count)
+ }
+}
+
+/// A command ready to be sent on the command queue.
+///
+/// This is the type returned by [`DmaGspMem::allocate_command`].
+struct GspCommand<'a> {
+ // Writable reference to the header of the command.
+ header: &'a mut GspMsgElement,
+ // Writable slices to the contents of the command. The second slice is zero unless the command
+ // loops over the command queue.
+ contents: (&'a mut [u8], &'a mut [u8]),
+}
+
+/// A message ready to be processed from the message queue.
+///
+/// This is the type returned by [`Cmdq::wait_for_msg`].
+struct GspMessage<'a> {
+ // Reference to the header of the message.
+ header: &'a GspMsgElement,
+ // Slices to the contents of the message. The second slice is zero unless the message loops
+ // over the message queue.
+ contents: (&'a [u8], &'a [u8]),
+}
+
+/// GSP command queue.
+///
+/// Provides the ability to send commands and receive messages from the GSP using a shared memory
+/// area.
+#[pin_data]
+pub(crate) struct Cmdq {
+ /// Inner mutex-protected state.
+ #[pin]
+ inner: Mutex<CmdqInner>,
+ /// DMA handle of the command queue's shared memory region.
+ pub(super) dma_handle: DmaAddress,
+}
+
+impl Cmdq {
+ /// Offset of the data after the PTEs.
+ const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
+
+ /// Offset of command queue ring buffer.
+ pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
+ + core::mem::offset_of!(Msgq, msgq)
+ - Self::POST_PTE_OFFSET;
+
+ /// Offset of message queue ring buffer.
+ pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
+ + core::mem::offset_of!(Msgq, msgq)
+ - Self::POST_PTE_OFFSET;
+
+ /// Number of page table entries for the GSP shared region.
+ pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
+
+ /// Default timeout for receiving a message from the GSP.
+ pub(super) const RECEIVE_TIMEOUT: Delta = Delta::from_secs(5);
+
+ /// Creates a new command queue for `dev`.
+ pub(crate) fn new(dev: &device::Device<device::Bound>) -> impl PinInit<Self, Error> + '_ {
+ pin_init_scope(move || {
+ let gsp_mem = DmaGspMem::new(dev)?;
+
+ Ok(try_pin_init!(Self {
+ dma_handle: gsp_mem.0.dma_handle(),
+ inner <- new_mutex!(CmdqInner {
+ dev: dev.into(),
+ gsp_mem,
+ seq: 0,
+ }),
+ }))
+ })
+ }
+
+ /// Computes the checksum for the message pointed to by `it`.
+ ///
+ /// A message is made of several parts, so `it` is an iterator over byte slices representing
+ /// these parts.
+ fn calculate_checksum<T: Iterator<Item = u8>>(it: T) -> u32 {
+ let sum64 = it
+ .enumerate()
+ .map(|(idx, byte)| (((idx % 8) * 8) as u32, byte))
+ .fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol));
+
+ ((sum64 >> 32) as u32) ^ (sum64 as u32)
+ }
+
+ /// Notifies the GSP that we have updated the command queue pointers.
+ fn notify_gsp(bar: &Bar0) {
+ bar.write_reg(regs::NV_PGSP_QUEUE_HEAD::zeroed().with_address(0u32));
+ }
+
+ /// Sends `command` to the GSP and waits for the reply.
+ ///
+ /// Messages with non-matching function codes are silently consumed until the expected reply
+ /// arrives.
+ ///
+ /// The queue is locked for the entire send+receive cycle to ensure that no other command can
+ /// be interleaved.
+ ///
+ /// # Errors
+ ///
+ /// - `ETIMEDOUT` if space does not become available to send the command, or if the reply is
+ /// not received within the timeout.
+ /// - `EIO` if the variable payload requested by the command has not been entirely
+ /// written to by its [`CommandToGsp::init_variable_payload`] method.
+ ///
+ /// Error codes returned by the command and reply initializers are propagated as-is.
+ pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply>
+ where
+ M: CommandToGsp,
+ M::Reply: MessageFromGsp,
+ Error: From<M::InitError>,
+ Error: From<<M::Reply as MessageFromGsp>::InitError>,
+ {
+ let mut inner = self.inner.lock();
+ inner.send_command(bar, command)?;
+
+ loop {
+ match inner.receive_msg::<M::Reply>(Self::RECEIVE_TIMEOUT) {
+ Ok(reply) => break Ok(reply),
+ Err(ERANGE) => continue,
+ Err(e) => break Err(e),
+ }
+ }
+ }
+
+ /// Sends `command` to the GSP without waiting for a reply.
+ ///
+ /// # Errors
+ ///
+ /// - `ETIMEDOUT` if space does not become available within the timeout.
+ /// - `EIO` if the variable payload requested by the command has not been entirely
+ /// written to by its [`CommandToGsp::init_variable_payload`] method.
+ ///
+ /// Error codes returned by the command initializers are propagated as-is.
+ pub(crate) fn send_command_no_wait<M>(&self, bar: &Bar0, command: M) -> Result
+ where
+ M: CommandToGsp<Reply = NoReply>,
+ Error: From<M::InitError>,
+ {
+ self.inner.lock().send_command(bar, command)
+ }
+
+ /// Receive a message from the GSP.
+ ///
+ /// See [`CmdqInner::receive_msg`] for details.
+ pub(crate) fn receive_msg<M: MessageFromGsp>(&self, timeout: Delta) -> Result<M>
+ where
+ // This allows all error types, including `Infallible`, to be used for `M::InitError`.
+ Error: From<M::InitError>,
+ {
+ self.inner.lock().receive_msg(timeout)
+ }
+}
+
+/// Inner mutex protected state of [`Cmdq`].
+struct CmdqInner {
+ /// Device this command queue belongs to.
+ dev: ARef<device::Device>,
+ /// Current command sequence number.
+ seq: u32,
+ /// Memory area shared with the GSP for communicating commands and messages.
+ gsp_mem: DmaGspMem,
+}
+
+impl CmdqInner {
+ /// Timeout for waiting for space on the command queue.
+ const ALLOCATE_TIMEOUT: Delta = Delta::from_secs(1);
+
+ /// Sends `command` to the GSP, without splitting it.
+ ///
+ /// # Errors
+ ///
+ /// - `EMSGSIZE` if the command exceeds the maximum queue element size.
+ /// - `ETIMEDOUT` if space does not become available within the timeout.
+ /// - `EIO` if the variable payload requested by the command has not been entirely
+ /// written to by its [`CommandToGsp::init_variable_payload`] method.
+ ///
+ /// Error codes returned by the command initializers are propagated as-is.
+ fn send_single_command<M>(&mut self, bar: &Bar0, command: M) -> Result
+ where
+ M: CommandToGsp,
+ // This allows all error types, including `Infallible`, to be used for `M::InitError`.
+ Error: From<M::InitError>,
+ {
+ let size_in_bytes = command.size();
+ let dst = self
+ .gsp_mem
+ .allocate_command(size_in_bytes, Self::ALLOCATE_TIMEOUT)?;
+
+ // Extract area for the command itself. The GSP message header and the command header
+ // together are guaranteed to fit entirely into a single page, so it's ok to only look
+ // at `dst.contents.0` here.
+ let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?;
+
+ // Fill the header and command in-place.
+ let msg_element = GspMsgElement::init(self.seq, size_in_bytes, M::FUNCTION);
+ // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer
+ // fails.
+ unsafe {
+ msg_element.__init(core::ptr::from_mut(dst.header))?;
+ command.init().__init(core::ptr::from_mut(cmd))?;
+ }
+
+ // Fill the variable-length payload, which may be empty.
+ let mut sbuffer = SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]);
+ command.init_variable_payload(&mut sbuffer)?;
+
+ if !sbuffer.is_empty() {
+ return Err(EIO);
+ }
+ drop(sbuffer);
+
+ // Compute checksum now that the whole message is ready.
+ dst.header
+ .set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([
+ dst.header.as_bytes(),
+ dst.contents.0,
+ dst.contents.1,
+ ])));
+
+ dev_dbg!(
+ &self.dev,
+ "GSP RPC: send: seq# {}, function={:?}, length=0x{:x}\n",
+ self.seq,
+ M::FUNCTION,
+ dst.header.length(),
+ );
+
+ // All set - update the write pointer and inform the GSP of the new command.
+ let elem_count = dst.header.element_count();
+ self.seq += 1;
+ self.gsp_mem.advance_cpu_write_ptr(elem_count);
+ Cmdq::notify_gsp(bar);
+
+ Ok(())
+ }
+
+ /// Sends `command` to the GSP.
+ ///
+ /// The command may be split into multiple messages if it is large.
+ ///
+ /// # Errors
+ ///
+ /// - `ETIMEDOUT` if space does not become available within the timeout.
+ /// - `EIO` if the variable payload requested by the command has not been entirely
+ /// written to by its [`CommandToGsp::init_variable_payload`] method.
+ ///
+ /// Error codes returned by the command initializers are propagated as-is.
+ fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
+ where
+ M: CommandToGsp,
+ Error: From<M::InitError>,
+ {
+ match SplitState::new(command)? {
+ SplitState::Single(command) => self.send_single_command(bar, command),
+ SplitState::Split(command, mut continuations) => {
+ self.send_single_command(bar, command)?;
+
+ while let Some(continuation) = continuations.next() {
+ // Turbofish needed because the compiler cannot infer M here.
+ self.send_single_command::<ContinuationRecord<'_>>(bar, continuation)?;
+ }
+
+ Ok(())
+ }
+ }
+ }
+
+ /// Wait for a message to become available on the message queue.
+ ///
+ /// This works purely at the transport layer and does not interpret or validate the message
+ /// beyond the advertised length in its [`GspMsgElement`].
+ ///
+ /// This method returns:
+ ///
+ /// - A reference to the [`GspMsgElement`] of the message,
+ /// - Two byte slices with the contents of the message. The second slice is empty unless the
+ /// message loops across the message queue.
+ ///
+ /// # Errors
+ ///
+ /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.
+ /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the
+ /// message queue.
+ ///
+ /// Error codes returned by the message constructor are propagated as-is.
+ fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> {
+ // Wait for a message to arrive from the GSP.
+ let (slice_1, slice_2) = read_poll_timeout(
+ || Ok(self.gsp_mem.driver_read_area()),
+ |driver_area| !driver_area.0.is_empty(),
+ Delta::from_millis(1),
+ timeout,
+ )
+ .map(|(slice_1, slice_2)| (slice_1.as_flattened(), slice_2.as_flattened()))?;
+
+ // Extract the `GspMsgElement`.
+ let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?;
+
+ dev_dbg!(
+ &self.dev,
+ "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n",
+ header.sequence(),
+ header.function(),
+ header.length(),
+ );
+
+ let payload_length = header.payload_length();
+
+ // Check that the driver read area is large enough for the message.
+ if slice_1.len() + slice_2.len() < payload_length {
+ return Err(EIO);
+ }
+
+ // Cut the message slices down to the actual length of the message.
+ let (slice_1, slice_2) = if slice_1.len() > payload_length {
+ // PANIC: we checked above that `slice_1` is at least as long as `payload_length`.
+ (slice_1.split_at(payload_length).0, &slice_2[0..0])
+ } else {
+ (
+ slice_1,
+ // PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as
+ // large as `payload_length`.
+ slice_2.split_at(payload_length - slice_1.len()).0,
+ )
+ };
+
+ // Validate checksum.
+ if Cmdq::calculate_checksum(SBufferIter::new_reader([
+ header.as_bytes(),
+ slice_1,
+ slice_2,
+ ])) != 0
+ {
+ dev_err!(
+ &self.dev,
+ "GSP RPC: receive: Call {} - bad checksum\n",
+ header.sequence()
+ );
+ return Err(EIO);
+ }
+
+ Ok(GspMessage {
+ header,
+ contents: (slice_1, slice_2),
+ })
+ }
+
+ /// Receive a message from the GSP.
+ ///
+ /// The expected message type is specified using the `M` generic parameter. If the pending
+ /// message has a different function code, `ERANGE` is returned and the message is consumed.
+ ///
+ /// The read pointer is always advanced past the message, regardless of whether it matched.
+ ///
+ /// # Errors
+ ///
+ /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.
+ /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the
+ /// message queue.
+ /// - `EINVAL` if the function code of the message was not recognized.
+ /// - `ERANGE` if the message had a recognized but non-matching function code.
+ ///
+ /// Error codes returned by [`MessageFromGsp::read`] are propagated as-is.
+ fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M>
+ where
+ // This allows all error types, including `Infallible`, to be used for `M::InitError`.
+ Error: From<M::InitError>,
+ {
+ let message = self.wait_for_msg(timeout)?;
+ let function = message.header.function().map_err(|_| EINVAL)?;
+
+ // Extract the message. Store the result as we want to advance the read pointer even in
+ // case of failure.
+ let result = if function == M::FUNCTION {
+ let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?;
+ let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]);
+
+ M::read(cmd, &mut sbuffer)
+ .map_err(|e| e.into())
+ .inspect(|_| {
+ if !sbuffer.is_empty() {
+ dev_warn!(
+ &self.dev,
+ "GSP message {:?} has unprocessed data\n",
+ function
+ );
+ }
+ })
+ } else {
+ Err(ERANGE)
+ };
+
+ // Advance the read pointer past this message.
+ self.gsp_mem.advance_cpu_read_ptr(u32::try_from(
+ message.header.length().div_ceil(GSP_PAGE_SIZE),
+ )?);
+
+ result
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/cmdq/continuation.rs b/drivers/gpu/nova-core/gsp/cmdq/continuation.rs
new file mode 100644
index 000000000000..05e904f18097
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/cmdq/continuation.rs
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Support for splitting large GSP commands across continuation records.
+
+use core::convert::Infallible;
+
+use kernel::prelude::*;
+
+use super::{
+ CommandToGsp,
+ NoReply, //
+};
+
+use crate::{
+ gsp::fw::{
+ GspMsgElement,
+ MsgFunction,
+ GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, //
+ },
+ sbuffer::SBufferIter,
+};
+
+/// Maximum command size that fits in a single queue element.
+const MAX_CMD_SIZE: usize = GSP_MSG_QUEUE_ELEMENT_SIZE_MAX - size_of::<GspMsgElement>();
+
+/// Acts as an iterator over the continuation records for a split command.
+pub(super) struct ContinuationRecords {
+ payload: KVVec<u8>,
+ offset: usize,
+}
+
+impl ContinuationRecords {
+ /// Creates a new iterator over continuation records for the given payload.
+ fn new(payload: KVVec<u8>) -> Self {
+ Self { payload, offset: 0 }
+ }
+
+ /// Returns the next continuation record, or [`None`] if there are no more.
+ pub(super) fn next(&mut self) -> Option<ContinuationRecord<'_>> {
+ let remaining = self.payload.len() - self.offset;
+
+ if remaining > 0 {
+ let chunk_size = remaining.min(MAX_CMD_SIZE);
+ let record =
+ ContinuationRecord::new(&self.payload[self.offset..(self.offset + chunk_size)]);
+ self.offset += chunk_size;
+ Some(record)
+ } else {
+ None
+ }
+ }
+}
+
+/// The [`ContinuationRecord`] command.
+pub(super) struct ContinuationRecord<'a> {
+ data: &'a [u8],
+}
+
+impl<'a> ContinuationRecord<'a> {
+ /// Creates a new [`ContinuationRecord`] command with the given data.
+ fn new(data: &'a [u8]) -> Self {
+ Self { data }
+ }
+}
+
+impl<'a> CommandToGsp for ContinuationRecord<'a> {
+ const FUNCTION: MsgFunction = MsgFunction::ContinuationRecord;
+ type Command = ();
+ type Reply = NoReply;
+ type InitError = Infallible;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ <()>::init_zeroed()
+ }
+
+ fn variable_payload_len(&self) -> usize {
+ self.data.len()
+ }
+
+ fn init_variable_payload(
+ &self,
+ dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ dst.write_all(self.data)
+ }
+}
+
+/// Whether a command needs to be split across continuation records or not.
+pub(super) enum SplitState<C: CommandToGsp> {
+ /// A command that fits in a single queue element.
+ Single(C),
+ /// A command split across continuation records.
+ Split(SplitCommand<C>, ContinuationRecords),
+}
+
+impl<C: CommandToGsp> SplitState<C> {
+ /// Maximum variable payload size that fits in the first command alongside the command header.
+ const MAX_FIRST_PAYLOAD: usize = MAX_CMD_SIZE - size_of::<C::Command>();
+
+ /// Creates a new [`SplitState`] for the given command.
+ ///
+ /// If the command is too large, it will be split into a main command and some number of
+ /// continuation records.
+ pub(super) fn new(command: C) -> Result<Self> {
+ let payload_len = command.variable_payload_len();
+
+ if command.size() > MAX_CMD_SIZE {
+ let mut command_payload =
+ KVVec::<u8>::from_elem(0u8, payload_len.min(Self::MAX_FIRST_PAYLOAD), GFP_KERNEL)?;
+ let mut continuation_payload =
+ KVVec::<u8>::from_elem(0u8, payload_len - command_payload.len(), GFP_KERNEL)?;
+ let mut sbuffer = SBufferIter::new_writer([
+ command_payload.as_mut_slice(),
+ continuation_payload.as_mut_slice(),
+ ]);
+
+ command.init_variable_payload(&mut sbuffer)?;
+ if !sbuffer.is_empty() {
+ return Err(EIO);
+ }
+ drop(sbuffer);
+
+ Ok(Self::Split(
+ SplitCommand::new(command, command_payload),
+ ContinuationRecords::new(continuation_payload),
+ ))
+ } else {
+ Ok(Self::Single(command))
+ }
+ }
+}
+
+/// A command that has been truncated to maximum accepted length of the command queue.
+///
+/// The remainder of its payload is expected to be sent using [`ContinuationRecords`].
+pub(super) struct SplitCommand<C: CommandToGsp> {
+ command: C,
+ payload: KVVec<u8>,
+}
+
+impl<C: CommandToGsp> SplitCommand<C> {
+ /// Creates a new [`SplitCommand`] wrapping `command` with the given truncated payload.
+ fn new(command: C, payload: KVVec<u8>) -> Self {
+ Self { command, payload }
+ }
+}
+
+impl<C: CommandToGsp> CommandToGsp for SplitCommand<C> {
+ const FUNCTION: MsgFunction = C::FUNCTION;
+ type Command = C::Command;
+ type Reply = C::Reply;
+ type InitError = C::InitError;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ self.command.init()
+ }
+
+ fn variable_payload_len(&self) -> usize {
+ self.payload.len()
+ }
+
+ fn init_variable_payload(
+ &self,
+ dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ dst.write_all(&self.payload)
+ }
+}
+
+#[kunit_tests(nova_core_gsp_continuation)]
+mod tests {
+ use super::*;
+
+ use kernel::transmute::{
+ AsBytes,
+ FromBytes, //
+ };
+
+ /// Non-zero-sized command header for testing.
+ #[repr(C)]
+ #[derive(Clone, Copy, Zeroable)]
+ struct TestHeader([u8; 64]);
+
+ // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid.
+ unsafe impl FromBytes for TestHeader {}
+
+ // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid.
+ unsafe impl AsBytes for TestHeader {}
+
+ struct TestPayload {
+ data: KVVec<u8>,
+ }
+
+ impl TestPayload {
+ fn generate_pattern(len: usize) -> Result<KVVec<u8>> {
+ let mut data = KVVec::with_capacity(len, GFP_KERNEL)?;
+ for i in 0..len {
+ // Mix in higher bits so the pattern does not repeat every 256 bytes.
+ data.push((i ^ (i >> 8)) as u8, GFP_KERNEL)?;
+ }
+ Ok(data)
+ }
+
+ fn new(len: usize) -> Result<Self> {
+ Ok(Self {
+ data: Self::generate_pattern(len)?,
+ })
+ }
+ }
+
+ impl CommandToGsp for TestPayload {
+ const FUNCTION: MsgFunction = MsgFunction::Nop;
+ type Command = TestHeader;
+ type Reply = NoReply;
+ type InitError = Infallible;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ TestHeader::init_zeroed()
+ }
+
+ fn variable_payload_len(&self) -> usize {
+ self.data.len()
+ }
+
+ fn init_variable_payload(
+ &self,
+ dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ dst.write_all(self.data.as_slice())
+ }
+ }
+
+ /// Maximum variable payload size that fits in the first command alongside the header.
+ const MAX_FIRST_PAYLOAD: usize = SplitState::<TestPayload>::MAX_FIRST_PAYLOAD;
+
+ fn read_payload(cmd: impl CommandToGsp) -> Result<KVVec<u8>> {
+ let len = cmd.variable_payload_len();
+ let mut buf = KVVec::from_elem(0u8, len, GFP_KERNEL)?;
+ let mut sbuf = SBufferIter::new_writer([buf.as_mut_slice(), &mut []]);
+ cmd.init_variable_payload(&mut sbuf)?;
+ drop(sbuf);
+ Ok(buf)
+ }
+
+ struct SplitTest {
+ payload_size: usize,
+ num_continuations: usize,
+ }
+
+ fn check_split(t: SplitTest) -> Result {
+ let payload = TestPayload::new(t.payload_size)?;
+ let mut num_continuations = 0;
+
+ let buf = match SplitState::new(payload)? {
+ SplitState::Single(cmd) => read_payload(cmd)?,
+ SplitState::Split(cmd, mut continuations) => {
+ let mut buf = read_payload(cmd)?;
+ assert!(size_of::<TestHeader>() + buf.len() <= MAX_CMD_SIZE);
+
+ while let Some(cont) = continuations.next() {
+ let payload = read_payload(cont)?;
+ assert!(payload.len() <= MAX_CMD_SIZE);
+ buf.extend_from_slice(&payload, GFP_KERNEL)?;
+ num_continuations += 1;
+ }
+
+ buf
+ }
+ };
+
+ assert_eq!(num_continuations, t.num_continuations);
+ assert_eq!(
+ buf.as_slice(),
+ TestPayload::generate_pattern(t.payload_size)?.as_slice()
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn split_command() -> Result {
+ check_split(SplitTest {
+ payload_size: 0,
+ num_continuations: 0,
+ })?;
+ check_split(SplitTest {
+ payload_size: MAX_FIRST_PAYLOAD,
+ num_continuations: 0,
+ })?;
+ check_split(SplitTest {
+ payload_size: MAX_FIRST_PAYLOAD + 1,
+ num_continuations: 1,
+ })?;
+ check_split(SplitTest {
+ payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE,
+ num_continuations: 1,
+ })?;
+ check_split(SplitTest {
+ payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE + 1,
+ num_continuations: 2,
+ })?;
+ check_split(SplitTest {
+ payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE * 3 + MAX_CMD_SIZE / 2,
+ num_continuations: 4,
+ })?;
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
new file mode 100644
index 000000000000..c89c7b57a751
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::{
+ array,
+ convert::Infallible,
+ ffi::FromBytesUntilNulError,
+ str::Utf8Error, //
+};
+
+use kernel::{
+ device,
+ pci,
+ prelude::*,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ }, //
+};
+
+use crate::{
+ driver::Bar0,
+ gsp::{
+ cmdq::{
+ Cmdq,
+ CommandToGsp,
+ MessageFromGsp,
+ NoReply, //
+ },
+ fw::{
+ commands::*,
+ MsgFunction, //
+ },
+ },
+ sbuffer::SBufferIter,
+};
+
+/// The `GspSetSystemInfo` command.
+pub(crate) struct SetSystemInfo<'a> {
+ pdev: &'a pci::Device<device::Bound>,
+}
+
+impl<'a> SetSystemInfo<'a> {
+ /// Creates a new `GspSetSystemInfo` command using the parameters of `pdev`.
+ pub(crate) fn new(pdev: &'a pci::Device<device::Bound>) -> Self {
+ Self { pdev }
+ }
+}
+
+impl<'a> CommandToGsp for SetSystemInfo<'a> {
+ const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo;
+ type Command = GspSetSystemInfo;
+ type Reply = NoReply;
+ type InitError = Error;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ GspSetSystemInfo::init(self.pdev)
+ }
+}
+
+struct RegistryEntry {
+ key: &'static str,
+ value: u32,
+}
+
+/// The `SetRegistry` command.
+pub(crate) struct SetRegistry {
+ entries: [RegistryEntry; Self::NUM_ENTRIES],
+}
+
+impl SetRegistry {
+ // For now we hard-code the registry entries. Future work will allow others to
+ // be added as module parameters.
+ const NUM_ENTRIES: usize = 3;
+
+ /// Creates a new `SetRegistry` command, using a set of hardcoded entries.
+ pub(crate) fn new() -> Self {
+ Self {
+ entries: [
+ // RMSecBusResetEnable - enables PCI secondary bus reset
+ RegistryEntry {
+ key: "RMSecBusResetEnable",
+ value: 1,
+ },
+ // RMForcePcieConfigSave - forces GSP-RM to preserve PCI configuration registers on
+ // any PCI reset.
+ RegistryEntry {
+ key: "RMForcePcieConfigSave",
+ value: 1,
+ },
+ // RMDevidCheckIgnore - allows GSP-RM to boot even if the PCI dev ID is not found
+ // in the internal product name database.
+ RegistryEntry {
+ key: "RMDevidCheckIgnore",
+ value: 1,
+ },
+ ],
+ }
+ }
+}
+
+impl CommandToGsp for SetRegistry {
+ const FUNCTION: MsgFunction = MsgFunction::SetRegistry;
+ type Command = PackedRegistryTable;
+ type Reply = NoReply;
+ type InitError = Infallible;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ PackedRegistryTable::init(Self::NUM_ENTRIES as u32, self.variable_payload_len() as u32)
+ }
+
+ fn variable_payload_len(&self) -> usize {
+ let mut key_size = 0;
+ for i in 0..Self::NUM_ENTRIES {
+ key_size += self.entries[i].key.len() + 1; // +1 for NULL terminator
+ }
+ Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>() + key_size
+ }
+
+ fn init_variable_payload(
+ &self,
+ dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ let string_data_start_offset =
+ size_of::<PackedRegistryTable>() + Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>();
+
+ // Array for string data.
+ let mut string_data = KVec::new();
+
+ for entry in self.entries.iter().take(Self::NUM_ENTRIES) {
+ dst.write_all(
+ PackedRegistryEntry::new(
+ (string_data_start_offset + string_data.len()) as u32,
+ entry.value,
+ )
+ .as_bytes(),
+ )?;
+
+ let key_bytes = entry.key.as_bytes();
+ string_data.extend_from_slice(key_bytes, GFP_KERNEL)?;
+ string_data.push(0, GFP_KERNEL)?;
+ }
+
+ dst.write_all(string_data.as_slice())
+ }
+}
+
+/// Message type for GSP initialization done notification.
+struct GspInitDone;
+
+// SAFETY: `GspInitDone` is a zero-sized type with no bytes, therefore it
+// trivially has no uninitialized bytes.
+unsafe impl FromBytes for GspInitDone {}
+
+impl MessageFromGsp for GspInitDone {
+ const FUNCTION: MsgFunction = MsgFunction::GspInitDone;
+ type InitError = Infallible;
+ type Message = ();
+
+ fn read(
+ _msg: &Self::Message,
+ _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>,
+ ) -> Result<Self, Self::InitError> {
+ Ok(GspInitDone)
+ }
+}
+
+/// Waits for GSP initialization to complete.
+pub(crate) fn wait_gsp_init_done(cmdq: &Cmdq) -> Result {
+ loop {
+ match cmdq.receive_msg::<GspInitDone>(Cmdq::RECEIVE_TIMEOUT) {
+ Ok(_) => break Ok(()),
+ Err(ERANGE) => continue,
+ Err(e) => break Err(e),
+ }
+ }
+}
+
+/// The `GetGspStaticInfo` command.
+struct GetGspStaticInfo;
+
+impl CommandToGsp for GetGspStaticInfo {
+ const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo;
+ type Command = GspStaticConfigInfo;
+ type Reply = GetGspStaticInfoReply;
+ type InitError = Infallible;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ GspStaticConfigInfo::init_zeroed()
+ }
+}
+
+/// The reply from the GSP to the [`GetGspInfo`] command.
+pub(crate) struct GetGspStaticInfoReply {
+ gpu_name: [u8; 64],
+}
+
+impl MessageFromGsp for GetGspStaticInfoReply {
+ const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo;
+ type Message = GspStaticConfigInfo;
+ type InitError = Infallible;
+
+ fn read(
+ msg: &Self::Message,
+ _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>,
+ ) -> Result<Self, Self::InitError> {
+ Ok(GetGspStaticInfoReply {
+ gpu_name: msg.gpu_name_str(),
+ })
+ }
+}
+
+/// Error type for [`GetGspStaticInfoReply::gpu_name`].
+#[derive(Debug)]
+pub(crate) enum GpuNameError {
+ /// The GPU name string does not contain a null terminator.
+ NoNullTerminator(FromBytesUntilNulError),
+
+ /// The GPU name string contains invalid UTF-8.
+ #[expect(dead_code)]
+ InvalidUtf8(Utf8Error),
+}
+
+impl GetGspStaticInfoReply {
+ /// Returns the name of the GPU as a string.
+ ///
+ /// Returns an error if the string given by the GSP does not contain a null terminator or
+ /// contains invalid UTF-8.
+ pub(crate) fn gpu_name(&self) -> core::result::Result<&str, GpuNameError> {
+ CStr::from_bytes_until_nul(&self.gpu_name)
+ .map_err(GpuNameError::NoNullTerminator)?
+ .to_str()
+ .map_err(GpuNameError::InvalidUtf8)
+ }
+}
+
+/// Send the [`GetGspInfo`] command and awaits for its reply.
+pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> {
+ cmdq.send_command(bar, GetGspStaticInfo)
+}
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
new file mode 100644
index 000000000000..0c8a74f0e8ac
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -0,0 +1,922 @@
+// SPDX-License-Identifier: GPL-2.0
+
+pub(crate) mod commands;
+mod r570_144;
+
+// Alias to avoid repeating the version number with every use.
+use r570_144 as bindings;
+
+use core::ops::Range;
+
+use kernel::{
+ dma::Coherent,
+ prelude::*,
+ ptr::{
+ Alignable,
+ Alignment,
+ KnownSize, //
+ },
+ sizes::{
+ SZ_128K,
+ SZ_1M, //
+ },
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ },
+};
+
+use crate::{
+ fb::FbLayout,
+ firmware::gsp::GspFirmware,
+ gpu::Chipset,
+ gsp::{
+ cmdq::Cmdq, //
+ GSP_PAGE_SIZE,
+ },
+ num::{
+ self,
+ FromSafeCast, //
+ },
+};
+
+// TODO: Replace with `IoView` projections once available.
+pub(super) mod gsp_mem {
+ use core::sync::atomic::{
+ fence,
+ Ordering, //
+ };
+
+ use kernel::{
+ dma::Coherent,
+ dma_read,
+ dma_write, //
+ };
+
+ use crate::gsp::cmdq::{
+ GspMem,
+ MSGQ_NUM_PAGES, //
+ };
+
+ pub(in crate::gsp) fn gsp_write_ptr(qs: &Coherent<GspMem>) -> u32 {
+ dma_read!(qs, .gspq.tx.0.writePtr) % MSGQ_NUM_PAGES
+ }
+
+ pub(in crate::gsp) fn gsp_read_ptr(qs: &Coherent<GspMem>) -> u32 {
+ dma_read!(qs, .gspq.rx.0.readPtr) % MSGQ_NUM_PAGES
+ }
+
+ pub(in crate::gsp) fn cpu_read_ptr(qs: &Coherent<GspMem>) -> u32 {
+ dma_read!(qs, .cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES
+ }
+
+ pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &Coherent<GspMem>, count: u32) {
+ let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES;
+
+ // Ensure read pointer is properly ordered.
+ fence(Ordering::SeqCst);
+
+ dma_write!(qs, .cpuq.rx.0.readPtr, rptr);
+ }
+
+ pub(in crate::gsp) fn cpu_write_ptr(qs: &Coherent<GspMem>) -> u32 {
+ dma_read!(qs, .cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES
+ }
+
+ pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) {
+ let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES;
+
+ dma_write!(qs, .cpuq.tx.0.writePtr, wptr);
+
+ // Ensure all command data is visible before triggering the GSP read.
+ fence(Ordering::SeqCst);
+ }
+}
+
+/// Maximum size of a single GSP message queue element in bytes.
+pub(crate) const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: usize =
+ num::u32_as_usize(bindings::GSP_MSG_QUEUE_ELEMENT_SIZE_MAX);
+
+/// Empty type to group methods related to heap parameters for running the GSP firmware.
+enum GspFwHeapParams {}
+
+/// Minimum required alignment for the GSP heap.
+const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
+
+impl GspFwHeapParams {
+ /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to
+ /// and including the first client subdevice allocation).
+ fn base_rm_size(_chipset: Chipset) -> u64 {
+ // TODO: this needs to be updated to return the correct value for Hopper+ once support for
+ // them is added:
+ // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100)
+ u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X)
+ }
+
+ /// Returns the amount of heap memory required to support a single channel allocation.
+ fn client_alloc_size() -> u64 {
+ u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
+ .align_up(GSP_HEAP_ALIGNMENT)
+ .unwrap_or(u64::MAX)
+ }
+
+ /// Returns the amount of memory to reserve for management purposes for a framebuffer of size
+ /// `fb_size`.
+ fn management_overhead(fb_size: u64) -> u64 {
+ let fb_size_gb = fb_size.div_ceil(u64::from_safe_cast(kernel::sizes::SZ_1G));
+
+ u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB)
+ .saturating_mul(fb_size_gb)
+ .align_up(GSP_HEAP_ALIGNMENT)
+ .unwrap_or(u64::MAX)
+ }
+}
+
+/// Heap memory requirements and constraints for a given version of the GSP LIBOS.
+pub(crate) struct LibosParams {
+ /// The base amount of heap required by the GSP operating system, in bytes.
+ carveout_size: u64,
+ /// The minimum and maximum sizes allowed for the GSP FW heap, in bytes.
+ allowed_heap_size: Range<u64>,
+}
+
+impl LibosParams {
+ /// Version 2 of the GSP LIBOS (Turing and GA100)
+ const LIBOS2: LibosParams = LibosParams {
+ carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2),
+ allowed_heap_size: num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB)
+ * num::usize_as_u64(SZ_1M)
+ ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB)
+ * num::usize_as_u64(SZ_1M),
+ };
+
+ /// Version 3 of the GSP LIBOS (GA102+)
+ const LIBOS3: LibosParams = LibosParams {
+ carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL),
+ allowed_heap_size: num::u32_as_u64(
+ bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB,
+ ) * num::usize_as_u64(SZ_1M)
+ ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB)
+ * num::usize_as_u64(SZ_1M),
+ };
+
+ /// Returns the libos parameters corresponding to `chipset`.
+ pub(crate) fn from_chipset(chipset: Chipset) -> &'static LibosParams {
+ if chipset < Chipset::GA102 {
+ &Self::LIBOS2
+ } else {
+ &Self::LIBOS3
+ }
+ }
+
+ /// Returns the amount of memory (in bytes) to allocate for the WPR heap for a framebuffer size
+ /// of `fb_size` (in bytes) for `chipset`.
+ pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 {
+ // The WPR heap will contain the following:
+ // LIBOS carveout,
+ self.carveout_size
+ // RM boot working memory,
+ .saturating_add(GspFwHeapParams::base_rm_size(chipset))
+ // One RM client,
+ .saturating_add(GspFwHeapParams::client_alloc_size())
+ // Overhead for memory management.
+ .saturating_add(GspFwHeapParams::management_overhead(fb_size))
+ // Clamp to the supported heap sizes.
+ .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1)
+ }
+}
+
+/// Structure passed to the GSP bootloader, containing the framebuffer layout as well as the DMA
+/// addresses of the GSP bootloader and firmware.
+#[repr(transparent)]
+pub(crate) struct GspFwWprMeta {
+ inner: bindings::GspFwWprMeta,
+}
+
+// SAFETY: Padding is explicit and does not contain uninitialized data.
+unsafe impl AsBytes for GspFwWprMeta {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for GspFwWprMeta {}
+
+type GspFwWprMetaBootResumeInfo = bindings::GspFwWprMeta__bindgen_ty_1;
+type GspFwWprMetaBootInfo = bindings::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1;
+
+impl GspFwWprMeta {
+ /// Returns an initializer for a `GspFwWprMeta` suitable for booting `gsp_firmware` using the
+ /// `fb_layout` layout.
+ pub(crate) fn new<'a>(
+ gsp_firmware: &'a GspFirmware,
+ fb_layout: &'a FbLayout,
+ ) -> impl Init<Self> + 'a {
+ #[allow(non_snake_case)]
+ let init_inner = init!(bindings::GspFwWprMeta {
+ // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified.
+ magic: bindings::GSP_FW_WPR_META_MAGIC as u64,
+ revision: u64::from(bindings::GSP_FW_WPR_META_REVISION),
+ sysmemAddrOfRadix3Elf: gsp_firmware.radix3_dma_handle(),
+ sizeOfRadix3Elf: u64::from_safe_cast(gsp_firmware.size),
+ sysmemAddrOfBootloader: gsp_firmware.bootloader.ucode.dma_handle(),
+ sizeOfBootloader: u64::from_safe_cast(gsp_firmware.bootloader.ucode.size()),
+ bootloaderCodeOffset: u64::from(gsp_firmware.bootloader.code_offset),
+ bootloaderDataOffset: u64::from(gsp_firmware.bootloader.data_offset),
+ bootloaderManifestOffset: u64::from(gsp_firmware.bootloader.manifest_offset),
+ __bindgen_anon_1: GspFwWprMetaBootResumeInfo {
+ __bindgen_anon_1: GspFwWprMetaBootInfo {
+ sysmemAddrOfSignature: gsp_firmware.signatures.dma_handle(),
+ sizeOfSignature: u64::from_safe_cast(gsp_firmware.signatures.size()),
+ },
+ },
+ gspFwRsvdStart: fb_layout.heap.start,
+ nonWprHeapOffset: fb_layout.heap.start,
+ nonWprHeapSize: fb_layout.heap.end - fb_layout.heap.start,
+ gspFwWprStart: fb_layout.wpr2.start,
+ gspFwHeapOffset: fb_layout.wpr2_heap.start,
+ gspFwHeapSize: fb_layout.wpr2_heap.end - fb_layout.wpr2_heap.start,
+ gspFwOffset: fb_layout.elf.start,
+ bootBinOffset: fb_layout.boot.start,
+ frtsOffset: fb_layout.frts.start,
+ frtsSize: fb_layout.frts.end - fb_layout.frts.start,
+ gspFwWprEnd: fb_layout
+ .vga_workspace
+ .start
+ .align_down(Alignment::new::<SZ_128K>()),
+ gspFwHeapVfPartitionCount: fb_layout.vf_partition_count,
+ fbSize: fb_layout.fb.end - fb_layout.fb.start,
+ vgaWorkspaceOffset: fb_layout.vga_workspace.start,
+ vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start,
+ ..Zeroable::init_zeroed()
+ });
+
+ init!(GspFwWprMeta {
+ inner <- init_inner,
+ })
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[repr(u32)]
+pub(crate) enum MsgFunction {
+ // Common function codes
+ AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA,
+ AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA,
+ AllocDevice = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE,
+ AllocMemory = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY,
+ AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT,
+ AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT,
+ BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA,
+ ContinuationRecord = bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD,
+ Free = bindings::NV_VGPU_MSG_FUNCTION_FREE,
+ GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO,
+ GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO,
+ GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU,
+ GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL,
+ GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO,
+ Log = bindings::NV_VGPU_MSG_FUNCTION_LOG,
+ MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY,
+ Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP,
+ SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO,
+ SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY,
+
+ // Event codes
+ GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE,
+ GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE,
+ GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD,
+ GspRunCpuSequencer = bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER,
+ MmuFaultQueued = bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED,
+ OsErrorLog = bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG,
+ PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT,
+ RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED,
+ UcodeLibOsPrint = bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT,
+}
+
+impl TryFrom<u32> for MsgFunction {
+ type Error = kernel::error::Error;
+
+ fn try_from(value: u32) -> Result<MsgFunction> {
+ match value {
+ // Common function codes
+ bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma),
+ bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma),
+ bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE => Ok(MsgFunction::AllocDevice),
+ bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY => Ok(MsgFunction::AllocMemory),
+ bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject),
+ bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot),
+ bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma),
+ bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD => {
+ Ok(MsgFunction::ContinuationRecord)
+ }
+ bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free),
+ bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo),
+ bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo),
+ bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU => {
+ Ok(MsgFunction::GspInitPostObjGpu)
+ }
+ bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl),
+ bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo),
+ bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log),
+ bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory),
+ bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop),
+ bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => {
+ Ok(MsgFunction::SetGuestSystemInfo)
+ }
+ bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry),
+
+ // Event codes
+ bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone),
+ bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice),
+ bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat),
+ bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => {
+ Ok(MsgFunction::GspRunCpuSequencer)
+ }
+ bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED => Ok(MsgFunction::MmuFaultQueued),
+ bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG => Ok(MsgFunction::OsErrorLog),
+ bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent),
+ bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered),
+ bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT => Ok(MsgFunction::UcodeLibOsPrint),
+ _ => Err(EINVAL),
+ }
+ }
+}
+
+impl From<MsgFunction> for u32 {
+ fn from(value: MsgFunction) -> Self {
+ // CAST: `MsgFunction` is `repr(u32)` and can thus be cast losslessly.
+ value as u32
+ }
+}
+
+/// Sequencer buffer opcode for GSP sequencer commands.
+#[derive(Copy, Clone, Debug, PartialEq)]
+#[repr(u32)]
+pub(crate) enum SeqBufOpcode {
+ // Core operation opcodes
+ CoreReset = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET,
+ CoreResume = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME,
+ CoreStart = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START,
+ CoreWaitForHalt = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT,
+
+ // Delay opcode
+ DelayUs = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US,
+
+ // Register operation opcodes
+ RegModify = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY,
+ RegPoll = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL,
+ RegStore = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE,
+ RegWrite = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE,
+}
+
+impl TryFrom<u32> for SeqBufOpcode {
+ type Error = kernel::error::Error;
+
+ fn try_from(value: u32) -> Result<SeqBufOpcode> {
+ match value {
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET => {
+ Ok(SeqBufOpcode::CoreReset)
+ }
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME => {
+ Ok(SeqBufOpcode::CoreResume)
+ }
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START => {
+ Ok(SeqBufOpcode::CoreStart)
+ }
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT => {
+ Ok(SeqBufOpcode::CoreWaitForHalt)
+ }
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US => Ok(SeqBufOpcode::DelayUs),
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY => {
+ Ok(SeqBufOpcode::RegModify)
+ }
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL => Ok(SeqBufOpcode::RegPoll),
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE => Ok(SeqBufOpcode::RegStore),
+ bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE => Ok(SeqBufOpcode::RegWrite),
+ _ => Err(EINVAL),
+ }
+ }
+}
+
+impl From<SeqBufOpcode> for u32 {
+ fn from(value: SeqBufOpcode) -> Self {
+ // CAST: `SeqBufOpcode` is `repr(u32)` and can thus be cast losslessly.
+ value as u32
+ }
+}
+
+/// Wrapper for GSP sequencer register write payload.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct RegWritePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_WRITE);
+
+impl RegWritePayload {
+ /// Returns the register address.
+ pub(crate) fn addr(&self) -> u32 {
+ self.0.addr
+ }
+
+ /// Returns the value to write.
+ pub(crate) fn val(&self) -> u32 {
+ self.0.val
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for RegWritePayload {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for RegWritePayload {}
+
+/// Wrapper for GSP sequencer register modify payload.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct RegModifyPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY);
+
+impl RegModifyPayload {
+ /// Returns the register address.
+ pub(crate) fn addr(&self) -> u32 {
+ self.0.addr
+ }
+
+ /// Returns the mask to apply.
+ pub(crate) fn mask(&self) -> u32 {
+ self.0.mask
+ }
+
+ /// Returns the value to write.
+ pub(crate) fn val(&self) -> u32 {
+ self.0.val
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for RegModifyPayload {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for RegModifyPayload {}
+
+/// Wrapper for GSP sequencer register poll payload.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct RegPollPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_POLL);
+
+impl RegPollPayload {
+ /// Returns the register address.
+ pub(crate) fn addr(&self) -> u32 {
+ self.0.addr
+ }
+
+ /// Returns the mask to apply.
+ pub(crate) fn mask(&self) -> u32 {
+ self.0.mask
+ }
+
+ /// Returns the expected value.
+ pub(crate) fn val(&self) -> u32 {
+ self.0.val
+ }
+
+ /// Returns the timeout in microseconds.
+ pub(crate) fn timeout(&self) -> u32 {
+ self.0.timeout
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for RegPollPayload {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for RegPollPayload {}
+
+/// Wrapper for GSP sequencer delay payload.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct DelayUsPayload(bindings::GSP_SEQ_BUF_PAYLOAD_DELAY_US);
+
+impl DelayUsPayload {
+ /// Returns the delay value in microseconds.
+ pub(crate) fn val(&self) -> u32 {
+ self.0.val
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for DelayUsPayload {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for DelayUsPayload {}
+
+/// Wrapper for GSP sequencer register store payload.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct RegStorePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_STORE);
+
+impl RegStorePayload {
+ /// Returns the register address.
+ pub(crate) fn addr(&self) -> u32 {
+ self.0.addr
+ }
+
+ /// Returns the storage index.
+ #[allow(unused)]
+ pub(crate) fn index(&self) -> u32 {
+ self.0.index
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for RegStorePayload {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for RegStorePayload {}
+
+/// Wrapper for GSP sequencer buffer command.
+#[repr(transparent)]
+pub(crate) struct SequencerBufferCmd(bindings::GSP_SEQUENCER_BUFFER_CMD);
+
+impl SequencerBufferCmd {
+ /// Returns the opcode as a `SeqBufOpcode` enum, or error if invalid.
+ pub(crate) fn opcode(&self) -> Result<SeqBufOpcode> {
+ self.0.opCode.try_into()
+ }
+
+ /// Returns the register write payload by value.
+ ///
+ /// Returns an error if the opcode is not `SeqBufOpcode::RegWrite`.
+ pub(crate) fn reg_write_payload(&self) -> Result<RegWritePayload> {
+ if self.opcode()? != SeqBufOpcode::RegWrite {
+ return Err(EINVAL);
+ }
+ // SAFETY: Opcode is verified to be `RegWrite`, so union contains valid `RegWritePayload`.
+ Ok(RegWritePayload(unsafe { self.0.payload.regWrite }))
+ }
+
+ /// Returns the register modify payload by value.
+ ///
+ /// Returns an error if the opcode is not `SeqBufOpcode::RegModify`.
+ pub(crate) fn reg_modify_payload(&self) -> Result<RegModifyPayload> {
+ if self.opcode()? != SeqBufOpcode::RegModify {
+ return Err(EINVAL);
+ }
+ // SAFETY: Opcode is verified to be `RegModify`, so union contains valid `RegModifyPayload`.
+ Ok(RegModifyPayload(unsafe { self.0.payload.regModify }))
+ }
+
+ /// Returns the register poll payload by value.
+ ///
+ /// Returns an error if the opcode is not `SeqBufOpcode::RegPoll`.
+ pub(crate) fn reg_poll_payload(&self) -> Result<RegPollPayload> {
+ if self.opcode()? != SeqBufOpcode::RegPoll {
+ return Err(EINVAL);
+ }
+ // SAFETY: Opcode is verified to be `RegPoll`, so union contains valid `RegPollPayload`.
+ Ok(RegPollPayload(unsafe { self.0.payload.regPoll }))
+ }
+
+ /// Returns the delay payload by value.
+ ///
+ /// Returns an error if the opcode is not `SeqBufOpcode::DelayUs`.
+ pub(crate) fn delay_us_payload(&self) -> Result<DelayUsPayload> {
+ if self.opcode()? != SeqBufOpcode::DelayUs {
+ return Err(EINVAL);
+ }
+ // SAFETY: Opcode is verified to be `DelayUs`, so union contains valid `DelayUsPayload`.
+ Ok(DelayUsPayload(unsafe { self.0.payload.delayUs }))
+ }
+
+ /// Returns the register store payload by value.
+ ///
+ /// Returns an error if the opcode is not `SeqBufOpcode::RegStore`.
+ pub(crate) fn reg_store_payload(&self) -> Result<RegStorePayload> {
+ if self.opcode()? != SeqBufOpcode::RegStore {
+ return Err(EINVAL);
+ }
+ // SAFETY: Opcode is verified to be `RegStore`, so union contains valid `RegStorePayload`.
+ Ok(RegStorePayload(unsafe { self.0.payload.regStore }))
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for SequencerBufferCmd {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for SequencerBufferCmd {}
+
+/// Wrapper for GSP run CPU sequencer RPC.
+#[repr(transparent)]
+pub(crate) struct RunCpuSequencer(bindings::rpc_run_cpu_sequencer_v17_00);
+
+impl RunCpuSequencer {
+ /// Returns the command index.
+ pub(crate) fn cmd_index(&self) -> u32 {
+ self.0.cmdIndex
+ }
+}
+
+// SAFETY: This struct only contains integer types for which all bit patterns are valid.
+unsafe impl FromBytes for RunCpuSequencer {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for RunCpuSequencer {}
+
+/// Struct containing the arguments required to pass a memory buffer to the GSP
+/// for use during initialisation.
+///
+/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
+/// configured for a larger page size (e.g. 64K pages), we need to give
+/// the GSP an array of 4K pages. Since we only create physically contiguous
+/// buffers the math to calculate the addresses is simple.
+///
+/// The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently
+/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
+/// buffers to be physically contiguous anyway.
+///
+/// The memory allocated for the arguments must remain until the GSP sends the
+/// init_done RPC.
+#[repr(transparent)]
+pub(crate) struct LibosMemoryRegionInitArgument {
+ inner: bindings::LibosMemoryRegionInitArgument,
+}
+
+// SAFETY: Padding is explicit and does not contain uninitialized data.
+unsafe impl AsBytes for LibosMemoryRegionInitArgument {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for LibosMemoryRegionInitArgument {}
+
+impl LibosMemoryRegionInitArgument {
+ pub(crate) fn new<'a, A: AsBytes + FromBytes + KnownSize + ?Sized>(
+ name: &'static str,
+ obj: &'a Coherent<A>,
+ ) -> impl Init<Self> + 'a {
+ /// Generates the `ID8` identifier required for some GSP objects.
+ fn id8(name: &str) -> u64 {
+ let mut bytes = [0u8; core::mem::size_of::<u64>()];
+
+ for (c, b) in name.bytes().rev().zip(&mut bytes) {
+ *b = c;
+ }
+
+ u64::from_ne_bytes(bytes)
+ }
+
+ #[allow(non_snake_case)]
+ let init_inner = init!(bindings::LibosMemoryRegionInitArgument {
+ id8: id8(name),
+ pa: obj.dma_handle(),
+ size: num::usize_as_u64(obj.size()),
+ kind: num::u32_into_u8::<
+ { bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS },
+ >(),
+ loc: num::u32_into_u8::<
+ { bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM },
+ >(),
+ ..Zeroable::init_zeroed()
+ });
+
+ init!(LibosMemoryRegionInitArgument {
+ inner <- init_inner,
+ })
+ }
+}
+
+/// TX header for setting up a message queue with the GSP.
+#[repr(transparent)]
+pub(crate) struct MsgqTxHeader(bindings::msgqTxHeader);
+
+impl MsgqTxHeader {
+ /// Create a new TX queue header.
+ ///
+ /// # Arguments
+ ///
+ /// * `msgq_size` - Total size of the message queue structure, in bytes.
+ /// * `rx_hdr_offset` - Offset, in bytes, of the start of the RX header in the message queue
+ /// structure.
+ /// * `msg_count` - Number of messages that can be sent, i.e. the number of memory pages
+ /// allocated for the message queue in the message queue structure.
+ pub(crate) fn new(msgq_size: u32, rx_hdr_offset: u32, msg_count: u32) -> Self {
+ Self(bindings::msgqTxHeader {
+ version: 0,
+ size: msgq_size,
+ msgSize: num::usize_into_u32::<GSP_PAGE_SIZE>(),
+ msgCount: msg_count,
+ writePtr: 0,
+ flags: 1,
+ rxHdrOff: rx_hdr_offset,
+ entryOff: num::usize_into_u32::<GSP_PAGE_SIZE>(),
+ })
+ }
+}
+
+// SAFETY: Padding is explicit and does not contain uninitialized data.
+unsafe impl AsBytes for MsgqTxHeader {}
+
+/// RX header for setting up a message queue with the GSP.
+#[repr(transparent)]
+pub(crate) struct MsgqRxHeader(bindings::msgqRxHeader);
+
+/// Header for the message RX queue.
+impl MsgqRxHeader {
+ /// Creates a new RX queue header.
+ pub(crate) fn new() -> Self {
+ Self(Default::default())
+ }
+}
+
+// SAFETY: Padding is explicit and does not contain uninitialized data.
+unsafe impl AsBytes for MsgqRxHeader {}
+
+bitfield! {
+ struct MsgHeaderVersion(u32) {
+ 31:24 major as u8;
+ 23:16 minor as u8;
+ }
+}
+
+impl MsgHeaderVersion {
+ const MAJOR_TOT: u8 = 3;
+ const MINOR_TOT: u8 = 0;
+
+ fn new() -> Self {
+ Self::default()
+ .set_major(Self::MAJOR_TOT)
+ .set_minor(Self::MINOR_TOT)
+ }
+}
+
+impl bindings::rpc_message_header_v {
+ fn init(cmd_size: usize, function: MsgFunction) -> impl Init<Self, Error> {
+ type RpcMessageHeader = bindings::rpc_message_header_v;
+
+ try_init!(RpcMessageHeader {
+ header_version: MsgHeaderVersion::new().into(),
+ signature: bindings::NV_VGPU_MSG_SIGNATURE_VALID,
+ function: function.into(),
+ length: size_of::<Self>()
+ .checked_add(cmd_size)
+ .ok_or(EOVERFLOW)
+ .and_then(|v| v.try_into().map_err(|_| EINVAL))?,
+ rpc_result: 0xffffffff,
+ rpc_result_private: 0xffffffff,
+ ..Zeroable::init_zeroed()
+ })
+ }
+}
+
+/// GSP Message Element.
+///
+/// This is essentially a message header expected to be followed by the message data.
+#[repr(transparent)]
+pub(crate) struct GspMsgElement {
+ inner: bindings::GSP_MSG_QUEUE_ELEMENT,
+}
+
+impl GspMsgElement {
+ /// Creates a new message element.
+ ///
+ /// # Arguments
+ ///
+ /// * `sequence` - Sequence number of the message.
+ /// * `cmd_size` - Size of the command (not including the message element), in bytes.
+ /// * `function` - Function of the message.
+ #[allow(non_snake_case)]
+ pub(crate) fn init(
+ sequence: u32,
+ cmd_size: usize,
+ function: MsgFunction,
+ ) -> impl Init<Self, Error> {
+ type RpcMessageHeader = bindings::rpc_message_header_v;
+ type InnerGspMsgElement = bindings::GSP_MSG_QUEUE_ELEMENT;
+ let init_inner = try_init!(InnerGspMsgElement {
+ seqNum: sequence,
+ elemCount: size_of::<Self>()
+ .checked_add(cmd_size)
+ .ok_or(EOVERFLOW)?
+ .div_ceil(GSP_PAGE_SIZE)
+ .try_into()
+ .map_err(|_| EOVERFLOW)?,
+ rpc <- RpcMessageHeader::init(cmd_size, function),
+ ..Zeroable::init_zeroed()
+ });
+
+ try_init!(GspMsgElement {
+ inner <- init_inner,
+ })
+ }
+
+ /// Sets the checksum of this message.
+ ///
+ /// Since the header is also part of the checksum, this is usually called after the whole
+ /// message has been written to the shared memory area.
+ pub(crate) fn set_checksum(&mut self, checksum: u32) {
+ self.inner.checkSum = checksum;
+ }
+
+ /// Returns the length of the message's payload.
+ pub(crate) fn payload_length(&self) -> usize {
+ // `rpc.length` includes the length of the RPC message header.
+ num::u32_as_usize(self.inner.rpc.length)
+ .saturating_sub(size_of::<bindings::rpc_message_header_v>())
+ }
+
+ /// Returns the total length of the message, message and RPC headers included.
+ pub(crate) fn length(&self) -> usize {
+ size_of::<Self>() + self.payload_length()
+ }
+
+ // Returns the sequence number of the message.
+ pub(crate) fn sequence(&self) -> u32 {
+ self.inner.rpc.sequence
+ }
+
+ // Returns the function of the message, if it is valid, or the invalid function number as an
+ // error.
+ pub(crate) fn function(&self) -> Result<MsgFunction, u32> {
+ self.inner
+ .rpc
+ .function
+ .try_into()
+ .map_err(|_| self.inner.rpc.function)
+ }
+
+ // Returns the number of elements (i.e. memory pages) used by this message.
+ pub(crate) fn element_count(&self) -> u32 {
+ self.inner.elemCount
+ }
+}
+
+// SAFETY: Padding is explicit and does not contain uninitialized data.
+unsafe impl AsBytes for GspMsgElement {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for GspMsgElement {}
+
+/// Arguments for GSP startup.
+#[repr(transparent)]
+#[derive(Zeroable)]
+pub(crate) struct GspArgumentsCached {
+ inner: bindings::GSP_ARGUMENTS_CACHED,
+}
+
+impl GspArgumentsCached {
+ /// Creates the arguments for starting the GSP up using `cmdq` as its command queue.
+ pub(crate) fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ {
+ #[allow(non_snake_case)]
+ let init_inner = init!(bindings::GSP_ARGUMENTS_CACHED {
+ messageQueueInitArguments <- MessageQueueInitArguments::new(cmdq),
+ bDmemStack: 1,
+ ..Zeroable::init_zeroed()
+ });
+
+ init!(GspArgumentsCached {
+ inner <- init_inner,
+ })
+ }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GspArgumentsCached {}
+
+/// On Turing and GA100, the entries in the `LibosMemoryRegionInitArgument`
+/// must all be a multiple of GSP_PAGE_SIZE in size, so add padding to force it
+/// to that size.
+#[repr(C)]
+#[derive(Zeroable)]
+pub(crate) struct GspArgumentsPadded {
+ pub(crate) inner: GspArgumentsCached,
+ _padding: [u8; GSP_PAGE_SIZE - core::mem::size_of::<bindings::GSP_ARGUMENTS_CACHED>()],
+}
+
+impl GspArgumentsPadded {
+ pub(crate) fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ {
+ init!(GspArgumentsPadded {
+ inner <- GspArgumentsCached::new(cmdq),
+ ..Zeroable::init_zeroed()
+ })
+ }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GspArgumentsPadded {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for GspArgumentsPadded {}
+
+/// Init arguments for the message queue.
+type MessageQueueInitArguments = bindings::MESSAGE_QUEUE_INIT_ARGUMENTS;
+
+impl MessageQueueInitArguments {
+ /// Creates a new init arguments structure for `cmdq`.
+ #[allow(non_snake_case)]
+ fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ {
+ init!(MessageQueueInitArguments {
+ sharedMemPhysAddr: cmdq.dma_handle,
+ pageTableEntryCount: num::usize_into_u32::<{ Cmdq::NUM_PTES }>(),
+ cmdQueueOffset: num::usize_as_u64(Cmdq::CMDQ_OFFSET),
+ statQueueOffset: num::usize_as_u64(Cmdq::STATQ_OFFSET),
+ ..Zeroable::init_zeroed()
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs
new file mode 100644
index 000000000000..db46276430be
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ device,
+ pci,
+ prelude::*,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ }, //
+};
+
+use crate::gsp::GSP_PAGE_SIZE;
+
+use super::bindings;
+
+/// Payload of the `GspSetSystemInfo` command.
+#[repr(transparent)]
+pub(crate) struct GspSetSystemInfo {
+ inner: bindings::GspSystemInfo,
+}
+static_assert!(size_of::<GspSetSystemInfo>() < GSP_PAGE_SIZE);
+
+impl GspSetSystemInfo {
+ /// Returns an in-place initializer for the `GspSetSystemInfo` command.
+ #[allow(non_snake_case)]
+ pub(crate) fn init<'a>(dev: &'a pci::Device<device::Bound>) -> impl Init<Self, Error> + 'a {
+ type InnerGspSystemInfo = bindings::GspSystemInfo;
+ let init_inner = try_init!(InnerGspSystemInfo {
+ gpuPhysAddr: dev.resource_start(0)?,
+ gpuPhysFbAddr: dev.resource_start(1)?,
+ gpuPhysInstAddr: dev.resource_start(3)?,
+ nvDomainBusDeviceFunc: u64::from(dev.dev_id()),
+
+ // Using TASK_SIZE in r535_gsp_rpc_set_system_info() seems wrong because
+ // TASK_SIZE is per-task. That's probably a design issue in GSP-RM though.
+ maxUserVa: (1 << 47) - 4096,
+ pciConfigMirrorBase: 0x088000,
+ pciConfigMirrorSize: 0x001000,
+
+ PCIDeviceID: (u32::from(dev.device_id()) << 16) | u32::from(dev.vendor_id().as_raw()),
+ PCISubDeviceID: (u32::from(dev.subsystem_device_id()) << 16)
+ | u32::from(dev.subsystem_vendor_id()),
+ PCIRevisionID: u32::from(dev.revision_id()),
+ bIsPrimary: 0,
+ bPreserveVideoMemoryAllocations: 0,
+ ..Zeroable::init_zeroed()
+ });
+
+ try_init!(GspSetSystemInfo {
+ inner <- init_inner,
+ })
+ }
+}
+
+// SAFETY: These structs don't meet the no-padding requirements of AsBytes but
+// that is not a problem because they are not used outside the kernel.
+unsafe impl AsBytes for GspSetSystemInfo {}
+
+// SAFETY: These structs don't meet the no-padding requirements of FromBytes but
+// that is not a problem because they are not used outside the kernel.
+unsafe impl FromBytes for GspSetSystemInfo {}
+
+#[repr(transparent)]
+pub(crate) struct PackedRegistryEntry(bindings::PACKED_REGISTRY_ENTRY);
+
+impl PackedRegistryEntry {
+ pub(crate) fn new(offset: u32, value: u32) -> Self {
+ Self({
+ bindings::PACKED_REGISTRY_ENTRY {
+ nameOffset: offset,
+
+ // We only support DWORD types for now. Support for other types
+ // will come later if required.
+ type_: bindings::REGISTRY_TABLE_ENTRY_TYPE_DWORD as u8,
+ __bindgen_padding_0: Default::default(),
+ data: value,
+ length: 0,
+ }
+ })
+ }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for PackedRegistryEntry {}
+
+/// Payload of the `SetRegistry` command.
+#[repr(transparent)]
+pub(crate) struct PackedRegistryTable {
+ inner: bindings::PACKED_REGISTRY_TABLE,
+}
+
+impl PackedRegistryTable {
+ #[allow(non_snake_case)]
+ pub(crate) fn init(num_entries: u32, size: u32) -> impl Init<Self> {
+ type InnerPackedRegistryTable = bindings::PACKED_REGISTRY_TABLE;
+ let init_inner = init!(InnerPackedRegistryTable {
+ numEntries: num_entries,
+ size,
+ entries: Default::default()
+ });
+
+ init!(PackedRegistryTable { inner <- init_inner })
+ }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for PackedRegistryTable {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for PackedRegistryTable {}
+
+/// Payload of the `GetGspStaticInfo` command and message.
+#[repr(transparent)]
+#[derive(Zeroable)]
+pub(crate) struct GspStaticConfigInfo(bindings::GspStaticConfigInfo_t);
+
+impl GspStaticConfigInfo {
+ /// Returns a bytes array containing the (hopefully) zero-terminated name of this GPU.
+ pub(crate) fn gpu_name_str(&self) -> [u8; 64] {
+ self.0.gpuNameString
+ }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GspStaticConfigInfo {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for GspStaticConfigInfo {}
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs
new file mode 100644
index 000000000000..2e6f0d298756
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Firmware bindings.
+//!
+//! Imports the generated bindings by `bindgen`.
+//!
+//! This module may not be directly used. Please abstract or re-export the needed symbols in the
+//! parent module instead.
+
+#![allow(
+ dead_code,
+ clippy::all,
+ clippy::undocumented_unsafe_blocks,
+ clippy::ptr_as_ptr,
+ clippy::ref_as_ptr,
+ missing_docs,
+ non_camel_case_types,
+ non_upper_case_globals,
+ non_snake_case,
+ improper_ctypes,
+ unreachable_pub,
+ unsafe_op_in_unsafe_fn
+)]
+use kernel::ffi;
+use pin_init::MaybeZeroable;
+
+include!("r570_144/bindings.rs");
+
+// SAFETY: This type has a size of zero, so its inclusion into another type should not affect their
+// ability to implement `Zeroable`.
+unsafe impl<T> kernel::prelude::Zeroable for __IncompleteArrayField<T> {}
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
new file mode 100644
index 000000000000..334e8be5fde8
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#[repr(C)]
+#[derive(Default)]
+pub struct __IncompleteArrayField<T>(::core::marker::PhantomData<T>, [T; 0]);
+impl<T> __IncompleteArrayField<T> {
+ #[inline]
+ pub const fn new() -> Self {
+ __IncompleteArrayField(::core::marker::PhantomData, [])
+ }
+ #[inline]
+ pub fn as_ptr(&self) -> *const T {
+ self as *const _ as *const T
+ }
+ #[inline]
+ pub fn as_mut_ptr(&mut self) -> *mut T {
+ self as *mut _ as *mut T
+ }
+ #[inline]
+ pub unsafe fn as_slice(&self, len: usize) -> &[T] {
+ ::core::slice::from_raw_parts(self.as_ptr(), len)
+ }
+ #[inline]
+ pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] {
+ ::core::slice::from_raw_parts_mut(self.as_mut_ptr(), len)
+ }
+}
+impl<T> ::core::fmt::Debug for __IncompleteArrayField<T> {
+ fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
+ fmt.write_str("__IncompleteArrayField")
+ }
+}
+pub const NV_VGPU_MSG_SIGNATURE_VALID: u32 = 1129337430;
+pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 = 0;
+pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 = 23068672;
+pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X: u32 = 8388608;
+pub const GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB: u32 = 98304;
+pub const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE: u32 = 100663296;
+pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB: u32 = 64;
+pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB: u32 = 256;
+pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB: u32 = 88;
+pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280;
+pub const GSP_FW_WPR_META_REVISION: u32 = 1;
+pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285;
+pub const REGISTRY_TABLE_ENTRY_TYPE_DWORD: u32 = 1;
+pub const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: u32 = 65536;
+pub type __u8 = ffi::c_uchar;
+pub type __u16 = ffi::c_ushort;
+pub type __u32 = ffi::c_uint;
+pub type __u64 = ffi::c_ulonglong;
+pub type u8_ = __u8;
+pub type u16_ = __u16;
+pub type u32_ = __u32;
+pub type u64_ = __u64;
+pub const NV_VGPU_MSG_FUNCTION_NOP: _bindgen_ty_2 = 0;
+pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO: _bindgen_ty_2 = 1;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_ROOT: _bindgen_ty_2 = 2;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE: _bindgen_ty_2 = 3;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY: _bindgen_ty_2 = 4;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA: _bindgen_ty_2 = 5;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA: _bindgen_ty_2 = 6;
+pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY: _bindgen_ty_2 = 7;
+pub const NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA: _bindgen_ty_2 = 8;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT: _bindgen_ty_2 = 9;
+pub const NV_VGPU_MSG_FUNCTION_FREE: _bindgen_ty_2 = 10;
+pub const NV_VGPU_MSG_FUNCTION_LOG: _bindgen_ty_2 = 11;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIDMEM: _bindgen_ty_2 = 12;
+pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY: _bindgen_ty_2 = 13;
+pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY_DMA: _bindgen_ty_2 = 14;
+pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY_DMA: _bindgen_ty_2 = 15;
+pub const NV_VGPU_MSG_FUNCTION_GET_EDID: _bindgen_ty_2 = 16;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_CHANNEL: _bindgen_ty_2 = 17;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_OBJECT: _bindgen_ty_2 = 18;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_SUBDEVICE: _bindgen_ty_2 = 19;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_DYNAMIC_MEMORY: _bindgen_ty_2 = 20;
+pub const NV_VGPU_MSG_FUNCTION_DUP_OBJECT: _bindgen_ty_2 = 21;
+pub const NV_VGPU_MSG_FUNCTION_IDLE_CHANNELS: _bindgen_ty_2 = 22;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_EVENT: _bindgen_ty_2 = 23;
+pub const NV_VGPU_MSG_FUNCTION_SEND_EVENT: _bindgen_ty_2 = 24;
+pub const NV_VGPU_MSG_FUNCTION_REMAPPER_CONTROL: _bindgen_ty_2 = 25;
+pub const NV_VGPU_MSG_FUNCTION_DMA_CONTROL: _bindgen_ty_2 = 26;
+pub const NV_VGPU_MSG_FUNCTION_DMA_FILL_PTE_MEM: _bindgen_ty_2 = 27;
+pub const NV_VGPU_MSG_FUNCTION_MANAGE_HW_RESOURCE: _bindgen_ty_2 = 28;
+pub const NV_VGPU_MSG_FUNCTION_BIND_ARBITRARY_CTX_DMA: _bindgen_ty_2 = 29;
+pub const NV_VGPU_MSG_FUNCTION_CREATE_FB_SEGMENT: _bindgen_ty_2 = 30;
+pub const NV_VGPU_MSG_FUNCTION_DESTROY_FB_SEGMENT: _bindgen_ty_2 = 31;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_SHARE_DEVICE: _bindgen_ty_2 = 32;
+pub const NV_VGPU_MSG_FUNCTION_DEFERRED_API_CONTROL: _bindgen_ty_2 = 33;
+pub const NV_VGPU_MSG_FUNCTION_REMOVE_DEFERRED_API: _bindgen_ty_2 = 34;
+pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_READ: _bindgen_ty_2 = 35;
+pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_WRITE: _bindgen_ty_2 = 36;
+pub const NV_VGPU_MSG_FUNCTION_SIM_MANAGE_DISPLAY_CONTEXT_DMA: _bindgen_ty_2 = 37;
+pub const NV_VGPU_MSG_FUNCTION_FREE_VIDMEM_VIRT: _bindgen_ty_2 = 38;
+pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PSTATE_INFO: _bindgen_ty_2 = 39;
+pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PERFMON_SAMPLE: _bindgen_ty_2 = 40;
+pub const NV_VGPU_MSG_FUNCTION_PERF_GET_VIRTUAL_PSTATE_INFO: _bindgen_ty_2 = 41;
+pub const NV_VGPU_MSG_FUNCTION_PERF_GET_LEVEL_INFO: _bindgen_ty_2 = 42;
+pub const NV_VGPU_MSG_FUNCTION_MAP_SEMA_MEMORY: _bindgen_ty_2 = 43;
+pub const NV_VGPU_MSG_FUNCTION_UNMAP_SEMA_MEMORY: _bindgen_ty_2 = 44;
+pub const NV_VGPU_MSG_FUNCTION_SET_SURFACE_PROPERTIES: _bindgen_ty_2 = 45;
+pub const NV_VGPU_MSG_FUNCTION_CLEANUP_SURFACE: _bindgen_ty_2 = 46;
+pub const NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER: _bindgen_ty_2 = 47;
+pub const NV_VGPU_MSG_FUNCTION_TDR_SET_TIMEOUT_STATE: _bindgen_ty_2 = 48;
+pub const NV_VGPU_MSG_FUNCTION_SWITCH_TO_VGA: _bindgen_ty_2 = 49;
+pub const NV_VGPU_MSG_FUNCTION_GPU_EXEC_REG_OPS: _bindgen_ty_2 = 50;
+pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO: _bindgen_ty_2 = 51;
+pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIRTMEM: _bindgen_ty_2 = 52;
+pub const NV_VGPU_MSG_FUNCTION_UPDATE_PDE_2: _bindgen_ty_2 = 53;
+pub const NV_VGPU_MSG_FUNCTION_SET_PAGE_DIRECTORY: _bindgen_ty_2 = 54;
+pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_PSTATE_INFO: _bindgen_ty_2 = 55;
+pub const NV_VGPU_MSG_FUNCTION_TRANSLATE_GUEST_GPU_PTES: _bindgen_ty_2 = 56;
+pub const NV_VGPU_MSG_FUNCTION_RESERVED_57: _bindgen_ty_2 = 57;
+pub const NV_VGPU_MSG_FUNCTION_RESET_CURRENT_GR_CONTEXT: _bindgen_ty_2 = 58;
+pub const NV_VGPU_MSG_FUNCTION_SET_SEMA_MEM_VALIDATION_STATE: _bindgen_ty_2 = 59;
+pub const NV_VGPU_MSG_FUNCTION_GET_ENGINE_UTILIZATION: _bindgen_ty_2 = 60;
+pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPU_PDES: _bindgen_ty_2 = 61;
+pub const NV_VGPU_MSG_FUNCTION_GET_ENCODER_CAPACITY: _bindgen_ty_2 = 62;
+pub const NV_VGPU_MSG_FUNCTION_VGPU_PF_REG_READ32: _bindgen_ty_2 = 63;
+pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO_EXT: _bindgen_ty_2 = 64;
+pub const NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO: _bindgen_ty_2 = 65;
+pub const NV_VGPU_MSG_FUNCTION_RMFS_INIT: _bindgen_ty_2 = 66;
+pub const NV_VGPU_MSG_FUNCTION_RMFS_CLOSE_QUEUE: _bindgen_ty_2 = 67;
+pub const NV_VGPU_MSG_FUNCTION_RMFS_CLEANUP: _bindgen_ty_2 = 68;
+pub const NV_VGPU_MSG_FUNCTION_RMFS_TEST: _bindgen_ty_2 = 69;
+pub const NV_VGPU_MSG_FUNCTION_UPDATE_BAR_PDE: _bindgen_ty_2 = 70;
+pub const NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD: _bindgen_ty_2 = 71;
+pub const NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO: _bindgen_ty_2 = 72;
+pub const NV_VGPU_MSG_FUNCTION_SET_REGISTRY: _bindgen_ty_2 = 73;
+pub const NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU: _bindgen_ty_2 = 74;
+pub const NV_VGPU_MSG_FUNCTION_SUBDEV_EVENT_SET_NOTIFICATION: _bindgen_ty_2 = 75;
+pub const NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL: _bindgen_ty_2 = 76;
+pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO2: _bindgen_ty_2 = 77;
+pub const NV_VGPU_MSG_FUNCTION_DUMP_PROTOBUF_COMPONENT: _bindgen_ty_2 = 78;
+pub const NV_VGPU_MSG_FUNCTION_UNSET_PAGE_DIRECTORY: _bindgen_ty_2 = 79;
+pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_STATIC_INFO: _bindgen_ty_2 = 80;
+pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_FAULT_BUFFER: _bindgen_ty_2 = 81;
+pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_FAULT_BUFFER: _bindgen_ty_2 = 82;
+pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 83;
+pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 84;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_VGPU_FB_USAGE: _bindgen_ty_2 = 85;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_NVFBC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 86;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_NVENC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 87;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_CHANNEL: _bindgen_ty_2 = 88;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_ISOLATED_CHANNEL: _bindgen_ty_2 = 89;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_HANDLE_VF_PRI_FAULT: _bindgen_ty_2 = 90;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CLK_GET_EXTENDED_INFO: _bindgen_ty_2 = 91;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_BOOST: _bindgen_ty_2 = 92;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_VPSTATES_GET_CONTROL: _bindgen_ty_2 = 93;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE: _bindgen_ty_2 = 94;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_COLOR_CLEAR: _bindgen_ty_2 = 95;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_DEPTH_CLEAR: _bindgen_ty_2 = 96;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SCHEDULE: _bindgen_ty_2 = 97;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TIMESLICE: _bindgen_ty_2 = 98;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PREEMPT: _bindgen_ty_2 = 99;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_DISABLE_CHANNELS: _bindgen_ty_2 = 100;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TSG_INTERLEAVE_LEVEL: _bindgen_ty_2 = 101;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_CHANNEL_INTERLEAVE_LEVEL: _bindgen_ty_2 = 102;
+pub const NV_VGPU_MSG_FUNCTION_GSP_RM_ALLOC: _bindgen_ty_2 = 103;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_V2: _bindgen_ty_2 = 104;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_AES_ENCRYPT: _bindgen_ty_2 = 105;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY: _bindgen_ty_2 = 106;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY_STATUS: _bindgen_ty_2 = 107;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 108;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 109;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_EXCEPTION_MASK: _bindgen_ty_2 = 110;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_PROMOTE_CTX: _bindgen_ty_2 = 111;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_PREEMPTION_BIND: _bindgen_ty_2 = 112;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_CTXSW_PREEMPTION_MODE: _bindgen_ty_2 = 113;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_ZCULL_BIND: _bindgen_ty_2 = 114;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_INITIALIZE_CTX: _bindgen_ty_2 = 115;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES: _bindgen_ty_2 = 116;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_CLEAR_FAULTED_BIT: _bindgen_ty_2 = 117;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_LATEST_ECC_ADDRESSES: _bindgen_ty_2 = 118;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_MC_SERVICE_INTERRUPTS: _bindgen_ty_2 = 119;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DMA_SET_DEFAULT_VASPACE: _bindgen_ty_2 = 120;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_CE_PCE_MASK: _bindgen_ty_2 = 121;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE_ENTRY: _bindgen_ty_2 = 122;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_PEER_ID_MASK: _bindgen_ty_2 = 123;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_STATUS: _bindgen_ty_2 = 124;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS: _bindgen_ty_2 = 125;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_MATRIX: _bindgen_ty_2 = 126;
+pub const NV_VGPU_MSG_FUNCTION_RESERVED_0: _bindgen_ty_2 = 127;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_PM_AREA_SMPC: _bindgen_ty_2 = 128;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HWPM_LEGACY: _bindgen_ty_2 = 129;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_B0CC_EXEC_REG_OPS: _bindgen_ty_2 = 130;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_BIND_PM_RESOURCES: _bindgen_ty_2 = 131;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SUSPEND_CONTEXT: _bindgen_ty_2 = 132;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_RESUME_CONTEXT: _bindgen_ty_2 = 133;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_EXEC_REG_OPS: _bindgen_ty_2 = 134;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_DEBUG: _bindgen_ty_2 = 135;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 136;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 137;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_ERRBAR_DEBUG: _bindgen_ty_2 = 138;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_NEXT_STOP_TRIGGER_TYPE: _bindgen_ty_2 = 139;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_ALLOC_PMA_STREAM: _bindgen_ty_2 = 140;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PMA_STREAM_UPDATE_GET_PUT: _bindgen_ty_2 = 141;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_INFO_V2: _bindgen_ty_2 = 142;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SET_CHANNEL_PROPERTIES: _bindgen_ty_2 = 143;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 144;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_KGR_GET_CTX_BUFFER_PTES: _bindgen_ty_2 = 145;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_EVICT_CTX: _bindgen_ty_2 = 146;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_FS_INFO: _bindgen_ty_2 = 147;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GRMGR_GET_GR_FS_INFO: _bindgen_ty_2 = 148;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_STOP_CHANNEL: _bindgen_ty_2 = 149;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_PC_SAMPLING_MODE: _bindgen_ty_2 = 150;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_GET_STATUS: _bindgen_ty_2 = 151;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_SET_CONTROL: _bindgen_ty_2 = 152;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FREE_PMA_STREAM: _bindgen_ty_2 = 153;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_TIMER_SET_GR_TICK_FREQ: _bindgen_ty_2 = 154;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB: _bindgen_ty_2 = 155;
+pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_GR_STATIC_INFO: _bindgen_ty_2 = 156;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_SINGLE_SM_SINGLE_STEP: _bindgen_ty_2 = 157;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_TPC_PARTITION_MODE: _bindgen_ty_2 = 158;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_TPC_PARTITION_MODE: _bindgen_ty_2 = 159;
+pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_ALLOCATE: _bindgen_ty_2 = 160;
+pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_DESTROY: _bindgen_ty_2 = 161;
+pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_MAP: _bindgen_ty_2 = 162;
+pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_UNMAP: _bindgen_ty_2 = 163;
+pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_PUSH_STREAM: _bindgen_ty_2 = 164;
+pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_SET_HANDLES: _bindgen_ty_2 = 165;
+pub const NV_VGPU_MSG_FUNCTION_UVM_METHOD_STREAM_GUEST_PAGES_OPERATION: _bindgen_ty_2 = 166;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL: _bindgen_ty_2 = 167;
+pub const NV_VGPU_MSG_FUNCTION_DCE_RM_INIT: _bindgen_ty_2 = 168;
+pub const NV_VGPU_MSG_FUNCTION_REGISTER_VIRTUAL_EVENT_BUFFER: _bindgen_ty_2 = 169;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_EVENT_BUFFER_UPDATE_GET: _bindgen_ty_2 = 170;
+pub const NV_VGPU_MSG_FUNCTION_GET_PLCABLE_ADDRESS_KIND: _bindgen_ty_2 = 171;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_LIMITS_SET_STATUS_V2: _bindgen_ty_2 = 172;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM: _bindgen_ty_2 = 173;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_MMU_DEBUG_MODE: _bindgen_ty_2 = 174;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_PROMOTE_FAULT_METHOD_BUFFERS: _bindgen_ty_2 = 175;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_SIZE: _bindgen_ty_2 = 176;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 177;
+pub const NV_VGPU_MSG_FUNCTION_DISABLE_CHANNELS: _bindgen_ty_2 = 178;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEMORY_DESCRIBE: _bindgen_ty_2 = 179;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEM_STATS: _bindgen_ty_2 = 180;
+pub const NV_VGPU_MSG_FUNCTION_SAVE_HIBERNATION_DATA: _bindgen_ty_2 = 181;
+pub const NV_VGPU_MSG_FUNCTION_RESTORE_HIBERNATION_DATA: _bindgen_ty_2 = 182;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_MEMSYS_SET_ZBC_REFERENCED: _bindgen_ty_2 = 183;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_CREATE: _bindgen_ty_2 = 184;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_DELETE: _bindgen_ty_2 = 185;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_GET_WORK_SUBMIT_TOKEN: _bindgen_ty_2 = 186;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SET_WORK_SUBMIT_TOKEN_NOTIF_INDEX: _bindgen_ty_2 = 187;
+pub const NV_VGPU_MSG_FUNCTION_PMA_SCRUBBER_SHARED_BUFFER_GUEST_PAGES_OPERATION: _bindgen_ty_2 =
+ 188;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK:
+ _bindgen_ty_2 = 189;
+pub const NV_VGPU_MSG_FUNCTION_SET_SYSMEM_DIRTY_PAGE_TRACKING_BUFFER: _bindgen_ty_2 = 190;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_P2P_CAPS: _bindgen_ty_2 = 191;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_SET_P2P_MAPPING: _bindgen_ty_2 = 192;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_UNSET_P2P_MAPPING: _bindgen_ty_2 = 193;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK: _bindgen_ty_2 = 194;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_MIGRATABLE_OPS: _bindgen_ty_2 = 195;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_TOTAL_HS_CREDITS: _bindgen_ty_2 = 196;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_HS_CREDITS: _bindgen_ty_2 = 197;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_HS_CREDITS: _bindgen_ty_2 = 198;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_PM_AREA_PC_SAMPLER: _bindgen_ty_2 = 199;
+pub const NV_VGPU_MSG_FUNCTION_INVALIDATE_TLB: _bindgen_ty_2 = 200;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_QUERY_ECC_STATUS: _bindgen_ty_2 = 201;
+pub const NV_VGPU_MSG_FUNCTION_ECC_NOTIFIER_WRITE_ACK: _bindgen_ty_2 = 202;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_DEBUG: _bindgen_ty_2 = 203;
+pub const NV_VGPU_MSG_FUNCTION_RM_API_CONTROL: _bindgen_ty_2 = 204;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_GPU_START_FABRIC_PROBE: _bindgen_ty_2 = 205;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_NVLINK_GET_INBAND_RECEIVED_DATA: _bindgen_ty_2 = 206;
+pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_DATA: _bindgen_ty_2 = 207;
+pub const NV_VGPU_MSG_FUNCTION_RESERVED_208: _bindgen_ty_2 = 208;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_GET_INFO_V2: _bindgen_ty_2 = 209;
+pub const NV_VGPU_MSG_FUNCTION_GET_BRAND_CAPS: _bindgen_ty_2 = 210;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_NVLINK_INBAND_SEND_DATA: _bindgen_ty_2 = 211;
+pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPM_GUEST_BUFFER_INFO: _bindgen_ty_2 = 212;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_CONTROL_GSP_TRACE: _bindgen_ty_2 = 213;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_STENCIL_CLEAR: _bindgen_ty_2 = 214;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_VGPU_HEAP_STATS: _bindgen_ty_2 = 215;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_LIBOS_HEAP_STATS: _bindgen_ty_2 = 216;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 217;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 218;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HES: _bindgen_ty_2 = 219;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_HES: _bindgen_ty_2 = 220;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_CCU_PROF: _bindgen_ty_2 = 221;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_CCU_PROF: _bindgen_ty_2 = 222;
+pub const NV_VGPU_MSG_FUNCTION_RESERVED: _bindgen_ty_2 = 223;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_CHIPLET_HS_CREDIT_POOL: _bindgen_ty_2 = 224;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_HS_CREDITS_MAPPING: _bindgen_ty_2 = 225;
+pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_EXPORT: _bindgen_ty_2 = 226;
+pub const NV_VGPU_MSG_FUNCTION_NUM_FUNCTIONS: _bindgen_ty_2 = 227;
+pub type _bindgen_ty_2 = ffi::c_uint;
+pub const NV_VGPU_MSG_EVENT_FIRST_EVENT: _bindgen_ty_3 = 4096;
+pub const NV_VGPU_MSG_EVENT_GSP_INIT_DONE: _bindgen_ty_3 = 4097;
+pub const NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER: _bindgen_ty_3 = 4098;
+pub const NV_VGPU_MSG_EVENT_POST_EVENT: _bindgen_ty_3 = 4099;
+pub const NV_VGPU_MSG_EVENT_RC_TRIGGERED: _bindgen_ty_3 = 4100;
+pub const NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED: _bindgen_ty_3 = 4101;
+pub const NV_VGPU_MSG_EVENT_OS_ERROR_LOG: _bindgen_ty_3 = 4102;
+pub const NV_VGPU_MSG_EVENT_RG_LINE_INTR: _bindgen_ty_3 = 4103;
+pub const NV_VGPU_MSG_EVENT_GPUACCT_PERFMON_UTIL_SAMPLES: _bindgen_ty_3 = 4104;
+pub const NV_VGPU_MSG_EVENT_SIM_READ: _bindgen_ty_3 = 4105;
+pub const NV_VGPU_MSG_EVENT_SIM_WRITE: _bindgen_ty_3 = 4106;
+pub const NV_VGPU_MSG_EVENT_SEMAPHORE_SCHEDULE_CALLBACK: _bindgen_ty_3 = 4107;
+pub const NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT: _bindgen_ty_3 = 4108;
+pub const NV_VGPU_MSG_EVENT_VGPU_GSP_PLUGIN_TRIGGERED: _bindgen_ty_3 = 4109;
+pub const NV_VGPU_MSG_EVENT_PERF_GPU_BOOST_SYNC_LIMITS_CALLBACK: _bindgen_ty_3 = 4110;
+pub const NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE: _bindgen_ty_3 = 4111;
+pub const NV_VGPU_MSG_EVENT_VGPU_CONFIG: _bindgen_ty_3 = 4112;
+pub const NV_VGPU_MSG_EVENT_DISPLAY_MODESET: _bindgen_ty_3 = 4113;
+pub const NV_VGPU_MSG_EVENT_EXTDEV_INTR_SERVICE: _bindgen_ty_3 = 4114;
+pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_256: _bindgen_ty_3 = 4115;
+pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_512: _bindgen_ty_3 = 4116;
+pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_1024: _bindgen_ty_3 = 4117;
+pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_2048: _bindgen_ty_3 = 4118;
+pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_4096: _bindgen_ty_3 = 4119;
+pub const NV_VGPU_MSG_EVENT_TIMED_SEMAPHORE_RELEASE: _bindgen_ty_3 = 4120;
+pub const NV_VGPU_MSG_EVENT_NVLINK_IS_GPU_DEGRADED: _bindgen_ty_3 = 4121;
+pub const NV_VGPU_MSG_EVENT_PFM_REQ_HNDLR_STATE_SYNC_CALLBACK: _bindgen_ty_3 = 4122;
+pub const NV_VGPU_MSG_EVENT_NVLINK_FAULT_UP: _bindgen_ty_3 = 4123;
+pub const NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE: _bindgen_ty_3 = 4124;
+pub const NV_VGPU_MSG_EVENT_MIG_CI_CONFIG_UPDATE: _bindgen_ty_3 = 4125;
+pub const NV_VGPU_MSG_EVENT_UPDATE_GSP_TRACE: _bindgen_ty_3 = 4126;
+pub const NV_VGPU_MSG_EVENT_NVLINK_FATAL_ERROR_RECOVERY: _bindgen_ty_3 = 4127;
+pub const NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD: _bindgen_ty_3 = 4128;
+pub const NV_VGPU_MSG_EVENT_FECS_ERROR: _bindgen_ty_3 = 4129;
+pub const NV_VGPU_MSG_EVENT_RECOVERY_ACTION: _bindgen_ty_3 = 4130;
+pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131;
+pub type _bindgen_ty_3 = ffi::c_uint;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS {
+ pub totalVFs: u32_,
+ pub firstVfOffset: u32_,
+ pub vfFeatureMask: u32_,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub FirstVFBar0Address: u64_,
+ pub FirstVFBar1Address: u64_,
+ pub FirstVFBar2Address: u64_,
+ pub bar0Size: u64_,
+ pub bar1Size: u64_,
+ pub bar2Size: u64_,
+ pub b64bitBar0: u8_,
+ pub b64bitBar1: u8_,
+ pub b64bitBar2: u8_,
+ pub bSriovEnabled: u8_,
+ pub bSriovHeavyEnabled: u8_,
+ pub bEmulateVFBar0TlbInvalidationRegister: u8_,
+ pub bClientRmAllocatedCtxBuffer: u8_,
+ pub bNonPowerOf2ChannelCountSupported: u8_,
+ pub bVfResizableBAR1Supported: u8_,
+ pub __bindgen_padding_1: [u8; 7usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS {
+ pub BoardID: u32_,
+ pub chipSKU: [ffi::c_char; 9usize],
+ pub chipSKUMod: [ffi::c_char; 5usize],
+ pub __bindgen_padding_0: [u8; 2usize],
+ pub skuConfigVersion: u32_,
+ pub project: [ffi::c_char; 5usize],
+ pub projectSKU: [ffi::c_char; 5usize],
+ pub CDP: [ffi::c_char; 6usize],
+ pub projectSKUMod: [ffi::c_char; 2usize],
+ pub __bindgen_padding_1: [u8; 2usize],
+ pub businessCycle: u32_,
+}
+pub type NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG = [u8_; 17usize];
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO {
+ pub base: u64_,
+ pub limit: u64_,
+ pub reserved: u64_,
+ pub performance: u32_,
+ pub supportCompressed: u8_,
+ pub supportISO: u8_,
+ pub bProtected: u8_,
+ pub blackList: NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS {
+ pub numFBRegions: u32_,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub fbRegion: [NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO; 16usize],
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, MaybeZeroable)]
+pub struct NV2080_CTRL_GPU_GET_GID_INFO_PARAMS {
+ pub index: u32_,
+ pub flags: u32_,
+ pub length: u32_,
+ pub data: [u8_; 256usize],
+}
+impl Default for NV2080_CTRL_GPU_GET_GID_INFO_PARAMS {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct DOD_METHOD_DATA {
+ pub status: u32_,
+ pub acpiIdListLen: u32_,
+ pub acpiIdList: [u32_; 16usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct JT_METHOD_DATA {
+ pub status: u32_,
+ pub jtCaps: u32_,
+ pub jtRevId: u16_,
+ pub bSBIOSCaps: u8_,
+ pub __bindgen_padding_0: u8,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct MUX_METHOD_DATA_ELEMENT {
+ pub acpiId: u32_,
+ pub mode: u32_,
+ pub status: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct MUX_METHOD_DATA {
+ pub tableLen: u32_,
+ pub acpiIdMuxModeTable: [MUX_METHOD_DATA_ELEMENT; 16usize],
+ pub acpiIdMuxPartTable: [MUX_METHOD_DATA_ELEMENT; 16usize],
+ pub acpiIdMuxStateTable: [MUX_METHOD_DATA_ELEMENT; 16usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct CAPS_METHOD_DATA {
+ pub status: u32_,
+ pub optimusCaps: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct ACPI_METHOD_DATA {
+ pub bValid: u8_,
+ pub __bindgen_padding_0: [u8; 3usize],
+ pub dodMethodData: DOD_METHOD_DATA,
+ pub jtMethodData: JT_METHOD_DATA,
+ pub muxMethodData: MUX_METHOD_DATA,
+ pub capsMethodData: CAPS_METHOD_DATA,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS {
+ pub headIndex: u32_,
+ pub maxHResolution: u32_,
+ pub maxVResolution: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS {
+ pub numHeads: u32_,
+ pub maxNumHeads: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct BUSINFO {
+ pub deviceID: u16_,
+ pub vendorID: u16_,
+ pub subdeviceID: u16_,
+ pub subvendorID: u16_,
+ pub revisionID: u8_,
+ pub __bindgen_padding_0: u8,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_VF_INFO {
+ pub totalVFs: u32_,
+ pub firstVFOffset: u32_,
+ pub FirstVFBar0Address: u64_,
+ pub FirstVFBar1Address: u64_,
+ pub FirstVFBar2Address: u64_,
+ pub b64bitBar0: u8_,
+ pub b64bitBar1: u8_,
+ pub b64bitBar2: u8_,
+ pub __bindgen_padding_0: [u8; 5usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_PCIE_CONFIG_REG {
+ pub linkCap: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct EcidManufacturingInfo {
+ pub ecidLow: u32_,
+ pub ecidHigh: u32_,
+ pub ecidExtended: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct FW_WPR_LAYOUT_OFFSET {
+ pub nonWprHeapOffset: u64_,
+ pub frtsOffset: u64_,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, MaybeZeroable)]
+pub struct GspStaticConfigInfo_t {
+ pub grCapsBits: [u8_; 23usize],
+ pub __bindgen_padding_0: u8,
+ pub gidInfo: NV2080_CTRL_GPU_GET_GID_INFO_PARAMS,
+ pub SKUInfo: NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS,
+ pub __bindgen_padding_1: [u8; 4usize],
+ pub fbRegionInfoParams: NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS,
+ pub sriovCaps: NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS,
+ pub sriovMaxGfid: u32_,
+ pub engineCaps: [u32_; 3usize],
+ pub poisonFuseEnabled: u8_,
+ pub __bindgen_padding_2: [u8; 7usize],
+ pub fb_length: u64_,
+ pub fbio_mask: u64_,
+ pub fb_bus_width: u32_,
+ pub fb_ram_type: u32_,
+ pub fbp_mask: u64_,
+ pub l2_cache_size: u32_,
+ pub gpuNameString: [u8_; 64usize],
+ pub gpuShortNameString: [u8_; 64usize],
+ pub gpuNameString_Unicode: [u16_; 64usize],
+ pub bGpuInternalSku: u8_,
+ pub bIsQuadroGeneric: u8_,
+ pub bIsQuadroAd: u8_,
+ pub bIsNvidiaNvs: u8_,
+ pub bIsVgx: u8_,
+ pub bGeforceSmb: u8_,
+ pub bIsTitan: u8_,
+ pub bIsTesla: u8_,
+ pub bIsMobile: u8_,
+ pub bIsGc6Rtd3Allowed: u8_,
+ pub bIsGc8Rtd3Allowed: u8_,
+ pub bIsGcOffRtd3Allowed: u8_,
+ pub bIsGcoffLegacyAllowed: u8_,
+ pub bIsMigSupported: u8_,
+ pub RTD3GC6TotalBoardPower: u16_,
+ pub RTD3GC6PerstDelay: u16_,
+ pub __bindgen_padding_3: [u8; 2usize],
+ pub bar1PdeBase: u64_,
+ pub bar2PdeBase: u64_,
+ pub bVbiosValid: u8_,
+ pub __bindgen_padding_4: [u8; 3usize],
+ pub vbiosSubVendor: u32_,
+ pub vbiosSubDevice: u32_,
+ pub bPageRetirementSupported: u8_,
+ pub bSplitVasBetweenServerClientRm: u8_,
+ pub bClRootportNeedsNosnoopWAR: u8_,
+ pub __bindgen_padding_5: u8,
+ pub displaylessMaxHeads: VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS,
+ pub displaylessMaxResolution: VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS,
+ pub __bindgen_padding_6: [u8; 4usize],
+ pub displaylessMaxPixels: u64_,
+ pub hInternalClient: u32_,
+ pub hInternalDevice: u32_,
+ pub hInternalSubdevice: u32_,
+ pub bSelfHostedMode: u8_,
+ pub bAtsSupported: u8_,
+ pub bIsGpuUefi: u8_,
+ pub bIsEfiInit: u8_,
+ pub ecidInfo: [EcidManufacturingInfo; 2usize],
+ pub fwWprLayoutOffset: FW_WPR_LAYOUT_OFFSET,
+}
+impl Default for GspStaticConfigInfo_t {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GspSystemInfo {
+ pub gpuPhysAddr: u64_,
+ pub gpuPhysFbAddr: u64_,
+ pub gpuPhysInstAddr: u64_,
+ pub gpuPhysIoAddr: u64_,
+ pub nvDomainBusDeviceFunc: u64_,
+ pub simAccessBufPhysAddr: u64_,
+ pub notifyOpSharedSurfacePhysAddr: u64_,
+ pub pcieAtomicsOpMask: u64_,
+ pub consoleMemSize: u64_,
+ pub maxUserVa: u64_,
+ pub pciConfigMirrorBase: u32_,
+ pub pciConfigMirrorSize: u32_,
+ pub PCIDeviceID: u32_,
+ pub PCISubDeviceID: u32_,
+ pub PCIRevisionID: u32_,
+ pub pcieAtomicsCplDeviceCapMask: u32_,
+ pub oorArch: u8_,
+ pub __bindgen_padding_0: [u8; 7usize],
+ pub clPdbProperties: u64_,
+ pub Chipset: u32_,
+ pub bGpuBehindBridge: u8_,
+ pub bFlrSupported: u8_,
+ pub b64bBar0Supported: u8_,
+ pub bMnocAvailable: u8_,
+ pub chipsetL1ssEnable: u32_,
+ pub bUpstreamL0sUnsupported: u8_,
+ pub bUpstreamL1Unsupported: u8_,
+ pub bUpstreamL1PorSupported: u8_,
+ pub bUpstreamL1PorMobileOnly: u8_,
+ pub bSystemHasMux: u8_,
+ pub upstreamAddressValid: u8_,
+ pub FHBBusInfo: BUSINFO,
+ pub chipsetIDInfo: BUSINFO,
+ pub __bindgen_padding_1: [u8; 2usize],
+ pub acpiMethodData: ACPI_METHOD_DATA,
+ pub hypervisorType: u32_,
+ pub bIsPassthru: u8_,
+ pub __bindgen_padding_2: [u8; 7usize],
+ pub sysTimerOffsetNs: u64_,
+ pub gspVFInfo: GSP_VF_INFO,
+ pub bIsPrimary: u8_,
+ pub isGridBuild: u8_,
+ pub __bindgen_padding_3: [u8; 2usize],
+ pub pcieConfigReg: GSP_PCIE_CONFIG_REG,
+ pub gridBuildCsp: u32_,
+ pub bPreserveVideoMemoryAllocations: u8_,
+ pub bTdrEventSupported: u8_,
+ pub bFeatureStretchVblankCapable: u8_,
+ pub bEnableDynamicGranularityPageArrays: u8_,
+ pub bClockBoostSupported: u8_,
+ pub bRouteDispIntrsToCPU: u8_,
+ pub __bindgen_padding_4: [u8; 6usize],
+ pub hostPageSize: u64_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct MESSAGE_QUEUE_INIT_ARGUMENTS {
+ pub sharedMemPhysAddr: u64_,
+ pub pageTableEntryCount: u32_,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub cmdQueueOffset: u64_,
+ pub statQueueOffset: u64_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SR_INIT_ARGUMENTS {
+ pub oldLevel: u32_,
+ pub flags: u32_,
+ pub bInPMTransition: u8_,
+ pub __bindgen_padding_0: [u8; 3usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_ARGUMENTS_CACHED {
+ pub messageQueueInitArguments: MESSAGE_QUEUE_INIT_ARGUMENTS,
+ pub srInitArguments: GSP_SR_INIT_ARGUMENTS,
+ pub gpuInstance: u32_,
+ pub bDmemStack: u8_,
+ pub __bindgen_padding_0: [u8; 7usize],
+ pub profilerArgs: GSP_ARGUMENTS_CACHED__bindgen_ty_1,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_ARGUMENTS_CACHED__bindgen_ty_1 {
+ pub pa: u64_,
+ pub size: u64_,
+}
+#[repr(C)]
+#[derive(Copy, Clone, MaybeZeroable)]
+pub union rpc_message_rpc_union_field_v03_00 {
+ pub spare: u32_,
+ pub cpuRmGfid: u32_,
+}
+impl Default for rpc_message_rpc_union_field_v03_00 {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+pub type rpc_message_rpc_union_field_v = rpc_message_rpc_union_field_v03_00;
+#[repr(C)]
+#[derive(MaybeZeroable)]
+pub struct rpc_message_header_v03_00 {
+ pub header_version: u32_,
+ pub signature: u32_,
+ pub length: u32_,
+ pub function: u32_,
+ pub rpc_result: u32_,
+ pub rpc_result_private: u32_,
+ pub sequence: u32_,
+ pub u: rpc_message_rpc_union_field_v,
+ pub rpc_message_data: __IncompleteArrayField<u8_>,
+}
+impl Default for rpc_message_header_v03_00 {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+pub type rpc_message_header_v = rpc_message_header_v03_00;
+#[repr(C)]
+#[derive(Copy, Clone, MaybeZeroable)]
+pub struct GspFwWprMeta {
+ pub magic: u64_,
+ pub revision: u64_,
+ pub sysmemAddrOfRadix3Elf: u64_,
+ pub sizeOfRadix3Elf: u64_,
+ pub sysmemAddrOfBootloader: u64_,
+ pub sizeOfBootloader: u64_,
+ pub bootloaderCodeOffset: u64_,
+ pub bootloaderDataOffset: u64_,
+ pub bootloaderManifestOffset: u64_,
+ pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1,
+ pub gspFwRsvdStart: u64_,
+ pub nonWprHeapOffset: u64_,
+ pub nonWprHeapSize: u64_,
+ pub gspFwWprStart: u64_,
+ pub gspFwHeapOffset: u64_,
+ pub gspFwHeapSize: u64_,
+ pub gspFwOffset: u64_,
+ pub bootBinOffset: u64_,
+ pub frtsOffset: u64_,
+ pub frtsSize: u64_,
+ pub gspFwWprEnd: u64_,
+ pub fbSize: u64_,
+ pub vgaWorkspaceOffset: u64_,
+ pub vgaWorkspaceSize: u64_,
+ pub bootCount: u64_,
+ pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2,
+ pub gspFwHeapVfPartitionCount: u8_,
+ pub flags: u8_,
+ pub padding: [u8_; 2usize],
+ pub pmuReservedSize: u32_,
+ pub verified: u64_,
+}
+#[repr(C)]
+#[derive(Copy, Clone, MaybeZeroable)]
+pub union GspFwWprMeta__bindgen_ty_1 {
+ pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1__bindgen_ty_1,
+ pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_1__bindgen_ty_2,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_1 {
+ pub sysmemAddrOfSignature: u64_,
+ pub sizeOfSignature: u64_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_2 {
+ pub gspFwHeapFreeListWprOffset: u32_,
+ pub unused0: u32_,
+ pub unused1: u64_,
+}
+impl Default for GspFwWprMeta__bindgen_ty_1 {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Copy, Clone, MaybeZeroable)]
+pub union GspFwWprMeta__bindgen_ty_2 {
+ pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_2__bindgen_ty_1,
+ pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2__bindgen_ty_2,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 {
+ pub partitionRpcAddr: u64_,
+ pub partitionRpcRequestOffset: u16_,
+ pub partitionRpcReplyOffset: u16_,
+ pub elfCodeOffset: u32_,
+ pub elfDataOffset: u32_,
+ pub elfCodeSize: u32_,
+ pub elfDataSize: u32_,
+ pub lsUcodeVersion: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_2 {
+ pub partitionRpcPadding: [u32_; 4usize],
+ pub sysmemAddrOfCrashReportQueue: u64_,
+ pub sizeOfCrashReportQueue: u32_,
+ pub lsUcodeVersionPadding: [u32_; 1usize],
+}
+impl Default for GspFwWprMeta__bindgen_ty_2 {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+impl Default for GspFwWprMeta {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+pub type LibosAddress = u64_;
+pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_NONE: LibosMemoryRegionKind = 0;
+pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS: LibosMemoryRegionKind = 1;
+pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_RADIX3: LibosMemoryRegionKind = 2;
+pub type LibosMemoryRegionKind = ffi::c_uint;
+pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_NONE: LibosMemoryRegionLoc = 0;
+pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegionLoc = 1;
+pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2;
+pub type LibosMemoryRegionLoc = ffi::c_uint;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct LibosMemoryRegionInitArgument {
+ pub id8: LibosAddress,
+ pub pa: LibosAddress,
+ pub size: LibosAddress,
+ pub kind: u8_,
+ pub loc: u8_,
+ pub __bindgen_padding_0: [u8; 6usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct PACKED_REGISTRY_ENTRY {
+ pub nameOffset: u32_,
+ pub type_: u8_,
+ pub __bindgen_padding_0: [u8; 3usize],
+ pub data: u32_,
+ pub length: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, MaybeZeroable)]
+pub struct PACKED_REGISTRY_TABLE {
+ pub size: u32_,
+ pub numEntries: u32_,
+ pub entries: __IncompleteArrayField<PACKED_REGISTRY_ENTRY>,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct msgqTxHeader {
+ pub version: u32_,
+ pub size: u32_,
+ pub msgSize: u32_,
+ pub msgCount: u32_,
+ pub writePtr: u32_,
+ pub flags: u32_,
+ pub rxHdrOff: u32_,
+ pub entryOff: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct msgqRxHeader {
+ pub readPtr: u32_,
+}
+#[repr(C)]
+#[repr(align(8))]
+#[derive(MaybeZeroable)]
+pub struct GSP_MSG_QUEUE_ELEMENT {
+ pub authTagBuffer: [u8_; 16usize],
+ pub aadBuffer: [u8_; 16usize],
+ pub checkSum: u32_,
+ pub seqNum: u32_,
+ pub elemCount: u32_,
+ pub __bindgen_padding_0: [u8; 4usize],
+ pub rpc: rpc_message_header_v,
+}
+impl Default for GSP_MSG_QUEUE_ELEMENT {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+#[repr(C)]
+#[derive(Debug, Default, MaybeZeroable)]
+pub struct rpc_run_cpu_sequencer_v17_00 {
+ pub bufferSizeDWord: u32_,
+ pub cmdIndex: u32_,
+ pub regSaveArea: [u32_; 8usize],
+ pub commandBuffer: __IncompleteArrayField<u32_>,
+}
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE: GSP_SEQ_BUF_OPCODE = 0;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY: GSP_SEQ_BUF_OPCODE = 1;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL: GSP_SEQ_BUF_OPCODE = 2;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US: GSP_SEQ_BUF_OPCODE = 3;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE: GSP_SEQ_BUF_OPCODE = 4;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET: GSP_SEQ_BUF_OPCODE = 5;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START: GSP_SEQ_BUF_OPCODE = 6;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT: GSP_SEQ_BUF_OPCODE = 7;
+pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME: GSP_SEQ_BUF_OPCODE = 8;
+pub type GSP_SEQ_BUF_OPCODE = ffi::c_uint;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SEQ_BUF_PAYLOAD_REG_WRITE {
+ pub addr: u32_,
+ pub val: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SEQ_BUF_PAYLOAD_REG_MODIFY {
+ pub addr: u32_,
+ pub mask: u32_,
+ pub val: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SEQ_BUF_PAYLOAD_REG_POLL {
+ pub addr: u32_,
+ pub mask: u32_,
+ pub val: u32_,
+ pub timeout: u32_,
+ pub error: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SEQ_BUF_PAYLOAD_DELAY_US {
+ pub val: u32_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, MaybeZeroable)]
+pub struct GSP_SEQ_BUF_PAYLOAD_REG_STORE {
+ pub addr: u32_,
+ pub index: u32_,
+}
+#[repr(C)]
+#[derive(Copy, Clone, MaybeZeroable)]
+pub struct GSP_SEQUENCER_BUFFER_CMD {
+ pub opCode: GSP_SEQ_BUF_OPCODE,
+ pub payload: GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1,
+}
+#[repr(C)]
+#[derive(Copy, Clone, MaybeZeroable)]
+pub union GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 {
+ pub regWrite: GSP_SEQ_BUF_PAYLOAD_REG_WRITE,
+ pub regModify: GSP_SEQ_BUF_PAYLOAD_REG_MODIFY,
+ pub regPoll: GSP_SEQ_BUF_PAYLOAD_REG_POLL,
+ pub delayUs: GSP_SEQ_BUF_PAYLOAD_DELAY_US,
+ pub regStore: GSP_SEQ_BUF_PAYLOAD_REG_STORE,
+}
+impl Default for GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
+impl Default for GSP_SEQUENCER_BUFFER_CMD {
+ fn default() -> Self {
+ let mut s = ::core::mem::MaybeUninit::<Self>::uninit();
+ unsafe {
+ ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+ s.assume_init()
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs
new file mode 100644
index 000000000000..474e4c8021db
--- /dev/null
+++ b/drivers/gpu/nova-core/gsp/sequencer.rs
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GSP Sequencer implementation for Pre-hopper GSP boot sequence.
+
+use core::array;
+
+use kernel::{
+ device,
+ io::{
+ poll::read_poll_timeout,
+ Io, //
+ },
+ prelude::*,
+ sync::aref::ARef,
+ time::{
+ delay::fsleep,
+ Delta, //
+ },
+ transmute::FromBytes, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ gsp::Gsp,
+ sec2::Sec2,
+ Falcon, //
+ },
+ gsp::{
+ cmdq::{
+ Cmdq,
+ MessageFromGsp, //
+ },
+ fw,
+ },
+ num::FromSafeCast,
+ sbuffer::SBufferIter,
+};
+
+/// GSP Sequencer information containing the command sequence and data.
+struct GspSequence {
+ /// Current command index for error reporting.
+ cmd_index: u32,
+ /// Command data buffer containing the sequence of commands.
+ cmd_data: KVec<u8>,
+}
+
+impl MessageFromGsp for GspSequence {
+ const FUNCTION: fw::MsgFunction = fw::MsgFunction::GspRunCpuSequencer;
+ type InitError = Error;
+ type Message = fw::RunCpuSequencer;
+
+ fn read(
+ msg: &Self::Message,
+ sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>,
+ ) -> Result<Self, Self::InitError> {
+ let cmd_data = sbuffer.flush_into_kvec(GFP_KERNEL)?;
+ Ok(GspSequence {
+ cmd_index: msg.cmd_index(),
+ cmd_data,
+ })
+ }
+}
+
+const CMD_SIZE: usize = size_of::<fw::SequencerBufferCmd>();
+
+/// GSP Sequencer Command types with payload data.
+/// Commands have an opcode and an opcode-dependent struct.
+#[allow(clippy::enum_variant_names)]
+#[derive(Debug)]
+pub(crate) enum GspSeqCmd {
+ RegWrite(fw::RegWritePayload),
+ RegModify(fw::RegModifyPayload),
+ RegPoll(fw::RegPollPayload),
+ DelayUs(fw::DelayUsPayload),
+ RegStore(fw::RegStorePayload),
+ CoreReset,
+ CoreStart,
+ CoreWaitForHalt,
+ CoreResume,
+}
+
+impl GspSeqCmd {
+ /// Creates a new `GspSeqCmd` from raw data returning the command and its size in bytes.
+ pub(crate) fn new(data: &[u8], dev: &device::Device) -> Result<(Self, usize)> {
+ let fw_cmd = fw::SequencerBufferCmd::from_bytes(data).ok_or(EINVAL)?;
+ let opcode_size = core::mem::size_of::<u32>();
+
+ let (cmd, size) = match fw_cmd.opcode()? {
+ fw::SeqBufOpcode::RegWrite => {
+ let payload = fw_cmd.reg_write_payload()?;
+ let size = opcode_size + size_of_val(&payload);
+ (GspSeqCmd::RegWrite(payload), size)
+ }
+ fw::SeqBufOpcode::RegModify => {
+ let payload = fw_cmd.reg_modify_payload()?;
+ let size = opcode_size + size_of_val(&payload);
+ (GspSeqCmd::RegModify(payload), size)
+ }
+ fw::SeqBufOpcode::RegPoll => {
+ let payload = fw_cmd.reg_poll_payload()?;
+ let size = opcode_size + size_of_val(&payload);
+ (GspSeqCmd::RegPoll(payload), size)
+ }
+ fw::SeqBufOpcode::DelayUs => {
+ let payload = fw_cmd.delay_us_payload()?;
+ let size = opcode_size + size_of_val(&payload);
+ (GspSeqCmd::DelayUs(payload), size)
+ }
+ fw::SeqBufOpcode::RegStore => {
+ let payload = fw_cmd.reg_store_payload()?;
+ let size = opcode_size + size_of_val(&payload);
+ (GspSeqCmd::RegStore(payload), size)
+ }
+ fw::SeqBufOpcode::CoreReset => (GspSeqCmd::CoreReset, opcode_size),
+ fw::SeqBufOpcode::CoreStart => (GspSeqCmd::CoreStart, opcode_size),
+ fw::SeqBufOpcode::CoreWaitForHalt => (GspSeqCmd::CoreWaitForHalt, opcode_size),
+ fw::SeqBufOpcode::CoreResume => (GspSeqCmd::CoreResume, opcode_size),
+ };
+
+ if data.len() < size {
+ dev_err!(dev, "Data is not enough for command\n");
+ return Err(EINVAL);
+ }
+
+ Ok((cmd, size))
+ }
+}
+
+/// GSP Sequencer for executing firmware commands during boot.
+pub(crate) struct GspSequencer<'a> {
+ /// Sequencer information with command data.
+ seq_info: GspSequence,
+ /// `Bar0` for register access.
+ bar: &'a Bar0,
+ /// SEC2 falcon for core operations.
+ sec2_falcon: &'a Falcon<Sec2>,
+ /// GSP falcon for core operations.
+ gsp_falcon: &'a Falcon<Gsp>,
+ /// LibOS DMA handle address.
+ libos_dma_handle: u64,
+ /// Bootloader application version.
+ bootloader_app_version: u32,
+ /// Device for logging.
+ dev: ARef<device::Device>,
+}
+
+impl fw::RegWritePayload {
+ fn run(&self, sequencer: &GspSequencer<'_>) -> Result {
+ let addr = usize::from_safe_cast(self.addr());
+
+ sequencer.bar.try_write32(self.val(), addr)
+ }
+}
+
+impl fw::RegModifyPayload {
+ fn run(&self, sequencer: &GspSequencer<'_>) -> Result {
+ let addr = usize::from_safe_cast(self.addr());
+
+ sequencer.bar.try_read32(addr).and_then(|val| {
+ sequencer
+ .bar
+ .try_write32((val & !self.mask()) | self.val(), addr)
+ })
+ }
+}
+
+impl fw::RegPollPayload {
+ fn run(&self, sequencer: &GspSequencer<'_>) -> Result {
+ let addr = usize::from_safe_cast(self.addr());
+
+ // Default timeout to 4 seconds.
+ let timeout_us = if self.timeout() == 0 {
+ 4_000_000
+ } else {
+ i64::from(self.timeout())
+ };
+
+ // First read.
+ sequencer.bar.try_read32(addr)?;
+
+ // Poll the requested register with requested timeout.
+ read_poll_timeout(
+ || sequencer.bar.try_read32(addr),
+ |current| (current & self.mask()) == self.val(),
+ Delta::ZERO,
+ Delta::from_micros(timeout_us),
+ )
+ .map(|_| ())
+ }
+}
+
+impl fw::DelayUsPayload {
+ fn run(&self, _sequencer: &GspSequencer<'_>) -> Result {
+ fsleep(Delta::from_micros(i64::from(self.val())));
+ Ok(())
+ }
+}
+
+impl fw::RegStorePayload {
+ fn run(&self, sequencer: &GspSequencer<'_>) -> Result {
+ let addr = usize::from_safe_cast(self.addr());
+
+ sequencer.bar.try_read32(addr).map(|_| ())
+ }
+}
+
+impl GspSeqCmd {
+ fn run(&self, seq: &GspSequencer<'_>) -> Result {
+ match self {
+ GspSeqCmd::RegWrite(cmd) => cmd.run(seq),
+ GspSeqCmd::RegModify(cmd) => cmd.run(seq),
+ GspSeqCmd::RegPoll(cmd) => cmd.run(seq),
+ GspSeqCmd::DelayUs(cmd) => cmd.run(seq),
+ GspSeqCmd::RegStore(cmd) => cmd.run(seq),
+ GspSeqCmd::CoreReset => {
+ seq.gsp_falcon.reset(seq.bar)?;
+ seq.gsp_falcon.dma_reset(seq.bar);
+ Ok(())
+ }
+ GspSeqCmd::CoreStart => {
+ seq.gsp_falcon.start(seq.bar)?;
+ Ok(())
+ }
+ GspSeqCmd::CoreWaitForHalt => {
+ seq.gsp_falcon.wait_till_halted(seq.bar)?;
+ Ok(())
+ }
+ GspSeqCmd::CoreResume => {
+ // At this point, 'SEC2-RTOS' has been loaded into SEC2 by the sequencer
+ // but neither SEC2-RTOS nor GSP-RM is running yet. This part of the
+ // sequencer will start both.
+
+ // Reset the GSP to prepare it for resuming.
+ seq.gsp_falcon.reset(seq.bar)?;
+
+ // Write the libOS DMA handle to GSP mailboxes.
+ seq.gsp_falcon.write_mailboxes(
+ seq.bar,
+ Some(seq.libos_dma_handle as u32),
+ Some((seq.libos_dma_handle >> 32) as u32),
+ );
+
+ // Start the SEC2 falcon which will trigger GSP-RM to resume on the GSP.
+ seq.sec2_falcon.start(seq.bar)?;
+
+ // Poll until GSP-RM reload/resume has completed (up to 2 seconds).
+ seq.gsp_falcon
+ .check_reload_completed(seq.bar, Delta::from_secs(2))?;
+
+ // Verify SEC2 completed successfully by checking its mailbox for errors.
+ let mbox0 = seq.sec2_falcon.read_mailbox0(seq.bar);
+ if mbox0 != 0 {
+ dev_err!(seq.dev, "Sequencer: sec2 errors: {:?}\n", mbox0);
+ return Err(EIO);
+ }
+
+ // Configure GSP with the bootloader version.
+ seq.gsp_falcon
+ .write_os_version(seq.bar, seq.bootloader_app_version);
+
+ // Verify the GSP's RISC-V core is active indicating successful GSP boot.
+ if !seq.gsp_falcon.is_riscv_active(seq.bar) {
+ dev_err!(seq.dev, "Sequencer: RISC-V core is not active\n");
+ return Err(EIO);
+ }
+ Ok(())
+ }
+ }
+ }
+}
+
+/// Iterator over GSP sequencer commands.
+pub(crate) struct GspSeqIter<'a> {
+ /// Command data buffer.
+ cmd_data: &'a [u8],
+ /// Current position in the buffer.
+ current_offset: usize,
+ /// Total number of commands to process.
+ total_cmds: u32,
+ /// Number of commands processed so far.
+ cmds_processed: u32,
+ /// Device for logging.
+ dev: ARef<device::Device>,
+}
+
+impl<'a> Iterator for GspSeqIter<'a> {
+ type Item = Result<GspSeqCmd>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // Stop if we've processed all commands or reached the end of data.
+ if self.cmds_processed >= self.total_cmds || self.current_offset >= self.cmd_data.len() {
+ return None;
+ }
+
+ // Check if we have enough data for opcode.
+ if self.current_offset + core::mem::size_of::<u32>() > self.cmd_data.len() {
+ return Some(Err(EIO));
+ }
+
+ let offset = self.current_offset;
+
+ // Handle command creation based on available data,
+ // zero-pad if necessary (since last command may not be full size).
+ let mut buffer = [0u8; CMD_SIZE];
+ let copy_len = if offset + CMD_SIZE <= self.cmd_data.len() {
+ CMD_SIZE
+ } else {
+ self.cmd_data.len() - offset
+ };
+ buffer[..copy_len].copy_from_slice(&self.cmd_data[offset..offset + copy_len]);
+ let cmd_result = GspSeqCmd::new(&buffer, &self.dev);
+
+ cmd_result.map_or_else(
+ |_err| {
+ dev_err!(self.dev, "Error parsing command at offset {}\n", offset);
+ None
+ },
+ |(cmd, size)| {
+ self.current_offset += size;
+ self.cmds_processed += 1;
+ Some(Ok(cmd))
+ },
+ )
+ }
+}
+
+impl<'a> GspSequencer<'a> {
+ fn iter(&self) -> GspSeqIter<'_> {
+ let cmd_data = &self.seq_info.cmd_data[..];
+
+ GspSeqIter {
+ cmd_data,
+ current_offset: 0,
+ total_cmds: self.seq_info.cmd_index,
+ cmds_processed: 0,
+ dev: self.dev.clone(),
+ }
+ }
+}
+
+/// Parameters for running the GSP sequencer.
+pub(crate) struct GspSequencerParams<'a> {
+ /// Bootloader application version.
+ pub(crate) bootloader_app_version: u32,
+ /// LibOS DMA handle address.
+ pub(crate) libos_dma_handle: u64,
+ /// GSP falcon for core operations.
+ pub(crate) gsp_falcon: &'a Falcon<Gsp>,
+ /// SEC2 falcon for core operations.
+ pub(crate) sec2_falcon: &'a Falcon<Sec2>,
+ /// Device for logging.
+ pub(crate) dev: ARef<device::Device>,
+ /// BAR0 for register access.
+ pub(crate) bar: &'a Bar0,
+}
+
+impl<'a> GspSequencer<'a> {
+ pub(crate) fn run(cmdq: &Cmdq, params: GspSequencerParams<'a>) -> Result {
+ let seq_info = loop {
+ match cmdq.receive_msg::<GspSequence>(Cmdq::RECEIVE_TIMEOUT) {
+ Ok(seq_info) => break seq_info,
+ Err(ERANGE) => continue,
+ Err(e) => return Err(e),
+ }
+ };
+
+ let sequencer = GspSequencer {
+ seq_info,
+ bar: params.bar,
+ sec2_falcon: params.sec2_falcon,
+ gsp_falcon: params.gsp_falcon,
+ libos_dma_handle: params.libos_dma_handle,
+ bootloader_app_version: params.bootloader_app_version,
+ dev: params.dev,
+ };
+
+ dev_dbg!(sequencer.dev, "Running CPU Sequencer commands\n");
+
+ for cmd_result in sequencer.iter() {
+ match cmd_result {
+ Ok(cmd) => cmd.run(&sequencer)?,
+ Err(e) => {
+ dev_err!(
+ sequencer.dev,
+ "Error running command at index {}\n",
+ sequencer.seq_info.cmd_index
+ );
+ return Err(e);
+ }
+ }
+ }
+
+ dev_dbg!(
+ sequencer.dev,
+ "CPU Sequencer commands completed successfully\n"
+ );
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
new file mode 100644
index 000000000000..04a1fa6b25f8
--- /dev/null
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Nova Core GPU Driver
+
+use kernel::{
+ debugfs,
+ driver::Registration,
+ pci,
+ prelude::*,
+ InPlaceModule, //
+};
+
+#[macro_use]
+mod bitfield;
+
+mod driver;
+mod falcon;
+mod fb;
+mod firmware;
+mod gfw;
+mod gpu;
+mod gsp;
+#[macro_use]
+mod num;
+mod regs;
+mod sbuffer;
+mod vbios;
+
+pub(crate) const MODULE_NAME: &core::ffi::CStr = <LocalModule as kernel::ModuleMetadata>::NAME;
+
+// TODO: Move this into per-module data once that exists.
+static mut DEBUGFS_ROOT: Option<debugfs::Dir> = None;
+
+/// Guard that clears `DEBUGFS_ROOT` when dropped.
+struct DebugfsRootGuard;
+
+impl Drop for DebugfsRootGuard {
+ fn drop(&mut self) {
+ // SAFETY: This guard is dropped after `_driver` (due to field order),
+ // so the driver is unregistered and no probe() can be running.
+ unsafe { DEBUGFS_ROOT = None };
+ }
+}
+
+#[pin_data]
+struct NovaCoreModule {
+ // Fields are dropped in declaration order, so `_driver` is dropped first,
+ // then `_debugfs_guard` clears `DEBUGFS_ROOT`.
+ #[pin]
+ _driver: Registration<pci::Adapter<driver::NovaCore>>,
+ _debugfs_guard: DebugfsRootGuard,
+}
+
+impl InPlaceModule for NovaCoreModule {
+ fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> {
+ let dir = debugfs::Dir::new(kernel::c_str!("nova_core"));
+
+ // SAFETY: We are the only driver code running during init, so there
+ // cannot be any concurrent access to `DEBUGFS_ROOT`.
+ unsafe { DEBUGFS_ROOT = Some(dir) };
+
+ try_pin_init!(Self {
+ _driver <- Registration::new(MODULE_NAME, module),
+ _debugfs_guard: DebugfsRootGuard,
+ })
+ }
+}
+
+module! {
+ type: NovaCoreModule,
+ name: "NovaCore",
+ authors: ["Danilo Krummrich"],
+ description: "Nova Core GPU driver",
+ license: "GPL v2",
+ firmware: [],
+}
+
+kernel::module_firmware!(firmware::ModInfoBuilder);
diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs
new file mode 100644
index 000000000000..6c824b8d7b97
--- /dev/null
+++ b/drivers/gpu/nova-core/num.rs
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Numerical helpers functions and traits.
+//!
+//! This is essentially a staging module for code to mature until it can be moved to the `kernel`
+//! crate.
+
+use kernel::{
+ macros::paste,
+ prelude::*, //
+};
+
+/// Implements safe `as` conversion functions from a given type into a series of target types.
+///
+/// These functions can be used in place of `as`, with the guarantee that they will be lossless.
+macro_rules! impl_safe_as {
+ ($from:ty as { $($into:ty),* }) => {
+ $(
+ paste! {
+ #[doc = ::core::concat!(
+ "Losslessly converts a [`",
+ ::core::stringify!($from),
+ "`] into a [`",
+ ::core::stringify!($into),
+ "`].")]
+ ///
+ /// This conversion is allowed as it is always lossless. Prefer this over the `as`
+ /// keyword to ensure no lossy casts are performed.
+ ///
+ /// This is for use from a `const` context. For non `const` use, prefer the
+ /// [`FromSafeCast`] and [`IntoSafeCast`] traits.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use crate::num;
+ ///
+ #[doc = ::core::concat!(
+ "assert_eq!(num::",
+ ::core::stringify!($from),
+ "_as_",
+ ::core::stringify!($into),
+ "(1",
+ ::core::stringify!($from),
+ "), 1",
+ ::core::stringify!($into),
+ ");")]
+ /// ```
+ #[allow(unused)]
+ #[inline(always)]
+ pub(crate) const fn [<$from _as_ $into>](value: $from) -> $into {
+ kernel::static_assert!(size_of::<$into>() >= size_of::<$from>());
+
+ value as $into
+ }
+ }
+ )*
+ };
+}
+
+impl_safe_as!(u8 as { u16, u32, u64, usize });
+impl_safe_as!(u16 as { u32, u64, usize });
+impl_safe_as!(u32 as { u64, usize } );
+// `u64` and `usize` have the same size on 64-bit platforms.
+#[cfg(CONFIG_64BIT)]
+impl_safe_as!(u64 as { usize } );
+
+// A `usize` fits into a `u64` on 32 and 64-bit platforms.
+#[cfg(any(CONFIG_32BIT, CONFIG_64BIT))]
+impl_safe_as!(usize as { u64 });
+
+// A `usize` fits into a `u32` on 32-bit platforms.
+#[cfg(CONFIG_32BIT)]
+impl_safe_as!(usize as { u32 });
+
+/// Extension trait providing guaranteed lossless cast to `Self` from `T`.
+///
+/// The standard library's `From` implementations do not cover conversions that are not portable or
+/// future-proof. For instance, even though it is safe today, `From<usize>` is not implemented for
+/// [`u64`] because of the possibility to support larger-than-64bit architectures in the future.
+///
+/// The workaround is to either deal with the error handling of [`TryFrom`] for an operation that
+/// technically cannot fail, or to use the `as` keyword, which can silently strip data if the
+/// destination type is smaller than the source.
+///
+/// Both options are hardly acceptable for the kernel. It is also a much more architecture
+/// dependent environment, supporting only 32 and 64 bit architectures, with some modules
+/// explicitly depending on a specific bus width that could greatly benefit from infallible
+/// conversion operations.
+///
+/// Thus this extension trait that provides, for the architecture the kernel is built for, safe
+/// conversion between types for which such cast is lossless.
+///
+/// In other words, this trait is implemented if, for the current build target and with `t: T`, the
+/// `t as Self` operation is completely lossless.
+///
+/// Prefer this over the `as` keyword to ensure no lossy casts are performed.
+///
+/// If you need to perform a conversion in `const` context, use [`u64_as_usize`], [`u32_as_usize`],
+/// [`usize_as_u64`], etc.
+///
+/// # Examples
+///
+/// ```
+/// use crate::num::FromSafeCast;
+///
+/// assert_eq!(usize::from_safe_cast(0xf00u32), 0xf00u32 as usize);
+/// ```
+pub(crate) trait FromSafeCast<T> {
+ /// Create a `Self` from `value`. This operation is guaranteed to be lossless.
+ fn from_safe_cast(value: T) -> Self;
+}
+
+impl FromSafeCast<usize> for u64 {
+ fn from_safe_cast(value: usize) -> Self {
+ usize_as_u64(value)
+ }
+}
+
+#[cfg(CONFIG_32BIT)]
+impl FromSafeCast<usize> for u32 {
+ fn from_safe_cast(value: usize) -> Self {
+ usize_as_u32(value)
+ }
+}
+
+impl FromSafeCast<u32> for usize {
+ fn from_safe_cast(value: u32) -> Self {
+ u32_as_usize(value)
+ }
+}
+
+#[cfg(CONFIG_64BIT)]
+impl FromSafeCast<u64> for usize {
+ fn from_safe_cast(value: u64) -> Self {
+ u64_as_usize(value)
+ }
+}
+
+/// Counterpart to the [`FromSafeCast`] trait, i.e. this trait is to [`FromSafeCast`] what [`Into`]
+/// is to [`From`].
+///
+/// See the documentation of [`FromSafeCast`] for the motivation.
+///
+/// # Examples
+///
+/// ```
+/// use crate::num::IntoSafeCast;
+///
+/// assert_eq!(0xf00u32.into_safe_cast(), 0xf00u32 as usize);
+/// ```
+pub(crate) trait IntoSafeCast<T> {
+ /// Convert `self` into a `T`. This operation is guaranteed to be lossless.
+ fn into_safe_cast(self) -> T;
+}
+
+/// Reverse operation for types implementing [`FromSafeCast`].
+impl<S, T> IntoSafeCast<T> for S
+where
+ T: FromSafeCast<S>,
+{
+ fn into_safe_cast(self) -> T {
+ T::from_safe_cast(self)
+ }
+}
+
+/// Implements lossless conversion of a constant from a larger type into a smaller one.
+macro_rules! impl_const_into {
+ ($from:ty => { $($into:ty),* }) => {
+ $(
+ paste! {
+ #[doc = ::core::concat!(
+ "Performs a build-time safe conversion of a [`",
+ ::core::stringify!($from),
+ "`] constant value into a [`",
+ ::core::stringify!($into),
+ "`].")]
+ ///
+ /// This checks at compile-time that the conversion is lossless, and triggers a build
+ /// error if it isn't.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use crate::num;
+ ///
+ /// // Succeeds because the value of the source fits into the destination's type.
+ #[doc = ::core::concat!(
+ "assert_eq!(num::",
+ ::core::stringify!($from),
+ "_into_",
+ ::core::stringify!($into),
+ "::<1",
+ ::core::stringify!($from),
+ ">(), 1",
+ ::core::stringify!($into),
+ ");")]
+ /// ```
+ #[allow(unused)]
+ pub(crate) const fn [<$from _into_ $into>]<const N: $from>() -> $into {
+ // Make sure that the target type is smaller than the source one.
+ static_assert!($from::BITS >= $into::BITS);
+ // CAST: we statically enforced above that `$from` is larger than `$into`, so the
+ // `as` conversion will be lossless.
+ build_assert!(N >= $into::MIN as $from && N <= $into::MAX as $from);
+
+ N as $into
+ }
+ }
+ )*
+ };
+}
+
+impl_const_into!(usize => { u8, u16, u32 });
+impl_const_into!(u64 => { u8, u16, u32 });
+impl_const_into!(u32 => { u8, u16 });
+impl_const_into!(u16 => { u8 });
+
+/// Creates an enum type associated to a [`Bounded`](kernel::num::Bounded), with a [`From`]
+/// conversion to the associated `Bounded` and either a [`TryFrom`] or `From` conversion from the
+/// associated `Bounded`.
+// TODO[FPRI]: This is a temporary solution to be replaced with the corresponding derive macros
+// once they land.
+#[macro_export]
+macro_rules! bounded_enum {
+ (
+ $(#[$enum_meta:meta])*
+ $vis:vis enum $enum_type:ident with $from_impl:ident<Bounded<$width:ty, $length:literal>> {
+ $( $(#[doc = $variant_doc:expr])* $variant:ident = $value:expr),* $(,)*
+ }
+ ) => {
+ $(#[$enum_meta])*
+ $vis enum $enum_type {
+ $(
+ $(#[doc = $variant_doc])*
+ $variant = $value
+ ),*
+ }
+
+ impl core::convert::From<$enum_type> for kernel::num::Bounded<$width, $length> {
+ fn from(value: $enum_type) -> Self {
+ match value {
+ $($enum_type::$variant =>
+ kernel::num::Bounded::<$width, _>::new::<{ $value }>()),*
+ }
+ }
+ }
+
+ bounded_enum!(@impl_from $enum_type with $from_impl<Bounded<$width, $length>> {
+ $($variant = $value),*
+ });
+ };
+
+ // `TryFrom` implementation from associated `Bounded` to enum type.
+ (@impl_from $enum_type:ident with TryFrom<Bounded<$width:ty, $length:literal>> {
+ $($variant:ident = $value:expr),* $(,)*
+ }) => {
+ impl core::convert::TryFrom<kernel::num::Bounded<$width, $length>> for $enum_type {
+ type Error = kernel::error::Error;
+
+ fn try_from(
+ value: kernel::num::Bounded<$width, $length>
+ ) -> kernel::error::Result<Self> {
+ match value.get() {
+ $(
+ $value => Ok($enum_type::$variant),
+ )*
+ _ => Err(kernel::error::code::EINVAL),
+ }
+ }
+ }
+ };
+
+ // `From` implementation from associated `Bounded` to enum type. Triggers a build-time error if
+ // all possible values of the `Bounded` are not covered by the enum type.
+ (@impl_from $enum_type:ident with From<Bounded<$width:ty, $length:literal>> {
+ $($variant:ident = $value:expr),* $(,)*
+ }) => {
+ impl core::convert::From<kernel::num::Bounded<$width, $length>> for $enum_type {
+ fn from(value: kernel::num::Bounded<$width, $length>) -> Self {
+ const MAX: $width = 1 << $length;
+
+ // Makes the compiler optimizer aware of the possible range of values.
+ let value = value.get() & ((1 << $length) - 1);
+ match value {
+ $(
+ $value => $enum_type::$variant,
+ )*
+ // PANIC: we cannot reach this arm as all possible variants are handled by the
+ // match arms above. It is here to make the compiler complain if `$enum_type`
+ // does not cover all values of the `0..MAX` range.
+ MAX.. => unreachable!(),
+ }
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
new file mode 100644
index 000000000000..2f171a4ff9ba
--- /dev/null
+++ b/drivers/gpu/nova-core/regs.rs
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ io::{
+ register,
+ register::WithBase,
+ Io, //
+ },
+ prelude::*,
+ time, //
+};
+
+use crate::{
+ driver::Bar0,
+ falcon::{
+ DmaTrfCmdSize,
+ FalconCoreRev,
+ FalconCoreRevSubversion,
+ FalconEngine,
+ FalconFbifMemType,
+ FalconFbifTarget,
+ FalconMem,
+ FalconModSelAlgo,
+ FalconSecurityModel,
+ PFalcon2Base,
+ PFalconBase,
+ PeregrineCoreSelect, //
+ },
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
+ num::FromSafeCast,
+};
+
+// PMC
+
+register! {
+ /// Basic revision information about the GPU.
+ pub(crate) NV_PMC_BOOT_0(u32) @ 0x00000000 {
+ /// Lower bits of the architecture.
+ 28:24 architecture_0;
+ /// Implementation version of the architecture.
+ 23:20 implementation;
+ /// MSB of the architecture.
+ 8:8 architecture_1;
+ /// Major revision of the chip.
+ 7:4 major_revision;
+ /// Minor revision of the chip.
+ 3:0 minor_revision;
+ }
+
+ /// Extended architecture information.
+ pub(crate) NV_PMC_BOOT_42(u32) @ 0x00000a00 {
+ /// Architecture value.
+ 29:24 architecture ?=> Architecture;
+ /// Implementation version of the architecture.
+ 23:20 implementation;
+ /// Major revision of the chip.
+ 19:16 major_revision;
+ /// Minor revision of the chip.
+ 15:12 minor_revision;
+ }
+}
+
+impl NV_PMC_BOOT_0 {
+ pub(crate) fn is_older_than_fermi(self) -> bool {
+ // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals :
+ const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc;
+
+ // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than
+ // GF100, means "older than Fermi".
+ self.architecture_1() == 0 && self.architecture_0() < NV_PMC_BOOT_0_ARCHITECTURE_GF100
+ }
+}
+
+impl NV_PMC_BOOT_42 {
+ /// Combines `architecture` and `implementation` to obtain a code unique to the chipset.
+ pub(crate) fn chipset(self) -> Result<Chipset> {
+ self.architecture()
+ .map(|arch| {
+ ((arch as u32) << Self::IMPLEMENTATION_RANGE.len())
+ | u32::from(self.implementation())
+ })
+ .and_then(Chipset::try_from)
+ }
+
+ /// Returns the raw architecture value from the register.
+ fn architecture_raw(self) -> u8 {
+ ((self.into_raw() >> Self::ARCHITECTURE_RANGE.start())
+ & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) as u8
+ }
+}
+
+impl kernel::fmt::Display for NV_PMC_BOOT_42 {
+ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
+ write!(
+ f,
+ "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})",
+ self.inner,
+ self.architecture_raw(),
+ self.implementation()
+ )
+ }
+}
+
+// PBUS
+
+register! {
+ pub(crate) NV_PBUS_SW_SCRATCH(u32)[64] @ 0x00001400 {}
+
+ /// Scratch register 0xe used as FRTS firmware error code.
+ pub(crate) NV_PBUS_SW_SCRATCH_0E_FRTS_ERR(u32) => NV_PBUS_SW_SCRATCH[0xe] {
+ 31:16 frts_err_code;
+ }
+}
+
+// PFB
+
+register! {
+ /// Low bits of the physical system memory address used by the GPU to perform sysmembar
+ /// operations (see [`crate::fb::SysmemFlush`]).
+ pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR(u32) @ 0x00100c10 {
+ 31:0 adr_39_08;
+ }
+
+ /// High bits of the physical system memory address used by the GPU to perform sysmembar
+ /// operations (see [`crate::fb::SysmemFlush`]).
+ pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00100c40 {
+ 23:0 adr_63_40;
+ }
+
+ pub(crate) NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE(u32) @ 0x00100ce0 {
+ 30:30 ecc_mode_enabled => bool;
+ 9:4 lower_mag;
+ 3:0 lower_scale;
+ }
+
+ pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_LO(u32) @ 0x001fa824 {
+ /// Bits 12..40 of the lower (inclusive) bound of the WPR2 region.
+ 31:4 lo_val;
+ }
+
+ pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_HI(u32) @ 0x001fa828 {
+ /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region.
+ 31:4 hi_val;
+ }
+}
+
+impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE {
+ /// Returns the usable framebuffer size, in bytes.
+ pub(crate) fn usable_fb_size(self) -> u64 {
+ let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale()))
+ * u64::from_safe_cast(kernel::sizes::SZ_1M);
+
+ if self.ecc_mode_enabled() {
+ // Remove the amount of memory reserved for ECC (one per 16 units).
+ size / 16 * 15
+ } else {
+ size
+ }
+ }
+}
+
+impl NV_PFB_PRI_MMU_WPR2_ADDR_LO {
+ /// Returns the lower (inclusive) bound of the WPR2 region.
+ pub(crate) fn lower_bound(self) -> u64 {
+ u64::from(self.lo_val()) << 12
+ }
+}
+
+impl NV_PFB_PRI_MMU_WPR2_ADDR_HI {
+ /// Returns the higher (exclusive) bound of the WPR2 region.
+ ///
+ /// A value of zero means the WPR2 region is not set.
+ pub(crate) fn higher_bound(self) -> u64 {
+ u64::from(self.hi_val()) << 12
+ }
+}
+
+// PGSP
+
+register! {
+ pub(crate) NV_PGSP_QUEUE_HEAD(u32) @ 0x00110c00 {
+ 31:0 address;
+ }
+}
+
+// PGC6 register space.
+//
+// `GC6` is a GPU low-power state where VRAM is in self-refresh and the GPU is powered down (except
+// for power rails needed to keep self-refresh working and important registers and hardware
+// blocks).
+//
+// These scratch registers remain powered on even in a low-power state and have a designated group
+// number.
+
+register! {
+ /// Boot Sequence Interface (BSI) register used to determine
+ /// if GSP reload/resume has completed during the boot process.
+ pub(crate) NV_PGC6_BSI_SECURE_SCRATCH_14(u32) @ 0x001180f8 {
+ 26:26 boot_stage_3_handoff => bool;
+ }
+
+ /// Privilege level mask register. It dictates whether the host CPU has privilege to access the
+ /// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT).
+ pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK(u32) @ 0x00118128 {
+ /// Set after FWSEC lowers its protection level.
+ 0:0 read_protection_level0 => bool;
+ }
+
+ /// OpenRM defines this as a register array, but doesn't specify its size and only uses its
+ /// first element. Be conservative until we know the actual size or need to use more registers.
+ pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05(u32)[1] @ 0x00118234 {}
+
+ /// Scratch group 05 register 0 used as GFW boot progress indicator.
+ pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT(u32)
+ => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0] {
+ /// Progress of GFW boot (0xff means completed).
+ 7:0 progress;
+ }
+
+ pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_42(u32) @ 0x001183a4 {
+ 31:0 value;
+ }
+
+ /// Scratch group 42 register used as framebuffer size.
+ pub(crate) NV_USABLE_FB_SIZE_IN_MB(u32) => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 {
+ /// Usable framebuffer size, in megabytes.
+ 31:0 value;
+ }
+}
+
+impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT {
+ /// Returns `true` if GFW boot is completed.
+ pub(crate) fn completed(self) -> bool {
+ self.progress() == 0xff
+ }
+}
+
+impl NV_USABLE_FB_SIZE_IN_MB {
+ /// Returns the usable framebuffer size, in bytes.
+ pub(crate) fn usable_fb_size(self) -> u64 {
+ u64::from(self.value()) * u64::from_safe_cast(kernel::sizes::SZ_1M)
+ }
+}
+
+// PDISP
+
+register! {
+ pub(crate) NV_PDISP_VGA_WORKSPACE_BASE(u32) @ 0x00625f04 {
+ /// VGA workspace base address divided by 0x10000.
+ 31:8 addr;
+ /// Set if the `addr` field is valid.
+ 3:3 status_valid => bool;
+ }
+}
+
+impl NV_PDISP_VGA_WORKSPACE_BASE {
+ /// Returns the base address of the VGA workspace, or `None` if none exists.
+ pub(crate) fn vga_workspace_addr(self) -> Option<u64> {
+ if self.status_valid() {
+ Some(u64::from(self.addr()) << 16)
+ } else {
+ None
+ }
+ }
+}
+
+// FUSE
+
+pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16;
+
+register! {
+ pub(crate) NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824100 {
+ 15:0 data => u16;
+ }
+
+ pub(crate) NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824140 {
+ 15:0 data => u16;
+ }
+
+ pub(crate) NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x008241c0 {
+ 15:0 data => u16;
+ }
+}
+
+// PFALCON
+
+register! {
+ pub(crate) NV_PFALCON_FALCON_IRQSCLR(u32) @ PFalconBase + 0x00000004 {
+ 6:6 swgen0 => bool;
+ 4:4 halt => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_MAILBOX0(u32) @ PFalconBase + 0x00000040 {
+ 31:0 value => u32;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_MAILBOX1(u32) @ PFalconBase + 0x00000044 {
+ 31:0 value => u32;
+ }
+
+ /// Used to store version information about the firmware running
+ /// on the Falcon processor.
+ pub(crate) NV_PFALCON_FALCON_OS(u32) @ PFalconBase + 0x00000080 {
+ 31:0 value => u32;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_RM(u32) @ PFalconBase + 0x00000084 {
+ 31:0 value => u32;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_HWCFG2(u32) @ PFalconBase + 0x000000f4 {
+ /// Signal indicating that reset is completed (GA102+).
+ 31:31 reset_ready => bool;
+ /// Set to 0 after memory scrubbing is completed.
+ 12:12 mem_scrubbing => bool;
+ 10:10 riscv => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_CPUCTL(u32) @ PFalconBase + 0x00000100 {
+ 6:6 alias_en => bool;
+ 4:4 halted => bool;
+ 1:1 startcpu => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_BOOTVEC(u32) @ PFalconBase + 0x00000104 {
+ 31:0 value => u32;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_DMACTL(u32) @ PFalconBase + 0x0000010c {
+ 7:7 secure_stat => bool;
+ 6:3 dmaq_num;
+ 2:2 imem_scrubbing => bool;
+ 1:1 dmem_scrubbing => bool;
+ 0:0 require_ctx => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_DMATRFBASE(u32) @ PFalconBase + 0x00000110 {
+ 31:0 base => u32;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_DMATRFMOFFS(u32) @ PFalconBase + 0x00000114 {
+ 23:0 offs;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_DMATRFCMD(u32) @ PFalconBase + 0x00000118 {
+ 16:16 set_dmtag;
+ 14:12 ctxdma;
+ 10:8 size ?=> DmaTrfCmdSize;
+ 5:5 is_write => bool;
+ 4:4 imem => bool;
+ 3:2 sec;
+ 1:1 idle => bool;
+ 0:0 full => bool;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_DMATRFFBOFFS(u32) @ PFalconBase + 0x0000011c {
+ 31:0 offs => u32;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_DMATRFBASE1(u32) @ PFalconBase + 0x00000128 {
+ 8:0 base;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_HWCFG1(u32) @ PFalconBase + 0x0000012c {
+ /// Core revision subversion.
+ 7:6 core_rev_subversion => FalconCoreRevSubversion;
+ /// Security model.
+ 5:4 security_model ?=> FalconSecurityModel;
+ /// Core revision.
+ 3:0 core_rev ?=> FalconCoreRev;
+ }
+
+ pub(crate) NV_PFALCON_FALCON_CPUCTL_ALIAS(u32) @ PFalconBase + 0x00000130 {
+ 1:1 startcpu => bool;
+ }
+
+ /// IMEM access control register. Up to 4 ports are available for IMEM access.
+ pub(crate) NV_PFALCON_FALCON_IMEMC(u32)[4, stride = 16] @ PFalconBase + 0x00000180 {
+ /// Access secure IMEM.
+ 28:28 secure => bool;
+ /// Auto-increment on write.
+ 24:24 aincw => bool;
+ /// IMEM block and word offset.
+ 15:0 offs;
+ }
+
+ /// IMEM data register. Reading/writing this register accesses IMEM at the address
+ /// specified by the corresponding IMEMC register.
+ pub(crate) NV_PFALCON_FALCON_IMEMD(u32)[4, stride = 16] @ PFalconBase + 0x00000184 {
+ 31:0 data;
+ }
+
+ /// IMEM tag register. Used to set the tag for the current IMEM block.
+ pub(crate) NV_PFALCON_FALCON_IMEMT(u32)[4, stride = 16] @ PFalconBase + 0x00000188 {
+ 15:0 tag;
+ }
+
+ /// DMEM access control register. Up to 8 ports are available for DMEM access.
+ pub(crate) NV_PFALCON_FALCON_DMEMC(u32)[8, stride = 8] @ PFalconBase + 0x000001c0 {
+ /// Auto-increment on write.
+ 24:24 aincw => bool;
+ /// DMEM block and word offset.
+ 15:0 offs;
+ }
+
+ /// DMEM data register. Reading/writing this register accesses DMEM at the address
+ /// specified by the corresponding DMEMC register.
+ pub(crate) NV_PFALCON_FALCON_DMEMD(u32)[8, stride = 8] @ PFalconBase + 0x000001c4 {
+ 31:0 data;
+ }
+
+ /// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the
+ /// falcon instance.
+ pub(crate) NV_PFALCON_FALCON_ENGINE(u32) @ PFalconBase + 0x000003c0 {
+ 0:0 reset => bool;
+ }
+
+ pub(crate) NV_PFALCON_FBIF_TRANSCFG(u32)[8] @ PFalconBase + 0x00000600 {
+ 2:2 mem_type => FalconFbifMemType;
+ 1:0 target ?=> FalconFbifTarget;
+ }
+
+ pub(crate) NV_PFALCON_FBIF_CTL(u32) @ PFalconBase + 0x00000624 {
+ 7:7 allow_phys_no_ctx => bool;
+ }
+}
+
+impl NV_PFALCON_FALCON_DMACTL {
+ /// Returns `true` if memory scrubbing is completed.
+ pub(crate) fn mem_scrubbing_done(self) -> bool {
+ !self.dmem_scrubbing() && !self.imem_scrubbing()
+ }
+}
+
+impl NV_PFALCON_FALCON_DMATRFCMD {
+ /// Programs the `imem` and `sec` fields for the given FalconMem
+ pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self {
+ let this = self.with_imem(mem != FalconMem::Dmem);
+
+ match mem {
+ FalconMem::ImemSecure => this.with_const_sec::<1>(),
+ _ => this.with_const_sec::<0>(),
+ }
+ }
+}
+
+impl NV_PFALCON_FALCON_ENGINE {
+ /// Resets the falcon
+ pub(crate) fn reset_engine<E: FalconEngine>(bar: &Bar0) {
+ bar.update(Self::of::<E>(), |r| r.with_reset(true));
+
+ // TIMEOUT: falcon engine should not take more than 10us to reset.
+ time::delay::fsleep(time::Delta::from_micros(10));
+
+ bar.update(Self::of::<E>(), |r| r.with_reset(false));
+ }
+}
+
+impl NV_PFALCON_FALCON_HWCFG2 {
+ /// Returns `true` if memory scrubbing is completed.
+ pub(crate) fn mem_scrubbing_done(self) -> bool {
+ !self.mem_scrubbing()
+ }
+}
+
+/* PFALCON2 */
+
+register! {
+ pub(crate) NV_PFALCON2_FALCON_MOD_SEL(u32) @ PFalcon2Base + 0x00000180 {
+ 7:0 algo ?=> FalconModSelAlgo;
+ }
+
+ pub(crate) NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID(u32) @ PFalcon2Base + 0x00000198 {
+ 7:0 ucode_id => u8;
+ }
+
+ pub(crate) NV_PFALCON2_FALCON_BROM_ENGIDMASK(u32) @ PFalcon2Base + 0x0000019c {
+ 31:0 value => u32;
+ }
+
+ /// OpenRM defines this as a register array, but doesn't specify its size and only uses its
+ /// first element. Be conservative until we know the actual size or need to use more registers.
+ pub(crate) NV_PFALCON2_FALCON_BROM_PARAADDR(u32)[1] @ PFalcon2Base + 0x00000210 {
+ 31:0 value => u32;
+ }
+}
+
+// PRISCV
+
+register! {
+ /// RISC-V status register for debug (Turing and GA100 only).
+ /// Reflects current RISC-V core status.
+ pub(crate) NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS(u32) @ PFalcon2Base + 0x00000240 {
+ /// RISC-V core active/inactive status.
+ 0:0 active_stat => bool;
+ }
+
+ /// GA102 and later.
+ pub(crate) NV_PRISCV_RISCV_CPUCTL(u32) @ PFalcon2Base + 0x00000388 {
+ 7:7 active_stat => bool;
+ 0:0 halted => bool;
+ }
+
+ /// GA102 and later.
+ pub(crate) NV_PRISCV_RISCV_BCR_CTRL(u32) @ PFalcon2Base + 0x00000668 {
+ 8:8 br_fetch => bool;
+ 4:4 core_select => PeregrineCoreSelect;
+ 0:0 valid => bool;
+ }
+}
+
+// The modules below provide registers that are not identical on all supported chips. They should
+// only be used in HAL modules.
+
+pub(crate) mod gm107 {
+ use kernel::io::register;
+
+ // FUSE
+
+ register! {
+ pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00021c04 {
+ 0:0 display_disabled => bool;
+ }
+ }
+}
+
+pub(crate) mod ga100 {
+ use kernel::io::register;
+
+ // FUSE
+
+ register! {
+ pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00820c04 {
+ 0:0 display_disabled => bool;
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs
new file mode 100644
index 000000000000..3a41d224c77a
--- /dev/null
+++ b/drivers/gpu/nova-core/sbuffer.rs
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use core::ops::Deref;
+
+use kernel::prelude::*;
+
+/// A buffer abstraction for discontiguous byte slices.
+///
+/// This allows you to treat multiple non-contiguous `&mut [u8]` slices
+/// of the same length as a single stream-like read/write buffer.
+///
+/// # Examples
+///
+/// ```
+// let mut buf1 = [0u8; 5];
+/// let mut buf2 = [0u8; 5];
+/// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]);
+///
+/// let data = b"hi world!";
+/// sbuffer.write_all(data)?;
+/// drop(sbuffer);
+///
+/// assert_eq!(buf1, *b"hi wo");
+/// assert_eq!(buf2, *b"rld!\0");
+///
+/// # Ok::<(), Error>(())
+/// ```
+pub(crate) struct SBufferIter<I: Iterator> {
+ // [`Some`] if we are not at the end of the data yet.
+ cur_slice: Option<I::Item>,
+ // All the slices remaining after `cur_slice`.
+ slices: I,
+}
+
+impl<'a, I> SBufferIter<I>
+where
+ I: Iterator,
+{
+ /// Creates a reader buffer for a discontiguous set of byte slices.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let buf1: [u8; 5] = [0, 1, 2, 3, 4];
+ /// let buf2: [u8; 5] = [5, 6, 7, 8, 9];
+ /// let sbuffer = SBufferIter::new_reader([&buf1[..], &buf2[..]]);
+ /// let sum: u8 = sbuffer.sum();
+ /// assert_eq!(sum, 45);
+ /// ```
+ pub(crate) fn new_reader(slices: impl IntoIterator<IntoIter = I>) -> Self
+ where
+ I: Iterator<Item = &'a [u8]>,
+ {
+ Self::new(slices)
+ }
+
+ /// Creates a writeable buffer for a discontiguous set of byte slices.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut buf1 = [0u8; 5];
+ /// let mut buf2 = [0u8; 5];
+ /// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]);
+ /// sbuffer.write_all(&[0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9][..])?;
+ /// drop(sbuffer);
+ /// assert_eq!(buf1, [0, 1, 2, 3, 4]);
+ /// assert_eq!(buf2, [5, 6, 7, 8, 9]);
+ ///
+ /// ```
+ pub(crate) fn new_writer(slices: impl IntoIterator<IntoIter = I>) -> Self
+ where
+ I: Iterator<Item = &'a mut [u8]>,
+ {
+ Self::new(slices)
+ }
+
+ fn new(slices: impl IntoIterator<IntoIter = I>) -> Self
+ where
+ I::Item: Deref<Target = [u8]>,
+ {
+ let mut slices = slices.into_iter();
+
+ Self {
+ // Skip empty slices.
+ cur_slice: slices.find(|s| !s.deref().is_empty()),
+ slices,
+ }
+ }
+
+ /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data.
+ ///
+ /// If a slice shorter than `len` bytes has been returned, the caller can call this method
+ /// again until it returns [`None`] to try and obtain the remainder of the data.
+ ///
+ /// The closure `f` should split the slice received in it's first parameter
+ /// at the position given in the second parameter.
+ fn get_slice_internal(
+ &mut self,
+ len: usize,
+ mut f: impl FnMut(I::Item, usize) -> (I::Item, I::Item),
+ ) -> Option<I::Item>
+ where
+ I::Item: Deref<Target = [u8]>,
+ {
+ match self.cur_slice.take() {
+ None => None,
+ Some(cur_slice) => {
+ if len >= cur_slice.len() {
+ // Caller requested more data than is in the current slice, return it entirely
+ // and prepare the following slice for being used. Skip empty slices to avoid
+ // trouble.
+ self.cur_slice = self.slices.find(|s| !s.is_empty());
+
+ Some(cur_slice)
+ } else {
+ // The current slice can satisfy the request, split it and return a slice of
+ // the requested size.
+ let (ret, next) = f(cur_slice, len);
+ self.cur_slice = Some(next);
+
+ Some(ret)
+ }
+ }
+ }
+ }
+
+ /// Returns whether this buffer still has data available.
+ pub(crate) fn is_empty(&self) -> bool {
+ self.cur_slice.is_none()
+ }
+}
+
+/// Provides a way to get non-mutable slices of data to read from.
+impl<'a, I> SBufferIter<I>
+where
+ I: Iterator<Item = &'a [u8]>,
+{
+ /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data.
+ ///
+ /// If a slice shorter than `len` bytes has been returned, the caller can call this method
+ /// again until it returns [`None`] to try and obtain the remainder of the data.
+ fn get_slice(&mut self, len: usize) -> Option<&'a [u8]> {
+ self.get_slice_internal(len, |s, pos| s.split_at(pos))
+ }
+
+ /// Ideally we would implement `Read`, but it is not available in `core`.
+ /// So mimic `std::io::Read::read_exact`.
+ #[expect(unused)]
+ pub(crate) fn read_exact(&mut self, mut dst: &mut [u8]) -> Result {
+ while !dst.is_empty() {
+ match self.get_slice(dst.len()) {
+ None => return Err(EINVAL),
+ Some(src) => {
+ let dst_slice;
+ (dst_slice, dst) = dst.split_at_mut(src.len());
+ dst_slice.copy_from_slice(src);
+ }
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Read all the remaining data into a [`KVec`].
+ ///
+ /// `self` will be empty after this operation.
+ pub(crate) fn flush_into_kvec(&mut self, flags: kernel::alloc::Flags) -> Result<KVec<u8>> {
+ let mut buf = KVec::<u8>::new();
+
+ if let Some(slice) = core::mem::take(&mut self.cur_slice) {
+ buf.extend_from_slice(slice, flags)?;
+ }
+ for slice in &mut self.slices {
+ buf.extend_from_slice(slice, flags)?;
+ }
+
+ Ok(buf)
+ }
+}
+
+/// Provides a way to get mutable slices of data to write into.
+impl<'a, I> SBufferIter<I>
+where
+ I: Iterator<Item = &'a mut [u8]>,
+{
+ /// Returns a mutable slice of at most `len` bytes, or [`None`] if we are at the end of the
+ /// data.
+ ///
+ /// If a slice shorter than `len` bytes has been returned, the caller can call this method
+ /// again until it returns `None` to try and obtain the remainder of the data.
+ fn get_slice_mut(&mut self, len: usize) -> Option<&'a mut [u8]> {
+ self.get_slice_internal(len, |s, pos| s.split_at_mut(pos))
+ }
+
+ /// Ideally we would implement [`Write`], but it is not available in `core`.
+ /// So mimic `std::io::Write::write_all`.
+ pub(crate) fn write_all(&mut self, mut src: &[u8]) -> Result {
+ while !src.is_empty() {
+ match self.get_slice_mut(src.len()) {
+ None => return Err(ETOOSMALL),
+ Some(dst) => {
+ let src_slice;
+ (src_slice, src) = src.split_at(dst.len());
+ dst.copy_from_slice(src_slice);
+ }
+ }
+ }
+
+ Ok(())
+ }
+}
+
+impl<'a, I> Iterator for SBufferIter<I>
+where
+ I: Iterator<Item = &'a [u8]>,
+{
+ type Item = u8;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // Returned slices are guaranteed to not be empty so we can safely index the first entry.
+ self.get_slice(1).map(|s| s[0])
+ }
+}
diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs
new file mode 100644
index 000000000000..ebda28e596c5
--- /dev/null
+++ b/drivers/gpu/nova-core/vbios.rs
@@ -0,0 +1,1086 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! VBIOS extraction and parsing.
+
+use core::convert::TryFrom;
+
+use kernel::{
+ device,
+ io::Io,
+ prelude::*,
+ ptr::{
+ Alignable,
+ Alignment, //
+ },
+ sync::aref::ARef,
+ transmute::FromBytes,
+};
+
+use crate::{
+ driver::Bar0,
+ firmware::{
+ fwsec::Bcrt30Rsa3kSignature,
+ FalconUCodeDesc,
+ FalconUCodeDescV2,
+ FalconUCodeDescV3, //
+ },
+ num::FromSafeCast,
+};
+
+/// The offset of the VBIOS ROM in the BAR0 space.
+const ROM_OFFSET: usize = 0x300000;
+/// The maximum length of the VBIOS ROM to scan into.
+const BIOS_MAX_SCAN_LEN: usize = 0x100000;
+/// The size to read ahead when parsing initial BIOS image headers.
+const BIOS_READ_AHEAD_SIZE: usize = 1024;
+/// The bit in the last image indicator byte for the PCI Data Structure that
+/// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit.
+const LAST_IMAGE_BIT_MASK: u8 = 0x80;
+
+/// BIOS Image Type from PCI Data Structure code_type field.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(u8)]
+enum BiosImageType {
+ /// PC-AT compatible BIOS image (x86 legacy)
+ PciAt = 0x00,
+ /// EFI (Extensible Firmware Interface) BIOS image
+ Efi = 0x03,
+ /// NBSI (Notebook System Information) BIOS image
+ Nbsi = 0x70,
+ /// FwSec (Firmware Security) BIOS image
+ FwSec = 0xE0,
+}
+
+impl TryFrom<u8> for BiosImageType {
+ type Error = Error;
+
+ fn try_from(code: u8) -> Result<Self> {
+ match code {
+ 0x00 => Ok(Self::PciAt),
+ 0x03 => Ok(Self::Efi),
+ 0x70 => Ok(Self::Nbsi),
+ 0xE0 => Ok(Self::FwSec),
+ _ => Err(EINVAL),
+ }
+ }
+}
+
+// PMU lookup table entry types. Used to locate PMU table entries
+// in the Fwsec image, corresponding to falcon ucodes.
+#[expect(dead_code)]
+const FALCON_UCODE_ENTRY_APPID_FIRMWARE_SEC_LIC: u8 = 0x05;
+#[expect(dead_code)]
+const FALCON_UCODE_ENTRY_APPID_FWSEC_DBG: u8 = 0x45;
+const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85;
+
+/// Vbios Reader for constructing the VBIOS data.
+struct VbiosIterator<'a> {
+ dev: &'a device::Device,
+ bar0: &'a Bar0,
+ /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference
+ /// or copying into other data structures. It is the entire scanned contents of the VBIOS which
+ /// progressively extends. It is used so that we do not re-read any contents that are already
+ /// read as we use the cumulative length read so far, and re-read any gaps as we extend the
+ /// length.
+ data: KVec<u8>,
+ /// Current offset of the [`Iterator`].
+ current_offset: usize,
+ /// Indicate whether the last image has been found.
+ last_found: bool,
+}
+
+impl<'a> VbiosIterator<'a> {
+ fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> {
+ Ok(Self {
+ dev,
+ bar0,
+ data: KVec::new(),
+ current_offset: 0,
+ last_found: false,
+ })
+ }
+
+ /// Read bytes from the ROM at the current end of the data vector.
+ fn read_more(&mut self, len: usize) -> Result {
+ let current_len = self.data.len();
+ let start = ROM_OFFSET + current_len;
+
+ // Ensure length is a multiple of 4 for 32-bit reads
+ if len % core::mem::size_of::<u32>() != 0 {
+ dev_err!(
+ self.dev,
+ "VBIOS read length {} is not a multiple of 4\n",
+ len
+ );
+ return Err(EINVAL);
+ }
+
+ self.data.reserve(len, GFP_KERNEL)?;
+ // Read ROM data bytes and push directly to `data`.
+ for addr in (start..start + len).step_by(core::mem::size_of::<u32>()) {
+ // Read 32-bit word from the VBIOS ROM
+ let word = self.bar0.try_read32(addr)?;
+
+ // Convert the `u32` to a 4 byte array and push each byte.
+ word.to_ne_bytes()
+ .iter()
+ .try_for_each(|&b| self.data.push(b, GFP_KERNEL))?;
+ }
+
+ Ok(())
+ }
+
+ /// Read bytes at a specific offset, filling any gap.
+ fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result {
+ if offset > BIOS_MAX_SCAN_LEN {
+ dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n");
+ return Err(EINVAL);
+ }
+
+ // If `offset` is beyond current data size, fill the gap first.
+ let current_len = self.data.len();
+ let gap_bytes = offset.saturating_sub(current_len);
+
+ // Now read the requested bytes at the offset.
+ self.read_more(gap_bytes + len)
+ }
+
+ /// Read a BIOS image at a specific offset and create a [`BiosImage`] from it.
+ ///
+ /// `self.data` is extended as needed and a new [`BiosImage`] is returned.
+ /// `context` is a string describing the operation for error reporting.
+ fn read_bios_image_at_offset(
+ &mut self,
+ offset: usize,
+ len: usize,
+ context: &str,
+ ) -> Result<BiosImage> {
+ let data_len = self.data.len();
+ if offset + len > data_len {
+ self.read_more_at_offset(offset, len).inspect_err(|e| {
+ dev_err!(
+ self.dev,
+ "Failed to read more at offset {:#x}: {:?}\n",
+ offset,
+ e
+ )
+ })?;
+ }
+
+ BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| {
+ dev_err!(
+ self.dev,
+ "Failed to {} at offset {:#x}: {:?}\n",
+ context,
+ offset,
+ err
+ )
+ })
+ }
+}
+
+impl<'a> Iterator for VbiosIterator<'a> {
+ type Item = Result<BiosImage>;
+
+ /// Iterate over all VBIOS images until the last image is detected or offset
+ /// exceeds scan limit.
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.last_found {
+ return None;
+ }
+
+ if self.current_offset > BIOS_MAX_SCAN_LEN {
+ dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n");
+ return None;
+ }
+
+ // Parse image headers first to get image size.
+ let image_size = match self.read_bios_image_at_offset(
+ self.current_offset,
+ BIOS_READ_AHEAD_SIZE,
+ "parse initial BIOS image headers",
+ ) {
+ Ok(image) => image.image_size_bytes(),
+ Err(e) => return Some(Err(e)),
+ };
+
+ // Now create a new `BiosImage` with the full image data.
+ let full_image = match self.read_bios_image_at_offset(
+ self.current_offset,
+ image_size,
+ "parse full BIOS image",
+ ) {
+ Ok(image) => image,
+ Err(e) => return Some(Err(e)),
+ };
+
+ self.last_found = full_image.is_last();
+
+ // Advance to next image (aligned to 512 bytes).
+ self.current_offset += image_size;
+ self.current_offset = self.current_offset.align_up(Alignment::new::<512>())?;
+
+ Some(Ok(full_image))
+ }
+}
+
+pub(crate) struct Vbios {
+ fwsec_image: FwSecBiosImage,
+}
+
+impl Vbios {
+ /// Probe for VBIOS extraction.
+ ///
+ /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore.
+ pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> {
+ // Images to extract from iteration
+ let mut pci_at_image: Option<PciAtBiosImage> = None;
+ let mut first_fwsec_image: Option<FwSecBiosBuilder> = None;
+ let mut second_fwsec_image: Option<FwSecBiosBuilder> = None;
+
+ // Parse all VBIOS images in the ROM
+ for image_result in VbiosIterator::new(dev, bar0)? {
+ let image = image_result?;
+
+ dev_dbg!(
+ dev,
+ "Found BIOS image: size: {:#x}, type: {:?}, last: {}\n",
+ image.image_size_bytes(),
+ image.image_type(),
+ image.is_last()
+ );
+
+ // Convert to a specific image type
+ match BiosImageType::try_from(image.pcir.code_type) {
+ Ok(BiosImageType::PciAt) => {
+ pci_at_image = Some(PciAtBiosImage::try_from(image)?);
+ }
+ Ok(BiosImageType::FwSec) => {
+ let fwsec = FwSecBiosBuilder {
+ base: image,
+ falcon_data_offset: None,
+ pmu_lookup_table: None,
+ falcon_ucode_offset: None,
+ };
+ if first_fwsec_image.is_none() {
+ first_fwsec_image = Some(fwsec);
+ } else {
+ second_fwsec_image = Some(fwsec);
+ }
+ }
+ _ => {
+ // Ignore other image types or unknown types
+ }
+ }
+ }
+
+ // Using all the images, setup the falcon data pointer in Fwsec.
+ if let (Some(mut second), Some(first), Some(pci_at)) =
+ (second_fwsec_image, first_fwsec_image, pci_at_image)
+ {
+ second
+ .setup_falcon_data(&pci_at, &first)
+ .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?;
+ Ok(Vbios {
+ fwsec_image: second.build()?,
+ })
+ } else {
+ dev_err!(
+ dev,
+ "Missing required images for falcon data setup, skipping\n"
+ );
+ Err(EINVAL)
+ }
+ }
+
+ pub(crate) fn fwsec_image(&self) -> &FwSecBiosImage {
+ &self.fwsec_image
+ }
+}
+
+/// PCI Data Structure as defined in PCI Firmware Specification
+#[derive(Debug, Clone)]
+#[repr(C)]
+struct PcirStruct {
+ /// PCI Data Structure signature ("PCIR" or "NPDS")
+ signature: [u8; 4],
+ /// PCI Vendor ID (e.g., 0x10DE for NVIDIA)
+ vendor_id: u16,
+ /// PCI Device ID
+ device_id: u16,
+ /// Device List Pointer
+ device_list_ptr: u16,
+ /// PCI Data Structure Length
+ pci_data_struct_len: u16,
+ /// PCI Data Structure Revision
+ pci_data_struct_rev: u8,
+ /// Class code (3 bytes, 0x03 for display controller)
+ class_code: [u8; 3],
+ /// Size of this image in 512-byte blocks
+ image_len: u16,
+ /// Revision Level of the Vendor's ROM
+ vendor_rom_rev: u16,
+ /// ROM image type (0x00 = PC-AT compatible, 0x03 = EFI, 0x70 = NBSI)
+ code_type: u8,
+ /// Last image indicator (0x00 = Not last image, 0x80 = Last image)
+ last_image: u8,
+ /// Maximum Run-time Image Length (units of 512 bytes)
+ max_runtime_image_len: u16,
+}
+
+// SAFETY: all bit patterns are valid for `PcirStruct`.
+unsafe impl FromBytes for PcirStruct {}
+
+impl PcirStruct {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
+ let (pcir, _) = PcirStruct::from_bytes_copy_prefix(data).ok_or(EINVAL)?;
+
+ // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e).
+ if &pcir.signature != b"PCIR" && &pcir.signature != b"NPDS" {
+ dev_err!(
+ dev,
+ "Invalid signature for PcirStruct: {:?}\n",
+ pcir.signature
+ );
+ return Err(EINVAL);
+ }
+
+ if pcir.image_len == 0 {
+ dev_err!(dev, "Invalid image length: 0\n");
+ return Err(EINVAL);
+ }
+
+ Ok(pcir)
+ }
+
+ /// Check if this is the last image in the ROM.
+ fn is_last(&self) -> bool {
+ self.last_image & LAST_IMAGE_BIT_MASK != 0
+ }
+
+ /// Calculate image size in bytes from 512-byte blocks.
+ fn image_size_bytes(&self) -> usize {
+ usize::from(self.image_len) * 512
+ }
+}
+
+/// BIOS Information Table (BIT) Header.
+///
+/// This is the head of the BIT table, that is used to locate the Falcon data. The BIT table (with
+/// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the
+/// [`FwSecBiosImage`].
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+struct BitHeader {
+ /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF)
+ id: u16,
+ /// 2h: BIT Header Signature ("BIT\0")
+ signature: [u8; 4],
+ /// 6h: Binary Coded Decimal Version, ex: 0x0100 is 1.00.
+ bcd_version: u16,
+ /// 8h: Size of BIT Header (in bytes)
+ header_size: u8,
+ /// 9h: Size of BIT Tokens (in bytes)
+ token_size: u8,
+ /// 10h: Number of token entries that follow
+ token_entries: u8,
+ /// 11h: BIT Header Checksum
+ checksum: u8,
+}
+
+// SAFETY: all bit patterns are valid for `BitHeader`.
+unsafe impl FromBytes for BitHeader {}
+
+impl BitHeader {
+ fn new(data: &[u8]) -> Result<Self> {
+ let (header, _) = BitHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?;
+
+ // Check header ID and signature
+ if header.id != 0xB8FF || &header.signature != b"BIT\0" {
+ return Err(EINVAL);
+ }
+
+ Ok(header)
+ }
+}
+
+/// BIT Token Entry: Records in the BIT table followed by the BIT header.
+#[derive(Debug, Clone, Copy)]
+#[expect(dead_code)]
+struct BitToken {
+ /// 00h: Token identifier
+ id: u8,
+ /// 01h: Version of the token data
+ data_version: u8,
+ /// 02h: Size of token data in bytes
+ data_size: u16,
+ /// 04h: Offset to the token data
+ data_offset: u16,
+}
+
+// Define the token ID for the Falcon data
+const BIT_TOKEN_ID_FALCON_DATA: u8 = 0x70;
+
+impl BitToken {
+ /// Find a BIT token entry by BIT ID in a PciAtBiosImage
+ fn from_id(image: &PciAtBiosImage, token_id: u8) -> Result<Self> {
+ let header = &image.bit_header;
+
+ // Offset to the first token entry
+ let tokens_start = image.bit_offset + usize::from(header.header_size);
+
+ for i in 0..usize::from(header.token_entries) {
+ let entry_offset = tokens_start + (i * usize::from(header.token_size));
+
+ // Make sure we don't go out of bounds
+ if entry_offset + usize::from(header.token_size) > image.base.data.len() {
+ return Err(EINVAL);
+ }
+
+ // Check if this token has the requested ID
+ if image.base.data[entry_offset] == token_id {
+ return Ok(BitToken {
+ id: image.base.data[entry_offset],
+ data_version: image.base.data[entry_offset + 1],
+ data_size: u16::from_le_bytes([
+ image.base.data[entry_offset + 2],
+ image.base.data[entry_offset + 3],
+ ]),
+ data_offset: u16::from_le_bytes([
+ image.base.data[entry_offset + 4],
+ image.base.data[entry_offset + 5],
+ ]),
+ });
+ }
+ }
+
+ // Token not found
+ Err(ENOENT)
+ }
+}
+
+/// PCI ROM Expansion Header as defined in PCI Firmware Specification.
+///
+/// This is header is at the beginning of every image in the set of images in the ROM. It contains
+/// a pointer to the PCI Data Structure which describes the image. For "NBSI" images (NoteBook
+/// System Information), the ROM header deviates from the standard and contains an offset to the
+/// NBSI image however we do not yet parse that in this module and keep it for future reference.
+#[derive(Debug, Clone, Copy)]
+#[expect(dead_code)]
+struct PciRomHeader {
+ /// 00h: Signature (0xAA55)
+ signature: u16,
+ /// 02h: Reserved bytes for processor architecture unique data (20 bytes)
+ reserved: [u8; 20],
+ /// 16h: NBSI Data Offset (NBSI-specific, offset from header to NBSI image)
+ nbsi_data_offset: Option<u16>,
+ /// 18h: Pointer to PCI Data Structure (offset from start of ROM image)
+ pci_data_struct_offset: u16,
+ /// 1Ah: Size of block (this is NBSI-specific)
+ size_of_block: Option<u32>,
+}
+
+impl PciRomHeader {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
+ if data.len() < 26 {
+ // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock.
+ return Err(EINVAL);
+ }
+
+ let signature = u16::from_le_bytes([data[0], data[1]]);
+
+ // Check for valid ROM signatures.
+ match signature {
+ 0xAA55 | 0xBB77 | 0x4E56 => {}
+ _ => {
+ dev_err!(dev, "ROM signature unknown {:#x}\n", signature);
+ return Err(EINVAL);
+ }
+ }
+
+ // Read the pointer to the PCI Data Structure at offset 0x18.
+ let pci_data_struct_ptr = u16::from_le_bytes([data[24], data[25]]);
+
+ // Try to read optional fields if enough data.
+ let mut size_of_block = None;
+ let mut nbsi_data_offset = None;
+
+ if data.len() >= 30 {
+ // Read size_of_block at offset 0x1A.
+ size_of_block = Some(u32::from_le_bytes([data[26], data[27], data[28], data[29]]));
+ }
+
+ // For NBSI images, try to read the nbsiDataOffset at offset 0x16.
+ if data.len() >= 24 {
+ nbsi_data_offset = Some(u16::from_le_bytes([data[22], data[23]]));
+ }
+
+ Ok(PciRomHeader {
+ signature,
+ reserved: [0u8; 20],
+ pci_data_struct_offset: pci_data_struct_ptr,
+ size_of_block,
+ nbsi_data_offset,
+ })
+ }
+}
+
+/// NVIDIA PCI Data Extension Structure.
+///
+/// This is similar to the PCI Data Structure, but is Nvidia-specific and is placed right after the
+/// PCI Data Structure. It contains some fields that are redundant with the PCI Data Structure, but
+/// are needed for traversing the BIOS images. It is expected to be present in all BIOS images
+/// except for NBSI images.
+#[derive(Debug, Clone)]
+#[repr(C)]
+struct NpdeStruct {
+ /// 00h: Signature ("NPDE")
+ signature: [u8; 4],
+ /// 04h: NVIDIA PCI Data Extension Revision
+ npci_data_ext_rev: u16,
+ /// 06h: NVIDIA PCI Data Extension Length
+ npci_data_ext_len: u16,
+ /// 08h: Sub-image Length (in 512-byte units)
+ subimage_len: u16,
+ /// 0Ah: Last image indicator flag
+ last_image: u8,
+}
+
+// SAFETY: all bit patterns are valid for `NpdeStruct`.
+unsafe impl FromBytes for NpdeStruct {}
+
+impl NpdeStruct {
+ fn new(dev: &device::Device, data: &[u8]) -> Option<Self> {
+ let (npde, _) = NpdeStruct::from_bytes_copy_prefix(data)?;
+
+ // Signature should be "NPDE" (0x4544504E).
+ if &npde.signature != b"NPDE" {
+ dev_dbg!(
+ dev,
+ "Invalid signature for NpdeStruct: {:?}\n",
+ npde.signature
+ );
+ return None;
+ }
+
+ if npde.subimage_len == 0 {
+ dev_dbg!(dev, "Invalid subimage length: 0\n");
+ return None;
+ }
+
+ Some(npde)
+ }
+
+ /// Check if this is the last image in the ROM.
+ fn is_last(&self) -> bool {
+ self.last_image & LAST_IMAGE_BIT_MASK != 0
+ }
+
+ /// Calculate image size in bytes from 512-byte blocks.
+ fn image_size_bytes(&self) -> usize {
+ usize::from(self.subimage_len) * 512
+ }
+
+ /// Try to find NPDE in the data, the NPDE is right after the PCIR.
+ fn find_in_data(
+ dev: &device::Device,
+ data: &[u8],
+ rom_header: &PciRomHeader,
+ pcir: &PcirStruct,
+ ) -> Option<Self> {
+ // Calculate the offset where NPDE might be located
+ // NPDE should be right after the PCIR structure, aligned to 16 bytes
+ let pcir_offset = usize::from(rom_header.pci_data_struct_offset);
+ let npde_start = (pcir_offset + usize::from(pcir.pci_data_struct_len) + 0x0F) & !0x0F;
+
+ // Check if we have enough data
+ if npde_start + core::mem::size_of::<Self>() > data.len() {
+ dev_dbg!(dev, "Not enough data for NPDE\n");
+ return None;
+ }
+
+ // Try to create NPDE from the data
+ NpdeStruct::new(dev, &data[npde_start..])
+ }
+}
+
+/// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain.
+///
+/// It contains the BIT header and the BIT tokens.
+struct PciAtBiosImage {
+ base: BiosImage,
+ bit_header: BitHeader,
+ bit_offset: usize,
+}
+
+#[expect(dead_code)]
+struct EfiBiosImage {
+ base: BiosImage,
+ // EFI-specific fields can be added here in the future.
+}
+
+#[expect(dead_code)]
+struct NbsiBiosImage {
+ base: BiosImage,
+ // NBSI-specific fields can be added here in the future.
+}
+
+struct FwSecBiosBuilder {
+ base: BiosImage,
+ /// These are temporary fields that are used during the construction of the
+ /// [`FwSecBiosBuilder`].
+ ///
+ /// Once FwSecBiosBuilder is constructed, the `falcon_ucode_offset` will be copied into a new
+ /// [`FwSecBiosImage`].
+ ///
+ /// The offset of the Falcon data from the start of Fwsec image.
+ falcon_data_offset: Option<usize>,
+ /// The [`PmuLookupTable`] starts at the offset of the falcon data pointer.
+ pmu_lookup_table: Option<PmuLookupTable>,
+ /// The offset of the Falcon ucode.
+ falcon_ucode_offset: Option<usize>,
+}
+
+/// The [`FwSecBiosImage`] structure contains the PMU table and the Falcon Ucode.
+///
+/// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode.
+pub(crate) struct FwSecBiosImage {
+ base: BiosImage,
+ /// The offset of the Falcon ucode.
+ falcon_ucode_offset: usize,
+}
+
+/// BIOS Image structure containing various headers and reference fields to all BIOS images.
+///
+/// A BiosImage struct is embedded into all image types and implements common operations.
+#[expect(dead_code)]
+struct BiosImage {
+ /// Used for logging.
+ dev: ARef<device::Device>,
+ /// PCI ROM Expansion Header
+ rom_header: PciRomHeader,
+ /// PCI Data Structure
+ pcir: PcirStruct,
+ /// NVIDIA PCI Data Extension (optional)
+ npde: Option<NpdeStruct>,
+ /// Image data (includes ROM header and PCIR)
+ data: KVec<u8>,
+}
+
+impl BiosImage {
+ /// Get the image size in bytes.
+ fn image_size_bytes(&self) -> usize {
+ // Prefer NPDE image size if available
+ if let Some(ref npde) = self.npde {
+ npde.image_size_bytes()
+ } else {
+ // Otherwise, fall back to the PCIR image size
+ self.pcir.image_size_bytes()
+ }
+ }
+
+ /// Get the BIOS image type.
+ fn image_type(&self) -> Result<BiosImageType> {
+ BiosImageType::try_from(self.pcir.code_type)
+ }
+
+ /// Check if this is the last image.
+ fn is_last(&self) -> bool {
+ // For NBSI images, return true as they're considered the last image.
+ if self.image_type() == Ok(BiosImageType::Nbsi) {
+ return true;
+ }
+
+ // For other image types, check the NPDE first if available
+ if let Some(ref npde) = self.npde {
+ return npde.is_last();
+ }
+
+ // Otherwise, fall back to checking the PCIR last_image flag
+ self.pcir.is_last()
+ }
+
+ /// Creates a new BiosImage from raw byte data.
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
+ // Ensure we have enough data for the ROM header.
+ if data.len() < 26 {
+ dev_err!(dev, "Not enough data for ROM header\n");
+ return Err(EINVAL);
+ }
+
+ // Parse the ROM header.
+ let rom_header = PciRomHeader::new(dev, &data[0..26])
+ .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?;
+
+ // Get the PCI Data Structure using the pointer from the ROM header.
+ let pcir_offset = usize::from(rom_header.pci_data_struct_offset);
+ let pcir_data = data
+ .get(pcir_offset..pcir_offset + core::mem::size_of::<PcirStruct>())
+ .ok_or(EINVAL)
+ .inspect_err(|_| {
+ dev_err!(
+ dev,
+ "PCIR offset {:#x} out of bounds (data length: {})\n",
+ pcir_offset,
+ data.len()
+ );
+ dev_err!(
+ dev,
+ "Consider reading more data for construction of BiosImage\n"
+ );
+ })?;
+
+ let pcir = PcirStruct::new(dev, pcir_data)
+ .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?;
+
+ // Look for NPDE structure if this is not an NBSI image (type != 0x70).
+ let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir);
+
+ // Create a copy of the data.
+ let mut data_copy = KVec::new();
+ data_copy.extend_from_slice(data, GFP_KERNEL)?;
+
+ Ok(BiosImage {
+ dev: dev.into(),
+ rom_header,
+ pcir,
+ npde,
+ data: data_copy,
+ })
+ }
+}
+
+impl PciAtBiosImage {
+ /// Find a byte pattern in a slice.
+ fn find_byte_pattern(haystack: &[u8], needle: &[u8]) -> Result<usize> {
+ haystack
+ .windows(needle.len())
+ .position(|window| window == needle)
+ .ok_or(EINVAL)
+ }
+
+ /// Find the BIT header in the [`PciAtBiosImage`].
+ fn find_bit_header(data: &[u8]) -> Result<(BitHeader, usize)> {
+ let bit_pattern = [0xff, 0xb8, b'B', b'I', b'T', 0x00];
+ let bit_offset = Self::find_byte_pattern(data, &bit_pattern)?;
+ let bit_header = BitHeader::new(&data[bit_offset..])?;
+
+ Ok((bit_header, bit_offset))
+ }
+
+ /// Get a BIT token entry from the BIT table in the [`PciAtBiosImage`]
+ fn get_bit_token(&self, token_id: u8) -> Result<BitToken> {
+ BitToken::from_id(self, token_id)
+ }
+
+ /// Find the Falcon data pointer structure in the [`PciAtBiosImage`].
+ ///
+ /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC
+ /// image.
+ fn falcon_data_ptr(&self) -> Result<u32> {
+ let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?;
+
+ // Make sure we don't go out of bounds
+ if usize::from(token.data_offset) + 4 > self.base.data.len() {
+ return Err(EINVAL);
+ }
+
+ // read the 4 bytes at the offset specified in the token
+ let offset = usize::from(token.data_offset);
+ let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| {
+ dev_err!(self.base.dev, "Failed to convert data slice to array\n");
+ EINVAL
+ })?;
+
+ let data_ptr = u32::from_le_bytes(bytes);
+
+ if (usize::from_safe_cast(data_ptr)) < self.base.data.len() {
+ dev_err!(self.base.dev, "Falcon data pointer out of bounds\n");
+ return Err(EINVAL);
+ }
+
+ Ok(data_ptr)
+ }
+}
+
+impl TryFrom<BiosImage> for PciAtBiosImage {
+ type Error = Error;
+
+ fn try_from(base: BiosImage) -> Result<Self> {
+ let data_slice = &base.data;
+ let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?;
+
+ Ok(PciAtBiosImage {
+ base,
+ bit_header,
+ bit_offset,
+ })
+ }
+}
+
+/// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`].
+///
+/// See the [`PmuLookupTable`] description for more information.
+#[repr(C, packed)]
+struct PmuLookupTableEntry {
+ application_id: u8,
+ target_id: u8,
+ data: u32,
+}
+
+impl PmuLookupTableEntry {
+ fn new(data: &[u8]) -> Result<Self> {
+ if data.len() < core::mem::size_of::<Self>() {
+ return Err(EINVAL);
+ }
+
+ Ok(PmuLookupTableEntry {
+ application_id: data[0],
+ target_id: data[1],
+ data: u32::from_le_bytes(data[2..6].try_into().map_err(|_| EINVAL)?),
+ })
+ }
+}
+
+#[repr(C)]
+struct PmuLookupTableHeader {
+ version: u8,
+ header_len: u8,
+ entry_len: u8,
+ entry_count: u8,
+}
+
+// SAFETY: all bit patterns are valid for `PmuLookupTableHeader`.
+unsafe impl FromBytes for PmuLookupTableHeader {}
+
+/// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given
+/// application ID.
+///
+/// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to
+/// locate the Falcon Ucode.
+struct PmuLookupTable {
+ header: PmuLookupTableHeader,
+ table_data: KVec<u8>,
+}
+
+impl PmuLookupTable {
+ fn new(dev: &device::Device, data: &[u8]) -> Result<Self> {
+ let (header, _) = PmuLookupTableHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?;
+
+ let header_len = usize::from(header.header_len);
+ let entry_len = usize::from(header.entry_len);
+ let entry_count = usize::from(header.entry_count);
+
+ let required_bytes = header_len + (entry_count * entry_len);
+
+ if data.len() < required_bytes {
+ dev_err!(dev, "PmuLookupTable data length less than required\n");
+ return Err(EINVAL);
+ }
+
+ // Create a copy of only the table data
+ let table_data = {
+ let mut ret = KVec::new();
+ ret.extend_from_slice(&data[header_len..required_bytes], GFP_KERNEL)?;
+ ret
+ };
+
+ Ok(PmuLookupTable { header, table_data })
+ }
+
+ fn lookup_index(&self, idx: u8) -> Result<PmuLookupTableEntry> {
+ if idx >= self.header.entry_count {
+ return Err(EINVAL);
+ }
+
+ let index = (usize::from(idx)) * usize::from(self.header.entry_len);
+ PmuLookupTableEntry::new(&self.table_data[index..])
+ }
+
+ // find entry by type value
+ fn find_entry_by_type(&self, entry_type: u8) -> Result<PmuLookupTableEntry> {
+ for i in 0..self.header.entry_count {
+ let entry = self.lookup_index(i)?;
+ if entry.application_id == entry_type {
+ return Ok(entry);
+ }
+ }
+
+ Err(EINVAL)
+ }
+}
+
+impl FwSecBiosBuilder {
+ fn setup_falcon_data(
+ &mut self,
+ pci_at_image: &PciAtBiosImage,
+ first_fwsec: &FwSecBiosBuilder,
+ ) -> Result {
+ let mut offset = usize::from_safe_cast(pci_at_image.falcon_data_ptr()?);
+ let mut pmu_in_first_fwsec = false;
+
+ // The falcon data pointer assumes that the PciAt and FWSEC images
+ // are contiguous in memory. However, testing shows the EFI image sits in
+ // between them. So calculate the offset from the end of the PciAt image
+ // rather than the start of it. Compensate.
+ offset -= pci_at_image.base.data.len();
+
+ // The offset is now from the start of the first Fwsec image, however
+ // the offset points to a location in the second Fwsec image. Since
+ // the fwsec images are contiguous, subtract the length of the first Fwsec
+ // image from the offset to get the offset to the start of the second
+ // Fwsec image.
+ if offset < first_fwsec.base.data.len() {
+ pmu_in_first_fwsec = true;
+ } else {
+ offset -= first_fwsec.base.data.len();
+ }
+
+ self.falcon_data_offset = Some(offset);
+
+ if pmu_in_first_fwsec {
+ self.pmu_lookup_table = Some(PmuLookupTable::new(
+ &self.base.dev,
+ &first_fwsec.base.data[offset..],
+ )?);
+ } else {
+ self.pmu_lookup_table = Some(PmuLookupTable::new(
+ &self.base.dev,
+ &self.base.data[offset..],
+ )?);
+ }
+
+ match self
+ .pmu_lookup_table
+ .as_ref()
+ .ok_or(EINVAL)?
+ .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD)
+ {
+ Ok(entry) => {
+ let mut ucode_offset = usize::from_safe_cast(entry.data);
+ ucode_offset -= pci_at_image.base.data.len();
+ if ucode_offset < first_fwsec.base.data.len() {
+ dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n");
+ return Err(EINVAL);
+ }
+ ucode_offset -= first_fwsec.base.data.len();
+ self.falcon_ucode_offset = Some(ucode_offset);
+ }
+ Err(e) => {
+ dev_err!(
+ self.base.dev,
+ "PmuLookupTableEntry not found, error: {:?}\n",
+ e
+ );
+ return Err(EINVAL);
+ }
+ }
+ Ok(())
+ }
+
+ /// Build the final FwSecBiosImage from this builder
+ fn build(self) -> Result<FwSecBiosImage> {
+ let ret = FwSecBiosImage {
+ base: self.base,
+ falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?,
+ };
+
+ if cfg!(debug_assertions) {
+ // Print the desc header for debugging
+ let desc = ret.header()?;
+ dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc);
+ }
+
+ Ok(ret)
+ }
+}
+
+impl FwSecBiosImage {
+ /// Get the FwSec header ([`FalconUCodeDesc`]).
+ pub(crate) fn header(&self) -> Result<FalconUCodeDesc> {
+ // Get the falcon ucode offset that was found in setup_falcon_data.
+ let falcon_ucode_offset = self.falcon_ucode_offset;
+
+ // Read the first 4 bytes to get the version.
+ let hdr_bytes: [u8; 4] = self.base.data[falcon_ucode_offset..falcon_ucode_offset + 4]
+ .try_into()
+ .map_err(|_| EINVAL)?;
+ let hdr = u32::from_le_bytes(hdr_bytes);
+ let ver = (hdr & 0xff00) >> 8;
+
+ let data = self.base.data.get(falcon_ucode_offset..).ok_or(EINVAL)?;
+ match ver {
+ 2 => {
+ let v2 = FalconUCodeDescV2::from_bytes_copy_prefix(data)
+ .ok_or(EINVAL)?
+ .0;
+ Ok(FalconUCodeDesc::V2(v2))
+ }
+ 3 => {
+ let v3 = FalconUCodeDescV3::from_bytes_copy_prefix(data)
+ .ok_or(EINVAL)?
+ .0;
+ Ok(FalconUCodeDesc::V3(v3))
+ }
+ _ => {
+ dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver);
+ Err(EINVAL)
+ }
+ }
+ }
+
+ /// Get the ucode data as a byte slice
+ pub(crate) fn ucode(&self, desc: &FalconUCodeDesc) -> Result<&[u8]> {
+ let falcon_ucode_offset = self.falcon_ucode_offset;
+
+ // The ucode data follows the descriptor.
+ let ucode_data_offset = falcon_ucode_offset + desc.size();
+ let size = usize::from_safe_cast(desc.imem_load_size() + desc.dmem_load_size());
+
+ // Get the data slice, checking bounds in a single operation.
+ self.base
+ .data
+ .get(ucode_data_offset..ucode_data_offset + size)
+ .ok_or(ERANGE)
+ .inspect_err(|_| {
+ dev_err!(
+ self.base.dev,
+ "fwsec ucode data not contained within BIOS bounds\n"
+ )
+ })
+ }
+
+ /// Get the signatures as a byte slice
+ pub(crate) fn sigs(&self, desc: &FalconUCodeDesc) -> Result<&[Bcrt30Rsa3kSignature]> {
+ let hdr_size = match desc {
+ FalconUCodeDesc::V2(_v2) => core::mem::size_of::<FalconUCodeDescV2>(),
+ FalconUCodeDesc::V3(_v3) => core::mem::size_of::<FalconUCodeDescV3>(),
+ };
+ // The signatures data follows the descriptor.
+ let sigs_data_offset = self.falcon_ucode_offset + hdr_size;
+ let sigs_count = usize::from(desc.signature_count());
+ let sigs_size = sigs_count * core::mem::size_of::<Bcrt30Rsa3kSignature>();
+
+ // Make sure the data is within bounds.
+ if sigs_data_offset + sigs_size > self.base.data.len() {
+ dev_err!(
+ self.base.dev,
+ "fwsec signatures data not contained within BIOS bounds\n"
+ );
+ return Err(ERANGE);
+ }
+
+ // SAFETY: we checked that `data + sigs_data_offset + (signature_count *
+ // sizeof::<Bcrt30Rsa3kSignature>()` is within the bounds of `data`.
+ Ok(unsafe {
+ core::slice::from_raw_parts(
+ self.base
+ .data
+ .as_ptr()
+ .add(sigs_data_offset)
+ .cast::<Bcrt30Rsa3kSignature>(),
+ sigs_count,
+ )
+ })
+ }
+}