diff options
Diffstat (limited to 'drivers/gpu/nova-core')
38 files changed, 11046 insertions, 0 deletions
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig new file mode 100644 index 000000000000..a4f2380654e2 --- /dev/null +++ b/drivers/gpu/nova-core/Kconfig @@ -0,0 +1,16 @@ +config NOVA_CORE + tristate "Nova Core GPU driver" + depends on 64BIT + depends on PCI + depends on RUST + select AUXILIARY_BUS + select RUST_FW_LOADER_ABSTRACTIONS + default n + help + Choose this if you want to build the Nova Core driver for Nvidia + GPUs based on the GPU System Processor (GSP). This is true for Turing + and later GPUs. + + This driver is work in progress and may not be functional. + + If M is selected, the module will be called nova_core. diff --git a/drivers/gpu/nova-core/Makefile b/drivers/gpu/nova-core/Makefile new file mode 100644 index 000000000000..2d78c50126e1 --- /dev/null +++ b/drivers/gpu/nova-core/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_NOVA_CORE) += nova_core.o diff --git a/drivers/gpu/nova-core/bitfield.rs b/drivers/gpu/nova-core/bitfield.rs new file mode 100644 index 000000000000..02efdcf78d89 --- /dev/null +++ b/drivers/gpu/nova-core/bitfield.rs @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bitfield library for Rust structures +//! +//! Support for defining bitfields in Rust structures. Also used by the [`register!`] macro. + +/// Defines a struct with accessors to access bits within an inner unsigned integer. +/// +/// # Syntax +/// +/// ```rust +/// use nova_core::bitfield; +/// +/// #[derive(Debug, Clone, Copy, Default)] +/// enum Mode { +/// #[default] +/// Low = 0, +/// High = 1, +/// Auto = 2, +/// } +/// +/// impl TryFrom<u8> for Mode { +/// type Error = u8; +/// fn try_from(value: u8) -> Result<Self, Self::Error> { +/// match value { +/// 0 => Ok(Mode::Low), +/// 1 => Ok(Mode::High), +/// 2 => Ok(Mode::Auto), +/// _ => Err(value), +/// } +/// } +/// } +/// +/// impl From<Mode> for u8 { +/// fn from(mode: Mode) -> u8 { +/// mode as u8 +/// } +/// } +/// +/// #[derive(Debug, Clone, Copy, Default)] +/// enum State { +/// #[default] +/// Inactive = 0, +/// Active = 1, +/// } +/// +/// impl From<bool> for State { +/// fn from(value: bool) -> Self { +/// if value { State::Active } else { State::Inactive } +/// } +/// } +/// +/// impl From<State> for bool { +/// fn from(state: State) -> bool { +/// match state { +/// State::Inactive => false, +/// State::Active => true, +/// } +/// } +/// } +/// +/// bitfield! { +/// pub struct ControlReg(u32) { +/// 7:7 state as bool => State; +/// 3:0 mode as u8 ?=> Mode; +/// } +/// } +/// ``` +/// +/// This generates a struct with: +/// - Field accessors: `mode()`, `state()`, etc. +/// - Field setters: `set_mode()`, `set_state()`, etc. (supports chaining with builder pattern). +/// Note that the compiler will error out if the size of the setter's arg exceeds the +/// struct's storage size. +/// - Debug and Default implementations. +/// +/// Note: Field accessors and setters inherit the same visibility as the struct itself. +/// In the example above, both `mode()` and `set_mode()` methods will be `pub`. +/// +/// Fields are defined as follows: +/// +/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns +/// the result. +/// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation +/// and returns the result. This is useful with fields for which not all values are valid. +macro_rules! bitfield { + // Main entry point - defines the bitfield struct with fields + ($vis:vis struct $name:ident($storage:ty) $(, $comment:literal)? { $($fields:tt)* }) => { + bitfield!(@core $vis $name $storage $(, $comment)? { $($fields)* }); + }; + + // All rules below are helpers. + + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, and conversion to the value type) and field accessor methods. + (@core $vis:vis $name:ident $storage:ty $(, $comment:literal)? { $($fields:tt)* }) => { + $( + #[doc=$comment] + )? + #[repr(transparent)] + #[derive(Clone, Copy)] + $vis struct $name($storage); + + impl ::core::convert::From<$name> for $storage { + fn from(val: $name) -> $storage { + val.0 + } + } + + bitfield!(@fields_dispatcher $vis $name $storage { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $vis:vis $name:ident $storage:ty { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + bitfield!(@field_accessors $vis $name $storage { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + bitfield!(@debug $name { $($field;)* }); + bitfield!(@default $name { $($field;)* }); + }; + + // Defines all the field getter/setter methods for `$name`. + ( + @field_accessors $vis:vis $name:ident $storage:ty { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + $( + bitfield!(@check_field_bounds $hi:$lo $field as $type); + )* + + #[allow(dead_code)] + impl $name { + $( + bitfield!(@field_accessor $vis $name $storage, $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + ); + )* + } + }; + + // Boolean fields must have `$hi == $lo`. + (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { + #[allow(clippy::eq_op)] + const _: () = { + ::kernel::build_assert!( + $hi == $lo, + concat!("boolean field `", stringify!($field), "` covers more than one bit") + ); + }; + }; + + // Non-boolean fields must have `$hi >= $lo`. + (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { + #[allow(clippy::eq_op)] + const _: () = { + ::kernel::build_assert!( + $hi >= $lo, + concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") + ); + }; + }; + + // Catches fields defined as `bool` and convert them into a boolean value. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + => $into_type:ty $(, $comment:literal)?; + ) => { + bitfield!( + @leaf_accessor $vis $name $storage, $hi:$lo $field + { |f| <$into_type>::from(f != 0) } + bool $into_type => $into_type $(, $comment)?; + ); + }; + + // Shortcut for fields defined as `bool` without the `=>` syntax. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as bool + $(, $comment:literal)?; + ) => { + bitfield!( + @field_accessor $vis $name $storage, $hi:$lo $field as bool => bool $(, $comment)?; + ); + }; + + // Catches the `?=>` syntax for non-boolean fields. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + ?=> $try_into_type:ty $(, $comment:literal)?; + ) => { + bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field + { |f| <$try_into_type>::try_from(f as $type) } $type $try_into_type => + ::core::result::Result< + $try_into_type, + <$try_into_type as ::core::convert::TryFrom<$type>>::Error + > + $(, $comment)?;); + }; + + // Catches the `=>` syntax for non-boolean fields. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + => $into_type:ty $(, $comment:literal)?; + ) => { + bitfield!(@leaf_accessor $vis $name $storage, $hi:$lo $field + { |f| <$into_type>::from(f as $type) } $type $into_type => $into_type $(, $comment)?;); + }; + + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. + ( + @field_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident as $type:tt + $(, $comment:literal)?; + ) => { + bitfield!( + @field_accessor $vis $name $storage, $hi:$lo $field as $type => $type $(, $comment)?; + ); + }; + + // Generates the accessor methods for a single field. + ( + @leaf_accessor $vis:vis $name:ident $storage:ty, $hi:tt:$lo:tt $field:ident + { $process:expr } $prim_type:tt $to_type:ty => $res_type:ty $(, $comment:literal)?; + ) => { + ::kernel::macros::paste!( + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; + const [<$field:upper _MASK>]: $storage = { + // Generate mask for shifting + match ::core::mem::size_of::<$storage>() { + 1 => ::kernel::bits::genmask_u8($lo..=$hi) as $storage, + 2 => ::kernel::bits::genmask_u16($lo..=$hi) as $storage, + 4 => ::kernel::bits::genmask_u32($lo..=$hi) as $storage, + 8 => ::kernel::bits::genmask_u64($lo..=$hi) as $storage, + _ => ::kernel::build_error!("Unsupported storage type size") + } + }; + const [<$field:upper _SHIFT>]: u32 = $lo; + ); + + $( + #[doc="Returns the value of this field:"] + #[doc=$comment] + )? + #[inline(always)] + $vis fn $field(self) -> $res_type { + ::kernel::macros::paste!( + const MASK: $storage = $name::[<$field:upper _MASK>]; + const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; + ); + let field = ((self.0 & MASK) >> SHIFT); + + $process(field) + } + + ::kernel::macros::paste!( + $( + #[doc="Sets the value of this field:"] + #[doc=$comment] + )? + #[inline(always)] + $vis fn [<set_ $field>](mut self, value: $to_type) -> Self { + const MASK: $storage = $name::[<$field:upper _MASK>]; + const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; + let value = ($storage::from($prim_type::from(value)) << SHIFT) & MASK; + self.0 = (self.0 & !MASK) | value; + + self + } + ); + }; + + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the bitfield where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + let value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + let value = value.[<set_ $field>](Default::default()); + )* + ); + + value + } + } + }; +} diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs new file mode 100644 index 000000000000..84b0e1703150 --- /dev/null +++ b/drivers/gpu/nova-core/driver.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + auxiliary, + device::Core, + devres::Devres, + dma::Device, + dma::DmaMask, + pci, + pci::{ + Class, + ClassMask, + Vendor, // + }, + prelude::*, + sizes::SZ_16M, + sync::{ + atomic::{ + Atomic, + Relaxed, // + }, + Arc, + }, +}; + +use crate::gpu::Gpu; + +/// Counter for generating unique auxiliary device IDs. +static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0); + +#[pin_data] +pub(crate) struct NovaCore { + #[pin] + pub(crate) gpu: Gpu, + #[pin] + _reg: Devres<auxiliary::Registration>, +} + +const BAR0_SIZE: usize = SZ_16M; + +// For now we only support Ampere which can use up to 47-bit DMA addresses. +// +// TODO: Add an abstraction for this to support newer GPUs which may support +// larger DMA addresses. Limiting these GPUs to smaller address widths won't +// have any adverse affects, unless installed on systems which require larger +// DMA addresses. These systems should be quite rare. +const GPU_DMA_BITS: u32 = 47; + +pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <NovaCore as pci::Driver>::IdInfo, + [ + // Modern NVIDIA GPUs will show up as either VGA or 3D controllers. + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_VGA, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_3D, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ] +); + +impl pci::Driver for NovaCore { + type IdInfo = (); + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, Error> { + pin_init::pin_init_scope(move || { + dev_dbg!(pdev, "Probe Nova Core GPU driver.\n"); + + pdev.enable_device_mem()?; + pdev.set_master(); + + // SAFETY: No concurrent DMA allocations or mappings can be made because + // the device is still being probed and therefore isn't being used by + // other threads of execution. + unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? }; + + let bar = Arc::pin_init( + pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"), + GFP_KERNEL, + )?; + + Ok(try_pin_init!(Self { + gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?), + _reg <- auxiliary::Registration::new( + pdev.as_ref(), + c"nova-drm", + // TODO[XARR]: Use XArray or perhaps IDA for proper ID allocation/recycling. For + // now, use a simple atomic counter that never recycles IDs. + AUXILIARY_ID_COUNTER.fetch_add(1, Relaxed), + crate::MODULE_NAME + ), + })) + }) + } + + fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { + this.gpu.unbind(pdev.as_ref()); + } +} diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs new file mode 100644 index 000000000000..33927af4134c --- /dev/null +++ b/drivers/gpu/nova-core/falcon.rs @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Falcon microprocessor base support + +use hal::FalconHal; + +use kernel::{ + device::{ + self, + Device, // + }, + dma::{ + Coherent, + CoherentBox, + DmaAddress, + DmaMask, // + }, + io::{ + poll::read_poll_timeout, + register::{ + RegisterBase, + WithBase, // + }, + Io, + }, + prelude::*, + sync::aref::ARef, + time::Delta, +}; + +use crate::{ + bounded_enum, + driver::Bar0, + falcon::hal::LoadMethod, + gpu::Chipset, + num::{ + self, + FromSafeCast, // + }, + regs, +}; + +pub(crate) mod gsp; +mod hal; +pub(crate) mod sec2; + +/// Alignment (in bytes) of falcon memory blocks. +pub(crate) const MEM_BLOCK_ALIGNMENT: usize = 256; + +bounded_enum! { + /// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] + /// register. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconCoreRev with TryFrom<Bounded<u32, 4>> { + Rev1 = 1, + Rev2 = 2, + Rev3 = 3, + Rev4 = 4, + Rev5 = 5, + Rev6 = 6, + Rev7 = 7, + } +} + +bounded_enum! { + /// Revision subversion number of a falcon core, used in the + /// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconCoreRevSubversion with From<Bounded<u32, 2>> { + Subversion0 = 0, + Subversion1 = 1, + Subversion2 = 2, + Subversion3 = 3, + } +} + +bounded_enum! { + /// Security mode of the Falcon microprocessor. + /// + /// See `falcon.rst` for more details. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconSecurityModel with TryFrom<Bounded<u32, 2>> { + /// Non-Secure: runs unsigned code without privileges. + None = 0, + /// Light-Secured (LS): Runs signed code with some privileges. + /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the + /// code's signature. + /// + /// Also known as Low-Secure, Privilege Level 2 or PL2. + Light = 2, + /// Heavy-Secured (HS): Runs signed code with full privileges. + /// The code's signature is verified by the Falcon Boot ROM (BROM). + /// + /// Also known as High-Secure, Privilege Level 3 or PL3. + Heavy = 3, + } +} + +bounded_enum! { + /// Signing algorithm for a given firmware, used in the + /// [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] register. It is passed to the Falcon Boot ROM + /// (BROM) as a parameter. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconModSelAlgo with TryFrom<Bounded<u32, 8>> { + /// AES. + Aes = 0, + /// RSA3K. + Rsa3k = 1, + } +} + +bounded_enum! { + /// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] + /// register. + #[derive(Debug, Copy, Clone)] + pub(crate) enum DmaTrfCmdSize with TryFrom<Bounded<u32, 3>> { + /// 256 bytes transfer. + Size256B = 0x6, + } +} + +bounded_enum! { + /// Currently active core on a dual falcon/riscv (Peregrine) controller. + #[derive(Debug, Copy, Clone, PartialEq, Eq)] + pub(crate) enum PeregrineCoreSelect with From<Bounded<u32, 1>> { + /// Falcon core is active. + Falcon = 0, + /// RISC-V core is active. + Riscv = 1, + } +} + +/// Different types of memory present in a falcon core. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(crate) enum FalconMem { + /// Secure Instruction Memory. + ImemSecure, + /// Non-Secure Instruction Memory. + #[expect(unused)] + ImemNonSecure, + /// Data Memory. + Dmem, +} + +bounded_enum! { + /// Defines the Framebuffer Interface (FBIF) aperture type. + /// This determines the memory type for external memory access during a DMA transfer, which is + /// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconFbifTarget with TryFrom<Bounded<u32, 2>> { + /// Local Framebuffer (GPU's VRAM memory). + LocalFb = 0, + /// Coherent system memory (System DRAM). + CoherentSysmem = 1, + /// Non-coherent system memory (System DRAM). + NoncoherentSysmem = 2, + } +} + +bounded_enum! { + /// Type of memory addresses to use. + #[derive(Debug, Copy, Clone)] + pub(crate) enum FalconFbifMemType with From<Bounded<u32, 1>> { + /// Virtual memory addresses. + Virtual = 0, + /// Physical memory addresses. + Physical = 1, + } +} + +/// Type used to represent the `PFALCON` registers address base for a given falcon engine. +pub(crate) struct PFalconBase(()); + +/// Type used to represent the `PFALCON2` registers address base for a given falcon engine. +pub(crate) struct PFalcon2Base(()); + +/// Trait defining the parameters of a given Falcon engine. +/// +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. +pub(crate) trait FalconEngine: + Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized +{ +} + +/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM) +/// using DMA. +#[derive(Debug, Clone)] +pub(crate) struct FalconDmaLoadTarget { + /// Offset from the start of the source object to copy from. + pub(crate) src_start: u32, + /// Offset from the start of the destination memory to copy into. + pub(crate) dst_start: u32, + /// Number of bytes to copy. + pub(crate) len: u32, +} + +/// Parameters for the falcon boot ROM. +#[derive(Debug, Clone)] +pub(crate) struct FalconBromParams { + /// Offset in `DMEM`` of the firmware's signature. + pub(crate) pkc_data_offset: u32, + /// Mask of engines valid for this firmware. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, +} + +/// Trait implemented by falcon firmwares that can be loaded using DMA. +pub(crate) trait FalconDmaLoadable { + /// Returns the firmware data as a slice of bytes. + fn as_slice(&self) -> &[u8]; + + /// Returns the load parameters for Secure `IMEM`. + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget; + + /// Returns the load parameters for Non-Secure `IMEM`, + /// used only on Turing and GA100. + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget>; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconDmaLoadTarget; + + /// Returns an adapter that provides the required parameter to load this firmware using PIO. + /// + /// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in + /// the headers are invalid. + fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> { + let new_pio_imem = |params: FalconDmaLoadTarget, secure| { + let start = usize::from_safe_cast(params.src_start); + let end = start + usize::from_safe_cast(params.len); + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; + + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; + + Ok::<_, Error>(FalconPioImemLoadTarget { + data, + dst_start, + secure, + start_tag: dst_start >> 8, + }) + }; + + let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?; + + let imem_ns = if let Some(params) = self.imem_ns_load_params() { + Some(new_pio_imem(params, false)?) + } else { + None + }; + + let dmem = { + let params = self.dmem_load_params(); + let start = usize::from_safe_cast(params.src_start); + let end = start + usize::from_safe_cast(params.len); + let data = self.as_slice().get(start..end).ok_or(EINVAL)?; + + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; + + FalconPioDmemLoadTarget { data, dst_start } + }; + + Ok(FalconDmaFirmwarePioAdapter { + fw: self, + imem_sec, + imem_ns, + dmem, + }) + } +} + +/// Represents a portion of the firmware to be loaded into IMEM using PIO. +#[derive(Clone)] +pub(crate) struct FalconPioImemLoadTarget<'a> { + pub(crate) data: &'a [u8], + pub(crate) dst_start: u16, + pub(crate) secure: bool, + pub(crate) start_tag: u16, +} + +/// Represents a portion of the firmware to be loaded into DMEM using PIO. +#[derive(Clone)] +pub(crate) struct FalconPioDmemLoadTarget<'a> { + pub(crate) data: &'a [u8], + pub(crate) dst_start: u16, +} + +/// Trait for providing PIO load parameters of falcon firmwares. +pub(crate) trait FalconPioLoadable { + /// Returns the load parameters for Secure `IMEM`, if any. + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; + + /// Returns the load parameters for Non-Secure `IMEM`, if any. + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>; +} + +/// Adapter type that makes any DMA-loadable firmware also loadable via PIO. +/// +/// Created using [`FalconDmaLoadable::try_as_pio_loadable`]. +pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> { + /// Reference to the DMA firmware. + fw: &'a T, + /// Validated secure IMEM parameters. + imem_sec: FalconPioImemLoadTarget<'a>, + /// Validated non-secure IMEM parameters. + imem_ns: Option<FalconPioImemLoadTarget<'a>>, + /// Validated DMEM parameters. + dmem: FalconPioDmemLoadTarget<'a>, +} + +impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T> +where + T: FalconDmaLoadable + ?Sized, +{ + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { + Some(self.imem_sec.clone()) + } + + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { + self.imem_ns.clone() + } + + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { + self.dmem.clone() + } +} + +impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T> +where + T: FalconDmaLoadable + FalconFirmware + ?Sized, +{ + type Target = <T as FalconFirmware>::Target; + + fn brom_params(&self) -> FalconBromParams { + self.fw.brom_params() + } + + fn boot_addr(&self) -> u32 { + self.fw.boot_addr() + } +} + +/// Trait for a falcon firmware. +/// +/// A falcon firmware can be loaded on a given engine. +pub(crate) trait FalconFirmware { + /// Engine on which this firmware is to be loaded. + type Target: FalconEngine; + + /// Returns the parameters to write into the BROM registers. + fn brom_params(&self) -> FalconBromParams; + + /// Returns the start address of the firmware. + fn boot_addr(&self) -> u32; +} + +/// Contains the base parameters common to all Falcon instances. +pub(crate) struct Falcon<E: FalconEngine> { + hal: KBox<dyn FalconHal<E>>, + dev: ARef<device::Device>, +} + +impl<E: FalconEngine + 'static> Falcon<E> { + /// Create a new falcon instance. + pub(crate) fn new(dev: &device::Device, chipset: Chipset) -> Result<Self> { + Ok(Self { + hal: hal::falcon_hal(chipset)?, + dev: dev.into(), + }) + } + + /// Resets DMA-related registers. + pub(crate) fn dma_reset(&self, bar: &Bar0) { + bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| { + v.with_allow_phys_no_ctx(true) + }); + + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_DMACTL::zeroed(), + ); + } + + /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete. + pub(crate) fn reset(&self, bar: &Bar0) -> Result { + self.hal.reset_eng(bar)?; + self.hal.select_core(self, bar)?; + self.hal.reset_wait_mem_scrubbing(bar)?; + + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_RM::from(bar.read(regs::NV_PMC_BOOT_0).into_raw()), + ); + + Ok(()) + } + + /// Falcons supports up to four ports, but we only ever use one, so just hard-code it. + const PIO_PORT: usize = 0; + + /// Write a slice to Falcon IMEM memory using programmed I/O (PIO). + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if load_offsets.data.len() % 4 != 0 { + return Err(EINVAL); + } + + bar.write( + WithBase::of::<E>().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_IMEMC::zeroed() + .with_secure(load_offsets.secure) + .with_aincw(true) + .with_offs(load_offsets.dst_start), + ); + + for (n, block) in load_offsets.data.chunks(MEM_BLOCK_ALIGNMENT).enumerate() { + let n = u16::try_from(n)?; + let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?; + bar.write( + WithBase::of::<E>().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_IMEMT::zeroed().with_tag(tag), + ); + for word in block.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + bar.write( + WithBase::of::<E>().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_IMEMD::zeroed().with_data(u32::from_le_bytes(w)), + ); + } + } + + Ok(()) + } + + /// Write a slice to Falcon DMEM memory using programmed I/O (PIO). + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if load_offsets.data.len() % 4 != 0 { + return Err(EINVAL); + } + + bar.write( + WithBase::of::<E>().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_DMEMC::zeroed() + .with_aincw(true) + .with_offs(load_offsets.dst_start), + ); + + for word in load_offsets.data.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + bar.write( + WithBase::of::<E>().at(Self::PIO_PORT), + regs::NV_PFALCON_FALCON_DMEMD::zeroed().with_data(u32::from_le_bytes(w)), + ); + } + + Ok(()) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>( + &self, + bar: &Bar0, + fw: &F, + ) -> Result { + bar.update(regs::NV_PFALCON_FBIF_CTL::of::<E>(), |v| { + v.with_allow_phys_no_ctx(true) + }); + + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_DMACTL::zeroed(), + ); + + if let Some(imem_ns) = fw.imem_ns_load_params() { + self.pio_wr_imem_slice(bar, imem_ns)?; + } + if let Some(imem_sec) = fw.imem_sec_load_params() { + self.pio_wr_imem_slice(bar, imem_sec)?; + } + self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()), + ); + + Ok(()) + } + + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's + /// `target_mem`. + /// + /// `sec` is set if the loaded firmware is expected to run in secure mode. + fn dma_wr( + &self, + bar: &Bar0, + dma_obj: &Coherent<[u8]>, + target_mem: FalconMem, + load_offsets: FalconDmaLoadTarget, + ) -> Result { + const DMA_LEN: u32 = num::usize_into_u32::<{ MEM_BLOCK_ALIGNMENT }>(); + + // For IMEM, we want to use the start offset as a virtual address tag for each page, since + // code addresses in the firmware (and the boot vector) are virtual. + // + // For DMEM we can fold the start offset into the DMA handle. + let (src_start, dma_start) = match target_mem { + FalconMem::ImemSecure | FalconMem::ImemNonSecure => { + (load_offsets.src_start, dma_obj.dma_handle()) + } + FalconMem::Dmem => ( + 0, + dma_obj.dma_handle() + DmaAddress::from(load_offsets.src_start), + ), + }; + if dma_start % DmaAddress::from(DMA_LEN) > 0 { + dev_err!( + self.dev, + "DMA transfer start addresses must be a multiple of {}\n", + DMA_LEN + ); + return Err(EINVAL); + } + + // The DMATRFBASE/1 register pair only supports a 49-bit address. + if dma_start > DmaMask::new::<49>().value() { + dev_err!(self.dev, "DMA address {:#x} exceeds 49 bits\n", dma_start); + return Err(ERANGE); + } + + // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we + // need to perform. + let num_transfers = load_offsets.len.div_ceil(DMA_LEN); + + // Check that the area we are about to transfer is within the bounds of the DMA object. + // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`. + match num_transfers + .checked_mul(DMA_LEN) + .and_then(|size| size.checked_add(load_offsets.src_start)) + { + None => { + dev_err!(self.dev, "DMA transfer length overflow\n"); + return Err(EOVERFLOW); + } + Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => { + dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n"); + return Err(EINVAL); + } + Some(_) => (), + }; + + // Set up the base source DMA address. + + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_DMATRFBASE::zeroed().with_base( + // CAST: `as u32` is used on purpose since we do want to strip the upper bits, + // which will be written to `NV_PFALCON_FALCON_DMATRFBASE1`. + (dma_start >> 8) as u32, + ), + ); + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_DMATRFBASE1::zeroed().try_with_base(dma_start >> 40)?, + ); + + let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::zeroed() + .with_size(DmaTrfCmdSize::Size256B) + .with_falcon_mem(target_mem); + + for pos in (0..num_transfers).map(|i| i * DMA_LEN) { + // Perform a transfer of size `DMA_LEN`. + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_DMATRFMOFFS::zeroed() + .try_with_offs(load_offsets.dst_start + pos)?, + ); + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_DMATRFFBOFFS::zeroed().with_offs(src_start + pos), + ); + + bar.write(WithBase::of::<E>(), cmd); + + // Wait for the transfer to complete. + // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories + // should ever take that long. + read_poll_timeout( + || Ok(bar.read(regs::NV_PFALCON_FALCON_DMATRFCMD::of::<E>())), + |r| r.idle(), + Delta::ZERO, + Delta::from_secs(2), + )?; + } + + Ok(()) + } + + /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + fn dma_load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( + &self, + dev: &Device<device::Bound>, + bar: &Bar0, + fw: &F, + ) -> Result { + // DMA object with firmware content as the source of the DMA engine. + let dma_obj = { + let fw_slice = fw.as_slice(); + + // DMA copies are done in chunks of `MEM_BLOCK_ALIGNMENT`, so pad the length + // accordingly and fill with `0`. + let mut dma_obj = CoherentBox::zeroed_slice( + dev, + fw_slice.len().next_multiple_of(MEM_BLOCK_ALIGNMENT), + GFP_KERNEL, + )?; + + // PANIC: `dma_obj` has been created with a length equal to or larger than + // `fw_slice.len()`, so the range `..fw_slice.len()` is valid. + dma_obj[..fw_slice.len()].copy_from_slice(fw_slice); + + dma_obj.into() + }; + + self.dma_reset(bar); + bar.update(regs::NV_PFALCON_FBIF_TRANSCFG::of::<E>().at(0), |v| { + v.with_target(FalconFbifTarget::CoherentSysmem) + .with_mem_type(FalconFbifMemType::Physical) + }); + + self.dma_wr( + bar, + &dma_obj, + FalconMem::ImemSecure, + fw.imem_sec_load_params(), + )?; + self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_BOOTVEC::zeroed().with_value(fw.boot_addr()), + ); + + Ok(()) + } + + /// Wait until the falcon CPU is halted. + pub(crate) fn wait_till_halted(&self, bar: &Bar0) -> Result<()> { + // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. + read_poll_timeout( + || Ok(bar.read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>())), + |r| r.halted(), + Delta::ZERO, + Delta::from_secs(2), + )?; + + Ok(()) + } + + /// Start the falcon CPU. + pub(crate) fn start(&self, bar: &Bar0) -> Result<()> { + match bar + .read(regs::NV_PFALCON_FALCON_CPUCTL::of::<E>()) + .alias_en() + { + true => bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::zeroed().with_startcpu(true), + ), + false => bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_CPUCTL::zeroed().with_startcpu(true), + ), + } + + Ok(()) + } + + /// Writes values to the mailbox registers if provided. + pub(crate) fn write_mailboxes(&self, bar: &Bar0, mbox0: Option<u32>, mbox1: Option<u32>) { + if let Some(mbox0) = mbox0 { + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_MAILBOX0::zeroed().with_value(mbox0), + ); + } + + if let Some(mbox1) = mbox1 { + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_MAILBOX1::zeroed().with_value(mbox1), + ); + } + } + + /// Reads the value from `mbox0` register. + pub(crate) fn read_mailbox0(&self, bar: &Bar0) -> u32 { + bar.read(regs::NV_PFALCON_FALCON_MAILBOX0::of::<E>()) + .value() + } + + /// Reads the value from `mbox1` register. + pub(crate) fn read_mailbox1(&self, bar: &Bar0) -> u32 { + bar.read(regs::NV_PFALCON_FALCON_MAILBOX1::of::<E>()) + .value() + } + + /// Reads values from both mailbox registers. + pub(crate) fn read_mailboxes(&self, bar: &Bar0) -> (u32, u32) { + let mbox0 = self.read_mailbox0(bar); + let mbox1 = self.read_mailbox1(bar); + + (mbox0, mbox1) + } + + /// Start running the loaded firmware. + /// + /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers + /// prior to running. + /// + /// Wait up to two seconds for the firmware to complete, and return its exit status read from + /// the `MBOX0` and `MBOX1` registers. + pub(crate) fn boot( + &self, + bar: &Bar0, + mbox0: Option<u32>, + mbox1: Option<u32>, + ) -> Result<(u32, u32)> { + self.write_mailboxes(bar, mbox0, mbox1); + self.start(bar)?; + self.wait_till_halted(bar)?; + Ok(self.read_mailboxes(bar)) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + pub(crate) fn signature_reg_fuse_version( + &self, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32> { + self.hal + .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id) + } + + /// Check if the RISC-V core is active. + /// + /// Returns `true` if the RISC-V core is active, `false` otherwise. + pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { + self.hal.is_riscv_active(bar) + } + + /// Load a firmware image into Falcon memory, using the preferred method for the current + /// chipset. + pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( + &self, + dev: &Device<device::Bound>, + bar: &Bar0, + fw: &F, + ) -> Result { + match self.hal.load_method() { + LoadMethod::Dma => self.dma_load(dev, bar, fw), + LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?), + } + } + + /// Write the application version to the OS register. + pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON_FALCON_OS::zeroed().with_value(app_version), + ); + } +} diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs new file mode 100644 index 000000000000..df6d5a382c7a --- /dev/null +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + io::{ + poll::read_poll_timeout, + register::{ + RegisterBase, + WithBase, // + }, + Io, + }, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + Falcon, + FalconEngine, + PFalcon2Base, + PFalconBase, // + }, + regs, +}; + +/// Type specifying the `Gsp` falcon engine. Cannot be instantiated. +pub(crate) struct Gsp(()); + +impl RegisterBase<PFalconBase> for Gsp { + const BASE: usize = 0x00110000; +} + +impl RegisterBase<PFalcon2Base> for Gsp { + const BASE: usize = 0x00111000; +} + +impl FalconEngine for Gsp {} + +impl Falcon<Gsp> { + /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to + /// allow GSP to signal CPU for processing new messages in message queue. + pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { + bar.write( + WithBase::of::<Gsp>(), + regs::NV_PFALCON_FALCON_IRQSCLR::zeroed().with_swgen0(true), + ); + } + + /// Checks if GSP reload/resume has completed during the boot process. + pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Result<bool> { + read_poll_timeout( + || Ok(bar.read(regs::NV_PGC6_BSI_SECURE_SCRATCH_14)), + |val| val.boot_stage_3_handoff(), + Delta::ZERO, + timeout, + ) + .map(|_| true) + } +} diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs new file mode 100644 index 000000000000..a7e5ea8d0272 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::{ + driver::Bar0, + falcon::{ + Falcon, + FalconBromParams, + FalconEngine, // + }, + gpu::Chipset, +}; + +mod ga102; +mod tu102; + +/// Method used to load data into falcon memory. Some GPU architectures need +/// PIO and others can use DMA. +pub(crate) enum LoadMethod { + /// Programmed I/O + Pio, + /// Direct Memory Access + Dma, +} + +/// Hardware Abstraction Layer for Falcon cores. +/// +/// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] +/// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing +/// registers. +pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync { + /// Activates the Falcon core if the engine is a risvc/falcon dual engine. + fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result { + Ok(()) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + fn signature_reg_fuse_version( + &self, + falcon: &Falcon<E>, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32>; + + /// Program the boot ROM registers prior to starting a secure firmware. + fn program_brom(&self, falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result; + + /// Check if the RISC-V core is active. + /// Returns `true` if the RISC-V core is active, `false` otherwise. + fn is_riscv_active(&self, bar: &Bar0) -> bool; + + /// Wait for memory scrubbing to complete. + fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result; + + /// Reset the falcon engine. + fn reset_eng(&self, bar: &Bar0) -> Result; + + /// Returns the method used to load data into the falcon's memory. + /// + /// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for + /// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the + /// only usable method. + fn load_method(&self) -> LoadMethod; +} + +/// Returns a boxed falcon HAL adequate for `chipset`. +/// +/// We use a heap-allocated trait object instead of a statically defined one because the +/// generic `FalconEngine` argument makes it difficult to define all the combinations +/// statically. +pub(super) fn falcon_hal<E: FalconEngine + 'static>( + chipset: Chipset, +) -> Result<KBox<dyn FalconHal<E>>> { + use Chipset::*; + + let hal = match chipset { + TU102 | TU104 | TU106 | TU116 | TU117 => { + KBox::new(tu102::Tu102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>> + } + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { + KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>> + } + _ => return Err(ENOTSUPP), + }; + + Ok(hal) +} diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs new file mode 100644 index 000000000000..8368a61ddeef --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::marker::PhantomData; + +use kernel::{ + device, + io::{ + poll::read_poll_timeout, + register::{ + Array, + WithBase, // + }, + Io, // + }, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + hal::LoadMethod, + Falcon, + FalconBromParams, + FalconEngine, + FalconModSelAlgo, + PeregrineCoreSelect, // + }, + regs, +}; + +use super::FalconHal; + +fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { + let bcr_ctrl = bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>()); + if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { + bar.write( + WithBase::of::<E>(), + regs::NV_PRISCV_RISCV_BCR_CTRL::zeroed().with_core_select(PeregrineCoreSelect::Falcon), + ); + + // TIMEOUT: falcon core should take less than 10ms to report being enabled. + read_poll_timeout( + || Ok(bar.read(regs::NV_PRISCV_RISCV_BCR_CTRL::of::<E>())), + |r| r.valid(), + Delta::ZERO, + Delta::from_millis(10), + )?; + } + + Ok(()) +} + +fn signature_reg_fuse_version_ga102( + dev: &device::Device, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, +) -> Result<u32> { + // Each engine has 16 ucode version registers numbered from 1 to 16. + let ucode_idx = match usize::from(ucode_id) { + ucode_id @ 1..=regs::NV_FUSE_OPT_FPF_SIZE => ucode_id - 1, + _ => { + dev_err!(dev, "invalid ucode id {:#x}\n", ucode_id); + return Err(EINVAL); + } + }; + + // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid + // at build-time. + let reg_fuse_version: u16 = if engine_id_mask & 0x0001 != 0 { + bar.read(regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::at(ucode_idx)) + .data() + } else if engine_id_mask & 0x0004 != 0 { + bar.read(regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::at(ucode_idx)) + .data() + } else if engine_id_mask & 0x0400 != 0 { + bar.read(regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::at(ucode_idx)) + .data() + } else { + dev_err!(dev, "unexpected engine_id_mask {:#x}\n", engine_id_mask); + return Err(EINVAL); + }; + + // TODO[NUMM]: replace with `last_set_bit` once it lands. + Ok(u16::BITS - reg_fuse_version.leading_zeros()) +} + +fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result { + bar.write( + WithBase::of::<E>().at(0), + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::zeroed().with_value(params.pkc_data_offset), + ); + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::zeroed() + .with_value(u32::from(params.engine_id_mask)), + ); + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::zeroed().with_ucode_id(params.ucode_id), + ); + bar.write( + WithBase::of::<E>(), + regs::NV_PFALCON2_FALCON_MOD_SEL::zeroed().with_algo(FalconModSelAlgo::Rsa3k), + ); + + Ok(()) +} + +pub(super) struct Ga102<E: FalconEngine>(PhantomData<E>); + +impl<E: FalconEngine> Ga102<E> { + pub(super) fn new() -> Self { + Self(PhantomData) + } +} + +impl<E: FalconEngine> FalconHal<E> for Ga102<E> { + fn select_core(&self, _falcon: &Falcon<E>, bar: &Bar0) -> Result { + select_core_ga102::<E>(bar) + } + + fn signature_reg_fuse_version( + &self, + falcon: &Falcon<E>, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32> { + signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id) + } + + fn program_brom(&self, _falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result { + program_brom_ga102::<E>(bar, params) + } + + fn is_riscv_active(&self, bar: &Bar0) -> bool { + bar.read(regs::NV_PRISCV_RISCV_CPUCTL::of::<E>()) + .active_stat() + } + + fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { + // TIMEOUT: memory scrubbing should complete in less than 20ms. + read_poll_timeout( + || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())), + |r| r.mem_scrubbing_done(), + Delta::ZERO, + Delta::from_millis(20), + ) + .map(|_| ()) + } + + fn reset_eng(&self, bar: &Bar0) -> Result { + let _ = bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>()); + + // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set + // RESET_READY so a non-failing timeout is used. + let _ = read_poll_timeout( + || Ok(bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<E>())), + |r| r.reset_ready(), + Delta::ZERO, + Delta::from_micros(150), + ); + + regs::NV_PFALCON_FALCON_ENGINE::reset_engine::<E>(bar); + self.reset_wait_mem_scrubbing(bar)?; + + Ok(()) + } + + fn load_method(&self) -> LoadMethod { + LoadMethod::Dma + } +} diff --git a/drivers/gpu/nova-core/falcon/hal/tu102.rs b/drivers/gpu/nova-core/falcon/hal/tu102.rs new file mode 100644 index 000000000000..c7a90266cb44 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal/tu102.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::marker::PhantomData; + +use kernel::{ + io::{ + poll::read_poll_timeout, + register::WithBase, + Io, // + }, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + hal::LoadMethod, + Falcon, + FalconBromParams, + FalconEngine, // + }, + regs, // +}; + +use super::FalconHal; + +pub(super) struct Tu102<E: FalconEngine>(PhantomData<E>); + +impl<E: FalconEngine> Tu102<E> { + pub(super) fn new() -> Self { + Self(PhantomData) + } +} + +impl<E: FalconEngine> FalconHal<E> for Tu102<E> { + fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result { + Ok(()) + } + + fn signature_reg_fuse_version( + &self, + _falcon: &Falcon<E>, + _bar: &Bar0, + _engine_id_mask: u16, + _ucode_id: u8, + ) -> Result<u32> { + Ok(0) + } + + fn program_brom(&self, _falcon: &Falcon<E>, _bar: &Bar0, _params: &FalconBromParams) -> Result { + Ok(()) + } + + fn is_riscv_active(&self, bar: &Bar0) -> bool { + bar.read(regs::NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS::of::<E>()) + .active_stat() + } + + fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { + // TIMEOUT: memory scrubbing should complete in less than 10ms. + read_poll_timeout( + || Ok(bar.read(regs::NV_PFALCON_FALCON_DMACTL::of::<E>())), + |r| r.mem_scrubbing_done(), + Delta::ZERO, + Delta::from_millis(10), + ) + .map(|_| ()) + } + + fn reset_eng(&self, bar: &Bar0) -> Result { + regs::NV_PFALCON_FALCON_ENGINE::reset_engine::<E>(bar); + self.reset_wait_mem_scrubbing(bar)?; + + Ok(()) + } + + fn load_method(&self) -> LoadMethod { + LoadMethod::Pio + } +} diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs new file mode 100644 index 000000000000..91ec7d49c1f5 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::io::register::RegisterBase; + +use crate::falcon::{ + FalconEngine, + PFalcon2Base, + PFalconBase, // +}; + +/// Type specifying the `Sec2` falcon engine. Cannot be instantiated. +pub(crate) struct Sec2(()); + +impl RegisterBase<PFalconBase> for Sec2 { + const BASE: usize = 0x00840000; +} + +impl RegisterBase<PFalcon2Base> for Sec2 { + const BASE: usize = 0x00841000; +} + +impl FalconEngine for Sec2 {} diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs new file mode 100644 index 000000000000..bdd5eed760e1 --- /dev/null +++ b/drivers/gpu/nova-core/fb.rs @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::ops::{ + Deref, + Range, // +}; + +use kernel::{ + device, + dma::CoherentHandle, + fmt, + io::Io, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sizes::*, + sync::aref::ARef, // +}; + +use crate::{ + driver::Bar0, + firmware::gsp::GspFirmware, + gpu::Chipset, + gsp, + num::{ + usize_as_u64, + FromSafeCast, // + }, + regs, +}; + +mod hal; + +/// Type holding the sysmem flush memory page, a page of memory to be written into the +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR*` registers and used to maintain memory coherency. +/// +/// A system memory page is required for `sysmembar`, which is a GPU-initiated hardware +/// memory-barrier operation that flushes all pending GPU-side memory writes that were done through +/// PCIE to system memory. It is required for falcons to be reset as the reset operation involves a +/// reset handshake. When the falcon acknowledges a reset, it writes into system memory. To ensure +/// this write is visible to the host and prevent driver timeouts, the falcon must perform a +/// sysmembar operation to flush its writes. +/// +/// Because of this, the sysmem flush memory page must be registered as early as possible during +/// driver initialization, and before any falcon is reset. +/// +/// Users are responsible for manually calling [`Self::unregister`] before dropping this object, +/// otherwise the GPU might still use it even after it has been freed. +pub(crate) struct SysmemFlush { + /// Chipset we are operating on. + chipset: Chipset, + device: ARef<device::Device>, + /// Keep the page alive as long as we need it. + page: CoherentHandle, +} + +impl SysmemFlush { + /// Allocate a memory page and register it as the sysmem flush page. + pub(crate) fn register( + dev: &device::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + ) -> Result<Self> { + let page = CoherentHandle::alloc(dev, kernel::page::PAGE_SIZE, GFP_KERNEL)?; + + hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; + + Ok(Self { + chipset, + device: dev.into(), + page, + }) + } + + /// Unregister the managed sysmem flush page. + /// + /// In order to gracefully tear down the GPU, users must make sure to call this method before + /// dropping the object. + pub(crate) fn unregister(&self, bar: &Bar0) { + let hal = hal::fb_hal(self.chipset); + + if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() { + let _ = hal.write_sysmem_flush_page(bar, 0).inspect_err(|e| { + dev_warn!( + &self.device, + "failed to unregister sysmem flush page: {:?}\n", + e + ) + }); + } else { + // Another page has been registered after us for some reason - warn as this is a bug. + dev_warn!( + &self.device, + "attempt to unregister a sysmem flush page that is not active\n" + ); + } + } +} + +pub(crate) struct FbRange(Range<u64>); + +impl FbRange { + pub(crate) fn len(&self) -> u64 { + self.0.end - self.0.start + } +} + +impl From<Range<u64>> for FbRange { + fn from(range: Range<u64>) -> Self { + Self(range) + } +} + +impl Deref for FbRange { + type Target = Range<u64>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl fmt::Debug for FbRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Use alternate format ({:#?}) to include size, compact format ({:?}) for just the range. + if f.alternate() { + let size = self.len(); + + if size < usize_as_u64(SZ_1M) { + let size_kib = size / usize_as_u64(SZ_1K); + f.write_fmt(fmt!( + "{:#x}..{:#x} ({} KiB)", + self.0.start, + self.0.end, + size_kib + )) + } else { + let size_mib = size / usize_as_u64(SZ_1M); + f.write_fmt(fmt!( + "{:#x}..{:#x} ({} MiB)", + self.0.start, + self.0.end, + size_mib + )) + } + } else { + f.write_fmt(fmt!("{:#x}..{:#x}", self.0.start, self.0.end)) + } + } +} + +/// Layout of the GPU framebuffer memory. +/// +/// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. +#[derive(Debug)] +pub(crate) struct FbLayout { + /// Range of the framebuffer. Starts at `0`. + pub(crate) fb: FbRange, + /// VGA workspace, small area of reserved memory at the end of the framebuffer. + pub(crate) vga_workspace: FbRange, + /// FRTS range. + pub(crate) frts: FbRange, + /// Memory area containing the GSP bootloader image. + pub(crate) boot: FbRange, + /// Memory area containing the GSP firmware image. + pub(crate) elf: FbRange, + /// WPR2 heap. + pub(crate) wpr2_heap: FbRange, + /// WPR2 region range, starting with an instance of `GspFwWprMeta`. + pub(crate) wpr2: FbRange, + pub(crate) heap: FbRange, + pub(crate) vf_partition_count: u8, +} + +impl FbLayout { + /// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP firmware. + pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw: &GspFirmware) -> Result<Self> { + let hal = hal::fb_hal(chipset); + + let fb = { + let fb_size = hal.vidmem_size(bar); + + FbRange(0..fb_size) + }; + + let vga_workspace = { + let vga_base = { + const NV_PRAMIN_SIZE: u64 = usize_as_u64(SZ_1M); + let base = fb.end - NV_PRAMIN_SIZE; + + if hal.supports_display(bar) { + match bar + .read(regs::NV_PDISP_VGA_WORKSPACE_BASE) + .vga_workspace_addr() + { + Some(addr) => { + if addr < base { + const VBIOS_WORKSPACE_SIZE: u64 = usize_as_u64(SZ_128K); + + // Point workspace address to end of framebuffer. + fb.end - VBIOS_WORKSPACE_SIZE + } else { + addr + } + } + None => base, + } + } else { + base + } + }; + + FbRange(vga_base..fb.end) + }; + + let frts = { + const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>(); + const FRTS_SIZE: u64 = usize_as_u64(SZ_1M); + let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; + + FbRange(frts_base..frts_base + FRTS_SIZE) + }; + + let boot = { + const BOOTLOADER_DOWN_ALIGN: Alignment = Alignment::new::<SZ_4K>(); + let bootloader_size = u64::from_safe_cast(gsp_fw.bootloader.ucode.size()); + let bootloader_base = (frts.start - bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN); + + FbRange(bootloader_base..bootloader_base + bootloader_size) + }; + + let elf = { + const ELF_DOWN_ALIGN: Alignment = Alignment::new::<SZ_64K>(); + let elf_size = u64::from_safe_cast(gsp_fw.size); + let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN); + + FbRange(elf_addr..elf_addr + elf_size) + }; + + let wpr2_heap = { + const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>(); + let wpr2_heap_size = + gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end); + let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN); + + FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN)) + }; + + let wpr2 = { + const WPR2_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>(); + let wpr2_addr = (wpr2_heap.start - u64::from_safe_cast(size_of::<gsp::GspFwWprMeta>())) + .align_down(WPR2_DOWN_ALIGN); + + FbRange(wpr2_addr..frts.end) + }; + + let heap = { + const HEAP_SIZE: u64 = usize_as_u64(SZ_1M); + + FbRange(wpr2.start - HEAP_SIZE..wpr2.start) + }; + + Ok(Self { + fb, + vga_workspace, + frts, + boot, + elf, + wpr2_heap, + wpr2, + heap, + vf_partition_count: 0, + }) + } +} diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs new file mode 100644 index 000000000000..aba0abd8ee00 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::{ + driver::Bar0, + gpu::Chipset, // +}; + +mod ga100; +mod ga102; +mod tu102; + +pub(crate) trait FbHal { + /// Returns the address of the currently-registered sysmem flush page. + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64; + + /// Register `addr` as the address of the sysmem flush page. + /// + /// This might fail if the address is too large for the receiving register. + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result; + + /// Returns `true` is display is supported. + fn supports_display(&self, bar: &Bar0) -> bool; + + /// Returns the VRAM size, in bytes. + fn vidmem_size(&self, bar: &Bar0) -> u64; +} + +/// Returns the HAL corresponding to `chipset`. +pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal { + use Chipset::*; + + match chipset { + TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL, + GA100 => ga100::GA100_HAL, + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { + ga102::GA102_HAL + } + } +} diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs new file mode 100644 index 000000000000..1c03783cddef --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + io::Io, + num::Bounded, + prelude::*, // +}; + +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; + +use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; + +struct Ga100; + +pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { + u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT + | u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI).adr_63_40()) + << FLUSH_SYSMEM_ADDR_SHIFT_HI +} + +pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { + bar.write_reg( + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr_63_40( + Bounded::<u64, _>::from(addr) + .shr::<FLUSH_SYSMEM_ADDR_SHIFT_HI, _>() + .cast(), + ), + ); + + bar.write_reg( + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed() + // CAST: `as u32` is used on purpose since we want to strip the upper bits that have + // been written to `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`. + .with_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32), + ); +} + +pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool { + !bar.read(regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY) + .display_disabled() +} + +/// Shift applied to the sysmem address before it is written into +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`, +const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40; + +impl FbHal for Ga100 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + super::tu102::vidmem_size_gp102(bar) + } +} + +const GA100: Ga100 = Ga100; +pub(super) const GA100_HAL: &dyn FbHal = &GA100; diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs new file mode 100644 index 000000000000..4b9f0f74d0e7 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + io::Io, + prelude::*, // +}; + +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; + +fn vidmem_size_ga102(bar: &Bar0) -> u64 { + bar.read(regs::NV_USABLE_FB_SIZE_IN_MB).usable_fb_size() +} + +struct Ga102; + +impl FbHal for Ga102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + super::ga100::read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + super::ga100::write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + super::ga100::display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_ga102(bar) + } +} + +const GA102: Ga102 = Ga102; +pub(super) const GA102_HAL: &dyn FbHal = &GA102; diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs new file mode 100644 index 000000000000..281bb796e198 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + io::Io, + prelude::*, // +}; + +use crate::{ + driver::Bar0, + fb::hal::FbHal, + regs, // +}; + +/// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`, +/// to be used by HALs. +pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; + +pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { + u64::from(bar.read(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT +} + +pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { + // Check that the address doesn't overflow the receiving 32-bit register. + u32::try_from(addr >> FLUSH_SYSMEM_ADDR_SHIFT) + .map_err(|_| EINVAL) + .map(|addr| { + bar.write_reg(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::zeroed().with_adr_39_08(addr)) + }) +} + +pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { + !bar.read(regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY) + .display_disabled() +} + +pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 { + bar.read(regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE) + .usable_fb_size() +} + +struct Tu102; + +impl FbHal for Tu102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_gm107(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_gm107(bar, addr) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_gm107(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_gp102(bar) + } +} + +const TU102: Tu102 = Tu102; +pub(super) const TU102_HAL: &dyn FbHal = &TU102; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs new file mode 100644 index 000000000000..6c2ab69cb605 --- /dev/null +++ b/drivers/gpu/nova-core/firmware.rs @@ -0,0 +1,537 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Contains structures and functions dedicated to the parsing, building and patching of firmwares +//! to be loaded into a given execution unit. + +use core::marker::PhantomData; +use core::ops::Deref; + +use kernel::{ + device, + firmware, + prelude::*, + str::CString, + transmute::FromBytes, // +}; + +use crate::{ + falcon::{ + FalconDmaLoadTarget, + FalconFirmware, // + }, + gpu, + num::{ + FromSafeCast, + IntoSafeCast, // + }, +}; + +pub(crate) mod booter; +pub(crate) mod fwsec; +pub(crate) mod gsp; +pub(crate) mod riscv; + +pub(crate) const FIRMWARE_VERSION: &str = "570.144"; + +/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. +fn request_firmware( + dev: &device::Device, + chipset: gpu::Chipset, + name: &str, + ver: &str, +) -> Result<firmware::Firmware> { + let chip_name = chipset.name(); + + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin")) + .and_then(|path| firmware::Firmware::request(&path, dev)) +} + +/// Structure used to describe some firmwares, notably FWSEC-FRTS. +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct FalconUCodeDescV2 { + /// Header defined by 'NV_BIT_FALCON_UCODE_DESC_HEADER_VDESC*' in OpenRM. + hdr: u32, + /// Stored size of the ucode after the header, compressed or uncompressed + stored_size: u32, + /// Uncompressed size of the ucode. If store_size == uncompressed_size, then the ucode + /// is not compressed. + pub(crate) uncompressed_size: u32, + /// Code entry point + pub(crate) virtual_entry: u32, + /// Offset after the code segment at which the Application Interface Table headers are located. + pub(crate) interface_offset: u32, + /// Base address at which to load the code segment into 'IMEM'. + pub(crate) imem_phys_base: u32, + /// Size in bytes of the code to copy into 'IMEM' (includes both secure and non-secure + /// segments). + pub(crate) imem_load_size: u32, + /// Virtual 'IMEM' address (i.e. 'tag') at which the code should start. + pub(crate) imem_virt_base: u32, + /// Virtual address of secure IMEM segment. + pub(crate) imem_sec_base: u32, + /// Size of secure IMEM segment. + pub(crate) imem_sec_size: u32, + /// Offset into stored (uncompressed) image at which DMEM begins. + pub(crate) dmem_offset: u32, + /// Base address at which to load the data segment into 'DMEM'. + pub(crate) dmem_phys_base: u32, + /// Size in bytes of the data to copy into 'DMEM'. + pub(crate) dmem_load_size: u32, + /// "Alternate" Size of data to load into IMEM. + pub(crate) alt_imem_load_size: u32, + /// "Alternate" Size of data to load into DMEM. + pub(crate) alt_dmem_load_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for FalconUCodeDescV2 {} + +/// Structure used to describe some firmwares, notably FWSEC-FRTS. +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct FalconUCodeDescV3 { + /// Header defined by `NV_BIT_FALCON_UCODE_DESC_HEADER_VDESC*` in OpenRM. + hdr: u32, + /// Stored size of the ucode after the header. + stored_size: u32, + /// Offset in `DMEM` at which the signature is expected to be found. + pub(crate) pkc_data_offset: u32, + /// Offset after the code segment at which the app headers are located. + pub(crate) interface_offset: u32, + /// Base address at which to load the code segment into `IMEM`. + pub(crate) imem_phys_base: u32, + /// Size in bytes of the code to copy into `IMEM`. + pub(crate) imem_load_size: u32, + /// Virtual `IMEM` address (i.e. `tag`) at which the code should start. + pub(crate) imem_virt_base: u32, + /// Base address at which to load the data segment into `DMEM`. + pub(crate) dmem_phys_base: u32, + /// Size in bytes of the data to copy into `DMEM`. + pub(crate) dmem_load_size: u32, + /// Mask of the falcon engines on which this firmware can run. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, + /// Number of signatures in this firmware. + pub(crate) signature_count: u8, + /// Versions of the signatures, used to infer a valid signature to use. + pub(crate) signature_versions: u16, + _reserved: u16, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use +// interior mutability. +unsafe impl FromBytes for FalconUCodeDescV3 {} + +/// Enum wrapping the different versions of Falcon microcode descriptors. +/// +/// This allows handling both V2 and V3 descriptor formats through a +/// unified type, providing version-agnostic access to firmware metadata +/// via the [`FalconUCodeDescriptor`] trait. +#[derive(Debug, Clone)] +pub(crate) enum FalconUCodeDesc { + V2(FalconUCodeDescV2), + V3(FalconUCodeDescV3), +} + +impl Deref for FalconUCodeDesc { + type Target = dyn FalconUCodeDescriptor; + + fn deref(&self) -> &Self::Target { + match self { + FalconUCodeDesc::V2(v2) => v2, + FalconUCodeDesc::V3(v3) => v3, + } + } +} + +/// Trait providing a common interface for accessing Falcon microcode descriptor fields. +/// +/// This trait abstracts over the different descriptor versions ([`FalconUCodeDescV2`] and +/// [`FalconUCodeDescV3`]), allowing code to work with firmware metadata without needing to +/// know the specific descriptor version. Fields not present return zero. +pub(crate) trait FalconUCodeDescriptor { + fn hdr(&self) -> u32; + fn imem_load_size(&self) -> u32; + fn interface_offset(&self) -> u32; + fn dmem_load_size(&self) -> u32; + fn pkc_data_offset(&self) -> u32; + fn engine_id_mask(&self) -> u16; + fn ucode_id(&self) -> u8; + fn signature_count(&self) -> u8; + fn signature_versions(&self) -> u16; + + /// Returns the size in bytes of the header. + fn size(&self) -> usize { + let hdr = self.hdr(); + + const HDR_SIZE_SHIFT: u32 = 16; + const HDR_SIZE_MASK: u32 = 0xffff0000; + ((hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT).into_safe_cast() + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget; + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget>; + fn dmem_load_params(&self) -> FalconDmaLoadTarget; +} + +impl FalconUCodeDescriptor for FalconUCodeDescV2 { + fn hdr(&self) -> u32 { + self.hdr + } + fn imem_load_size(&self) -> u32 { + self.imem_load_size + } + fn interface_offset(&self) -> u32 { + self.interface_offset + } + fn dmem_load_size(&self) -> u32 { + self.dmem_load_size + } + fn pkc_data_offset(&self) -> u32 { + 0 + } + fn engine_id_mask(&self) -> u16 { + 0 + } + fn ucode_id(&self) -> u8 { + 0 + } + fn signature_count(&self) -> u8 { + 0 + } + fn signature_versions(&self) -> u16 { + 0 + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { + // `imem_sec_base` is the *virtual* start address of the secure IMEM segment, so subtract + // `imem_virt_base` to get its physical offset. + let imem_sec_start = self.imem_sec_base.saturating_sub(self.imem_virt_base); + + FalconDmaLoadTarget { + src_start: imem_sec_start, + dst_start: self.imem_phys_base.saturating_add(imem_sec_start), + len: self.imem_sec_size, + } + } + + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { + Some(FalconDmaLoadTarget { + // Non-secure code always starts at offset 0. + src_start: 0, + dst_start: self.imem_phys_base, + // `imem_load_size` includes the size of the secure segment, so subtract it to + // get the correct amount of data to copy. + len: self.imem_load_size.saturating_sub(self.imem_sec_size), + }) + } + + fn dmem_load_params(&self) -> FalconDmaLoadTarget { + FalconDmaLoadTarget { + src_start: self.dmem_offset, + dst_start: self.dmem_phys_base, + len: self.dmem_load_size, + } + } +} + +impl FalconUCodeDescriptor for FalconUCodeDescV3 { + fn hdr(&self) -> u32 { + self.hdr + } + fn imem_load_size(&self) -> u32 { + self.imem_load_size + } + fn interface_offset(&self) -> u32 { + self.interface_offset + } + fn dmem_load_size(&self) -> u32 { + self.dmem_load_size + } + fn pkc_data_offset(&self) -> u32 { + self.pkc_data_offset + } + fn engine_id_mask(&self) -> u16 { + self.engine_id_mask + } + fn ucode_id(&self) -> u8 { + self.ucode_id + } + fn signature_count(&self) -> u8 { + self.signature_count + } + fn signature_versions(&self) -> u16 { + self.signature_versions + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { + FalconDmaLoadTarget { + // IMEM segment always starts at offset 0. + src_start: 0, + dst_start: self.imem_phys_base, + len: self.imem_load_size, + } + } + + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { + // Not used on V3 platforms + None + } + + fn dmem_load_params(&self) -> FalconDmaLoadTarget { + FalconDmaLoadTarget { + // DMEM segment starts right after the IMEM one. + src_start: self.imem_load_size, + dst_start: self.dmem_phys_base, + len: self.dmem_load_size, + } + } +} + +/// Trait implemented by types defining the signed state of a firmware. +trait SignedState {} + +/// Type indicating that the firmware must be signed before it can be used. +struct Unsigned; +impl SignedState for Unsigned {} + +/// Type indicating that the firmware is signed and ready to be loaded. +struct Signed; +impl SignedState for Signed {} + +/// Microcode to be loaded into a specific falcon. +/// +/// This is module-local and meant for sub-modules to use internally. +/// +/// After construction, a firmware is [`Unsigned`], and must generally be patched with a signature +/// before it can be loaded (with an exception for development hardware). The +/// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the +/// firmware to its [`Signed`] state. +// TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports +// temporary CPU-exclusive access to the object without unsafe methods. +struct FirmwareObject<F: FalconFirmware, S: SignedState>(KVVec<u8>, PhantomData<(F, S)>); + +/// Trait for signatures to be patched directly into a given firmware. +/// +/// This is module-local and meant for sub-modules to use internally. +trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {} + +impl<F: FalconFirmware> FirmwareObject<F, Unsigned> { + /// Patches the firmware at offset `signature_start` with `signature`. + fn patch_signature<S: FirmwareSignature<F>>( + mut self, + signature: &S, + signature_start: usize, + ) -> Result<FirmwareObject<F, Signed>> { + let signature_bytes = signature.as_ref(); + let signature_end = signature_start + .checked_add(signature_bytes.len()) + .ok_or(EOVERFLOW)?; + let dst = self + .0 + .get_mut(signature_start..signature_end) + .ok_or(EINVAL)?; + + // PANIC: `dst` and `signature_bytes` have the same length. + dst.copy_from_slice(signature_bytes); + + Ok(FirmwareObject(self.0, PhantomData)) + } + + /// Mark the firmware as signed without patching it. + /// + /// This method is used to explicitly confirm that we do not need to sign the firmware, while + /// allowing us to continue as if it was. This is typically only needed for development + /// hardware. + fn no_patch_signature(self) -> FirmwareObject<F, Signed> { + FirmwareObject(self.0, PhantomData) + } +} + +/// Header common to most firmware files. +#[repr(C)] +#[derive(Debug, Clone)] +struct BinHdr { + /// Magic number, must be `0x10de`. + bin_magic: u32, + /// Version of the header. + bin_ver: u32, + /// Size in bytes of the binary (to be ignored). + bin_size: u32, + /// Offset of the start of the application-specific header. + header_offset: u32, + /// Offset of the start of the data payload. + data_offset: u32, + /// Size in bytes of the data payload. + data_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for BinHdr {} + +// A firmware blob starting with a `BinHdr`. +struct BinFirmware<'a> { + hdr: BinHdr, + fw: &'a [u8], +} + +impl<'a> BinFirmware<'a> { + /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the + /// corresponding [`BinFirmware`] that can be used to extract its payload. + fn new(fw: &'a firmware::Firmware) -> Result<Self> { + const BIN_MAGIC: u32 = 0x10de; + let fw = fw.data(); + + fw.get(0..size_of::<BinHdr>()) + // Extract header. + .and_then(BinHdr::from_bytes_copy) + // Validate header. + .and_then(|hdr| { + if hdr.bin_magic == BIN_MAGIC { + Some(hdr) + } else { + None + } + }) + .map(|hdr| Self { hdr, fw }) + .ok_or(EINVAL) + } + + /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of + /// the firmware image. + fn data(&self) -> Option<&[u8]> { + let fw_start = usize::from_safe_cast(self.hdr.data_offset); + let fw_size = usize::from_safe_cast(self.hdr.data_size); + let fw_end = fw_start.checked_add(fw_size)?; + + self.fw.get(fw_start..fw_end) + } +} + +pub(crate) struct ModInfoBuilder<const N: usize>(firmware::ModInfoBuilder<N>); + +impl<const N: usize> ModInfoBuilder<N> { + const fn make_entry_file(self, chipset: &str, fw: &str) -> Self { + ModInfoBuilder( + self.0 + .new_entry() + .push("nvidia/") + .push(chipset) + .push("/gsp/") + .push(fw) + .push("-") + .push(FIRMWARE_VERSION) + .push(".bin"), + ) + } + + const fn make_entry_chipset(self, chipset: gpu::Chipset) -> Self { + let name = chipset.name(); + + let this = self + .make_entry_file(name, "booter_load") + .make_entry_file(name, "booter_unload") + .make_entry_file(name, "bootloader") + .make_entry_file(name, "gsp"); + + if chipset.needs_fwsec_bootloader() { + this.make_entry_file(name, "gen_bootloader") + } else { + this + } + } + + pub(crate) const fn create( + module_name: &'static core::ffi::CStr, + ) -> firmware::ModInfoBuilder<N> { + let mut this = Self(firmware::ModInfoBuilder::new(module_name)); + let mut i = 0; + + while i < gpu::Chipset::ALL.len() { + this = this.make_entry_chipset(gpu::Chipset::ALL[i]); + i += 1; + } + + this.0 + } +} + +/// Ad-hoc and temporary module to extract sections from ELF images. +/// +/// Some firmware images are currently packaged as ELF files, where sections names are used as keys +/// to specific and related bits of data. Future firmware versions are scheduled to move away from +/// that scheme before nova-core becomes stable, which means this module will eventually be +/// removed. +mod elf { + use core::mem::size_of; + + use kernel::{ + bindings, + str::CStr, + transmute::FromBytes, // + }; + + /// Newtype to provide a [`FromBytes`] implementation. + #[repr(transparent)] + struct Elf64Hdr(bindings::elf64_hdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64Hdr {} + + #[repr(transparent)] + struct Elf64SHdr(bindings::elf64_shdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64SHdr {} + + /// Returns a NULL-terminated string from the ELF image at `offset`. + fn elf_str(elf: &[u8], offset: u64) -> Option<&str> { + let idx = usize::try_from(offset).ok()?; + let bytes = elf.get(idx..)?; + CStr::from_bytes_until_nul(bytes).ok()?.to_str().ok() + } + + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::<bindings::elf64_hdr>()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::<Elf64SHdr>()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find_map(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let name_offset = strhdr.0.sh_offset.checked_add(u64::from(hdr.0.sh_name))?; + let section_name = elf_str(elf, name_offset)?; + + if section_name != name { + return None; + } + + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs new file mode 100644 index 000000000000..de2a4536b532 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware +//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon +//! (and optionally unload it through a separate firmware image). + +use core::marker::PhantomData; + +use kernel::{ + device, + prelude::*, + transmute::FromBytes, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + sec2::Sec2, + Falcon, + FalconBromParams, + FalconDmaLoadTarget, + FalconDmaLoadable, + FalconFirmware, // + }, + firmware::{ + BinFirmware, + FirmwareObject, + FirmwareSignature, + Signed, + Unsigned, // + }, + gpu::Chipset, + num::{ + FromSafeCast, + IntoSafeCast, // + }, +}; + +/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at +/// `offset` in `slice`. +fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> { + let end = offset.checked_add(size_of::<S>()).ok_or(EINVAL)?; + slice + .get(offset..end) + .and_then(S::from_bytes_copy) + .ok_or(EINVAL) +} + +/// Heavy-Secured firmware header. +/// +/// Such firmwares have an application-specific payload that needs to be patched with a given +/// signature. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsHeaderV2 { + /// Offset to the start of the signatures. + sig_prod_offset: u32, + /// Size in bytes of the signatures. + sig_prod_size: u32, + /// Offset to a `u32` containing the location at which to patch the signature in the microcode + /// image. + patch_loc_offset: u32, + /// Offset to a `u32` containing the index of the signature to patch. + patch_sig_offset: u32, + /// Start offset to the signature metadata. + meta_data_offset: u32, + /// Size in bytes of the signature metadata. + meta_data_size: u32, + /// Offset to a `u32` containing the number of signatures in the signatures section. + num_sig_offset: u32, + /// Offset of the application-specific header. + header_offset: u32, + /// Size in bytes of the application-specific header. + header_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsHeaderV2 {} + +/// Heavy-Secured Firmware image container. +/// +/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to +/// read from in the firmware data. +struct HsFirmwareV2<'a> { + hdr: HsHeaderV2, + fw: &'a [u8], +} + +impl<'a> HsFirmwareV2<'a> { + /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of + /// `HsFirmwareV2` for further parsing. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> { + frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset.into_safe_cast()) + .map(|hdr| Self { hdr, fw: bin_fw.fw }) + } + + /// Returns the location at which the signatures should be patched in the microcode image. + /// + /// Fails if the offset of the patch location is outside the bounds of the firmware + /// image. + fn patch_location(&self) -> Result<u32> { + frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset.into_safe_cast()) + } + + /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the + /// firmware is unsigned. + /// + /// Fails if the pointed signatures are outside the bounds of the firmware image. + fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> { + let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset.into_safe_cast())?; + let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { + // If there are no signatures, return an iterator that will yield zero elements. + None => (&[] as &[u8]).chunks_exact(1), + Some(sig_size) => { + let patch_sig = + frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset.into_safe_cast())?; + + let signatures_start = self + .hdr + .sig_prod_offset + .checked_add(patch_sig) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let signatures_end = signatures_start + .checked_add(usize::from_safe_cast(self.hdr.sig_prod_size)) + .ok_or(EINVAL)?; + + self.fw + // Get signatures range. + .get(signatures_start..signatures_end) + .ok_or(EINVAL)? + .chunks_exact(sig_size.into_safe_cast()) + } + }; + + // Map the byte slices into signatures. + Ok(iter.map(BooterSignature)) + } +} + +/// Signature parameters, as defined in the firmware. +#[repr(C)] +struct HsSignatureParams { + /// Fuse version to use. + fuse_ver: u32, + /// Mask of engine IDs this firmware applies to. + engine_id_mask: u32, + /// ID of the microcode. + ucode_id: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsSignatureParams {} + +impl HsSignatureParams { + /// Returns the signature parameters contained in `hs_fw`. + /// + /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or + /// if its size doesn't match that of [`HsSignatureParams`]. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + let start = usize::from_safe_cast(hs_fw.hdr.meta_data_offset); + let end = start + .checked_add(hs_fw.hdr.meta_data_size.into_safe_cast()) + .ok_or(EINVAL)?; + + hs_fw + .fw + .get(start..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// Header for code and data load offsets. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2 { + // Offset at which the code starts. + os_code_offset: u32, + // Total size of the code, for all apps. + os_code_size: u32, + // Offset at which the data starts. + os_data_offset: u32, + // Size of the data. + os_data_size: u32, + // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`]. + num_apps: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2 {} + +impl HsLoadHeaderV2 { + /// Returns the load header contained in `hs_fw`. + /// + /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset.into_safe_cast()) + } +} + +/// Header for app code loader. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2App { + /// Offset at which to load the app code. + offset: u32, + /// Length in bytes of the app code. + len: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2App {} + +impl HsLoadHeaderV2App { + /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`. + /// + /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is + /// not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> { + let load_hdr = HsLoadHeaderV2::new(hs_fw)?; + if idx >= load_hdr.num_apps { + Err(EINVAL) + } else { + frombytes_at::<Self>( + hs_fw.fw, + usize::from_safe_cast(hs_fw.hdr.header_offset) + // Skip the load header... + .checked_add(size_of::<HsLoadHeaderV2>()) + // ... and jump to app header `idx`. + .and_then(|offset| { + offset + .checked_add(usize::from_safe_cast(idx).checked_mul(size_of::<Self>())?) + }) + .ok_or(EINVAL)?, + ) + } + } +} + +/// Signature for Booter firmware. Their size is encoded into the header and not known a compile +/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`]. +struct BooterSignature<'a>(&'a [u8]); + +impl<'a> AsRef<[u8]> for BooterSignature<'a> { + fn as_ref(&self) -> &[u8] { + self.0 + } +} + +impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {} + +/// The `Booter` loader firmware, responsible for loading the GSP. +pub(crate) struct BooterFirmware { + // Load parameters for Secure `IMEM` falcon memory. + imem_sec_load_target: FalconDmaLoadTarget, + // Load parameters for Non-Secure `IMEM` falcon memory, + // used only on Turing and GA100 + imem_ns_load_target: Option<FalconDmaLoadTarget>, + // Load parameters for `DMEM` falcon memory. + dmem_load_target: FalconDmaLoadTarget, + // BROM falcon parameters. + brom_params: FalconBromParams, + // Device-mapped firmware image. + ucode: FirmwareObject<Self, Signed>, +} + +impl FirmwareObject<BooterFirmware, Unsigned> { + fn new_booter(data: &[u8]) -> Result<Self> { + let mut ucode = KVVec::new(); + ucode.extend_from_slice(data, GFP_KERNEL)?; + + Ok(Self(ucode, PhantomData)) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum BooterKind { + Loader, + #[expect(unused)] + Unloader, +} + +impl BooterFirmware { + /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is + /// ready to be loaded and run on `falcon`. + pub(crate) fn new( + dev: &device::Device<device::Bound>, + kind: BooterKind, + chipset: Chipset, + ver: &str, + falcon: &Falcon<<Self as FalconFirmware>::Target>, + bar: &Bar0, + ) -> Result<Self> { + let fw_name = match kind { + BooterKind::Loader => "booter_load", + BooterKind::Unloader => "booter_unload", + }; + let fw = super::request_firmware(dev, chipset, fw_name, ver)?; + let bin_fw = BinFirmware::new(&fw)?; + + // The binary firmware embeds a Heavy-Secured firmware. + let hs_fw = HsFirmwareV2::new(&bin_fw)?; + + // The Heavy-Secured firmware embeds a firmware load descriptor. + let load_hdr = HsLoadHeaderV2::new(&hs_fw)?; + + // Offset in `ucode` where to patch the signature. + let patch_loc = hs_fw.patch_location()?; + + let sig_params = HsSignatureParams::new(&hs_fw)?; + let brom_params = FalconBromParams { + // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the + // signature patch location. + pkc_data_offset: patch_loc + .checked_sub(load_hdr.os_data_offset) + .ok_or(EINVAL)?, + engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?, + ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?, + }; + let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?; + + // Object containing the firmware microcode to be signature-patched. + let ucode = bin_fw + .data() + .ok_or(EINVAL) + .and_then(FirmwareObject::<Self, _>::new_booter)?; + + let ucode_signed = { + let mut signatures = hs_fw.signatures_iter()?.peekable(); + + if signatures.peek().is_none() { + // If there are no signatures, then the firmware is unsigned. + ucode.no_patch_signature() + } else { + // Obtain the version from the fuse register, and extract the corresponding + // signature. + let reg_fuse_version = falcon.signature_reg_fuse_version( + bar, + brom_params.engine_id_mask, + brom_params.ucode_id, + )?; + + // `0` means the last signature should be used. + const FUSE_VERSION_USE_LAST_SIG: u32 = 0; + let signature = match reg_fuse_version { + FUSE_VERSION_USE_LAST_SIG => signatures.last(), + // Otherwise hardware fuse version needs to be subtracted to obtain the index. + reg_fuse_version => { + let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else { + dev_err!(dev, "invalid fuse version for Booter firmware\n"); + return Err(EINVAL); + }; + signatures.nth(idx.into_safe_cast()) + } + } + .ok_or(EINVAL)?; + + ucode.patch_signature(&signature, patch_loc.into_safe_cast())? + } + }; + + // There are two versions of Booter, one for Turing/GA100, and another for + // GA102+. The extraction of the IMEM sections differs between the two + // versions. Unfortunately, the file names are the same, and the headers + // don't indicate the versions. The only way to differentiate is by the Chipset. + let (imem_sec_dst_start, imem_ns_load_target) = if chipset <= Chipset::GA100 { + ( + app0.offset, + Some(FalconDmaLoadTarget { + src_start: 0, + dst_start: load_hdr.os_code_offset, + len: load_hdr.os_code_size, + }), + ) + } else { + (0, None) + }; + + Ok(Self { + imem_sec_load_target: FalconDmaLoadTarget { + src_start: app0.offset, + dst_start: imem_sec_dst_start, + len: app0.len, + }, + imem_ns_load_target, + dmem_load_target: FalconDmaLoadTarget { + src_start: load_hdr.os_data_offset, + dst_start: 0, + len: load_hdr.os_data_size, + }, + brom_params, + ucode: ucode_signed, + }) + } +} + +impl FalconDmaLoadable for BooterFirmware { + fn as_slice(&self) -> &[u8] { + self.ucode.0.as_slice() + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { + self.imem_sec_load_target.clone() + } + + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { + self.imem_ns_load_target.clone() + } + + fn dmem_load_params(&self) -> FalconDmaLoadTarget { + self.dmem_load_target.clone() + } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + if let Some(ns_target) = &self.imem_ns_load_target { + ns_target.dst_start + } else { + self.imem_sec_load_target.src_start + } + } +} diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs new file mode 100644 index 000000000000..8810cb49db67 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! FWSEC is a High Secure firmware that is extracted from the BIOS and performs the first step of +//! the GSP startup by creating the WPR2 memory region and copying critical areas of the VBIOS into +//! it after authenticating them, ensuring they haven't been tampered with. It runs on the GSP +//! falcon. +//! +//! Before being run, it needs to be patched in two areas: +//! +//! - The command to be run, as this firmware can perform several tasks ; +//! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. + +pub(crate) mod bootloader; + +use core::marker::PhantomData; + +use kernel::{ + device::{ + self, + Device, // + }, + prelude::*, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + Falcon, + FalconBromParams, + FalconDmaLoadTarget, + FalconDmaLoadable, + FalconFirmware, // + }, + firmware::{ + FalconUCodeDesc, + FirmwareObject, + FirmwareSignature, + Signed, + Unsigned, // + }, + num::FromSafeCast, + vbios::Vbios, +}; + +const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4; + +#[repr(C)] +#[derive(Debug)] +struct FalconAppifHdrV1 { + version: u8, + header_size: u8, + entry_size: u8, + entry_count: u8, +} +// SAFETY: Any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifHdrV1 {} + +#[repr(C, packed)] +#[derive(Debug)] +struct FalconAppifV1 { + id: u32, + dmem_base: u32, +} +// SAFETY: Any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifV1 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FalconAppifDmemmapperV3 { + signature: u32, + version: u16, + size: u16, + cmd_in_buffer_offset: u32, + cmd_in_buffer_size: u32, + cmd_out_buffer_offset: u32, + cmd_out_buffer_size: u32, + nvf_img_data_buffer_offset: u32, + nvf_img_data_buffer_size: u32, + printf_buffer_hdr: u32, + ucode_build_time_stamp: u32, + ucode_signature: u32, + init_cmd: u32, + ucode_feature: u32, + ucode_cmd_mask0: u32, + ucode_cmd_mask1: u32, + multi_tgt_tbl: u32, +} +// SAFETY: Any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifDmemmapperV3 {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FalconAppifDmemmapperV3 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct ReadVbios { + ver: u32, + hdr: u32, + addr: u64, + size: u32, + flags: u32, +} +// SAFETY: Any byte sequence is valid for this struct. +unsafe impl FromBytes for ReadVbios {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for ReadVbios {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FrtsRegion { + ver: u32, + hdr: u32, + addr: u32, + size: u32, + ftype: u32, +} +// SAFETY: Any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsRegion {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FrtsRegion {} + +const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2; + +#[repr(C, packed)] +struct FrtsCmd { + read_vbios: ReadVbios, + frts_region: FrtsRegion, +} +// SAFETY: Any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsCmd {} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for FrtsCmd {} + +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15; +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19; + +/// Command for the [`FwsecFirmware`] to execute. +pub(crate) enum FwsecCommand { + /// Asks [`FwsecFirmware`] to carve out the WPR2 area and place a verified copy of the VBIOS + /// image into it. + Frts { frts_addr: u64, frts_size: u64 }, + /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU. + #[expect(dead_code)] + Sb, +} + +/// Size of the signatures used in FWSEC. +const BCRT30_RSA3K_SIG_SIZE: usize = 384; + +/// A single signature that can be patched into a FWSEC image. +#[repr(transparent)] +pub(crate) struct Bcrt30Rsa3kSignature([u8; BCRT30_RSA3K_SIG_SIZE]); + +/// SAFETY: A signature is just an array of bytes. +unsafe impl FromBytes for Bcrt30Rsa3kSignature {} + +impl From<[u8; BCRT30_RSA3K_SIG_SIZE]> for Bcrt30Rsa3kSignature { + fn from(sig: [u8; BCRT30_RSA3K_SIG_SIZE]) -> Self { + Self(sig) + } +} + +impl AsRef<[u8]> for Bcrt30Rsa3kSignature { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {} + +/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. +/// +/// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. +pub(crate) struct FwsecFirmware { + /// Descriptor of the firmware. + desc: FalconUCodeDesc, + /// Object containing the firmware binary. + ucode: FirmwareObject<Self, Signed>, +} + +impl FalconDmaLoadable for FwsecFirmware { + fn as_slice(&self) -> &[u8] { + self.ucode.0.as_slice() + } + + fn imem_sec_load_params(&self) -> FalconDmaLoadTarget { + self.desc.imem_sec_load_params() + } + + fn imem_ns_load_params(&self) -> Option<FalconDmaLoadTarget> { + self.desc.imem_ns_load_params() + } + + fn dmem_load_params(&self) -> FalconDmaLoadTarget { + self.desc.dmem_load_params() + } +} + +impl FalconFirmware for FwsecFirmware { + type Target = Gsp; + + fn brom_params(&self) -> FalconBromParams { + FalconBromParams { + pkc_data_offset: self.desc.pkc_data_offset(), + engine_id_mask: self.desc.engine_id_mask(), + ucode_id: self.desc.ucode_id(), + } + } + + fn boot_addr(&self) -> u32 { + 0 + } +} + +impl FirmwareObject<FwsecFirmware, Unsigned> { + fn new_fwsec(bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { + let desc = bios.fwsec_image().header()?; + let mut ucode = KVVec::new(); + ucode.extend_from_slice(bios.fwsec_image().ucode(&desc)?, GFP_KERNEL)?; + + let hdr_offset = desc + .imem_load_size() + .checked_add(desc.interface_offset()) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let hdr = ucode + .get(hdr_offset..) + .and_then(FalconAppifHdrV1::from_bytes_prefix) + .ok_or(EINVAL)? + .0; + + if hdr.version != 1 { + return Err(EINVAL); + } + + // Find the DMEM mapper section in the firmware. + for i in 0..usize::from(hdr.entry_count) { + // CALC: hdr_offset + header_size + i * entry_size. + let entry_offset = hdr_offset + .checked_add(usize::from(hdr.header_size)) + .and_then(|o| o.checked_add(i.checked_mul(usize::from(hdr.entry_size))?)) + .ok_or(EINVAL)?; + + let app = ucode + .get(entry_offset..) + .and_then(FalconAppifV1::from_bytes_prefix) + .ok_or(EINVAL)? + .0; + + if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { + continue; + } + let dmem_base = app.dmem_base; + + let dmem_mapper_offset = desc + .imem_load_size() + .checked_add(dmem_base) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let dmem_mapper = ucode + .get_mut(dmem_mapper_offset..) + .and_then(FalconAppifDmemmapperV3::from_bytes_mut_prefix) + .ok_or(EINVAL)? + .0; + + dmem_mapper.init_cmd = match cmd { + FwsecCommand::Frts { .. } => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS, + FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, + }; + let cmd_in_buffer_offset = dmem_mapper.cmd_in_buffer_offset; + + let frts_cmd_offset = desc + .imem_load_size() + .checked_add(cmd_in_buffer_offset) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + + let frts_cmd = ucode + .get_mut(frts_cmd_offset..) + .and_then(FrtsCmd::from_bytes_mut_prefix) + .ok_or(EINVAL)? + .0; + + frts_cmd.read_vbios = ReadVbios { + ver: 1, + hdr: u32::try_from(size_of::<ReadVbios>())?, + addr: 0, + size: 0, + flags: 2, + }; + if let FwsecCommand::Frts { + frts_addr, + frts_size, + } = cmd + { + frts_cmd.frts_region = FrtsRegion { + ver: 1, + hdr: u32::try_from(size_of::<FrtsRegion>())?, + addr: u32::try_from(frts_addr >> 12)?, + size: u32::try_from(frts_size >> 12)?, + ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, + }; + } + + // Return early as we found and patched the DMEMMAPPER region. + return Ok(Self(ucode, PhantomData)); + } + + Err(ENOTSUPP) + } +} + +impl FwsecFirmware { + /// Extract the Fwsec firmware from `bios` and patch it to run on `falcon` with the `cmd` + /// command. + pub(crate) fn new( + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + cmd: FwsecCommand, + ) -> Result<Self> { + let ucode_dma = FirmwareObject::<Self, _>::new_fwsec(bios, cmd)?; + + // Patch signature if needed. + let desc = bios.fwsec_image().header()?; + let ucode_signed = if desc.signature_count() != 0 { + let sig_base_img = desc + .imem_load_size() + .checked_add(desc.pkc_data_offset()) + .map(usize::from_safe_cast) + .ok_or(EINVAL)?; + let desc_sig_versions = u32::from(desc.signature_versions()); + let reg_fuse_version = + falcon.signature_reg_fuse_version(bar, desc.engine_id_mask(), desc.ucode_id())?; + dev_dbg!( + dev, + "desc_sig_versions: {:#x}, reg_fuse_version: {}\n", + desc_sig_versions, + reg_fuse_version + ); + let signature_idx = { + let reg_fuse_version_bit = 1 << reg_fuse_version; + + // Check if the fuse version is supported by the firmware. + if desc_sig_versions & reg_fuse_version_bit == 0 { + dev_err!( + dev, + "no matching signature: {:#x} {:#x}\n", + reg_fuse_version_bit, + desc_sig_versions, + ); + return Err(EINVAL); + } + + // `desc_sig_versions` has one bit set per included signature. Thus, the index of + // the signature to patch is the number of bits in `desc_sig_versions` set to `1` + // before `reg_fuse_version_bit`. + + // Mask of the bits of `desc_sig_versions` to preserve. + let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1); + + usize::from_safe_cast((desc_sig_versions & reg_fuse_version_mask).count_ones()) + }; + + dev_dbg!(dev, "patching signature with index {}\n", signature_idx); + let signature = bios + .fwsec_image() + .sigs(&desc) + .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; + + ucode_dma.patch_signature(signature, sig_base_img)? + } else { + ucode_dma.no_patch_signature() + }; + + Ok(FwsecFirmware { + desc, + ucode: ucode_signed, + }) + } + + /// Loads the FWSEC firmware into `falcon` and execute it. + /// + /// This must only be called on chipsets that do not need the FWSEC bootloader (i.e., where + /// [`Chipset::needs_fwsec_bootloader()`](crate::gpu::Chipset::needs_fwsec_bootloader) returns + /// `false`). On chipsets that do, use [`bootloader::FwsecFirmwareWithBl`] instead. + pub(crate) fn run( + &self, + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + ) -> Result<()> { + // Reset falcon, load the firmware, and run it. + falcon + .reset(bar) + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; + falcon + .load(dev, bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + let (mbox0, _) = falcon + .boot(bar, Some(0), None) + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; + if mbox0 != 0 { + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); + Err(EIO) + } else { + Ok(()) + } + } +} diff --git a/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs new file mode 100644 index 000000000000..bcb713a868e2 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/fwsec/bootloader.rs @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bootloader support for the FWSEC firmware. +//! +//! On Turing, the FWSEC firmware is not loaded directly, but is instead loaded through a small +//! bootloader program that performs the required DMA operations. This bootloader itself needs to +//! be loaded using PIO. + +use kernel::{ + alloc::KVec, + device::{ + self, + Device, // + }, + dma::Coherent, + io::{ + register::WithBase, // + Io, + }, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sizes, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + driver::Bar0, + falcon::{ + self, + gsp::Gsp, + Falcon, + FalconBromParams, + FalconDmaLoadable, + FalconFbifMemType, + FalconFbifTarget, + FalconFirmware, + FalconPioDmemLoadTarget, + FalconPioImemLoadTarget, + FalconPioLoadable, // + }, + firmware::{ + fwsec::FwsecFirmware, + request_firmware, + BinHdr, + FIRMWARE_VERSION, // + }, + gpu::Chipset, + num::FromSafeCast, + regs, +}; + +/// Descriptor used by RM to figure out the requirements of the boot loader. +/// +/// Most of its fields appear to be legacy and carry incorrect values, so they are left unused. +#[repr(C)] +#[derive(Debug, Clone)] +struct BootloaderDesc { + /// Starting tag of bootloader. + start_tag: u32, + /// DMEM load offset - unused here as we always load at offset `0`. + _dmem_load_off: u32, + /// Offset of code section in the image. Unused as there is only one section in the bootloader + /// binary. + _code_off: u32, + /// Size of code section in the image. + code_size: u32, + /// Offset of data section in the image. Unused as we build the data section ourselves. + _data_off: u32, + /// Size of data section in the image. Unused as we build the data section ourselves. + _data_size: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for BootloaderDesc {} + +/// Structure used by the boot-loader to load the rest of the code. +/// +/// This has to be filled by the GPU driver and copied into DMEM at offset +/// [`BootloaderDesc.dmem_load_off`]. +#[repr(C, packed)] +#[derive(Debug, Clone)] +struct BootloaderDmemDescV2 { + /// Reserved, should always be first element. + reserved: [u32; 4], + /// 16B signature for secure code, 0s if no secure code. + signature: [u32; 4], + /// DMA context used by the bootloader while loading code/data. + ctx_dma: u32, + /// 256B-aligned physical FB address where code is located. + code_dma_base: u64, + /// Offset from `code_dma_base` where the non-secure code is located. + /// + /// Also used as destination IMEM offset of non-secure code as the DMA firmware object is + /// expected to be a mirror image of its loaded state. + /// + /// Must be multiple of 256. + non_sec_code_off: u32, + /// Size of the non-secure code part. + non_sec_code_size: u32, + /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256). + /// + /// Also used as destination IMEM offset of secure code as the DMA firmware object is expected + /// to be a mirror image of its loaded state. + /// + /// Must be multiple of 256. + sec_code_off: u32, + /// Size of the secure code part. + sec_code_size: u32, + /// Code entry point invoked by the bootloader after code is loaded. + code_entry_point: u32, + /// 256B-aligned physical FB address where data is located. + data_dma_base: u64, + /// Size of data block (should be multiple of 256B). + data_size: u32, + /// Number of arguments to be passed to the target firmware being loaded. + argc: u32, + /// Arguments to be passed to the target firmware being loaded. + argv: u32, +} +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability. +unsafe impl AsBytes for BootloaderDmemDescV2 {} + +/// Wrapper for [`FwsecFirmware`] that includes the bootloader performing the actual load +/// operation. +pub(crate) struct FwsecFirmwareWithBl { + /// DMA object the bootloader will copy the firmware from. + _firmware_dma: Coherent<[u8]>, + /// Code of the bootloader to be loaded into non-secure IMEM. + ucode: KVec<u8>, + /// Descriptor to be loaded into DMEM for the bootloader to read. + dmem_desc: BootloaderDmemDescV2, + /// Range-validated start offset of the firmware code in IMEM. + imem_dst_start: u16, + /// BROM parameters of the loaded firmware. + brom_params: FalconBromParams, + /// Range-validated `desc.start_tag`. + start_tag: u16, +} + +impl FwsecFirmwareWithBl { + /// Loads the bootloader firmware for `dev` and `chipset`, and wrap `firmware` so it can be + /// loaded using it. + pub(crate) fn new( + firmware: FwsecFirmware, + dev: &Device<device::Bound>, + chipset: Chipset, + ) -> Result<Self> { + let fw = request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION)?; + let hdr = fw + .data() + .get(0..size_of::<BinHdr>()) + .and_then(BinHdr::from_bytes_copy) + .ok_or(EINVAL)?; + + let desc = { + let desc_offset = usize::from_safe_cast(hdr.header_offset); + + fw.data() + .get(desc_offset..) + .and_then(BootloaderDesc::from_bytes_copy_prefix) + .ok_or(EINVAL)? + .0 + }; + + let ucode = { + let ucode_start = usize::from_safe_cast(hdr.data_offset); + let code_size = usize::from_safe_cast(desc.code_size); + // Align to falcon block size (256 bytes). + let aligned_code_size = code_size + .align_up(Alignment::new::<{ falcon::MEM_BLOCK_ALIGNMENT }>()) + .ok_or(EINVAL)?; + + let mut ucode = KVec::with_capacity(aligned_code_size, GFP_KERNEL)?; + ucode.extend_from_slice( + fw.data() + .get(ucode_start..ucode_start + code_size) + .ok_or(EINVAL)?, + GFP_KERNEL, + )?; + ucode.resize(aligned_code_size, 0, GFP_KERNEL)?; + + ucode + }; + + // `BootloaderDmemDescV2` expects the source to be a mirror image of the destination and + // uses the same offset parameter for both. + // + // Thus, the start of the source object needs to be padded with the difference between the + // destination and source offsets. + // + // In practice, this is expected to always be zero but is required for code correctness. + let (align_padding, firmware_dma) = { + let align_padding = { + let imem_sec = firmware.imem_sec_load_params(); + + imem_sec + .dst_start + .checked_sub(imem_sec.src_start) + .map(usize::from_safe_cast) + .ok_or(EOVERFLOW)? + }; + + let mut firmware_obj = KVVec::new(); + firmware_obj.extend_with(align_padding, 0u8, GFP_KERNEL)?; + firmware_obj.extend_from_slice(firmware.ucode.0.as_slice(), GFP_KERNEL)?; + + ( + align_padding, + Coherent::from_slice(dev, firmware_obj.as_slice(), GFP_KERNEL)?, + ) + }; + + let dmem_desc = { + // Bootloader payload is in non-coherent system memory. + const FALCON_DMAIDX_PHYS_SYS_NCOH: u32 = 4; + + let imem_sec = firmware.imem_sec_load_params(); + let imem_ns = firmware.imem_ns_load_params().ok_or(EINVAL)?; + let dmem = firmware.dmem_load_params(); + + // The bootloader does not have a data destination offset field and copies the data at + // the start of DMEM, so it can only be used if the destination offset of the firmware + // is 0. + if dmem.dst_start != 0 { + return Err(EINVAL); + } + + BootloaderDmemDescV2 { + reserved: [0; 4], + signature: [0; 4], + ctx_dma: FALCON_DMAIDX_PHYS_SYS_NCOH, + code_dma_base: firmware_dma.dma_handle(), + // `dst_start` is also valid as the source offset since the firmware DMA object is + // a mirror image of the target IMEM layout. + non_sec_code_off: imem_ns.dst_start, + non_sec_code_size: imem_ns.len, + // `dst_start` is also valid as the source offset since the firmware DMA object is + // a mirror image of the target IMEM layout. + sec_code_off: imem_sec.dst_start, + sec_code_size: imem_sec.len, + code_entry_point: 0, + // Start of data section is the added padding + the DMEM `src_start` field. + data_dma_base: firmware_dma + .dma_handle() + .checked_add(u64::from_safe_cast(align_padding)) + .and_then(|offset| offset.checked_add(dmem.src_start.into())) + .ok_or(EOVERFLOW)?, + data_size: dmem.len, + argc: 0, + argv: 0, + } + }; + + // The bootloader's code must be loaded in the area right below the first 64K of IMEM. + const BOOTLOADER_LOAD_CEILING: usize = sizes::SZ_64K; + let imem_dst_start = BOOTLOADER_LOAD_CEILING + .checked_sub(ucode.len()) + .ok_or(EOVERFLOW)?; + + Ok(Self { + _firmware_dma: firmware_dma, + ucode, + dmem_desc, + brom_params: firmware.brom_params(), + imem_dst_start: u16::try_from(imem_dst_start)?, + start_tag: u16::try_from(desc.start_tag)?, + }) + } + + /// Loads the bootloader into `falcon` and execute it. + /// + /// The bootloader will load the FWSEC firmware and then execute it. This function returns + /// after FWSEC has reached completion. + pub(crate) fn run( + &self, + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + ) -> Result<()> { + // Reset falcon, load the firmware, and run it. + falcon + .reset(bar) + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; + falcon + .pio_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + + // Configure DMA index for the bootloader to fetch the FWSEC firmware from system memory. + bar.update( + regs::NV_PFALCON_FBIF_TRANSCFG::of::<Gsp>() + .try_at(usize::from_safe_cast(self.dmem_desc.ctx_dma)) + .ok_or(EINVAL)?, + |v| { + v.with_target(FalconFbifTarget::CoherentSysmem) + .with_mem_type(FalconFbifMemType::Physical) + }, + ); + + let (mbox0, _) = falcon + .boot(bar, Some(0), None) + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; + if mbox0 != 0 { + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); + Err(EIO) + } else { + Ok(()) + } + } +} + +impl FalconFirmware for FwsecFirmwareWithBl { + type Target = Gsp; + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + // On V2 platforms, the boot address is extracted from the generic bootloader, because the + // gbl is what actually copies FWSEC into memory, so that is what needs to be booted. + u32::from(self.start_tag) << 8 + } +} + +impl FalconPioLoadable for FwsecFirmwareWithBl { + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { + None + } + + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { + Some(FalconPioImemLoadTarget { + data: self.ucode.as_ref(), + dst_start: self.imem_dst_start, + secure: false, + start_tag: self.start_tag, + }) + } + + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { + FalconPioDmemLoadTarget { + data: self.dmem_desc.as_bytes(), + dst_start: 0, + } + } +} diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs new file mode 100644 index 000000000000..2fcc255c3bc8 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, + dma::{ + Coherent, + CoherentBox, + DataDirection, + DmaAddress, // + }, + prelude::*, + scatterlist::{ + Owned, + SGTable, // + }, +}; + +use crate::{ + firmware::{ + elf, + riscv::RiscvFirmware, // + }, + gpu::{ + Architecture, + Chipset, // + }, + gsp::GSP_PAGE_SIZE, + num::FromSafeCast, +}; + +/// GSP firmware with 3-level radix page tables for the GSP bootloader. +/// +/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address +/// space: +/// +/// ```text +/// Level 0: 1 page, 1 entry -> points to first level 1 page +/// Level 1: Multiple pages/entries -> each entry points to a level 2 page +/// Level 2: Multiple pages/entries -> each entry points to a firmware page +/// ``` +/// +/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address). +/// Also known as "Radix3" firmware. +#[pin_data] +pub(crate) struct GspFirmware { + /// The GSP firmware inside a [`VVec`], device-mapped via a SG table. + #[pin] + fw: SGTable<Owned<VVec<u8>>>, + /// Level 2 page table whose entries contain DMA addresses of firmware pages. + #[pin] + level2: SGTable<Owned<VVec<u8>>>, + /// Level 1 page table whose entries contain DMA addresses of level 2 pages. + #[pin] + level1: SGTable<Owned<VVec<u8>>>, + /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. + level0: Coherent<[u64]>, + /// Size in bytes of the firmware contained in [`Self::fw`]. + pub(crate) size: usize, + /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. + pub(crate) signatures: Coherent<[u8]>, + /// GSP bootloader, verifies the GSP firmware before loading and running it. + pub(crate) bootloader: RiscvFirmware, +} + +impl GspFirmware { + /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page + /// tables expected by the GSP bootloader to load it. + pub(crate) fn new<'a>( + dev: &'a device::Device<device::Bound>, + chipset: Chipset, + ver: &'a str, + ) -> impl PinInit<Self, Error> + 'a { + pin_init::pin_init_scope(move || { + let firmware = super::request_firmware(dev, chipset, "gsp", ver)?; + + let fw_section = elf::elf64_section(firmware.data(), ".fwimage").ok_or(EINVAL)?; + + let size = fw_section.len(); + + // Move the firmware into a vmalloc'd vector and map it into the device address + // space. + let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL) + .and_then(|mut v| { + v.extend_from_slice(fw_section, GFP_KERNEL)?; + Ok(v) + }) + .map_err(|_| ENOMEM)?; + + Ok(try_pin_init!(Self { + fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), + level2 <- { + // Allocate the level 2 page table, map the firmware onto it, and map it into + // the device address space. + VVec::<u8>::with_capacity( + fw.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level2| map_into_lvl(&fw, level2)) + .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))? + }, + level1 <- { + // Allocate the level 1 page table, map the level 2 page table onto it, and map + // it into the device address space. + VVec::<u8>::with_capacity( + level2.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level1| map_into_lvl(&level2, level1)) + .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))? + }, + level0: { + // Allocate the level 0 page table as a device-visible DMA object, and map the + // level 1 page table onto it. + + // Fill level 1 page entry. + let level1_entry = level1.iter().next().ok_or(EINVAL)?; + let level1_entry_addr = level1_entry.dma_address(); + + // Create level 0 page table data and fill its first entry with the level 1 + // table. + let mut level0 = CoherentBox::<[u64]>::zeroed_slice( + dev, + GSP_PAGE_SIZE / size_of::<u64>(), + GFP_KERNEL + )?; + level0[0] = level1_entry_addr.to_le(); + + level0.into() + }, + size, + signatures: { + let sigs_section = match chipset.arch() { + Architecture::Turing + if matches!(chipset, Chipset::TU116 | Chipset::TU117) => + { + ".fwsignature_tu11x" + } + Architecture::Turing => ".fwsignature_tu10x", + // GA100 uses the same firmware as Turing + Architecture::Ampere if chipset == Chipset::GA100 => ".fwsignature_tu10x", + Architecture::Ampere => ".fwsignature_ga10x", + Architecture::Ada => ".fwsignature_ad10x", + }; + + elf::elf64_section(firmware.data(), sigs_section) + .ok_or(EINVAL) + .and_then(|data| Coherent::from_slice(dev, data, GFP_KERNEL))? + }, + bootloader: { + let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; + + RiscvFirmware::new(dev, &bl)? + }, + })) + }) + } + + /// Returns the DMA handle of the radix3 level 0 page table. + pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { + self.level0.dma_handle() + } +} + +/// Build a page table from a scatter-gather list. +/// +/// Takes each DMA-mapped region from `sg_table` and writes page table entries +/// for all 4KB pages within that region. For example, a 16KB SG entry becomes +/// 4 consecutive page table entries. +fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> { + for sg_entry in sg_table.iter() { + // Number of pages we need to map. + let num_pages = usize::from_safe_cast(sg_entry.dma_len()).div_ceil(GSP_PAGE_SIZE); + + for i in 0..num_pages { + let entry = sg_entry.dma_address() + + (u64::from_safe_cast(i) * u64::from_safe_cast(GSP_PAGE_SIZE)); + dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; + } + } + + Ok(dst) +} diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs new file mode 100644 index 000000000000..2afa7f36404e --- /dev/null +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a +//! dedicated header. + +use kernel::{ + device, + dma::Coherent, + firmware::Firmware, + prelude::*, + transmute::FromBytes, // +}; + +use crate::{ + firmware::BinFirmware, + num::FromSafeCast, // +}; + +/// Descriptor for microcode running on a RISC-V core. +#[repr(C)] +#[derive(Debug)] +struct RmRiscvUCodeDesc { + version: u32, + bootloader_offset: u32, + bootloader_size: u32, + bootloader_param_offset: u32, + bootloader_param_size: u32, + riscv_elf_offset: u32, + riscv_elf_size: u32, + app_version: u32, + manifest_offset: u32, + manifest_size: u32, + monitor_data_offset: u32, + monitor_data_size: u32, + monitor_code_offset: u32, + monitor_code_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for RmRiscvUCodeDesc {} + +impl RmRiscvUCodeDesc { + /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { + let offset = usize::from_safe_cast(bin_fw.hdr.header_offset); + let end = offset.checked_add(size_of::<Self>()).ok_or(EINVAL)?; + + bin_fw + .fw + .get(offset..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// A parsed firmware for a RISC-V core, ready to be loaded and run. +pub(crate) struct RiscvFirmware { + /// Offset at which the code starts in the firmware image. + pub(crate) code_offset: u32, + /// Offset at which the data starts in the firmware image. + pub(crate) data_offset: u32, + /// Offset at which the manifest starts in the firmware image. + pub(crate) manifest_offset: u32, + /// Application version. + pub(crate) app_version: u32, + /// Device-mapped firmware image. + pub(crate) ucode: Coherent<[u8]>, +} + +impl RiscvFirmware { + /// Parses the RISC-V firmware image contained in `fw`. + pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> { + let bin_fw = BinFirmware::new(fw)?; + + let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; + + let ucode = { + let start = usize::from_safe_cast(bin_fw.hdr.data_offset); + let len = usize::from_safe_cast(bin_fw.hdr.data_size); + let end = start.checked_add(len).ok_or(EINVAL)?; + + Coherent::from_slice(dev, fw.data().get(start..end).ok_or(EINVAL)?, GFP_KERNEL)? + }; + + Ok(Self { + ucode, + code_offset: riscv_desc.monitor_code_offset, + data_offset: riscv_desc.monitor_data_offset, + manifest_offset: riscv_desc.manifest_offset, + app_version: riscv_desc.app_version, + }) + } +} diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs new file mode 100644 index 000000000000..fb75dd10a172 --- /dev/null +++ b/drivers/gpu/nova-core/gfw.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GPU Firmware (`GFW`) support, a.k.a `devinit`. +//! +//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of +//! the GPU is considered unusable until this step is completed, so we must wait on it before +//! performing driver initialization. +//! +//! A clarification about devinit terminology: devinit is a sequence of register read/writes after +//! reset that performs tasks such as: +//! 1. Programming VRAM memory controller timings. +//! 2. Power sequencing. +//! 3. Clock and PLL configuration. +//! 4. Thermal management. +//! +//! devinit itself is a 'script' which is interpreted by an interpreter program typically running +//! on the PMU microcontroller. +//! +//! Note that the devinit sequence also needs to run during suspend/resume. + +use kernel::{ + io::{ + poll::read_poll_timeout, + Io, // + }, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + regs, // +}; + +/// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. +/// +/// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware code to +/// setup its core parameters. Most of the GPU is considered unusable until this step is completed, +/// so it must be waited on very early during driver initialization. +/// +/// The `GFW` code includes several components that need to execute before the driver loads. These +/// components are located in the VBIOS ROM and executed in a sequence on these different +/// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the +/// GSP. +/// +/// This function waits for a signal indicating that core initialization is complete. Before this +/// signal is received, little can be done with the GPU. This signal is set by the FWSEC running on +/// the GSP in Heavy-secured mode. +pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { + // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must + // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because + // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware + // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only + // safely read the status register from CPU (LS/Light-secured) once the mask indicates + // that the privilege level has been lowered. + // + // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of + // reset, and should complete in less time than that. + read_poll_timeout( + || { + Ok( + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT + // status. + bar.read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK) + .read_protection_level0() + && bar + .read(regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT) + .completed(), + ) + }, + |&gfw_booted| gfw_booted, + Delta::from_millis(1), + Delta::from_secs(4), + ) + .map(|_| ()) +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs new file mode 100644 index 000000000000..0f6fe9a1b955 --- /dev/null +++ b/drivers/gpu/nova-core/gpu.rs @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, + devres::Devres, + fmt, + io::Io, + num::Bounded, + pci, + prelude::*, + sync::Arc, // +}; + +use crate::{ + bounded_enum, + driver::Bar0, + falcon::{ + gsp::Gsp as GspFalcon, + sec2::Sec2 as Sec2Falcon, + Falcon, // + }, + fb::SysmemFlush, + gfw, + gsp::Gsp, + regs, +}; + +macro_rules! define_chipset { + ({ $($variant:ident = $value:expr),* $(,)* }) => + { + /// Enum representation of the GPU chipset. + #[derive(fmt::Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)] + pub(crate) enum Chipset { + $($variant = $value),*, + } + + impl Chipset { + pub(crate) const ALL: &'static [Chipset] = &[ + $( Chipset::$variant, )* + ]; + + ::kernel::macros::paste!( + /// Returns the name of this chipset, in lowercase. + /// + /// # Examples + /// + /// ``` + /// let chipset = Chipset::GA102; + /// assert_eq!(chipset.name(), "ga102"); + /// ``` + pub(crate) const fn name(&self) -> &'static str { + match *self { + $( + Chipset::$variant => stringify!([<$variant:lower>]), + )* + } + } + ); + } + + // TODO[FPRI]: replace with something like derive(FromPrimitive) + impl TryFrom<u32> for Chipset { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result<Self, Self::Error> { + match value { + $( $value => Ok(Chipset::$variant), )* + _ => Err(ENODEV), + } + } + } + } +} + +define_chipset!({ + // Turing + TU102 = 0x162, + TU104 = 0x164, + TU106 = 0x166, + TU117 = 0x167, + TU116 = 0x168, + // Ampere + GA100 = 0x170, + GA102 = 0x172, + GA103 = 0x173, + GA104 = 0x174, + GA106 = 0x176, + GA107 = 0x177, + // Ada + AD102 = 0x192, + AD103 = 0x193, + AD104 = 0x194, + AD106 = 0x196, + AD107 = 0x197, +}); + +impl Chipset { + pub(crate) const fn arch(self) -> Architecture { + match self { + Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => { + Architecture::Turing + } + Self::GA100 | Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106 | Self::GA107 => { + Architecture::Ampere + } + Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107 => { + Architecture::Ada + } + } + } + + /// Returns `true` if this chipset requires the PIO-loaded bootloader in order to boot FWSEC. + /// + /// This includes all chipsets < GA102. + pub(crate) const fn needs_fwsec_bootloader(self) -> bool { + matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100) + } +} + +// TODO +// +// The resulting strings are used to generate firmware paths, hence the +// generated strings have to be stable. +// +// Hence, replace with something like strum_macros derive(Display). +// +// For now, redirect to fmt::Debug for convenience. +impl fmt::Display for Chipset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } +} + +bounded_enum! { + /// Enum representation of the GPU generation. + #[derive(fmt::Debug, Copy, Clone)] + pub(crate) enum Architecture with TryFrom<Bounded<u32, 6>> { + Turing = 0x16, + Ampere = 0x17, + Ada = 0x19, + } +} + +pub(crate) struct Revision { + major: Bounded<u8, 4>, + minor: Bounded<u8, 4>, +} + +impl From<regs::NV_PMC_BOOT_42> for Revision { + fn from(boot0: regs::NV_PMC_BOOT_42) -> Self { + Self { + major: boot0.major_revision().cast(), + minor: boot0.minor_revision().cast(), + } + } +} + +impl fmt::Display for Revision { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:x}.{:x}", self.major, self.minor) + } +} + +/// Structure holding a basic description of the GPU: `Chipset` and `Revision`. +pub(crate) struct Spec { + chipset: Chipset, + revision: Revision, +} + +impl Spec { + fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> { + // Some brief notes about boot0 and boot42, in chronological order: + // + // NV04 through NV50: + // + // Not supported by Nova. boot0 is necessary and sufficient to identify these GPUs. + // boot42 may not even exist on some of these GPUs. + // + // Fermi through Volta: + // + // Not supported by Nova. boot0 is still sufficient to identify these GPUs, but boot42 + // is also guaranteed to be both present and accurate. + // + // Turing and later: + // + // Supported by Nova. Identified by first checking boot0 to ensure that the GPU is not + // from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU. + // Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs. + + let boot0 = bar.read(regs::NV_PMC_BOOT_0); + + if boot0.is_older_than_fermi() { + return Err(ENODEV); + } + + let boot42 = bar.read(regs::NV_PMC_BOOT_42); + Spec::try_from(boot42).inspect_err(|_| { + dev_err!(dev, "Unsupported chipset: {}\n", boot42); + }) + } +} + +impl TryFrom<regs::NV_PMC_BOOT_42> for Spec { + type Error = Error; + + fn try_from(boot42: regs::NV_PMC_BOOT_42) -> Result<Self> { + Ok(Self { + chipset: boot42.chipset()?, + revision: boot42.into(), + }) + } +} + +impl fmt::Display for Spec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_fmt(fmt!( + "Chipset: {}, Architecture: {:?}, Revision: {}", + self.chipset, + self.chipset.arch(), + self.revision + )) + } +} + +/// Structure holding the resources required to operate the GPU. +#[pin_data] +pub(crate) struct Gpu { + spec: Spec, + /// MMIO mapping of PCI BAR 0 + bar: Arc<Devres<Bar0>>, + /// System memory page required for flushing all pending GPU-side memory writes done through + /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). + sysmem_flush: SysmemFlush, + /// GSP falcon instance, used for GSP boot up and cleanup. + gsp_falcon: Falcon<GspFalcon>, + /// SEC2 falcon instance, used for GSP boot up and cleanup. + sec2_falcon: Falcon<Sec2Falcon>, + /// GSP runtime data. Temporarily an empty placeholder. + #[pin] + gsp: Gsp, +} + +impl Gpu { + pub(crate) fn new<'a>( + pdev: &'a pci::Device<device::Bound>, + devres_bar: Arc<Devres<Bar0>>, + bar: &'a Bar0, + ) -> impl PinInit<Self, Error> + 'a { + try_pin_init!(Self { + spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { + dev_info!(pdev,"NVIDIA ({})\n", spec); + })?, + + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?; + }, + + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, + + gsp_falcon: Falcon::new( + pdev.as_ref(), + spec.chipset, + ) + .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, + + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?, + + gsp <- Gsp::new(pdev), + + _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, + + bar: devres_bar, + }) + } + + /// Called when the corresponding [`Device`](device::Device) is unbound. + /// + /// Note: This method must only be called from `Driver::unbind`. + pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) { + kernel::warn_on!(self + .bar + .access(dev) + .inspect(|bar| self.sysmem_flush.unregister(bar)) + .is_err()); + } +} diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs new file mode 100644 index 000000000000..ba5b7f990031 --- /dev/null +++ b/drivers/gpu/nova-core/gsp.rs @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod boot; + +use kernel::{ + debugfs, + device, + dma::{ + Coherent, + CoherentBox, + DmaAddress, // + }, + pci, + prelude::*, + transmute::{ + AsBytes, + FromBytes, // + }, // +}; + +pub(crate) mod cmdq; +pub(crate) mod commands; +mod fw; +mod sequencer; + +pub(crate) use fw::{ + GspFwWprMeta, + LibosParams, // +}; + +use crate::{ + gsp::cmdq::Cmdq, + gsp::fw::{ + GspArgumentsPadded, + LibosMemoryRegionInitArgument, // + }, + num, +}; + +pub(crate) const GSP_PAGE_SHIFT: usize = 12; +pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; + +/// Number of GSP pages to use in a RM log buffer. +const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10; +const LOG_BUFFER_SIZE: usize = RM_LOG_BUFFER_NUM_PAGES * GSP_PAGE_SIZE; + +/// Array of page table entries, as understood by the GSP bootloader. +#[repr(C)] +struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]); + +/// SAFETY: arrays of `u64` implement `FromBytes` and we are but a wrapper around one. +unsafe impl<const NUM_ENTRIES: usize> FromBytes for PteArray<NUM_ENTRIES> {} + +/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one. +unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {} + +impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> { + /// Returns the page table entry for `index`, for a mapping starting at `start`. + // TODO: Replace with `IoView` projection once available. + fn entry(start: DmaAddress, index: usize) -> Result<u64> { + start + .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW) + } +} + +/// The logging buffers are byte queues that contain encoded printf-like +/// messages from GSP-RM. They need to be decoded by a special application +/// that can parse the buffers. +/// +/// The 'loginit' buffer contains logs from early GSP-RM init and +/// exception dumps. The 'logrm' buffer contains the subsequent logs. Both are +/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. +/// +/// The physical address map for the log buffer is stored in the buffer +/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp). +/// Initially, pp is equal to 0. If the buffer has valid logging data in it, +/// then pp points to index into the buffer where the next logging entry will +/// be written. Therefore, the logging data is valid if: +/// 1 <= pp < sizeof(buffer)/sizeof(u64) +struct LogBuffer(Coherent<[u8; LOG_BUFFER_SIZE]>); + +impl LogBuffer { + /// Creates a new `LogBuffer` mapped on `dev`. + fn new(dev: &device::Device<device::Bound>) -> Result<Self> { + let obj = Self(Coherent::zeroed(dev, GFP_KERNEL)?); + + let start_addr = obj.0.dma_handle(); + + // SAFETY: `obj` has just been created and we are its sole user. + let pte_region = unsafe { + &mut obj.0.as_mut()[size_of::<u64>()..][..RM_LOG_BUFFER_NUM_PAGES * size_of::<u64>()] + }; + + // Write values one by one to avoid an on-stack instance of `PteArray`. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() { + let pte_value = PteArray::<0>::entry(start_addr, i)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + + Ok(obj) + } +} + +struct LogBuffers { + /// Init log buffer. + loginit: LogBuffer, + /// Interrupts log buffer. + logintr: LogBuffer, + /// RM log buffer. + logrm: LogBuffer, +} + +/// GSP runtime data. +#[pin_data] +pub(crate) struct Gsp { + /// Libos arguments. + pub(crate) libos: Coherent<[LibosMemoryRegionInitArgument]>, + /// Log buffers, optionally exposed via debugfs. + #[pin] + logs: debugfs::Scope<LogBuffers>, + /// Command queue. + #[pin] + pub(crate) cmdq: Cmdq, + /// RM arguments. + rmargs: Coherent<GspArgumentsPadded>, +} + +impl Gsp { + // Creates an in-place initializer for a `Gsp` manager for `pdev`. + pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> impl PinInit<Self, Error> + '_ { + pin_init::pin_init_scope(move || { + let dev = pdev.as_ref(); + + let loginit = LogBuffer::new(dev)?; + let logintr = LogBuffer::new(dev)?; + let logrm = LogBuffer::new(dev)?; + + // Initialise the logging structures. The OpenRM equivalents are in: + // _kgspInitLibosLoggingStructures (allocates memory for buffers) + // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + Ok(try_pin_init!(Self { + cmdq <- Cmdq::new(dev), + rmargs: Coherent::init(dev, GFP_KERNEL, GspArgumentsPadded::new(&cmdq))?, + libos: { + let mut libos = CoherentBox::zeroed_slice( + dev, + GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(), + GFP_KERNEL, + )?; + + libos.init_at(0, LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0))?; + libos.init_at(1, LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0))?; + libos.init_at(2, LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0))?; + libos.init_at(3, LibosMemoryRegionInitArgument::new("RMARGS", rmargs))?; + + libos.into() + }, + logs <- { + let log_buffers = LogBuffers { + loginit, + logintr, + logrm, + }; + + #[allow(static_mut_refs)] + // SAFETY: `DEBUGFS_ROOT` is created before driver registration and cleared + // after driver unregistration, so no probe() can race with its modification. + // + // PANIC: `DEBUGFS_ROOT` cannot be `None` here. It is set before driver + // registration and cleared after driver unregistration, so it is always + // `Some` for the entire lifetime that probe() can be called. + let log_parent: &debugfs::Dir = unsafe { crate::DEBUGFS_ROOT.as_ref() } + .expect("DEBUGFS_ROOT not initialized"); + + log_parent.scope(log_buffers, dev.name(), |logs, dir| { + dir.read_binary_file(c"loginit", &logs.loginit.0); + dir.read_binary_file(c"logintr", &logs.logintr.0); + dir.read_binary_file(c"logrm", &logs.logrm.0); + }) + }, + })) + }) + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs new file mode 100644 index 000000000000..18f356c9178e --- /dev/null +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, + dma::Coherent, + io::poll::read_poll_timeout, + io::Io, + pci, + prelude::*, + time::Delta, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + sec2::Sec2, + Falcon, // + }, + fb::FbLayout, + firmware::{ + booter::{ + BooterFirmware, + BooterKind, // + }, + fwsec::{ + bootloader::FwsecFirmwareWithBl, + FwsecCommand, + FwsecFirmware, // + }, + gsp::GspFirmware, + FIRMWARE_VERSION, // + }, + gpu::Chipset, + gsp::{ + commands, + sequencer::{ + GspSequencer, + GspSequencerParams, // + }, + GspFwWprMeta, // + }, + regs, + vbios::Vbios, +}; + +impl super::Gsp { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + fn run_fwsec_frts( + dev: &device::Device<device::Bound>, + chipset: Chipset, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + // FWSEC-FRTS will create the WPR2 region. + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.len(), + }, + )?; + + if chipset.needs_fwsec_bootloader() { + let fwsec_frts_bl = FwsecFirmwareWithBl::new(fwsec_frts, dev, chipset)?; + // Load and run the bootloader, which will load FWSEC-FRTS and run it. + fwsec_frts_bl.run(dev, falcon, bar)?; + } else { + // Load and run FWSEC-FRTS directly. + fwsec_frts.run(dev, falcon, bar)?; + } + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = bar + .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR) + .frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}\n", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(), + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + + /// Attempt to boot the GSP. + /// + /// This is a GPU-dependent and complex procedure that involves loading firmware files from + /// user-space, patching them with signatures, and building firmware-specific intricate data + /// structures that the GSP will use at runtime. + /// + /// Upon return, the GSP is up and running, and its runtime object given as return value. + pub(crate) fn boot( + self: Pin<&mut Self>, + pdev: &pci::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + gsp_falcon: &Falcon<Gsp>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result { + let dev = pdev.as_ref(); + + let bios = Vbios::new(dev, bar)?; + + let gsp_fw = KBox::pin_init(GspFirmware::new(dev, chipset, FIRMWARE_VERSION), GFP_KERNEL)?; + + let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; + dev_dbg!(dev, "{:#x?}\n", fb_layout); + + Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?; + + let booter_loader = BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?; + + let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?; + + self.cmdq + .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev))?; + self.cmdq + .send_command_no_wait(bar, commands::SetRegistry::new())?; + + gsp_falcon.reset(bar)?; + let libos_handle = self.libos.dma_handle(); + let (mbox0, mbox1) = gsp_falcon.boot( + bar, + Some(libos_handle as u32), + Some((libos_handle >> 32) as u32), + )?; + dev_dbg!(pdev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1); + + dev_dbg!( + pdev, + "Using SEC2 to load and run the booter_load firmware...\n" + ); + + sec2_falcon.reset(bar)?; + sec2_falcon.load(dev, bar, &booter_loader)?; + let wpr_handle = wpr_meta.dma_handle(); + let (mbox0, mbox1) = sec2_falcon.boot( + bar, + Some(wpr_handle as u32), + Some((wpr_handle >> 32) as u32), + )?; + dev_dbg!(pdev, "SEC2 MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1); + + if mbox0 != 0 { + dev_err!(pdev, "Booter-load failed with error {:#x}\n", mbox0); + return Err(ENODEV); + } + + gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version); + + // Poll for RISC-V to become active before running sequencer + read_poll_timeout( + || Ok(gsp_falcon.is_riscv_active(bar)), + |val: &bool| *val, + Delta::from_millis(10), + Delta::from_secs(5), + )?; + + dev_dbg!(pdev, "RISC-V active? {}\n", gsp_falcon.is_riscv_active(bar),); + + // Create and run the GSP sequencer. + let seq_params = GspSequencerParams { + bootloader_app_version: gsp_fw.bootloader.app_version, + libos_dma_handle: libos_handle, + gsp_falcon, + sec2_falcon, + dev: pdev.as_ref().into(), + bar, + }; + GspSequencer::run(&self.cmdq, seq_params)?; + + // Wait until GSP is fully initialized. + commands::wait_gsp_init_done(&self.cmdq)?; + + // Obtain and display basic GPU information. + let info = commands::get_gsp_info(&self.cmdq, bar)?; + match info.gpu_name() { + Ok(name) => dev_info!(pdev, "GPU name: {}\n", name), + Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), + } + + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs new file mode 100644 index 000000000000..275da9b1ee0e --- /dev/null +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -0,0 +1,849 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod continuation; + +use core::mem; + +use kernel::{ + device, + dma::{ + Coherent, + DmaAddress, // + }, + dma_write, + io::{ + poll::read_poll_timeout, + Io, // + }, + new_mutex, + prelude::*, + ptr, + sync::{ + aref::ARef, + Mutex, // + }, + time::Delta, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use continuation::{ + ContinuationRecord, + SplitState, // +}; + +use pin_init::pin_init_scope; + +use crate::{ + driver::Bar0, + gsp::{ + fw::{ + GspMsgElement, + MsgFunction, + MsgqRxHeader, + MsgqTxHeader, + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, // + }, + PteArray, + GSP_PAGE_SHIFT, + GSP_PAGE_SIZE, // + }, + num, + regs, + sbuffer::SBufferIter, // +}; + +/// Marker type representing the absence of a reply for a command. Commands using this as their +/// reply type are sent using [`Cmdq::send_command_no_wait`]. +pub(crate) struct NoReply; + +/// Trait implemented by types representing a command to send to the GSP. +/// +/// The main purpose of this trait is to provide [`Cmdq`] with the information it needs to send +/// a given command. +/// +/// [`CommandToGsp::init`] in particular is responsible for initializing the command directly +/// into the space reserved for it in the command queue buffer. +/// +/// Some commands may be followed by a variable-length payload. For these, the +/// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be +/// defined as well. +pub(crate) trait CommandToGsp { + /// Function identifying this command to the GSP. + const FUNCTION: MsgFunction; + + /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer. + type Command: FromBytes + AsBytes; + + /// Type of the reply expected from the GSP, or [`NoReply`] for commands that don't + /// have a reply. + type Reply; + + /// Error type returned by [`CommandToGsp::init`]. + type InitError; + + /// In-place command initializer responsible for filling the command in the command queue + /// buffer. + fn init(&self) -> impl Init<Self::Command, Self::InitError>; + + /// Size of the variable-length payload following the command structure generated by + /// [`CommandToGsp::init`]. + /// + /// Most commands don't have a variable-length payload, so this is zero by default. + fn variable_payload_len(&self) -> usize { + 0 + } + + /// Method initializing the variable-length payload. + /// + /// The command buffer is circular, which means that we may need to jump back to its beginning + /// while in the middle of a command. For this reason, the variable-length payload is + /// initialized using a [`SBufferIter`]. + /// + /// This method will receive a buffer of the length returned by + /// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving + /// unwritten space will lead to an error. + /// + /// Most commands don't have a variable-length payload, so this does nothing by default. + fn init_variable_payload( + &self, + _dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + Ok(()) + } + + /// Total size of the command (including its variable-length payload) without the + /// [`GspMsgElement`] header. + fn size(&self) -> usize { + size_of::<Self::Command>() + self.variable_payload_len() + } +} + +/// Trait representing messages received from the GSP. +/// +/// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message. +pub(crate) trait MessageFromGsp: Sized { + /// Function identifying this message from the GSP. + const FUNCTION: MsgFunction; + + /// Error type returned by [`MessageFromGsp::read`]. + type InitError; + + /// Type containing the raw message to be read from the message queue. + type Message: FromBytes; + + /// Method reading the message from the message queue and returning it. + /// + /// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns + /// it. + fn read( + msg: &Self::Message, + sbuffer: &mut SBufferIter<core::array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError>; +} + +/// Number of GSP pages making the [`Msgq`]. +pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f; + +/// Circular buffer of a [`Msgq`]. +/// +/// This area of memory is to be shared between the driver and the GSP to exchange commands or +/// messages. +#[repr(C, align(0x1000))] +#[derive(Debug)] +struct MsgqData { + data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)], +} + +// Annoyingly we are forced to use a literal to specify the alignment of +// `MsgqData`, so check that it corresponds to the actual GSP page size here. +static_assert!(align_of::<MsgqData>() == GSP_PAGE_SIZE); + +/// Unidirectional message queue. +/// +/// Contains the data for a message queue, that either the driver or GSP writes to. +/// +/// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the +/// read pointer of `rx` actually refers to the `Msgq` owned by the other side. +/// This design ensures that only the driver or GSP ever writes to a given instance of this struct. +#[repr(C)] +// There is no struct defined for this in the open-gpu-kernel-source headers. +// Instead it is defined by code in `GspMsgQueuesInit()`. +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct Msgq { + /// Header for sending messages, including the write pointer. + pub(super) tx: MsgqTxHeader, + /// Header for receiving messages, including the read pointer. + pub(super) rx: MsgqRxHeader, + /// The message queue proper. + msgq: MsgqData, +} + +/// Structure shared between the driver and the GSP and containing the command and message queues. +#[repr(C)] +// TODO: Revert to private once `IoView` projections replace the `gsp_mem` module. +pub(super) struct GspMem { + /// Self-mapping page table entries. + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, + /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the + /// write and read pointers that the CPU updates. This means that the read pointer here is an + /// index into the GSP queue. + /// + /// This member is read-only for the GSP. + pub(super) cpuq: Msgq, + /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the + /// write and read pointers that the GSP updates. This means that the read pointer here is an + /// index into the CPU queue. + /// + /// This member is read-only for the driver. + pub(super) gspq: Msgq, +} + +impl GspMem { + const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>(); +} + +// SAFETY: These structs don't meet the no-padding requirements of AsBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl AsBytes for GspMem {} + +// SAFETY: These structs don't meet the no-padding requirements of FromBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl FromBytes for GspMem {} + +/// Wrapper around [`GspMem`] to share it with the GPU using a [`Coherent`]. +/// +/// This provides the low-level functionality to communicate with the GSP, including allocation of +/// queue space to write messages to and management of read/write pointers. +/// +/// This is shared with the GSP, with clear ownership rules regarding the command queues: +/// +/// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write +/// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`]. +/// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read +/// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`]. +struct DmaGspMem(Coherent<GspMem>); + +impl DmaGspMem { + /// Allocate a new instance and map it for `dev`. + fn new(dev: &device::Device<device::Bound>) -> Result<Self> { + const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>(); + const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>(); + + let gsp_mem = Coherent::<GspMem>::zeroed(dev, GFP_KERNEL)?; + + let start = gsp_mem.dma_handle(); + // Write values one by one to avoid an on-stack instance of `PteArray`. + for i in 0..GspMem::PTE_ARRAY_SIZE { + dma_write!(gsp_mem, .ptes.0[i], PteArray::<0>::entry(start, i)?); + } + + dma_write!( + gsp_mem, + .cpuq.tx, + MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES) + ); + dma_write!(gsp_mem, .cpuq.rx, MsgqRxHeader::new()); + + Ok(Self(gsp_mem)) + } + + /// Returns the region of the CPU message queue that the driver is currently allowed to write + /// to. + /// + /// As the message queue is a circular buffer, the region may be discontiguous in memory. In + /// that case the second slice will have a non-zero length. + fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) { + let tx = self.cpu_write_ptr(); + let rx = self.gsp_read_ptr(); + + // Pointer to the first entry of the CPU message queue. + let data = ptr::project!(mut self.0.as_mut_ptr(), .cpuq.msgq.data[0]); + + let (tail_end, wrap_end) = if rx == 0 { + // The write area is non-wrapping, and stops at the second-to-last entry of the command + // queue (to leave the last one empty). + (MSGQ_NUM_PAGES - 1, 0) + } else if rx <= tx { + // The write area wraps and continues until `rx - 1`. + (MSGQ_NUM_PAGES, rx - 1) + } else { + // The write area doesn't wrap and stops at `rx - 1`. + (rx - 1, 0) + }; + + // SAFETY: + // - `data` was created from a valid pointer, and `rx` and `tx` are in the + // `0..MSGQ_NUM_PAGES` range per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, + // thus the created slices are valid. + // - The area starting at `tx` and ending at `rx - 2` modulo `MSGQ_NUM_PAGES`, + // inclusive, belongs to the driver for writing and is not accessed concurrently by + // the GSP. + // - The caller holds a reference to `self` for as long as the returned slices are live, + // meaning the CPU write pointer cannot be advanced and thus that the returned area + // remains exclusive to the CPU for the duration of the slices. + // - The created slices point to non-overlapping sub-ranges of `data` in all + // branches (in the `rx <= tx` case, the second slice ends at `rx - 1` which is strictly + // less than `tx` where the first slice starts; in the other cases the second slice is + // empty), so creating two `&mut` references from them does not violate aliasing rules. + unsafe { + ( + core::slice::from_raw_parts_mut( + data.add(num::u32_as_usize(tx)), + num::u32_as_usize(tail_end - tx), + ), + core::slice::from_raw_parts_mut(data, num::u32_as_usize(wrap_end)), + ) + } + } + + /// Returns the size of the region of the CPU message queue that the driver is currently allowed + /// to write to, in bytes. + fn driver_write_area_size(&self) -> usize { + let tx = self.cpu_write_ptr(); + let rx = self.gsp_read_ptr(); + + // `rx` and `tx` are both in `0..MSGQ_NUM_PAGES` per the invariants of `gsp_read_ptr` and + // `cpu_write_ptr`. The minimum value case is where `rx == 0` and `tx == MSGQ_NUM_PAGES - + // 1`, which gives `0 + MSGQ_NUM_PAGES - (MSGQ_NUM_PAGES - 1) - 1 == 0`. + let slots = (rx + MSGQ_NUM_PAGES - tx - 1) % MSGQ_NUM_PAGES; + num::u32_as_usize(slots) * GSP_PAGE_SIZE + } + + /// Returns the region of the GSP message queue that the driver is currently allowed to read + /// from. + /// + /// As the message queue is a circular buffer, the region may be discontiguous in memory. In + /// that case the second slice will have a non-zero length. + fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) { + let tx = self.gsp_write_ptr(); + let rx = self.cpu_read_ptr(); + + // Pointer to the first entry of the GSP message queue. + let data = ptr::project!(self.0.as_ptr(), .gspq.msgq.data[0]); + + let (tail_end, wrap_end) = if rx <= tx { + // Read area is non-wrapping and stops right before `tx`. + (tx, 0) + } else { + // Read area is wrapping and stops right before `tx`. + (MSGQ_NUM_PAGES, tx) + }; + + // SAFETY: + // - `data` was created from a valid pointer, and `rx` and `tx` are in the + // `0..MSGQ_NUM_PAGES` range per the invariants of `gsp_write_ptr` and `cpu_read_ptr`, + // thus the created slices are valid. + // - The area starting at `rx` and ending at `tx - 1` modulo `MSGQ_NUM_PAGES`, + // inclusive, belongs to the driver for reading and is not accessed concurrently by + // the GSP. + // - The caller holds a reference to `self` for as long as the returned slices are live, + // meaning the CPU read pointer cannot be advanced and thus that the returned area + // remains exclusive to the CPU for the duration of the slices. + unsafe { + ( + core::slice::from_raw_parts( + data.add(num::u32_as_usize(rx)), + num::u32_as_usize(tail_end - rx), + ), + core::slice::from_raw_parts(data, num::u32_as_usize(wrap_end)), + ) + } + } + + /// Allocates a region on the command queue that is large enough to send a command of `size` + /// bytes, waiting for space to become available based on the provided timeout. + /// + /// This returns a [`GspCommand`] ready to be written to by the caller. + /// + /// # Errors + /// + /// - `EMSGSIZE` if the command is larger than [`GSP_MSG_QUEUE_ELEMENT_SIZE_MAX`]. + /// - `ETIMEDOUT` if space does not become available within the timeout. + /// - `EIO` if the command header is not properly aligned. + fn allocate_command(&mut self, size: usize, timeout: Delta) -> Result<GspCommand<'_>> { + if size_of::<GspMsgElement>() + size > GSP_MSG_QUEUE_ELEMENT_SIZE_MAX { + return Err(EMSGSIZE); + } + read_poll_timeout( + || Ok(self.driver_write_area_size()), + |available_bytes| *available_bytes >= size_of::<GspMsgElement>() + size, + Delta::from_micros(1), + timeout, + )?; + + // Get the current writable area as an array of bytes. + let (slice_1, slice_2) = { + let (slice_1, slice_2) = self.driver_write_area(); + + (slice_1.as_flattened_mut(), slice_2.as_flattened_mut()) + }; + + // Extract area for the `GspMsgElement`. + let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?; + + // Create the contents area. + let (slice_1, slice_2) = if slice_1.len() > size { + // Contents fits entirely in `slice_1`. + (&mut slice_1[..size], &mut slice_2[0..0]) + } else { + // Need all of `slice_1` and some of `slice_2`. + let slice_2_len = size - slice_1.len(); + (slice_1, &mut slice_2[..slice_2_len]) + }; + + Ok(GspCommand { + header, + contents: (slice_1, slice_2), + }) + } + + // Returns the index of the memory page the GSP will write the next message to. + // + // # Invariants + // + // - The returned value is within `0..MSGQ_NUM_PAGES`. + fn gsp_write_ptr(&self) -> u32 { + super::fw::gsp_mem::gsp_write_ptr(&self.0) + } + + // Returns the index of the memory page the GSP will read the next command from. + // + // # Invariants + // + // - The returned value is within `0..MSGQ_NUM_PAGES`. + fn gsp_read_ptr(&self) -> u32 { + super::fw::gsp_mem::gsp_read_ptr(&self.0) + } + + // Returns the index of the memory page the CPU can read the next message from. + // + // # Invariants + // + // - The returned value is within `0..MSGQ_NUM_PAGES`. + fn cpu_read_ptr(&self) -> u32 { + super::fw::gsp_mem::cpu_read_ptr(&self.0) + } + + // Informs the GSP that it can send `elem_count` new pages into the message queue. + fn advance_cpu_read_ptr(&mut self, elem_count: u32) { + super::fw::gsp_mem::advance_cpu_read_ptr(&self.0, elem_count) + } + + // Returns the index of the memory page the CPU can write the next command to. + // + // # Invariants + // + // - The returned value is within `0..MSGQ_NUM_PAGES`. + fn cpu_write_ptr(&self) -> u32 { + super::fw::gsp_mem::cpu_write_ptr(&self.0) + } + + // Informs the GSP that it can process `elem_count` new pages from the command queue. + fn advance_cpu_write_ptr(&mut self, elem_count: u32) { + super::fw::gsp_mem::advance_cpu_write_ptr(&self.0, elem_count) + } +} + +/// A command ready to be sent on the command queue. +/// +/// This is the type returned by [`DmaGspMem::allocate_command`]. +struct GspCommand<'a> { + // Writable reference to the header of the command. + header: &'a mut GspMsgElement, + // Writable slices to the contents of the command. The second slice is zero unless the command + // loops over the command queue. + contents: (&'a mut [u8], &'a mut [u8]), +} + +/// A message ready to be processed from the message queue. +/// +/// This is the type returned by [`Cmdq::wait_for_msg`]. +struct GspMessage<'a> { + // Reference to the header of the message. + header: &'a GspMsgElement, + // Slices to the contents of the message. The second slice is zero unless the message loops + // over the message queue. + contents: (&'a [u8], &'a [u8]), +} + +/// GSP command queue. +/// +/// Provides the ability to send commands and receive messages from the GSP using a shared memory +/// area. +#[pin_data] +pub(crate) struct Cmdq { + /// Inner mutex-protected state. + #[pin] + inner: Mutex<CmdqInner>, + /// DMA handle of the command queue's shared memory region. + pub(super) dma_handle: DmaAddress, +} + +impl Cmdq { + /// Offset of the data after the PTEs. + const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq); + + /// Offset of command queue ring buffer. + pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq) + + core::mem::offset_of!(Msgq, msgq) + - Self::POST_PTE_OFFSET; + + /// Offset of message queue ring buffer. + pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq) + + core::mem::offset_of!(Msgq, msgq) + - Self::POST_PTE_OFFSET; + + /// Number of page table entries for the GSP shared region. + pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT; + + /// Default timeout for receiving a message from the GSP. + pub(super) const RECEIVE_TIMEOUT: Delta = Delta::from_secs(5); + + /// Creates a new command queue for `dev`. + pub(crate) fn new(dev: &device::Device<device::Bound>) -> impl PinInit<Self, Error> + '_ { + pin_init_scope(move || { + let gsp_mem = DmaGspMem::new(dev)?; + + Ok(try_pin_init!(Self { + dma_handle: gsp_mem.0.dma_handle(), + inner <- new_mutex!(CmdqInner { + dev: dev.into(), + gsp_mem, + seq: 0, + }), + })) + }) + } + + /// Computes the checksum for the message pointed to by `it`. + /// + /// A message is made of several parts, so `it` is an iterator over byte slices representing + /// these parts. + fn calculate_checksum<T: Iterator<Item = u8>>(it: T) -> u32 { + let sum64 = it + .enumerate() + .map(|(idx, byte)| (((idx % 8) * 8) as u32, byte)) + .fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol)); + + ((sum64 >> 32) as u32) ^ (sum64 as u32) + } + + /// Notifies the GSP that we have updated the command queue pointers. + fn notify_gsp(bar: &Bar0) { + bar.write_reg(regs::NV_PGSP_QUEUE_HEAD::zeroed().with_address(0u32)); + } + + /// Sends `command` to the GSP and waits for the reply. + /// + /// Messages with non-matching function codes are silently consumed until the expected reply + /// arrives. + /// + /// The queue is locked for the entire send+receive cycle to ensure that no other command can + /// be interleaved. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if space does not become available to send the command, or if the reply is + /// not received within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command and reply initializers are propagated as-is. + pub(crate) fn send_command<M>(&self, bar: &Bar0, command: M) -> Result<M::Reply> + where + M: CommandToGsp, + M::Reply: MessageFromGsp, + Error: From<M::InitError>, + Error: From<<M::Reply as MessageFromGsp>::InitError>, + { + let mut inner = self.inner.lock(); + inner.send_command(bar, command)?; + + loop { + match inner.receive_msg::<M::Reply>(Self::RECEIVE_TIMEOUT) { + Ok(reply) => break Ok(reply), + Err(ERANGE) => continue, + Err(e) => break Err(e), + } + } + } + + /// Sends `command` to the GSP without waiting for a reply. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if space does not become available within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + pub(crate) fn send_command_no_wait<M>(&self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp<Reply = NoReply>, + Error: From<M::InitError>, + { + self.inner.lock().send_command(bar, command) + } + + /// Receive a message from the GSP. + /// + /// See [`CmdqInner::receive_msg`] for details. + pub(crate) fn receive_msg<M: MessageFromGsp>(&self, timeout: Delta) -> Result<M> + where + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From<M::InitError>, + { + self.inner.lock().receive_msg(timeout) + } +} + +/// Inner mutex protected state of [`Cmdq`]. +struct CmdqInner { + /// Device this command queue belongs to. + dev: ARef<device::Device>, + /// Current command sequence number. + seq: u32, + /// Memory area shared with the GSP for communicating commands and messages. + gsp_mem: DmaGspMem, +} + +impl CmdqInner { + /// Timeout for waiting for space on the command queue. + const ALLOCATE_TIMEOUT: Delta = Delta::from_secs(1); + + /// Sends `command` to the GSP, without splitting it. + /// + /// # Errors + /// + /// - `EMSGSIZE` if the command exceeds the maximum queue element size. + /// - `ETIMEDOUT` if space does not become available within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + fn send_single_command<M>(&mut self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From<M::InitError>, + { + let size_in_bytes = command.size(); + let dst = self + .gsp_mem + .allocate_command(size_in_bytes, Self::ALLOCATE_TIMEOUT)?; + + // Extract area for the command itself. The GSP message header and the command header + // together are guaranteed to fit entirely into a single page, so it's ok to only look + // at `dst.contents.0` here. + let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?; + + // Fill the header and command in-place. + let msg_element = GspMsgElement::init(self.seq, size_in_bytes, M::FUNCTION); + // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer + // fails. + unsafe { + msg_element.__init(core::ptr::from_mut(dst.header))?; + command.init().__init(core::ptr::from_mut(cmd))?; + } + + // Fill the variable-length payload, which may be empty. + let mut sbuffer = SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); + command.init_variable_payload(&mut sbuffer)?; + + if !sbuffer.is_empty() { + return Err(EIO); + } + drop(sbuffer); + + // Compute checksum now that the whole message is ready. + dst.header + .set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([ + dst.header.as_bytes(), + dst.contents.0, + dst.contents.1, + ]))); + + dev_dbg!( + &self.dev, + "GSP RPC: send: seq# {}, function={:?}, length=0x{:x}\n", + self.seq, + M::FUNCTION, + dst.header.length(), + ); + + // All set - update the write pointer and inform the GSP of the new command. + let elem_count = dst.header.element_count(); + self.seq += 1; + self.gsp_mem.advance_cpu_write_ptr(elem_count); + Cmdq::notify_gsp(bar); + + Ok(()) + } + + /// Sends `command` to the GSP. + /// + /// The command may be split into multiple messages if it is large. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if space does not become available within the timeout. + /// - `EIO` if the variable payload requested by the command has not been entirely + /// written to by its [`CommandToGsp::init_variable_payload`] method. + /// + /// Error codes returned by the command initializers are propagated as-is. + fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result + where + M: CommandToGsp, + Error: From<M::InitError>, + { + match SplitState::new(command)? { + SplitState::Single(command) => self.send_single_command(bar, command), + SplitState::Split(command, mut continuations) => { + self.send_single_command(bar, command)?; + + while let Some(continuation) = continuations.next() { + // Turbofish needed because the compiler cannot infer M here. + self.send_single_command::<ContinuationRecord<'_>>(bar, continuation)?; + } + + Ok(()) + } + } + } + + /// Wait for a message to become available on the message queue. + /// + /// This works purely at the transport layer and does not interpret or validate the message + /// beyond the advertised length in its [`GspMsgElement`]. + /// + /// This method returns: + /// + /// - A reference to the [`GspMsgElement`] of the message, + /// - Two byte slices with the contents of the message. The second slice is empty unless the + /// message loops across the message queue. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. + /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the + /// message queue. + /// + /// Error codes returned by the message constructor are propagated as-is. + fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> { + // Wait for a message to arrive from the GSP. + let (slice_1, slice_2) = read_poll_timeout( + || Ok(self.gsp_mem.driver_read_area()), + |driver_area| !driver_area.0.is_empty(), + Delta::from_millis(1), + timeout, + ) + .map(|(slice_1, slice_2)| (slice_1.as_flattened(), slice_2.as_flattened()))?; + + // Extract the `GspMsgElement`. + let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?; + + dev_dbg!( + &self.dev, + "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n", + header.sequence(), + header.function(), + header.length(), + ); + + let payload_length = header.payload_length(); + + // Check that the driver read area is large enough for the message. + if slice_1.len() + slice_2.len() < payload_length { + return Err(EIO); + } + + // Cut the message slices down to the actual length of the message. + let (slice_1, slice_2) = if slice_1.len() > payload_length { + // PANIC: we checked above that `slice_1` is at least as long as `payload_length`. + (slice_1.split_at(payload_length).0, &slice_2[0..0]) + } else { + ( + slice_1, + // PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as + // large as `payload_length`. + slice_2.split_at(payload_length - slice_1.len()).0, + ) + }; + + // Validate checksum. + if Cmdq::calculate_checksum(SBufferIter::new_reader([ + header.as_bytes(), + slice_1, + slice_2, + ])) != 0 + { + dev_err!( + &self.dev, + "GSP RPC: receive: Call {} - bad checksum\n", + header.sequence() + ); + return Err(EIO); + } + + Ok(GspMessage { + header, + contents: (slice_1, slice_2), + }) + } + + /// Receive a message from the GSP. + /// + /// The expected message type is specified using the `M` generic parameter. If the pending + /// message has a different function code, `ERANGE` is returned and the message is consumed. + /// + /// The read pointer is always advanced past the message, regardless of whether it matched. + /// + /// # Errors + /// + /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. + /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the + /// message queue. + /// - `EINVAL` if the function code of the message was not recognized. + /// - `ERANGE` if the message had a recognized but non-matching function code. + /// + /// Error codes returned by [`MessageFromGsp::read`] are propagated as-is. + fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M> + where + // This allows all error types, including `Infallible`, to be used for `M::InitError`. + Error: From<M::InitError>, + { + let message = self.wait_for_msg(timeout)?; + let function = message.header.function().map_err(|_| EINVAL)?; + + // Extract the message. Store the result as we want to advance the read pointer even in + // case of failure. + let result = if function == M::FUNCTION { + let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?; + let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]); + + M::read(cmd, &mut sbuffer) + .map_err(|e| e.into()) + .inspect(|_| { + if !sbuffer.is_empty() { + dev_warn!( + &self.dev, + "GSP message {:?} has unprocessed data\n", + function + ); + } + }) + } else { + Err(ERANGE) + }; + + // Advance the read pointer past this message. + self.gsp_mem.advance_cpu_read_ptr(u32::try_from( + message.header.length().div_ceil(GSP_PAGE_SIZE), + )?); + + result + } +} diff --git a/drivers/gpu/nova-core/gsp/cmdq/continuation.rs b/drivers/gpu/nova-core/gsp/cmdq/continuation.rs new file mode 100644 index 000000000000..05e904f18097 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/cmdq/continuation.rs @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for splitting large GSP commands across continuation records. + +use core::convert::Infallible; + +use kernel::prelude::*; + +use super::{ + CommandToGsp, + NoReply, // +}; + +use crate::{ + gsp::fw::{ + GspMsgElement, + MsgFunction, + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, // + }, + sbuffer::SBufferIter, +}; + +/// Maximum command size that fits in a single queue element. +const MAX_CMD_SIZE: usize = GSP_MSG_QUEUE_ELEMENT_SIZE_MAX - size_of::<GspMsgElement>(); + +/// Acts as an iterator over the continuation records for a split command. +pub(super) struct ContinuationRecords { + payload: KVVec<u8>, + offset: usize, +} + +impl ContinuationRecords { + /// Creates a new iterator over continuation records for the given payload. + fn new(payload: KVVec<u8>) -> Self { + Self { payload, offset: 0 } + } + + /// Returns the next continuation record, or [`None`] if there are no more. + pub(super) fn next(&mut self) -> Option<ContinuationRecord<'_>> { + let remaining = self.payload.len() - self.offset; + + if remaining > 0 { + let chunk_size = remaining.min(MAX_CMD_SIZE); + let record = + ContinuationRecord::new(&self.payload[self.offset..(self.offset + chunk_size)]); + self.offset += chunk_size; + Some(record) + } else { + None + } + } +} + +/// The [`ContinuationRecord`] command. +pub(super) struct ContinuationRecord<'a> { + data: &'a [u8], +} + +impl<'a> ContinuationRecord<'a> { + /// Creates a new [`ContinuationRecord`] command with the given data. + fn new(data: &'a [u8]) -> Self { + Self { data } + } +} + +impl<'a> CommandToGsp for ContinuationRecord<'a> { + const FUNCTION: MsgFunction = MsgFunction::ContinuationRecord; + type Command = (); + type Reply = NoReply; + type InitError = Infallible; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + <()>::init_zeroed() + } + + fn variable_payload_len(&self) -> usize { + self.data.len() + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + dst.write_all(self.data) + } +} + +/// Whether a command needs to be split across continuation records or not. +pub(super) enum SplitState<C: CommandToGsp> { + /// A command that fits in a single queue element. + Single(C), + /// A command split across continuation records. + Split(SplitCommand<C>, ContinuationRecords), +} + +impl<C: CommandToGsp> SplitState<C> { + /// Maximum variable payload size that fits in the first command alongside the command header. + const MAX_FIRST_PAYLOAD: usize = MAX_CMD_SIZE - size_of::<C::Command>(); + + /// Creates a new [`SplitState`] for the given command. + /// + /// If the command is too large, it will be split into a main command and some number of + /// continuation records. + pub(super) fn new(command: C) -> Result<Self> { + let payload_len = command.variable_payload_len(); + + if command.size() > MAX_CMD_SIZE { + let mut command_payload = + KVVec::<u8>::from_elem(0u8, payload_len.min(Self::MAX_FIRST_PAYLOAD), GFP_KERNEL)?; + let mut continuation_payload = + KVVec::<u8>::from_elem(0u8, payload_len - command_payload.len(), GFP_KERNEL)?; + let mut sbuffer = SBufferIter::new_writer([ + command_payload.as_mut_slice(), + continuation_payload.as_mut_slice(), + ]); + + command.init_variable_payload(&mut sbuffer)?; + if !sbuffer.is_empty() { + return Err(EIO); + } + drop(sbuffer); + + Ok(Self::Split( + SplitCommand::new(command, command_payload), + ContinuationRecords::new(continuation_payload), + )) + } else { + Ok(Self::Single(command)) + } + } +} + +/// A command that has been truncated to maximum accepted length of the command queue. +/// +/// The remainder of its payload is expected to be sent using [`ContinuationRecords`]. +pub(super) struct SplitCommand<C: CommandToGsp> { + command: C, + payload: KVVec<u8>, +} + +impl<C: CommandToGsp> SplitCommand<C> { + /// Creates a new [`SplitCommand`] wrapping `command` with the given truncated payload. + fn new(command: C, payload: KVVec<u8>) -> Self { + Self { command, payload } + } +} + +impl<C: CommandToGsp> CommandToGsp for SplitCommand<C> { + const FUNCTION: MsgFunction = C::FUNCTION; + type Command = C::Command; + type Reply = C::Reply; + type InitError = C::InitError; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + self.command.init() + } + + fn variable_payload_len(&self) -> usize { + self.payload.len() + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + dst.write_all(&self.payload) + } +} + +#[kunit_tests(nova_core_gsp_continuation)] +mod tests { + use super::*; + + use kernel::transmute::{ + AsBytes, + FromBytes, // + }; + + /// Non-zero-sized command header for testing. + #[repr(C)] + #[derive(Clone, Copy, Zeroable)] + struct TestHeader([u8; 64]); + + // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid. + unsafe impl FromBytes for TestHeader {} + + // SAFETY: `TestHeader` is a plain array of bytes for which all bit patterns are valid. + unsafe impl AsBytes for TestHeader {} + + struct TestPayload { + data: KVVec<u8>, + } + + impl TestPayload { + fn generate_pattern(len: usize) -> Result<KVVec<u8>> { + let mut data = KVVec::with_capacity(len, GFP_KERNEL)?; + for i in 0..len { + // Mix in higher bits so the pattern does not repeat every 256 bytes. + data.push((i ^ (i >> 8)) as u8, GFP_KERNEL)?; + } + Ok(data) + } + + fn new(len: usize) -> Result<Self> { + Ok(Self { + data: Self::generate_pattern(len)?, + }) + } + } + + impl CommandToGsp for TestPayload { + const FUNCTION: MsgFunction = MsgFunction::Nop; + type Command = TestHeader; + type Reply = NoReply; + type InitError = Infallible; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + TestHeader::init_zeroed() + } + + fn variable_payload_len(&self) -> usize { + self.data.len() + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + dst.write_all(self.data.as_slice()) + } + } + + /// Maximum variable payload size that fits in the first command alongside the header. + const MAX_FIRST_PAYLOAD: usize = SplitState::<TestPayload>::MAX_FIRST_PAYLOAD; + + fn read_payload(cmd: impl CommandToGsp) -> Result<KVVec<u8>> { + let len = cmd.variable_payload_len(); + let mut buf = KVVec::from_elem(0u8, len, GFP_KERNEL)?; + let mut sbuf = SBufferIter::new_writer([buf.as_mut_slice(), &mut []]); + cmd.init_variable_payload(&mut sbuf)?; + drop(sbuf); + Ok(buf) + } + + struct SplitTest { + payload_size: usize, + num_continuations: usize, + } + + fn check_split(t: SplitTest) -> Result { + let payload = TestPayload::new(t.payload_size)?; + let mut num_continuations = 0; + + let buf = match SplitState::new(payload)? { + SplitState::Single(cmd) => read_payload(cmd)?, + SplitState::Split(cmd, mut continuations) => { + let mut buf = read_payload(cmd)?; + assert!(size_of::<TestHeader>() + buf.len() <= MAX_CMD_SIZE); + + while let Some(cont) = continuations.next() { + let payload = read_payload(cont)?; + assert!(payload.len() <= MAX_CMD_SIZE); + buf.extend_from_slice(&payload, GFP_KERNEL)?; + num_continuations += 1; + } + + buf + } + }; + + assert_eq!(num_continuations, t.num_continuations); + assert_eq!( + buf.as_slice(), + TestPayload::generate_pattern(t.payload_size)?.as_slice() + ); + Ok(()) + } + + #[test] + fn split_command() -> Result { + check_split(SplitTest { + payload_size: 0, + num_continuations: 0, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD, + num_continuations: 0, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + 1, + num_continuations: 1, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE, + num_continuations: 1, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE + 1, + num_continuations: 2, + })?; + check_split(SplitTest { + payload_size: MAX_FIRST_PAYLOAD + MAX_CMD_SIZE * 3 + MAX_CMD_SIZE / 2, + num_continuations: 4, + })?; + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs new file mode 100644 index 000000000000..c89c7b57a751 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::{ + array, + convert::Infallible, + ffi::FromBytesUntilNulError, + str::Utf8Error, // +}; + +use kernel::{ + device, + pci, + prelude::*, + transmute::{ + AsBytes, + FromBytes, // + }, // +}; + +use crate::{ + driver::Bar0, + gsp::{ + cmdq::{ + Cmdq, + CommandToGsp, + MessageFromGsp, + NoReply, // + }, + fw::{ + commands::*, + MsgFunction, // + }, + }, + sbuffer::SBufferIter, +}; + +/// The `GspSetSystemInfo` command. +pub(crate) struct SetSystemInfo<'a> { + pdev: &'a pci::Device<device::Bound>, +} + +impl<'a> SetSystemInfo<'a> { + /// Creates a new `GspSetSystemInfo` command using the parameters of `pdev`. + pub(crate) fn new(pdev: &'a pci::Device<device::Bound>) -> Self { + Self { pdev } + } +} + +impl<'a> CommandToGsp for SetSystemInfo<'a> { + const FUNCTION: MsgFunction = MsgFunction::GspSetSystemInfo; + type Command = GspSetSystemInfo; + type Reply = NoReply; + type InitError = Error; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + GspSetSystemInfo::init(self.pdev) + } +} + +struct RegistryEntry { + key: &'static str, + value: u32, +} + +/// The `SetRegistry` command. +pub(crate) struct SetRegistry { + entries: [RegistryEntry; Self::NUM_ENTRIES], +} + +impl SetRegistry { + // For now we hard-code the registry entries. Future work will allow others to + // be added as module parameters. + const NUM_ENTRIES: usize = 3; + + /// Creates a new `SetRegistry` command, using a set of hardcoded entries. + pub(crate) fn new() -> Self { + Self { + entries: [ + // RMSecBusResetEnable - enables PCI secondary bus reset + RegistryEntry { + key: "RMSecBusResetEnable", + value: 1, + }, + // RMForcePcieConfigSave - forces GSP-RM to preserve PCI configuration registers on + // any PCI reset. + RegistryEntry { + key: "RMForcePcieConfigSave", + value: 1, + }, + // RMDevidCheckIgnore - allows GSP-RM to boot even if the PCI dev ID is not found + // in the internal product name database. + RegistryEntry { + key: "RMDevidCheckIgnore", + value: 1, + }, + ], + } + } +} + +impl CommandToGsp for SetRegistry { + const FUNCTION: MsgFunction = MsgFunction::SetRegistry; + type Command = PackedRegistryTable; + type Reply = NoReply; + type InitError = Infallible; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + PackedRegistryTable::init(Self::NUM_ENTRIES as u32, self.variable_payload_len() as u32) + } + + fn variable_payload_len(&self) -> usize { + let mut key_size = 0; + for i in 0..Self::NUM_ENTRIES { + key_size += self.entries[i].key.len() + 1; // +1 for NULL terminator + } + Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>() + key_size + } + + fn init_variable_payload( + &self, + dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>, + ) -> Result { + let string_data_start_offset = + size_of::<PackedRegistryTable>() + Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>(); + + // Array for string data. + let mut string_data = KVec::new(); + + for entry in self.entries.iter().take(Self::NUM_ENTRIES) { + dst.write_all( + PackedRegistryEntry::new( + (string_data_start_offset + string_data.len()) as u32, + entry.value, + ) + .as_bytes(), + )?; + + let key_bytes = entry.key.as_bytes(); + string_data.extend_from_slice(key_bytes, GFP_KERNEL)?; + string_data.push(0, GFP_KERNEL)?; + } + + dst.write_all(string_data.as_slice()) + } +} + +/// Message type for GSP initialization done notification. +struct GspInitDone; + +// SAFETY: `GspInitDone` is a zero-sized type with no bytes, therefore it +// trivially has no uninitialized bytes. +unsafe impl FromBytes for GspInitDone {} + +impl MessageFromGsp for GspInitDone { + const FUNCTION: MsgFunction = MsgFunction::GspInitDone; + type InitError = Infallible; + type Message = (); + + fn read( + _msg: &Self::Message, + _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError> { + Ok(GspInitDone) + } +} + +/// Waits for GSP initialization to complete. +pub(crate) fn wait_gsp_init_done(cmdq: &Cmdq) -> Result { + loop { + match cmdq.receive_msg::<GspInitDone>(Cmdq::RECEIVE_TIMEOUT) { + Ok(_) => break Ok(()), + Err(ERANGE) => continue, + Err(e) => break Err(e), + } + } +} + +/// The `GetGspStaticInfo` command. +struct GetGspStaticInfo; + +impl CommandToGsp for GetGspStaticInfo { + const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; + type Command = GspStaticConfigInfo; + type Reply = GetGspStaticInfoReply; + type InitError = Infallible; + + fn init(&self) -> impl Init<Self::Command, Self::InitError> { + GspStaticConfigInfo::init_zeroed() + } +} + +/// The reply from the GSP to the [`GetGspInfo`] command. +pub(crate) struct GetGspStaticInfoReply { + gpu_name: [u8; 64], +} + +impl MessageFromGsp for GetGspStaticInfoReply { + const FUNCTION: MsgFunction = MsgFunction::GetGspStaticInfo; + type Message = GspStaticConfigInfo; + type InitError = Infallible; + + fn read( + msg: &Self::Message, + _sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError> { + Ok(GetGspStaticInfoReply { + gpu_name: msg.gpu_name_str(), + }) + } +} + +/// Error type for [`GetGspStaticInfoReply::gpu_name`]. +#[derive(Debug)] +pub(crate) enum GpuNameError { + /// The GPU name string does not contain a null terminator. + NoNullTerminator(FromBytesUntilNulError), + + /// The GPU name string contains invalid UTF-8. + #[expect(dead_code)] + InvalidUtf8(Utf8Error), +} + +impl GetGspStaticInfoReply { + /// Returns the name of the GPU as a string. + /// + /// Returns an error if the string given by the GSP does not contain a null terminator or + /// contains invalid UTF-8. + pub(crate) fn gpu_name(&self) -> core::result::Result<&str, GpuNameError> { + CStr::from_bytes_until_nul(&self.gpu_name) + .map_err(GpuNameError::NoNullTerminator)? + .to_str() + .map_err(GpuNameError::InvalidUtf8) + } +} + +/// Send the [`GetGspInfo`] command and awaits for its reply. +pub(crate) fn get_gsp_info(cmdq: &Cmdq, bar: &Bar0) -> Result<GetGspStaticInfoReply> { + cmdq.send_command(bar, GetGspStaticInfo) +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs new file mode 100644 index 000000000000..0c8a74f0e8ac --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -0,0 +1,922 @@ +// SPDX-License-Identifier: GPL-2.0 + +pub(crate) mod commands; +mod r570_144; + +// Alias to avoid repeating the version number with every use. +use r570_144 as bindings; + +use core::ops::Range; + +use kernel::{ + dma::Coherent, + prelude::*, + ptr::{ + Alignable, + Alignment, + KnownSize, // + }, + sizes::{ + SZ_128K, + SZ_1M, // + }, + transmute::{ + AsBytes, + FromBytes, // + }, +}; + +use crate::{ + fb::FbLayout, + firmware::gsp::GspFirmware, + gpu::Chipset, + gsp::{ + cmdq::Cmdq, // + GSP_PAGE_SIZE, + }, + num::{ + self, + FromSafeCast, // + }, +}; + +// TODO: Replace with `IoView` projections once available. +pub(super) mod gsp_mem { + use core::sync::atomic::{ + fence, + Ordering, // + }; + + use kernel::{ + dma::Coherent, + dma_read, + dma_write, // + }; + + use crate::gsp::cmdq::{ + GspMem, + MSGQ_NUM_PAGES, // + }; + + pub(in crate::gsp) fn gsp_write_ptr(qs: &Coherent<GspMem>) -> u32 { + dma_read!(qs, .gspq.tx.0.writePtr) % MSGQ_NUM_PAGES + } + + pub(in crate::gsp) fn gsp_read_ptr(qs: &Coherent<GspMem>) -> u32 { + dma_read!(qs, .gspq.rx.0.readPtr) % MSGQ_NUM_PAGES + } + + pub(in crate::gsp) fn cpu_read_ptr(qs: &Coherent<GspMem>) -> u32 { + dma_read!(qs, .cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES + } + + pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &Coherent<GspMem>, count: u32) { + let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + // Ensure read pointer is properly ordered. + fence(Ordering::SeqCst); + + dma_write!(qs, .cpuq.rx.0.readPtr, rptr); + } + + pub(in crate::gsp) fn cpu_write_ptr(qs: &Coherent<GspMem>) -> u32 { + dma_read!(qs, .cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES + } + + pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) { + let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES; + + dma_write!(qs, .cpuq.tx.0.writePtr, wptr); + + // Ensure all command data is visible before triggering the GSP read. + fence(Ordering::SeqCst); + } +} + +/// Maximum size of a single GSP message queue element in bytes. +pub(crate) const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: usize = + num::u32_as_usize(bindings::GSP_MSG_QUEUE_ELEMENT_SIZE_MAX); + +/// Empty type to group methods related to heap parameters for running the GSP firmware. +enum GspFwHeapParams {} + +/// Minimum required alignment for the GSP heap. +const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>(); + +impl GspFwHeapParams { + /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to + /// and including the first client subdevice allocation). + fn base_rm_size(_chipset: Chipset) -> u64 { + // TODO: this needs to be updated to return the correct value for Hopper+ once support for + // them is added: + // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100) + u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X) + } + + /// Returns the amount of heap memory required to support a single channel allocation. + fn client_alloc_size() -> u64 { + u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE) + .align_up(GSP_HEAP_ALIGNMENT) + .unwrap_or(u64::MAX) + } + + /// Returns the amount of memory to reserve for management purposes for a framebuffer of size + /// `fb_size`. + fn management_overhead(fb_size: u64) -> u64 { + let fb_size_gb = fb_size.div_ceil(u64::from_safe_cast(kernel::sizes::SZ_1G)); + + u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB) + .saturating_mul(fb_size_gb) + .align_up(GSP_HEAP_ALIGNMENT) + .unwrap_or(u64::MAX) + } +} + +/// Heap memory requirements and constraints for a given version of the GSP LIBOS. +pub(crate) struct LibosParams { + /// The base amount of heap required by the GSP operating system, in bytes. + carveout_size: u64, + /// The minimum and maximum sizes allowed for the GSP FW heap, in bytes. + allowed_heap_size: Range<u64>, +} + +impl LibosParams { + /// Version 2 of the GSP LIBOS (Turing and GA100) + const LIBOS2: LibosParams = LibosParams { + carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2), + allowed_heap_size: num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB) + * num::usize_as_u64(SZ_1M) + ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB) + * num::usize_as_u64(SZ_1M), + }; + + /// Version 3 of the GSP LIBOS (GA102+) + const LIBOS3: LibosParams = LibosParams { + carveout_size: num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL), + allowed_heap_size: num::u32_as_u64( + bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB, + ) * num::usize_as_u64(SZ_1M) + ..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB) + * num::usize_as_u64(SZ_1M), + }; + + /// Returns the libos parameters corresponding to `chipset`. + pub(crate) fn from_chipset(chipset: Chipset) -> &'static LibosParams { + if chipset < Chipset::GA102 { + &Self::LIBOS2 + } else { + &Self::LIBOS3 + } + } + + /// Returns the amount of memory (in bytes) to allocate for the WPR heap for a framebuffer size + /// of `fb_size` (in bytes) for `chipset`. + pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 { + // The WPR heap will contain the following: + // LIBOS carveout, + self.carveout_size + // RM boot working memory, + .saturating_add(GspFwHeapParams::base_rm_size(chipset)) + // One RM client, + .saturating_add(GspFwHeapParams::client_alloc_size()) + // Overhead for memory management. + .saturating_add(GspFwHeapParams::management_overhead(fb_size)) + // Clamp to the supported heap sizes. + .clamp(self.allowed_heap_size.start, self.allowed_heap_size.end - 1) + } +} + +/// Structure passed to the GSP bootloader, containing the framebuffer layout as well as the DMA +/// addresses of the GSP bootloader and firmware. +#[repr(transparent)] +pub(crate) struct GspFwWprMeta { + inner: bindings::GspFwWprMeta, +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for GspFwWprMeta {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspFwWprMeta {} + +type GspFwWprMetaBootResumeInfo = bindings::GspFwWprMeta__bindgen_ty_1; +type GspFwWprMetaBootInfo = bindings::GspFwWprMeta__bindgen_ty_1__bindgen_ty_1; + +impl GspFwWprMeta { + /// Returns an initializer for a `GspFwWprMeta` suitable for booting `gsp_firmware` using the + /// `fb_layout` layout. + pub(crate) fn new<'a>( + gsp_firmware: &'a GspFirmware, + fb_layout: &'a FbLayout, + ) -> impl Init<Self> + 'a { + #[allow(non_snake_case)] + let init_inner = init!(bindings::GspFwWprMeta { + // CAST: we want to store the bits of `GSP_FW_WPR_META_MAGIC` unmodified. + magic: bindings::GSP_FW_WPR_META_MAGIC as u64, + revision: u64::from(bindings::GSP_FW_WPR_META_REVISION), + sysmemAddrOfRadix3Elf: gsp_firmware.radix3_dma_handle(), + sizeOfRadix3Elf: u64::from_safe_cast(gsp_firmware.size), + sysmemAddrOfBootloader: gsp_firmware.bootloader.ucode.dma_handle(), + sizeOfBootloader: u64::from_safe_cast(gsp_firmware.bootloader.ucode.size()), + bootloaderCodeOffset: u64::from(gsp_firmware.bootloader.code_offset), + bootloaderDataOffset: u64::from(gsp_firmware.bootloader.data_offset), + bootloaderManifestOffset: u64::from(gsp_firmware.bootloader.manifest_offset), + __bindgen_anon_1: GspFwWprMetaBootResumeInfo { + __bindgen_anon_1: GspFwWprMetaBootInfo { + sysmemAddrOfSignature: gsp_firmware.signatures.dma_handle(), + sizeOfSignature: u64::from_safe_cast(gsp_firmware.signatures.size()), + }, + }, + gspFwRsvdStart: fb_layout.heap.start, + nonWprHeapOffset: fb_layout.heap.start, + nonWprHeapSize: fb_layout.heap.end - fb_layout.heap.start, + gspFwWprStart: fb_layout.wpr2.start, + gspFwHeapOffset: fb_layout.wpr2_heap.start, + gspFwHeapSize: fb_layout.wpr2_heap.end - fb_layout.wpr2_heap.start, + gspFwOffset: fb_layout.elf.start, + bootBinOffset: fb_layout.boot.start, + frtsOffset: fb_layout.frts.start, + frtsSize: fb_layout.frts.end - fb_layout.frts.start, + gspFwWprEnd: fb_layout + .vga_workspace + .start + .align_down(Alignment::new::<SZ_128K>()), + gspFwHeapVfPartitionCount: fb_layout.vf_partition_count, + fbSize: fb_layout.fb.end - fb_layout.fb.start, + vgaWorkspaceOffset: fb_layout.vga_workspace.start, + vgaWorkspaceSize: fb_layout.vga_workspace.end - fb_layout.vga_workspace.start, + ..Zeroable::init_zeroed() + }); + + init!(GspFwWprMeta { + inner <- init_inner, + }) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u32)] +pub(crate) enum MsgFunction { + // Common function codes + AllocChannelDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA, + AllocCtxDma = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA, + AllocDevice = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE, + AllocMemory = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY, + AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT, + AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT, + BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA, + ContinuationRecord = bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, + Free = bindings::NV_VGPU_MSG_FUNCTION_FREE, + GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO, + GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO, + GspInitPostObjGpu = bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU, + GspRmControl = bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, + GspSetSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, + Log = bindings::NV_VGPU_MSG_FUNCTION_LOG, + MapMemory = bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY, + Nop = bindings::NV_VGPU_MSG_FUNCTION_NOP, + SetGuestSystemInfo = bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO, + SetRegistry = bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, + + // Event codes + GspInitDone = bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE, + GspLockdownNotice = bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE, + GspPostNoCat = bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD, + GspRunCpuSequencer = bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER, + MmuFaultQueued = bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, + OsErrorLog = bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG, + PostEvent = bindings::NV_VGPU_MSG_EVENT_POST_EVENT, + RcTriggered = bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED, + UcodeLibOsPrint = bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, +} + +impl TryFrom<u32> for MsgFunction { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result<MsgFunction> { + match value { + // Common function codes + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA => Ok(MsgFunction::AllocChannelDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA => Ok(MsgFunction::AllocCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE => Ok(MsgFunction::AllocDevice), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY => Ok(MsgFunction::AllocMemory), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject), + bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot), + bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma), + bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD => { + Ok(MsgFunction::ContinuationRecord) + } + bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free), + bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo), + bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo), + bindings::NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU => { + Ok(MsgFunction::GspInitPostObjGpu) + } + bindings::NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL => Ok(MsgFunction::GspRmControl), + bindings::NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO => Ok(MsgFunction::GspSetSystemInfo), + bindings::NV_VGPU_MSG_FUNCTION_LOG => Ok(MsgFunction::Log), + bindings::NV_VGPU_MSG_FUNCTION_MAP_MEMORY => Ok(MsgFunction::MapMemory), + bindings::NV_VGPU_MSG_FUNCTION_NOP => Ok(MsgFunction::Nop), + bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO => { + Ok(MsgFunction::SetGuestSystemInfo) + } + bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY => Ok(MsgFunction::SetRegistry), + + // Event codes + bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE => Ok(MsgFunction::GspInitDone), + bindings::NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE => Ok(MsgFunction::GspLockdownNotice), + bindings::NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD => Ok(MsgFunction::GspPostNoCat), + bindings::NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER => { + Ok(MsgFunction::GspRunCpuSequencer) + } + bindings::NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED => Ok(MsgFunction::MmuFaultQueued), + bindings::NV_VGPU_MSG_EVENT_OS_ERROR_LOG => Ok(MsgFunction::OsErrorLog), + bindings::NV_VGPU_MSG_EVENT_POST_EVENT => Ok(MsgFunction::PostEvent), + bindings::NV_VGPU_MSG_EVENT_RC_TRIGGERED => Ok(MsgFunction::RcTriggered), + bindings::NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT => Ok(MsgFunction::UcodeLibOsPrint), + _ => Err(EINVAL), + } + } +} + +impl From<MsgFunction> for u32 { + fn from(value: MsgFunction) -> Self { + // CAST: `MsgFunction` is `repr(u32)` and can thus be cast losslessly. + value as u32 + } +} + +/// Sequencer buffer opcode for GSP sequencer commands. +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u32)] +pub(crate) enum SeqBufOpcode { + // Core operation opcodes + CoreReset = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET, + CoreResume = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME, + CoreStart = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START, + CoreWaitForHalt = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT, + + // Delay opcode + DelayUs = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US, + + // Register operation opcodes + RegModify = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY, + RegPoll = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL, + RegStore = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE, + RegWrite = bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE, +} + +impl TryFrom<u32> for SeqBufOpcode { + type Error = kernel::error::Error; + + fn try_from(value: u32) -> Result<SeqBufOpcode> { + match value { + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET => { + Ok(SeqBufOpcode::CoreReset) + } + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME => { + Ok(SeqBufOpcode::CoreResume) + } + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START => { + Ok(SeqBufOpcode::CoreStart) + } + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT => { + Ok(SeqBufOpcode::CoreWaitForHalt) + } + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US => Ok(SeqBufOpcode::DelayUs), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY => { + Ok(SeqBufOpcode::RegModify) + } + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL => Ok(SeqBufOpcode::RegPoll), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE => Ok(SeqBufOpcode::RegStore), + bindings::GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE => Ok(SeqBufOpcode::RegWrite), + _ => Err(EINVAL), + } + } +} + +impl From<SeqBufOpcode> for u32 { + fn from(value: SeqBufOpcode) -> Self { + // CAST: `SeqBufOpcode` is `repr(u32)` and can thus be cast losslessly. + value as u32 + } +} + +/// Wrapper for GSP sequencer register write payload. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct RegWritePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_WRITE); + +impl RegWritePayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the value to write. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegWritePayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegWritePayload {} + +/// Wrapper for GSP sequencer register modify payload. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct RegModifyPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_MODIFY); + +impl RegModifyPayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the mask to apply. + pub(crate) fn mask(&self) -> u32 { + self.0.mask + } + + /// Returns the value to write. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegModifyPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegModifyPayload {} + +/// Wrapper for GSP sequencer register poll payload. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct RegPollPayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_POLL); + +impl RegPollPayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the mask to apply. + pub(crate) fn mask(&self) -> u32 { + self.0.mask + } + + /// Returns the expected value. + pub(crate) fn val(&self) -> u32 { + self.0.val + } + + /// Returns the timeout in microseconds. + pub(crate) fn timeout(&self) -> u32 { + self.0.timeout + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegPollPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegPollPayload {} + +/// Wrapper for GSP sequencer delay payload. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct DelayUsPayload(bindings::GSP_SEQ_BUF_PAYLOAD_DELAY_US); + +impl DelayUsPayload { + /// Returns the delay value in microseconds. + pub(crate) fn val(&self) -> u32 { + self.0.val + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for DelayUsPayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for DelayUsPayload {} + +/// Wrapper for GSP sequencer register store payload. +#[repr(transparent)] +#[derive(Copy, Clone, Debug)] +pub(crate) struct RegStorePayload(bindings::GSP_SEQ_BUF_PAYLOAD_REG_STORE); + +impl RegStorePayload { + /// Returns the register address. + pub(crate) fn addr(&self) -> u32 { + self.0.addr + } + + /// Returns the storage index. + #[allow(unused)] + pub(crate) fn index(&self) -> u32 { + self.0.index + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RegStorePayload {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RegStorePayload {} + +/// Wrapper for GSP sequencer buffer command. +#[repr(transparent)] +pub(crate) struct SequencerBufferCmd(bindings::GSP_SEQUENCER_BUFFER_CMD); + +impl SequencerBufferCmd { + /// Returns the opcode as a `SeqBufOpcode` enum, or error if invalid. + pub(crate) fn opcode(&self) -> Result<SeqBufOpcode> { + self.0.opCode.try_into() + } + + /// Returns the register write payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegWrite`. + pub(crate) fn reg_write_payload(&self) -> Result<RegWritePayload> { + if self.opcode()? != SeqBufOpcode::RegWrite { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegWrite`, so union contains valid `RegWritePayload`. + Ok(RegWritePayload(unsafe { self.0.payload.regWrite })) + } + + /// Returns the register modify payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegModify`. + pub(crate) fn reg_modify_payload(&self) -> Result<RegModifyPayload> { + if self.opcode()? != SeqBufOpcode::RegModify { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegModify`, so union contains valid `RegModifyPayload`. + Ok(RegModifyPayload(unsafe { self.0.payload.regModify })) + } + + /// Returns the register poll payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegPoll`. + pub(crate) fn reg_poll_payload(&self) -> Result<RegPollPayload> { + if self.opcode()? != SeqBufOpcode::RegPoll { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegPoll`, so union contains valid `RegPollPayload`. + Ok(RegPollPayload(unsafe { self.0.payload.regPoll })) + } + + /// Returns the delay payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::DelayUs`. + pub(crate) fn delay_us_payload(&self) -> Result<DelayUsPayload> { + if self.opcode()? != SeqBufOpcode::DelayUs { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `DelayUs`, so union contains valid `DelayUsPayload`. + Ok(DelayUsPayload(unsafe { self.0.payload.delayUs })) + } + + /// Returns the register store payload by value. + /// + /// Returns an error if the opcode is not `SeqBufOpcode::RegStore`. + pub(crate) fn reg_store_payload(&self) -> Result<RegStorePayload> { + if self.opcode()? != SeqBufOpcode::RegStore { + return Err(EINVAL); + } + // SAFETY: Opcode is verified to be `RegStore`, so union contains valid `RegStorePayload`. + Ok(RegStorePayload(unsafe { self.0.payload.regStore })) + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for SequencerBufferCmd {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for SequencerBufferCmd {} + +/// Wrapper for GSP run CPU sequencer RPC. +#[repr(transparent)] +pub(crate) struct RunCpuSequencer(bindings::rpc_run_cpu_sequencer_v17_00); + +impl RunCpuSequencer { + /// Returns the command index. + pub(crate) fn cmd_index(&self) -> u32 { + self.0.cmdIndex + } +} + +// SAFETY: This struct only contains integer types for which all bit patterns are valid. +unsafe impl FromBytes for RunCpuSequencer {} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for RunCpuSequencer {} + +/// Struct containing the arguments required to pass a memory buffer to the GSP +/// for use during initialisation. +/// +/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is +/// configured for a larger page size (e.g. 64K pages), we need to give +/// the GSP an array of 4K pages. Since we only create physically contiguous +/// buffers the math to calculate the addresses is simple. +/// +/// The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently +/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the +/// buffers to be physically contiguous anyway. +/// +/// The memory allocated for the arguments must remain until the GSP sends the +/// init_done RPC. +#[repr(transparent)] +pub(crate) struct LibosMemoryRegionInitArgument { + inner: bindings::LibosMemoryRegionInitArgument, +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for LibosMemoryRegionInitArgument {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for LibosMemoryRegionInitArgument {} + +impl LibosMemoryRegionInitArgument { + pub(crate) fn new<'a, A: AsBytes + FromBytes + KnownSize + ?Sized>( + name: &'static str, + obj: &'a Coherent<A>, + ) -> impl Init<Self> + 'a { + /// Generates the `ID8` identifier required for some GSP objects. + fn id8(name: &str) -> u64 { + let mut bytes = [0u8; core::mem::size_of::<u64>()]; + + for (c, b) in name.bytes().rev().zip(&mut bytes) { + *b = c; + } + + u64::from_ne_bytes(bytes) + } + + #[allow(non_snake_case)] + let init_inner = init!(bindings::LibosMemoryRegionInitArgument { + id8: id8(name), + pa: obj.dma_handle(), + size: num::usize_as_u64(obj.size()), + kind: num::u32_into_u8::< + { bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS }, + >(), + loc: num::u32_into_u8::< + { bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM }, + >(), + ..Zeroable::init_zeroed() + }); + + init!(LibosMemoryRegionInitArgument { + inner <- init_inner, + }) + } +} + +/// TX header for setting up a message queue with the GSP. +#[repr(transparent)] +pub(crate) struct MsgqTxHeader(bindings::msgqTxHeader); + +impl MsgqTxHeader { + /// Create a new TX queue header. + /// + /// # Arguments + /// + /// * `msgq_size` - Total size of the message queue structure, in bytes. + /// * `rx_hdr_offset` - Offset, in bytes, of the start of the RX header in the message queue + /// structure. + /// * `msg_count` - Number of messages that can be sent, i.e. the number of memory pages + /// allocated for the message queue in the message queue structure. + pub(crate) fn new(msgq_size: u32, rx_hdr_offset: u32, msg_count: u32) -> Self { + Self(bindings::msgqTxHeader { + version: 0, + size: msgq_size, + msgSize: num::usize_into_u32::<GSP_PAGE_SIZE>(), + msgCount: msg_count, + writePtr: 0, + flags: 1, + rxHdrOff: rx_hdr_offset, + entryOff: num::usize_into_u32::<GSP_PAGE_SIZE>(), + }) + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for MsgqTxHeader {} + +/// RX header for setting up a message queue with the GSP. +#[repr(transparent)] +pub(crate) struct MsgqRxHeader(bindings::msgqRxHeader); + +/// Header for the message RX queue. +impl MsgqRxHeader { + /// Creates a new RX queue header. + pub(crate) fn new() -> Self { + Self(Default::default()) + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for MsgqRxHeader {} + +bitfield! { + struct MsgHeaderVersion(u32) { + 31:24 major as u8; + 23:16 minor as u8; + } +} + +impl MsgHeaderVersion { + const MAJOR_TOT: u8 = 3; + const MINOR_TOT: u8 = 0; + + fn new() -> Self { + Self::default() + .set_major(Self::MAJOR_TOT) + .set_minor(Self::MINOR_TOT) + } +} + +impl bindings::rpc_message_header_v { + fn init(cmd_size: usize, function: MsgFunction) -> impl Init<Self, Error> { + type RpcMessageHeader = bindings::rpc_message_header_v; + + try_init!(RpcMessageHeader { + header_version: MsgHeaderVersion::new().into(), + signature: bindings::NV_VGPU_MSG_SIGNATURE_VALID, + function: function.into(), + length: size_of::<Self>() + .checked_add(cmd_size) + .ok_or(EOVERFLOW) + .and_then(|v| v.try_into().map_err(|_| EINVAL))?, + rpc_result: 0xffffffff, + rpc_result_private: 0xffffffff, + ..Zeroable::init_zeroed() + }) + } +} + +/// GSP Message Element. +/// +/// This is essentially a message header expected to be followed by the message data. +#[repr(transparent)] +pub(crate) struct GspMsgElement { + inner: bindings::GSP_MSG_QUEUE_ELEMENT, +} + +impl GspMsgElement { + /// Creates a new message element. + /// + /// # Arguments + /// + /// * `sequence` - Sequence number of the message. + /// * `cmd_size` - Size of the command (not including the message element), in bytes. + /// * `function` - Function of the message. + #[allow(non_snake_case)] + pub(crate) fn init( + sequence: u32, + cmd_size: usize, + function: MsgFunction, + ) -> impl Init<Self, Error> { + type RpcMessageHeader = bindings::rpc_message_header_v; + type InnerGspMsgElement = bindings::GSP_MSG_QUEUE_ELEMENT; + let init_inner = try_init!(InnerGspMsgElement { + seqNum: sequence, + elemCount: size_of::<Self>() + .checked_add(cmd_size) + .ok_or(EOVERFLOW)? + .div_ceil(GSP_PAGE_SIZE) + .try_into() + .map_err(|_| EOVERFLOW)?, + rpc <- RpcMessageHeader::init(cmd_size, function), + ..Zeroable::init_zeroed() + }); + + try_init!(GspMsgElement { + inner <- init_inner, + }) + } + + /// Sets the checksum of this message. + /// + /// Since the header is also part of the checksum, this is usually called after the whole + /// message has been written to the shared memory area. + pub(crate) fn set_checksum(&mut self, checksum: u32) { + self.inner.checkSum = checksum; + } + + /// Returns the length of the message's payload. + pub(crate) fn payload_length(&self) -> usize { + // `rpc.length` includes the length of the RPC message header. + num::u32_as_usize(self.inner.rpc.length) + .saturating_sub(size_of::<bindings::rpc_message_header_v>()) + } + + /// Returns the total length of the message, message and RPC headers included. + pub(crate) fn length(&self) -> usize { + size_of::<Self>() + self.payload_length() + } + + // Returns the sequence number of the message. + pub(crate) fn sequence(&self) -> u32 { + self.inner.rpc.sequence + } + + // Returns the function of the message, if it is valid, or the invalid function number as an + // error. + pub(crate) fn function(&self) -> Result<MsgFunction, u32> { + self.inner + .rpc + .function + .try_into() + .map_err(|_| self.inner.rpc.function) + } + + // Returns the number of elements (i.e. memory pages) used by this message. + pub(crate) fn element_count(&self) -> u32 { + self.inner.elemCount + } +} + +// SAFETY: Padding is explicit and does not contain uninitialized data. +unsafe impl AsBytes for GspMsgElement {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspMsgElement {} + +/// Arguments for GSP startup. +#[repr(transparent)] +#[derive(Zeroable)] +pub(crate) struct GspArgumentsCached { + inner: bindings::GSP_ARGUMENTS_CACHED, +} + +impl GspArgumentsCached { + /// Creates the arguments for starting the GSP up using `cmdq` as its command queue. + pub(crate) fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ { + #[allow(non_snake_case)] + let init_inner = init!(bindings::GSP_ARGUMENTS_CACHED { + messageQueueInitArguments <- MessageQueueInitArguments::new(cmdq), + bDmemStack: 1, + ..Zeroable::init_zeroed() + }); + + init!(GspArgumentsCached { + inner <- init_inner, + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspArgumentsCached {} + +/// On Turing and GA100, the entries in the `LibosMemoryRegionInitArgument` +/// must all be a multiple of GSP_PAGE_SIZE in size, so add padding to force it +/// to that size. +#[repr(C)] +#[derive(Zeroable)] +pub(crate) struct GspArgumentsPadded { + pub(crate) inner: GspArgumentsCached, + _padding: [u8; GSP_PAGE_SIZE - core::mem::size_of::<bindings::GSP_ARGUMENTS_CACHED>()], +} + +impl GspArgumentsPadded { + pub(crate) fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ { + init!(GspArgumentsPadded { + inner <- GspArgumentsCached::new(cmdq), + ..Zeroable::init_zeroed() + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspArgumentsPadded {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspArgumentsPadded {} + +/// Init arguments for the message queue. +type MessageQueueInitArguments = bindings::MESSAGE_QUEUE_INIT_ARGUMENTS; + +impl MessageQueueInitArguments { + /// Creates a new init arguments structure for `cmdq`. + #[allow(non_snake_case)] + fn new(cmdq: &Cmdq) -> impl Init<Self> + '_ { + init!(MessageQueueInitArguments { + sharedMemPhysAddr: cmdq.dma_handle, + pageTableEntryCount: num::usize_into_u32::<{ Cmdq::NUM_PTES }>(), + cmdQueueOffset: num::usize_as_u64(Cmdq::CMDQ_OFFSET), + statQueueOffset: num::usize_as_u64(Cmdq::STATQ_OFFSET), + ..Zeroable::init_zeroed() + }) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs new file mode 100644 index 000000000000..db46276430be --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, + pci, + prelude::*, + transmute::{ + AsBytes, + FromBytes, // + }, // +}; + +use crate::gsp::GSP_PAGE_SIZE; + +use super::bindings; + +/// Payload of the `GspSetSystemInfo` command. +#[repr(transparent)] +pub(crate) struct GspSetSystemInfo { + inner: bindings::GspSystemInfo, +} +static_assert!(size_of::<GspSetSystemInfo>() < GSP_PAGE_SIZE); + +impl GspSetSystemInfo { + /// Returns an in-place initializer for the `GspSetSystemInfo` command. + #[allow(non_snake_case)] + pub(crate) fn init<'a>(dev: &'a pci::Device<device::Bound>) -> impl Init<Self, Error> + 'a { + type InnerGspSystemInfo = bindings::GspSystemInfo; + let init_inner = try_init!(InnerGspSystemInfo { + gpuPhysAddr: dev.resource_start(0)?, + gpuPhysFbAddr: dev.resource_start(1)?, + gpuPhysInstAddr: dev.resource_start(3)?, + nvDomainBusDeviceFunc: u64::from(dev.dev_id()), + + // Using TASK_SIZE in r535_gsp_rpc_set_system_info() seems wrong because + // TASK_SIZE is per-task. That's probably a design issue in GSP-RM though. + maxUserVa: (1 << 47) - 4096, + pciConfigMirrorBase: 0x088000, + pciConfigMirrorSize: 0x001000, + + PCIDeviceID: (u32::from(dev.device_id()) << 16) | u32::from(dev.vendor_id().as_raw()), + PCISubDeviceID: (u32::from(dev.subsystem_device_id()) << 16) + | u32::from(dev.subsystem_vendor_id()), + PCIRevisionID: u32::from(dev.revision_id()), + bIsPrimary: 0, + bPreserveVideoMemoryAllocations: 0, + ..Zeroable::init_zeroed() + }); + + try_init!(GspSetSystemInfo { + inner <- init_inner, + }) + } +} + +// SAFETY: These structs don't meet the no-padding requirements of AsBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl AsBytes for GspSetSystemInfo {} + +// SAFETY: These structs don't meet the no-padding requirements of FromBytes but +// that is not a problem because they are not used outside the kernel. +unsafe impl FromBytes for GspSetSystemInfo {} + +#[repr(transparent)] +pub(crate) struct PackedRegistryEntry(bindings::PACKED_REGISTRY_ENTRY); + +impl PackedRegistryEntry { + pub(crate) fn new(offset: u32, value: u32) -> Self { + Self({ + bindings::PACKED_REGISTRY_ENTRY { + nameOffset: offset, + + // We only support DWORD types for now. Support for other types + // will come later if required. + type_: bindings::REGISTRY_TABLE_ENTRY_TYPE_DWORD as u8, + __bindgen_padding_0: Default::default(), + data: value, + length: 0, + } + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for PackedRegistryEntry {} + +/// Payload of the `SetRegistry` command. +#[repr(transparent)] +pub(crate) struct PackedRegistryTable { + inner: bindings::PACKED_REGISTRY_TABLE, +} + +impl PackedRegistryTable { + #[allow(non_snake_case)] + pub(crate) fn init(num_entries: u32, size: u32) -> impl Init<Self> { + type InnerPackedRegistryTable = bindings::PACKED_REGISTRY_TABLE; + let init_inner = init!(InnerPackedRegistryTable { + numEntries: num_entries, + size, + entries: Default::default() + }); + + init!(PackedRegistryTable { inner <- init_inner }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for PackedRegistryTable {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for PackedRegistryTable {} + +/// Payload of the `GetGspStaticInfo` command and message. +#[repr(transparent)] +#[derive(Zeroable)] +pub(crate) struct GspStaticConfigInfo(bindings::GspStaticConfigInfo_t); + +impl GspStaticConfigInfo { + /// Returns a bytes array containing the (hopefully) zero-terminated name of this GPU. + pub(crate) fn gpu_name_str(&self) -> [u8; 64] { + self.0.gpuNameString + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for GspStaticConfigInfo {} + +// SAFETY: This struct only contains integer types for which all bit patterns +// are valid. +unsafe impl FromBytes for GspStaticConfigInfo {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs new file mode 100644 index 000000000000..2e6f0d298756 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This module may not be directly used. Please abstract or re-export the needed symbols in the +//! parent module instead. + +#![allow( + dead_code, + clippy::all, + clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, + clippy::ref_as_ptr, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] +use kernel::ffi; +use pin_init::MaybeZeroable; + +include!("r570_144/bindings.rs"); + +// SAFETY: This type has a size of zero, so its inclusion into another type should not affect their +// ability to implement `Zeroable`. +unsafe impl<T> kernel::prelude::Zeroable for __IncompleteArrayField<T> {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs new file mode 100644 index 000000000000..334e8be5fde8 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: GPL-2.0 + +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField<T>(::core::marker::PhantomData<T>, [T; 0]); +impl<T> __IncompleteArrayField<T> { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::core::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::core::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::core::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl<T> ::core::fmt::Debug for __IncompleteArrayField<T> { + fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +pub const NV_VGPU_MSG_SIGNATURE_VALID: u32 = 1129337430; +pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 = 0; +pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 = 23068672; +pub const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X: u32 = 8388608; +pub const GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB: u32 = 98304; +pub const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE: u32 = 100663296; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MIN_MB: u32 = 64; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS2_MAX_MB: u32 = 256; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB: u32 = 88; +pub const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB: u32 = 280; +pub const GSP_FW_WPR_META_REVISION: u32 = 1; +pub const GSP_FW_WPR_META_MAGIC: i64 = -2577556379034558285; +pub const REGISTRY_TABLE_ENTRY_TYPE_DWORD: u32 = 1; +pub const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: u32 = 65536; +pub type __u8 = ffi::c_uchar; +pub type __u16 = ffi::c_ushort; +pub type __u32 = ffi::c_uint; +pub type __u64 = ffi::c_ulonglong; +pub type u8_ = __u8; +pub type u16_ = __u16; +pub type u32_ = __u32; +pub type u64_ = __u64; +pub const NV_VGPU_MSG_FUNCTION_NOP: _bindgen_ty_2 = 0; +pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO: _bindgen_ty_2 = 1; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_ROOT: _bindgen_ty_2 = 2; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DEVICE: _bindgen_ty_2 = 3; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY: _bindgen_ty_2 = 4; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_CTX_DMA: _bindgen_ty_2 = 5; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_CHANNEL_DMA: _bindgen_ty_2 = 6; +pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY: _bindgen_ty_2 = 7; +pub const NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA: _bindgen_ty_2 = 8; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT: _bindgen_ty_2 = 9; +pub const NV_VGPU_MSG_FUNCTION_FREE: _bindgen_ty_2 = 10; +pub const NV_VGPU_MSG_FUNCTION_LOG: _bindgen_ty_2 = 11; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIDMEM: _bindgen_ty_2 = 12; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY: _bindgen_ty_2 = 13; +pub const NV_VGPU_MSG_FUNCTION_MAP_MEMORY_DMA: _bindgen_ty_2 = 14; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_MEMORY_DMA: _bindgen_ty_2 = 15; +pub const NV_VGPU_MSG_FUNCTION_GET_EDID: _bindgen_ty_2 = 16; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_CHANNEL: _bindgen_ty_2 = 17; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DISP_OBJECT: _bindgen_ty_2 = 18; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_SUBDEVICE: _bindgen_ty_2 = 19; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_DYNAMIC_MEMORY: _bindgen_ty_2 = 20; +pub const NV_VGPU_MSG_FUNCTION_DUP_OBJECT: _bindgen_ty_2 = 21; +pub const NV_VGPU_MSG_FUNCTION_IDLE_CHANNELS: _bindgen_ty_2 = 22; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_EVENT: _bindgen_ty_2 = 23; +pub const NV_VGPU_MSG_FUNCTION_SEND_EVENT: _bindgen_ty_2 = 24; +pub const NV_VGPU_MSG_FUNCTION_REMAPPER_CONTROL: _bindgen_ty_2 = 25; +pub const NV_VGPU_MSG_FUNCTION_DMA_CONTROL: _bindgen_ty_2 = 26; +pub const NV_VGPU_MSG_FUNCTION_DMA_FILL_PTE_MEM: _bindgen_ty_2 = 27; +pub const NV_VGPU_MSG_FUNCTION_MANAGE_HW_RESOURCE: _bindgen_ty_2 = 28; +pub const NV_VGPU_MSG_FUNCTION_BIND_ARBITRARY_CTX_DMA: _bindgen_ty_2 = 29; +pub const NV_VGPU_MSG_FUNCTION_CREATE_FB_SEGMENT: _bindgen_ty_2 = 30; +pub const NV_VGPU_MSG_FUNCTION_DESTROY_FB_SEGMENT: _bindgen_ty_2 = 31; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_SHARE_DEVICE: _bindgen_ty_2 = 32; +pub const NV_VGPU_MSG_FUNCTION_DEFERRED_API_CONTROL: _bindgen_ty_2 = 33; +pub const NV_VGPU_MSG_FUNCTION_REMOVE_DEFERRED_API: _bindgen_ty_2 = 34; +pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_READ: _bindgen_ty_2 = 35; +pub const NV_VGPU_MSG_FUNCTION_SIM_ESCAPE_WRITE: _bindgen_ty_2 = 36; +pub const NV_VGPU_MSG_FUNCTION_SIM_MANAGE_DISPLAY_CONTEXT_DMA: _bindgen_ty_2 = 37; +pub const NV_VGPU_MSG_FUNCTION_FREE_VIDMEM_VIRT: _bindgen_ty_2 = 38; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PSTATE_INFO: _bindgen_ty_2 = 39; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_PERFMON_SAMPLE: _bindgen_ty_2 = 40; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_VIRTUAL_PSTATE_INFO: _bindgen_ty_2 = 41; +pub const NV_VGPU_MSG_FUNCTION_PERF_GET_LEVEL_INFO: _bindgen_ty_2 = 42; +pub const NV_VGPU_MSG_FUNCTION_MAP_SEMA_MEMORY: _bindgen_ty_2 = 43; +pub const NV_VGPU_MSG_FUNCTION_UNMAP_SEMA_MEMORY: _bindgen_ty_2 = 44; +pub const NV_VGPU_MSG_FUNCTION_SET_SURFACE_PROPERTIES: _bindgen_ty_2 = 45; +pub const NV_VGPU_MSG_FUNCTION_CLEANUP_SURFACE: _bindgen_ty_2 = 46; +pub const NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER: _bindgen_ty_2 = 47; +pub const NV_VGPU_MSG_FUNCTION_TDR_SET_TIMEOUT_STATE: _bindgen_ty_2 = 48; +pub const NV_VGPU_MSG_FUNCTION_SWITCH_TO_VGA: _bindgen_ty_2 = 49; +pub const NV_VGPU_MSG_FUNCTION_GPU_EXEC_REG_OPS: _bindgen_ty_2 = 50; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO: _bindgen_ty_2 = 51; +pub const NV_VGPU_MSG_FUNCTION_ALLOC_VIRTMEM: _bindgen_ty_2 = 52; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_PDE_2: _bindgen_ty_2 = 53; +pub const NV_VGPU_MSG_FUNCTION_SET_PAGE_DIRECTORY: _bindgen_ty_2 = 54; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_PSTATE_INFO: _bindgen_ty_2 = 55; +pub const NV_VGPU_MSG_FUNCTION_TRANSLATE_GUEST_GPU_PTES: _bindgen_ty_2 = 56; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_57: _bindgen_ty_2 = 57; +pub const NV_VGPU_MSG_FUNCTION_RESET_CURRENT_GR_CONTEXT: _bindgen_ty_2 = 58; +pub const NV_VGPU_MSG_FUNCTION_SET_SEMA_MEM_VALIDATION_STATE: _bindgen_ty_2 = 59; +pub const NV_VGPU_MSG_FUNCTION_GET_ENGINE_UTILIZATION: _bindgen_ty_2 = 60; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPU_PDES: _bindgen_ty_2 = 61; +pub const NV_VGPU_MSG_FUNCTION_GET_ENCODER_CAPACITY: _bindgen_ty_2 = 62; +pub const NV_VGPU_MSG_FUNCTION_VGPU_PF_REG_READ32: _bindgen_ty_2 = 63; +pub const NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM_INFO_EXT: _bindgen_ty_2 = 64; +pub const NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO: _bindgen_ty_2 = 65; +pub const NV_VGPU_MSG_FUNCTION_RMFS_INIT: _bindgen_ty_2 = 66; +pub const NV_VGPU_MSG_FUNCTION_RMFS_CLOSE_QUEUE: _bindgen_ty_2 = 67; +pub const NV_VGPU_MSG_FUNCTION_RMFS_CLEANUP: _bindgen_ty_2 = 68; +pub const NV_VGPU_MSG_FUNCTION_RMFS_TEST: _bindgen_ty_2 = 69; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_BAR_PDE: _bindgen_ty_2 = 70; +pub const NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD: _bindgen_ty_2 = 71; +pub const NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO: _bindgen_ty_2 = 72; +pub const NV_VGPU_MSG_FUNCTION_SET_REGISTRY: _bindgen_ty_2 = 73; +pub const NV_VGPU_MSG_FUNCTION_GSP_INIT_POST_OBJGPU: _bindgen_ty_2 = 74; +pub const NV_VGPU_MSG_FUNCTION_SUBDEV_EVENT_SET_NOTIFICATION: _bindgen_ty_2 = 75; +pub const NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL: _bindgen_ty_2 = 76; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO2: _bindgen_ty_2 = 77; +pub const NV_VGPU_MSG_FUNCTION_DUMP_PROTOBUF_COMPONENT: _bindgen_ty_2 = 78; +pub const NV_VGPU_MSG_FUNCTION_UNSET_PAGE_DIRECTORY: _bindgen_ty_2 = 79; +pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_STATIC_INFO: _bindgen_ty_2 = 80; +pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_FAULT_BUFFER: _bindgen_ty_2 = 81; +pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_FAULT_BUFFER: _bindgen_ty_2 = 82; +pub const NV_VGPU_MSG_FUNCTION_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 83; +pub const NV_VGPU_MSG_FUNCTION_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER: _bindgen_ty_2 = 84; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_VGPU_FB_USAGE: _bindgen_ty_2 = 85; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVFBC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 86; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVENC_SW_SESSION_UPDATE_INFO: _bindgen_ty_2 = 87; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_CHANNEL: _bindgen_ty_2 = 88; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESET_ISOLATED_CHANNEL: _bindgen_ty_2 = 89; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_HANDLE_VF_PRI_FAULT: _bindgen_ty_2 = 90; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CLK_GET_EXTENDED_INFO: _bindgen_ty_2 = 91; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_BOOST: _bindgen_ty_2 = 92; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_VPSTATES_GET_CONTROL: _bindgen_ty_2 = 93; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE: _bindgen_ty_2 = 94; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_COLOR_CLEAR: _bindgen_ty_2 = 95; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_DEPTH_CLEAR: _bindgen_ty_2 = 96; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SCHEDULE: _bindgen_ty_2 = 97; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TIMESLICE: _bindgen_ty_2 = 98; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PREEMPT: _bindgen_ty_2 = 99; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_DISABLE_CHANNELS: _bindgen_ty_2 = 100; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_TSG_INTERLEAVE_LEVEL: _bindgen_ty_2 = 101; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_CHANNEL_INTERLEAVE_LEVEL: _bindgen_ty_2 = 102; +pub const NV_VGPU_MSG_FUNCTION_GSP_RM_ALLOC: _bindgen_ty_2 = 103; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_V2: _bindgen_ty_2 = 104; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_AES_ENCRYPT: _bindgen_ty_2 = 105; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY: _bindgen_ty_2 = 106; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CIPHER_SESSION_KEY_STATUS: _bindgen_ty_2 = 107; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 108; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_ALL_SM_ERROR_STATES: _bindgen_ty_2 = 109; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_EXCEPTION_MASK: _bindgen_ty_2 = 110; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_PROMOTE_CTX: _bindgen_ty_2 = 111; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_PREEMPTION_BIND: _bindgen_ty_2 = 112; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_CTXSW_PREEMPTION_MODE: _bindgen_ty_2 = 113; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_CTXSW_ZCULL_BIND: _bindgen_ty_2 = 114; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_INITIALIZE_CTX: _bindgen_ty_2 = 115; +pub const NV_VGPU_MSG_FUNCTION_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES: _bindgen_ty_2 = 116; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_CLEAR_FAULTED_BIT: _bindgen_ty_2 = 117; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_LATEST_ECC_ADDRESSES: _bindgen_ty_2 = 118; +pub const NV_VGPU_MSG_FUNCTION_CTRL_MC_SERVICE_INTERRUPTS: _bindgen_ty_2 = 119; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DMA_SET_DEFAULT_VASPACE: _bindgen_ty_2 = 120; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_CE_PCE_MASK: _bindgen_ty_2 = 121; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_ZBC_CLEAR_TABLE_ENTRY: _bindgen_ty_2 = 122; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_PEER_ID_MASK: _bindgen_ty_2 = 123; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_NVLINK_STATUS: _bindgen_ty_2 = 124; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS: _bindgen_ty_2 = 125; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_P2P_CAPS_MATRIX: _bindgen_ty_2 = 126; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_0: _bindgen_ty_2 = 127; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_PM_AREA_SMPC: _bindgen_ty_2 = 128; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HWPM_LEGACY: _bindgen_ty_2 = 129; +pub const NV_VGPU_MSG_FUNCTION_CTRL_B0CC_EXEC_REG_OPS: _bindgen_ty_2 = 130; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BIND_PM_RESOURCES: _bindgen_ty_2 = 131; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SUSPEND_CONTEXT: _bindgen_ty_2 = 132; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_RESUME_CONTEXT: _bindgen_ty_2 = 133; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_EXEC_REG_OPS: _bindgen_ty_2 = 134; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_DEBUG: _bindgen_ty_2 = 135; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_READ_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 136; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_CLEAR_SINGLE_SM_ERROR_STATE: _bindgen_ty_2 = 137; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_ERRBAR_DEBUG: _bindgen_ty_2 = 138; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_NEXT_STOP_TRIGGER_TYPE: _bindgen_ty_2 = 139; +pub const NV_VGPU_MSG_FUNCTION_CTRL_ALLOC_PMA_STREAM: _bindgen_ty_2 = 140; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PMA_STREAM_UPDATE_GET_PUT: _bindgen_ty_2 = 141; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_INFO_V2: _bindgen_ty_2 = 142; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SET_CHANNEL_PROPERTIES: _bindgen_ty_2 = 143; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 144; +pub const NV_VGPU_MSG_FUNCTION_CTRL_KGR_GET_CTX_BUFFER_PTES: _bindgen_ty_2 = 145; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_EVICT_CTX: _bindgen_ty_2 = 146; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FB_GET_FS_INFO: _bindgen_ty_2 = 147; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GRMGR_GET_GR_FS_INFO: _bindgen_ty_2 = 148; +pub const NV_VGPU_MSG_FUNCTION_CTRL_STOP_CHANNEL: _bindgen_ty_2 = 149; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_PC_SAMPLING_MODE: _bindgen_ty_2 = 150; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_GET_STATUS: _bindgen_ty_2 = 151; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_RATED_TDP_SET_CONTROL: _bindgen_ty_2 = 152; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FREE_PMA_STREAM: _bindgen_ty_2 = 153; +pub const NV_VGPU_MSG_FUNCTION_CTRL_TIMER_SET_GR_TICK_FREQ: _bindgen_ty_2 = 154; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB: _bindgen_ty_2 = 155; +pub const NV_VGPU_MSG_FUNCTION_GET_CONSOLIDATED_GR_STATIC_INFO: _bindgen_ty_2 = 156; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_SINGLE_SM_SINGLE_STEP: _bindgen_ty_2 = 157; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_GET_TPC_PARTITION_MODE: _bindgen_ty_2 = 158; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GR_SET_TPC_PARTITION_MODE: _bindgen_ty_2 = 159; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_ALLOCATE: _bindgen_ty_2 = 160; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_DESTROY: _bindgen_ty_2 = 161; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_MAP: _bindgen_ty_2 = 162; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_UNMAP: _bindgen_ty_2 = 163; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_PUSH_STREAM: _bindgen_ty_2 = 164; +pub const NV_VGPU_MSG_FUNCTION_UVM_PAGING_CHANNEL_SET_HANDLES: _bindgen_ty_2 = 165; +pub const NV_VGPU_MSG_FUNCTION_UVM_METHOD_STREAM_GUEST_PAGES_OPERATION: _bindgen_ty_2 = 166; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL: _bindgen_ty_2 = 167; +pub const NV_VGPU_MSG_FUNCTION_DCE_RM_INIT: _bindgen_ty_2 = 168; +pub const NV_VGPU_MSG_FUNCTION_REGISTER_VIRTUAL_EVENT_BUFFER: _bindgen_ty_2 = 169; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EVENT_BUFFER_UPDATE_GET: _bindgen_ty_2 = 170; +pub const NV_VGPU_MSG_FUNCTION_GET_PLCABLE_ADDRESS_KIND: _bindgen_ty_2 = 171; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PERF_LIMITS_SET_STATUS_V2: _bindgen_ty_2 = 172; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_SRIOV_PROMOTE_PMA_STREAM: _bindgen_ty_2 = 173; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_MMU_DEBUG_MODE: _bindgen_ty_2 = 174; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_PROMOTE_FAULT_METHOD_BUFFERS: _bindgen_ty_2 = 175; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_SIZE: _bindgen_ty_2 = 176; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLCN_GET_CTX_BUFFER_INFO: _bindgen_ty_2 = 177; +pub const NV_VGPU_MSG_FUNCTION_DISABLE_CHANNELS: _bindgen_ty_2 = 178; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEMORY_DESCRIBE: _bindgen_ty_2 = 179; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FABRIC_MEM_STATS: _bindgen_ty_2 = 180; +pub const NV_VGPU_MSG_FUNCTION_SAVE_HIBERNATION_DATA: _bindgen_ty_2 = 181; +pub const NV_VGPU_MSG_FUNCTION_RESTORE_HIBERNATION_DATA: _bindgen_ty_2 = 182; +pub const NV_VGPU_MSG_FUNCTION_CTRL_INTERNAL_MEMSYS_SET_ZBC_REFERENCED: _bindgen_ty_2 = 183; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_CREATE: _bindgen_ty_2 = 184; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_DELETE: _bindgen_ty_2 = 185; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_GET_WORK_SUBMIT_TOKEN: _bindgen_ty_2 = 186; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPFIFO_SET_WORK_SUBMIT_TOKEN_NOTIF_INDEX: _bindgen_ty_2 = 187; +pub const NV_VGPU_MSG_FUNCTION_PMA_SCRUBBER_SHARED_BUFFER_GUEST_PAGES_OPERATION: _bindgen_ty_2 = + 188; +pub const NV_VGPU_MSG_FUNCTION_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK: + _bindgen_ty_2 = 189; +pub const NV_VGPU_MSG_FUNCTION_SET_SYSMEM_DIRTY_PAGE_TRACKING_BUFFER: _bindgen_ty_2 = 190; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_P2P_CAPS: _bindgen_ty_2 = 191; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_SET_P2P_MAPPING: _bindgen_ty_2 = 192; +pub const NV_VGPU_MSG_FUNCTION_CTRL_BUS_UNSET_P2P_MAPPING: _bindgen_ty_2 = 193; +pub const NV_VGPU_MSG_FUNCTION_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK: _bindgen_ty_2 = 194; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_MIGRATABLE_OPS: _bindgen_ty_2 = 195; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_TOTAL_HS_CREDITS: _bindgen_ty_2 = 196; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GET_HS_CREDITS: _bindgen_ty_2 = 197; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_HS_CREDITS: _bindgen_ty_2 = 198; +pub const NV_VGPU_MSG_FUNCTION_CTRL_PM_AREA_PC_SAMPLER: _bindgen_ty_2 = 199; +pub const NV_VGPU_MSG_FUNCTION_INVALIDATE_TLB: _bindgen_ty_2 = 200; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_QUERY_ECC_STATUS: _bindgen_ty_2 = 201; +pub const NV_VGPU_MSG_FUNCTION_ECC_NOTIFIER_WRITE_ACK: _bindgen_ty_2 = 202; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_DEBUG: _bindgen_ty_2 = 203; +pub const NV_VGPU_MSG_FUNCTION_RM_API_CONTROL: _bindgen_ty_2 = 204; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_GPU_START_FABRIC_PROBE: _bindgen_ty_2 = 205; +pub const NV_VGPU_MSG_FUNCTION_CTRL_NVLINK_GET_INBAND_RECEIVED_DATA: _bindgen_ty_2 = 206; +pub const NV_VGPU_MSG_FUNCTION_GET_STATIC_DATA: _bindgen_ty_2 = 207; +pub const NV_VGPU_MSG_FUNCTION_RESERVED_208: _bindgen_ty_2 = 208; +pub const NV_VGPU_MSG_FUNCTION_CTRL_GPU_GET_INFO_V2: _bindgen_ty_2 = 209; +pub const NV_VGPU_MSG_FUNCTION_GET_BRAND_CAPS: _bindgen_ty_2 = 210; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_NVLINK_INBAND_SEND_DATA: _bindgen_ty_2 = 211; +pub const NV_VGPU_MSG_FUNCTION_UPDATE_GPM_GUEST_BUFFER_INFO: _bindgen_ty_2 = 212; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_INTERNAL_CONTROL_GSP_TRACE: _bindgen_ty_2 = 213; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SET_ZBC_STENCIL_CLEAR: _bindgen_ty_2 = 214; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_VGPU_HEAP_STATS: _bindgen_ty_2 = 215; +pub const NV_VGPU_MSG_FUNCTION_CTRL_SUBDEVICE_GET_LIBOS_HEAP_STATS: _bindgen_ty_2 = 216; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_SET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 217; +pub const NV_VGPU_MSG_FUNCTION_CTRL_DBG_GET_MODE_MMU_GCC_DEBUG: _bindgen_ty_2 = 218; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_HES: _bindgen_ty_2 = 219; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_HES: _bindgen_ty_2 = 220; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RESERVE_CCU_PROF: _bindgen_ty_2 = 221; +pub const NV_VGPU_MSG_FUNCTION_CTRL_RELEASE_CCU_PROF: _bindgen_ty_2 = 222; +pub const NV_VGPU_MSG_FUNCTION_RESERVED: _bindgen_ty_2 = 223; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_CHIPLET_HS_CREDIT_POOL: _bindgen_ty_2 = 224; +pub const NV_VGPU_MSG_FUNCTION_CTRL_CMD_GET_HS_CREDITS_MAPPING: _bindgen_ty_2 = 225; +pub const NV_VGPU_MSG_FUNCTION_CTRL_EXEC_PARTITIONS_EXPORT: _bindgen_ty_2 = 226; +pub const NV_VGPU_MSG_FUNCTION_NUM_FUNCTIONS: _bindgen_ty_2 = 227; +pub type _bindgen_ty_2 = ffi::c_uint; +pub const NV_VGPU_MSG_EVENT_FIRST_EVENT: _bindgen_ty_3 = 4096; +pub const NV_VGPU_MSG_EVENT_GSP_INIT_DONE: _bindgen_ty_3 = 4097; +pub const NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER: _bindgen_ty_3 = 4098; +pub const NV_VGPU_MSG_EVENT_POST_EVENT: _bindgen_ty_3 = 4099; +pub const NV_VGPU_MSG_EVENT_RC_TRIGGERED: _bindgen_ty_3 = 4100; +pub const NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED: _bindgen_ty_3 = 4101; +pub const NV_VGPU_MSG_EVENT_OS_ERROR_LOG: _bindgen_ty_3 = 4102; +pub const NV_VGPU_MSG_EVENT_RG_LINE_INTR: _bindgen_ty_3 = 4103; +pub const NV_VGPU_MSG_EVENT_GPUACCT_PERFMON_UTIL_SAMPLES: _bindgen_ty_3 = 4104; +pub const NV_VGPU_MSG_EVENT_SIM_READ: _bindgen_ty_3 = 4105; +pub const NV_VGPU_MSG_EVENT_SIM_WRITE: _bindgen_ty_3 = 4106; +pub const NV_VGPU_MSG_EVENT_SEMAPHORE_SCHEDULE_CALLBACK: _bindgen_ty_3 = 4107; +pub const NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT: _bindgen_ty_3 = 4108; +pub const NV_VGPU_MSG_EVENT_VGPU_GSP_PLUGIN_TRIGGERED: _bindgen_ty_3 = 4109; +pub const NV_VGPU_MSG_EVENT_PERF_GPU_BOOST_SYNC_LIMITS_CALLBACK: _bindgen_ty_3 = 4110; +pub const NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE: _bindgen_ty_3 = 4111; +pub const NV_VGPU_MSG_EVENT_VGPU_CONFIG: _bindgen_ty_3 = 4112; +pub const NV_VGPU_MSG_EVENT_DISPLAY_MODESET: _bindgen_ty_3 = 4113; +pub const NV_VGPU_MSG_EVENT_EXTDEV_INTR_SERVICE: _bindgen_ty_3 = 4114; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_256: _bindgen_ty_3 = 4115; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_512: _bindgen_ty_3 = 4116; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_1024: _bindgen_ty_3 = 4117; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_2048: _bindgen_ty_3 = 4118; +pub const NV_VGPU_MSG_EVENT_NVLINK_INBAND_RECEIVED_DATA_4096: _bindgen_ty_3 = 4119; +pub const NV_VGPU_MSG_EVENT_TIMED_SEMAPHORE_RELEASE: _bindgen_ty_3 = 4120; +pub const NV_VGPU_MSG_EVENT_NVLINK_IS_GPU_DEGRADED: _bindgen_ty_3 = 4121; +pub const NV_VGPU_MSG_EVENT_PFM_REQ_HNDLR_STATE_SYNC_CALLBACK: _bindgen_ty_3 = 4122; +pub const NV_VGPU_MSG_EVENT_NVLINK_FAULT_UP: _bindgen_ty_3 = 4123; +pub const NV_VGPU_MSG_EVENT_GSP_LOCKDOWN_NOTICE: _bindgen_ty_3 = 4124; +pub const NV_VGPU_MSG_EVENT_MIG_CI_CONFIG_UPDATE: _bindgen_ty_3 = 4125; +pub const NV_VGPU_MSG_EVENT_UPDATE_GSP_TRACE: _bindgen_ty_3 = 4126; +pub const NV_VGPU_MSG_EVENT_NVLINK_FATAL_ERROR_RECOVERY: _bindgen_ty_3 = 4127; +pub const NV_VGPU_MSG_EVENT_GSP_POST_NOCAT_RECORD: _bindgen_ty_3 = 4128; +pub const NV_VGPU_MSG_EVENT_FECS_ERROR: _bindgen_ty_3 = 4129; +pub const NV_VGPU_MSG_EVENT_RECOVERY_ACTION: _bindgen_ty_3 = 4130; +pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131; +pub type _bindgen_ty_3 = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS { + pub totalVFs: u32_, + pub firstVfOffset: u32_, + pub vfFeatureMask: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub FirstVFBar0Address: u64_, + pub FirstVFBar1Address: u64_, + pub FirstVFBar2Address: u64_, + pub bar0Size: u64_, + pub bar1Size: u64_, + pub bar2Size: u64_, + pub b64bitBar0: u8_, + pub b64bitBar1: u8_, + pub b64bitBar2: u8_, + pub bSriovEnabled: u8_, + pub bSriovHeavyEnabled: u8_, + pub bEmulateVFBar0TlbInvalidationRegister: u8_, + pub bClientRmAllocatedCtxBuffer: u8_, + pub bNonPowerOf2ChannelCountSupported: u8_, + pub bVfResizableBAR1Supported: u8_, + pub __bindgen_padding_1: [u8; 7usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS { + pub BoardID: u32_, + pub chipSKU: [ffi::c_char; 9usize], + pub chipSKUMod: [ffi::c_char; 5usize], + pub __bindgen_padding_0: [u8; 2usize], + pub skuConfigVersion: u32_, + pub project: [ffi::c_char; 5usize], + pub projectSKU: [ffi::c_char; 5usize], + pub CDP: [ffi::c_char; 6usize], + pub projectSKUMod: [ffi::c_char; 2usize], + pub __bindgen_padding_1: [u8; 2usize], + pub businessCycle: u32_, +} +pub type NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG = [u8_; 17usize]; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO { + pub base: u64_, + pub limit: u64_, + pub reserved: u64_, + pub performance: u32_, + pub supportCompressed: u8_, + pub supportISO: u8_, + pub bProtected: u8_, + pub blackList: NV2080_CTRL_CMD_FB_GET_FB_REGION_SURFACE_MEM_TYPE_FLAG, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS { + pub numFBRegions: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub fbRegion: [NV2080_CTRL_CMD_FB_GET_FB_REGION_FB_REGION_INFO; 16usize], +} +#[repr(C)] +#[derive(Debug, Copy, Clone, MaybeZeroable)] +pub struct NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { + pub index: u32_, + pub flags: u32_, + pub length: u32_, + pub data: [u8_; 256usize], +} +impl Default for NV2080_CTRL_GPU_GET_GID_INFO_PARAMS { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct DOD_METHOD_DATA { + pub status: u32_, + pub acpiIdListLen: u32_, + pub acpiIdList: [u32_; 16usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct JT_METHOD_DATA { + pub status: u32_, + pub jtCaps: u32_, + pub jtRevId: u16_, + pub bSBIOSCaps: u8_, + pub __bindgen_padding_0: u8, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct MUX_METHOD_DATA_ELEMENT { + pub acpiId: u32_, + pub mode: u32_, + pub status: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct MUX_METHOD_DATA { + pub tableLen: u32_, + pub acpiIdMuxModeTable: [MUX_METHOD_DATA_ELEMENT; 16usize], + pub acpiIdMuxPartTable: [MUX_METHOD_DATA_ELEMENT; 16usize], + pub acpiIdMuxStateTable: [MUX_METHOD_DATA_ELEMENT; 16usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct CAPS_METHOD_DATA { + pub status: u32_, + pub optimusCaps: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct ACPI_METHOD_DATA { + pub bValid: u8_, + pub __bindgen_padding_0: [u8; 3usize], + pub dodMethodData: DOD_METHOD_DATA, + pub jtMethodData: JT_METHOD_DATA, + pub muxMethodData: MUX_METHOD_DATA, + pub capsMethodData: CAPS_METHOD_DATA, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS { + pub headIndex: u32_, + pub maxHResolution: u32_, + pub maxVResolution: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS { + pub numHeads: u32_, + pub maxNumHeads: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct BUSINFO { + pub deviceID: u16_, + pub vendorID: u16_, + pub subdeviceID: u16_, + pub subvendorID: u16_, + pub revisionID: u8_, + pub __bindgen_padding_0: u8, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_VF_INFO { + pub totalVFs: u32_, + pub firstVFOffset: u32_, + pub FirstVFBar0Address: u64_, + pub FirstVFBar1Address: u64_, + pub FirstVFBar2Address: u64_, + pub b64bitBar0: u8_, + pub b64bitBar1: u8_, + pub b64bitBar2: u8_, + pub __bindgen_padding_0: [u8; 5usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_PCIE_CONFIG_REG { + pub linkCap: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct EcidManufacturingInfo { + pub ecidLow: u32_, + pub ecidHigh: u32_, + pub ecidExtended: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct FW_WPR_LAYOUT_OFFSET { + pub nonWprHeapOffset: u64_, + pub frtsOffset: u64_, +} +#[repr(C)] +#[derive(Debug, Copy, Clone, MaybeZeroable)] +pub struct GspStaticConfigInfo_t { + pub grCapsBits: [u8_; 23usize], + pub __bindgen_padding_0: u8, + pub gidInfo: NV2080_CTRL_GPU_GET_GID_INFO_PARAMS, + pub SKUInfo: NV2080_CTRL_BIOS_GET_SKU_INFO_PARAMS, + pub __bindgen_padding_1: [u8; 4usize], + pub fbRegionInfoParams: NV2080_CTRL_CMD_FB_GET_FB_REGION_INFO_PARAMS, + pub sriovCaps: NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS, + pub sriovMaxGfid: u32_, + pub engineCaps: [u32_; 3usize], + pub poisonFuseEnabled: u8_, + pub __bindgen_padding_2: [u8; 7usize], + pub fb_length: u64_, + pub fbio_mask: u64_, + pub fb_bus_width: u32_, + pub fb_ram_type: u32_, + pub fbp_mask: u64_, + pub l2_cache_size: u32_, + pub gpuNameString: [u8_; 64usize], + pub gpuShortNameString: [u8_; 64usize], + pub gpuNameString_Unicode: [u16_; 64usize], + pub bGpuInternalSku: u8_, + pub bIsQuadroGeneric: u8_, + pub bIsQuadroAd: u8_, + pub bIsNvidiaNvs: u8_, + pub bIsVgx: u8_, + pub bGeforceSmb: u8_, + pub bIsTitan: u8_, + pub bIsTesla: u8_, + pub bIsMobile: u8_, + pub bIsGc6Rtd3Allowed: u8_, + pub bIsGc8Rtd3Allowed: u8_, + pub bIsGcOffRtd3Allowed: u8_, + pub bIsGcoffLegacyAllowed: u8_, + pub bIsMigSupported: u8_, + pub RTD3GC6TotalBoardPower: u16_, + pub RTD3GC6PerstDelay: u16_, + pub __bindgen_padding_3: [u8; 2usize], + pub bar1PdeBase: u64_, + pub bar2PdeBase: u64_, + pub bVbiosValid: u8_, + pub __bindgen_padding_4: [u8; 3usize], + pub vbiosSubVendor: u32_, + pub vbiosSubDevice: u32_, + pub bPageRetirementSupported: u8_, + pub bSplitVasBetweenServerClientRm: u8_, + pub bClRootportNeedsNosnoopWAR: u8_, + pub __bindgen_padding_5: u8, + pub displaylessMaxHeads: VIRTUAL_DISPLAY_GET_NUM_HEADS_PARAMS, + pub displaylessMaxResolution: VIRTUAL_DISPLAY_GET_MAX_RESOLUTION_PARAMS, + pub __bindgen_padding_6: [u8; 4usize], + pub displaylessMaxPixels: u64_, + pub hInternalClient: u32_, + pub hInternalDevice: u32_, + pub hInternalSubdevice: u32_, + pub bSelfHostedMode: u8_, + pub bAtsSupported: u8_, + pub bIsGpuUefi: u8_, + pub bIsEfiInit: u8_, + pub ecidInfo: [EcidManufacturingInfo; 2usize], + pub fwWprLayoutOffset: FW_WPR_LAYOUT_OFFSET, +} +impl Default for GspStaticConfigInfo_t { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GspSystemInfo { + pub gpuPhysAddr: u64_, + pub gpuPhysFbAddr: u64_, + pub gpuPhysInstAddr: u64_, + pub gpuPhysIoAddr: u64_, + pub nvDomainBusDeviceFunc: u64_, + pub simAccessBufPhysAddr: u64_, + pub notifyOpSharedSurfacePhysAddr: u64_, + pub pcieAtomicsOpMask: u64_, + pub consoleMemSize: u64_, + pub maxUserVa: u64_, + pub pciConfigMirrorBase: u32_, + pub pciConfigMirrorSize: u32_, + pub PCIDeviceID: u32_, + pub PCISubDeviceID: u32_, + pub PCIRevisionID: u32_, + pub pcieAtomicsCplDeviceCapMask: u32_, + pub oorArch: u8_, + pub __bindgen_padding_0: [u8; 7usize], + pub clPdbProperties: u64_, + pub Chipset: u32_, + pub bGpuBehindBridge: u8_, + pub bFlrSupported: u8_, + pub b64bBar0Supported: u8_, + pub bMnocAvailable: u8_, + pub chipsetL1ssEnable: u32_, + pub bUpstreamL0sUnsupported: u8_, + pub bUpstreamL1Unsupported: u8_, + pub bUpstreamL1PorSupported: u8_, + pub bUpstreamL1PorMobileOnly: u8_, + pub bSystemHasMux: u8_, + pub upstreamAddressValid: u8_, + pub FHBBusInfo: BUSINFO, + pub chipsetIDInfo: BUSINFO, + pub __bindgen_padding_1: [u8; 2usize], + pub acpiMethodData: ACPI_METHOD_DATA, + pub hypervisorType: u32_, + pub bIsPassthru: u8_, + pub __bindgen_padding_2: [u8; 7usize], + pub sysTimerOffsetNs: u64_, + pub gspVFInfo: GSP_VF_INFO, + pub bIsPrimary: u8_, + pub isGridBuild: u8_, + pub __bindgen_padding_3: [u8; 2usize], + pub pcieConfigReg: GSP_PCIE_CONFIG_REG, + pub gridBuildCsp: u32_, + pub bPreserveVideoMemoryAllocations: u8_, + pub bTdrEventSupported: u8_, + pub bFeatureStretchVblankCapable: u8_, + pub bEnableDynamicGranularityPageArrays: u8_, + pub bClockBoostSupported: u8_, + pub bRouteDispIntrsToCPU: u8_, + pub __bindgen_padding_4: [u8; 6usize], + pub hostPageSize: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct MESSAGE_QUEUE_INIT_ARGUMENTS { + pub sharedMemPhysAddr: u64_, + pub pageTableEntryCount: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub cmdQueueOffset: u64_, + pub statQueueOffset: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_SR_INIT_ARGUMENTS { + pub oldLevel: u32_, + pub flags: u32_, + pub bInPMTransition: u8_, + pub __bindgen_padding_0: [u8; 3usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_ARGUMENTS_CACHED { + pub messageQueueInitArguments: MESSAGE_QUEUE_INIT_ARGUMENTS, + pub srInitArguments: GSP_SR_INIT_ARGUMENTS, + pub gpuInstance: u32_, + pub bDmemStack: u8_, + pub __bindgen_padding_0: [u8; 7usize], + pub profilerArgs: GSP_ARGUMENTS_CACHED__bindgen_ty_1, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_ARGUMENTS_CACHED__bindgen_ty_1 { + pub pa: u64_, + pub size: u64_, +} +#[repr(C)] +#[derive(Copy, Clone, MaybeZeroable)] +pub union rpc_message_rpc_union_field_v03_00 { + pub spare: u32_, + pub cpuRmGfid: u32_, +} +impl Default for rpc_message_rpc_union_field_v03_00 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type rpc_message_rpc_union_field_v = rpc_message_rpc_union_field_v03_00; +#[repr(C)] +#[derive(MaybeZeroable)] +pub struct rpc_message_header_v03_00 { + pub header_version: u32_, + pub signature: u32_, + pub length: u32_, + pub function: u32_, + pub rpc_result: u32_, + pub rpc_result_private: u32_, + pub sequence: u32_, + pub u: rpc_message_rpc_union_field_v, + pub rpc_message_data: __IncompleteArrayField<u8_>, +} +impl Default for rpc_message_header_v03_00 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type rpc_message_header_v = rpc_message_header_v03_00; +#[repr(C)] +#[derive(Copy, Clone, MaybeZeroable)] +pub struct GspFwWprMeta { + pub magic: u64_, + pub revision: u64_, + pub sysmemAddrOfRadix3Elf: u64_, + pub sizeOfRadix3Elf: u64_, + pub sysmemAddrOfBootloader: u64_, + pub sizeOfBootloader: u64_, + pub bootloaderCodeOffset: u64_, + pub bootloaderDataOffset: u64_, + pub bootloaderManifestOffset: u64_, + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1, + pub gspFwRsvdStart: u64_, + pub nonWprHeapOffset: u64_, + pub nonWprHeapSize: u64_, + pub gspFwWprStart: u64_, + pub gspFwHeapOffset: u64_, + pub gspFwHeapSize: u64_, + pub gspFwOffset: u64_, + pub bootBinOffset: u64_, + pub frtsOffset: u64_, + pub frtsSize: u64_, + pub gspFwWprEnd: u64_, + pub fbSize: u64_, + pub vgaWorkspaceOffset: u64_, + pub vgaWorkspaceSize: u64_, + pub bootCount: u64_, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2, + pub gspFwHeapVfPartitionCount: u8_, + pub flags: u8_, + pub padding: [u8_; 2usize], + pub pmuReservedSize: u32_, + pub verified: u64_, +} +#[repr(C)] +#[derive(Copy, Clone, MaybeZeroable)] +pub union GspFwWprMeta__bindgen_ty_1 { + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_1__bindgen_ty_1, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_1__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_1 { + pub sysmemAddrOfSignature: u64_, + pub sizeOfSignature: u64_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GspFwWprMeta__bindgen_ty_1__bindgen_ty_2 { + pub gspFwHeapFreeListWprOffset: u32_, + pub unused0: u32_, + pub unused1: u64_, +} +impl Default for GspFwWprMeta__bindgen_ty_1 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Copy, Clone, MaybeZeroable)] +pub union GspFwWprMeta__bindgen_ty_2 { + pub __bindgen_anon_1: GspFwWprMeta__bindgen_ty_2__bindgen_ty_1, + pub __bindgen_anon_2: GspFwWprMeta__bindgen_ty_2__bindgen_ty_2, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_1 { + pub partitionRpcAddr: u64_, + pub partitionRpcRequestOffset: u16_, + pub partitionRpcReplyOffset: u16_, + pub elfCodeOffset: u32_, + pub elfDataOffset: u32_, + pub elfCodeSize: u32_, + pub elfDataSize: u32_, + pub lsUcodeVersion: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GspFwWprMeta__bindgen_ty_2__bindgen_ty_2 { + pub partitionRpcPadding: [u32_; 4usize], + pub sysmemAddrOfCrashReportQueue: u64_, + pub sizeOfCrashReportQueue: u32_, + pub lsUcodeVersionPadding: [u32_; 1usize], +} +impl Default for GspFwWprMeta__bindgen_ty_2 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +impl Default for GspFwWprMeta { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +pub type LibosAddress = u64_; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_NONE: LibosMemoryRegionKind = 0; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS: LibosMemoryRegionKind = 1; +pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_RADIX3: LibosMemoryRegionKind = 2; +pub type LibosMemoryRegionKind = ffi::c_uint; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_NONE: LibosMemoryRegionLoc = 0; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegionLoc = 1; +pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2; +pub type LibosMemoryRegionLoc = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct LibosMemoryRegionInitArgument { + pub id8: LibosAddress, + pub pa: LibosAddress, + pub size: LibosAddress, + pub kind: u8_, + pub loc: u8_, + pub __bindgen_padding_0: [u8; 6usize], +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct PACKED_REGISTRY_ENTRY { + pub nameOffset: u32_, + pub type_: u8_, + pub __bindgen_padding_0: [u8; 3usize], + pub data: u32_, + pub length: u32_, +} +#[repr(C)] +#[derive(Debug, Default, MaybeZeroable)] +pub struct PACKED_REGISTRY_TABLE { + pub size: u32_, + pub numEntries: u32_, + pub entries: __IncompleteArrayField<PACKED_REGISTRY_ENTRY>, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct msgqTxHeader { + pub version: u32_, + pub size: u32_, + pub msgSize: u32_, + pub msgCount: u32_, + pub writePtr: u32_, + pub flags: u32_, + pub rxHdrOff: u32_, + pub entryOff: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct msgqRxHeader { + pub readPtr: u32_, +} +#[repr(C)] +#[repr(align(8))] +#[derive(MaybeZeroable)] +pub struct GSP_MSG_QUEUE_ELEMENT { + pub authTagBuffer: [u8_; 16usize], + pub aadBuffer: [u8_; 16usize], + pub checkSum: u32_, + pub seqNum: u32_, + pub elemCount: u32_, + pub __bindgen_padding_0: [u8; 4usize], + pub rpc: rpc_message_header_v, +} +impl Default for GSP_MSG_QUEUE_ELEMENT { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +#[repr(C)] +#[derive(Debug, Default, MaybeZeroable)] +pub struct rpc_run_cpu_sequencer_v17_00 { + pub bufferSizeDWord: u32_, + pub cmdIndex: u32_, + pub regSaveArea: [u32_; 8usize], + pub commandBuffer: __IncompleteArrayField<u32_>, +} +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_WRITE: GSP_SEQ_BUF_OPCODE = 0; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_MODIFY: GSP_SEQ_BUF_OPCODE = 1; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_POLL: GSP_SEQ_BUF_OPCODE = 2; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_DELAY_US: GSP_SEQ_BUF_OPCODE = 3; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_REG_STORE: GSP_SEQ_BUF_OPCODE = 4; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESET: GSP_SEQ_BUF_OPCODE = 5; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_START: GSP_SEQ_BUF_OPCODE = 6; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT: GSP_SEQ_BUF_OPCODE = 7; +pub const GSP_SEQ_BUF_OPCODE_GSP_SEQ_BUF_OPCODE_CORE_RESUME: GSP_SEQ_BUF_OPCODE = 8; +pub type GSP_SEQ_BUF_OPCODE = ffi::c_uint; +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_WRITE { + pub addr: u32_, + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_MODIFY { + pub addr: u32_, + pub mask: u32_, + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_POLL { + pub addr: u32_, + pub mask: u32_, + pub val: u32_, + pub timeout: u32_, + pub error: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_SEQ_BUF_PAYLOAD_DELAY_US { + pub val: u32_, +} +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct GSP_SEQ_BUF_PAYLOAD_REG_STORE { + pub addr: u32_, + pub index: u32_, +} +#[repr(C)] +#[derive(Copy, Clone, MaybeZeroable)] +pub struct GSP_SEQUENCER_BUFFER_CMD { + pub opCode: GSP_SEQ_BUF_OPCODE, + pub payload: GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone, MaybeZeroable)] +pub union GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { + pub regWrite: GSP_SEQ_BUF_PAYLOAD_REG_WRITE, + pub regModify: GSP_SEQ_BUF_PAYLOAD_REG_MODIFY, + pub regPoll: GSP_SEQ_BUF_PAYLOAD_REG_POLL, + pub delayUs: GSP_SEQ_BUF_PAYLOAD_DELAY_US, + pub regStore: GSP_SEQ_BUF_PAYLOAD_REG_STORE, +} +impl Default for GSP_SEQUENCER_BUFFER_CMD__bindgen_ty_1 { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} +impl Default for GSP_SEQUENCER_BUFFER_CMD { + fn default() -> Self { + let mut s = ::core::mem::MaybeUninit::<Self>::uninit(); + unsafe { + ::core::ptr::write_bytes(s.as_mut_ptr(), 0, 1); + s.assume_init() + } + } +} diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs b/drivers/gpu/nova-core/gsp/sequencer.rs new file mode 100644 index 000000000000..474e4c8021db --- /dev/null +++ b/drivers/gpu/nova-core/gsp/sequencer.rs @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GSP Sequencer implementation for Pre-hopper GSP boot sequence. + +use core::array; + +use kernel::{ + device, + io::{ + poll::read_poll_timeout, + Io, // + }, + prelude::*, + sync::aref::ARef, + time::{ + delay::fsleep, + Delta, // + }, + transmute::FromBytes, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp, + sec2::Sec2, + Falcon, // + }, + gsp::{ + cmdq::{ + Cmdq, + MessageFromGsp, // + }, + fw, + }, + num::FromSafeCast, + sbuffer::SBufferIter, +}; + +/// GSP Sequencer information containing the command sequence and data. +struct GspSequence { + /// Current command index for error reporting. + cmd_index: u32, + /// Command data buffer containing the sequence of commands. + cmd_data: KVec<u8>, +} + +impl MessageFromGsp for GspSequence { + const FUNCTION: fw::MsgFunction = fw::MsgFunction::GspRunCpuSequencer; + type InitError = Error; + type Message = fw::RunCpuSequencer; + + fn read( + msg: &Self::Message, + sbuffer: &mut SBufferIter<array::IntoIter<&[u8], 2>>, + ) -> Result<Self, Self::InitError> { + let cmd_data = sbuffer.flush_into_kvec(GFP_KERNEL)?; + Ok(GspSequence { + cmd_index: msg.cmd_index(), + cmd_data, + }) + } +} + +const CMD_SIZE: usize = size_of::<fw::SequencerBufferCmd>(); + +/// GSP Sequencer Command types with payload data. +/// Commands have an opcode and an opcode-dependent struct. +#[allow(clippy::enum_variant_names)] +#[derive(Debug)] +pub(crate) enum GspSeqCmd { + RegWrite(fw::RegWritePayload), + RegModify(fw::RegModifyPayload), + RegPoll(fw::RegPollPayload), + DelayUs(fw::DelayUsPayload), + RegStore(fw::RegStorePayload), + CoreReset, + CoreStart, + CoreWaitForHalt, + CoreResume, +} + +impl GspSeqCmd { + /// Creates a new `GspSeqCmd` from raw data returning the command and its size in bytes. + pub(crate) fn new(data: &[u8], dev: &device::Device) -> Result<(Self, usize)> { + let fw_cmd = fw::SequencerBufferCmd::from_bytes(data).ok_or(EINVAL)?; + let opcode_size = core::mem::size_of::<u32>(); + + let (cmd, size) = match fw_cmd.opcode()? { + fw::SeqBufOpcode::RegWrite => { + let payload = fw_cmd.reg_write_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegWrite(payload), size) + } + fw::SeqBufOpcode::RegModify => { + let payload = fw_cmd.reg_modify_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegModify(payload), size) + } + fw::SeqBufOpcode::RegPoll => { + let payload = fw_cmd.reg_poll_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegPoll(payload), size) + } + fw::SeqBufOpcode::DelayUs => { + let payload = fw_cmd.delay_us_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::DelayUs(payload), size) + } + fw::SeqBufOpcode::RegStore => { + let payload = fw_cmd.reg_store_payload()?; + let size = opcode_size + size_of_val(&payload); + (GspSeqCmd::RegStore(payload), size) + } + fw::SeqBufOpcode::CoreReset => (GspSeqCmd::CoreReset, opcode_size), + fw::SeqBufOpcode::CoreStart => (GspSeqCmd::CoreStart, opcode_size), + fw::SeqBufOpcode::CoreWaitForHalt => (GspSeqCmd::CoreWaitForHalt, opcode_size), + fw::SeqBufOpcode::CoreResume => (GspSeqCmd::CoreResume, opcode_size), + }; + + if data.len() < size { + dev_err!(dev, "Data is not enough for command\n"); + return Err(EINVAL); + } + + Ok((cmd, size)) + } +} + +/// GSP Sequencer for executing firmware commands during boot. +pub(crate) struct GspSequencer<'a> { + /// Sequencer information with command data. + seq_info: GspSequence, + /// `Bar0` for register access. + bar: &'a Bar0, + /// SEC2 falcon for core operations. + sec2_falcon: &'a Falcon<Sec2>, + /// GSP falcon for core operations. + gsp_falcon: &'a Falcon<Gsp>, + /// LibOS DMA handle address. + libos_dma_handle: u64, + /// Bootloader application version. + bootloader_app_version: u32, + /// Device for logging. + dev: ARef<device::Device>, +} + +impl fw::RegWritePayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_write32(self.val(), addr) + } +} + +impl fw::RegModifyPayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_read32(addr).and_then(|val| { + sequencer + .bar + .try_write32((val & !self.mask()) | self.val(), addr) + }) + } +} + +impl fw::RegPollPayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + // Default timeout to 4 seconds. + let timeout_us = if self.timeout() == 0 { + 4_000_000 + } else { + i64::from(self.timeout()) + }; + + // First read. + sequencer.bar.try_read32(addr)?; + + // Poll the requested register with requested timeout. + read_poll_timeout( + || sequencer.bar.try_read32(addr), + |current| (current & self.mask()) == self.val(), + Delta::ZERO, + Delta::from_micros(timeout_us), + ) + .map(|_| ()) + } +} + +impl fw::DelayUsPayload { + fn run(&self, _sequencer: &GspSequencer<'_>) -> Result { + fsleep(Delta::from_micros(i64::from(self.val()))); + Ok(()) + } +} + +impl fw::RegStorePayload { + fn run(&self, sequencer: &GspSequencer<'_>) -> Result { + let addr = usize::from_safe_cast(self.addr()); + + sequencer.bar.try_read32(addr).map(|_| ()) + } +} + +impl GspSeqCmd { + fn run(&self, seq: &GspSequencer<'_>) -> Result { + match self { + GspSeqCmd::RegWrite(cmd) => cmd.run(seq), + GspSeqCmd::RegModify(cmd) => cmd.run(seq), + GspSeqCmd::RegPoll(cmd) => cmd.run(seq), + GspSeqCmd::DelayUs(cmd) => cmd.run(seq), + GspSeqCmd::RegStore(cmd) => cmd.run(seq), + GspSeqCmd::CoreReset => { + seq.gsp_falcon.reset(seq.bar)?; + seq.gsp_falcon.dma_reset(seq.bar); + Ok(()) + } + GspSeqCmd::CoreStart => { + seq.gsp_falcon.start(seq.bar)?; + Ok(()) + } + GspSeqCmd::CoreWaitForHalt => { + seq.gsp_falcon.wait_till_halted(seq.bar)?; + Ok(()) + } + GspSeqCmd::CoreResume => { + // At this point, 'SEC2-RTOS' has been loaded into SEC2 by the sequencer + // but neither SEC2-RTOS nor GSP-RM is running yet. This part of the + // sequencer will start both. + + // Reset the GSP to prepare it for resuming. + seq.gsp_falcon.reset(seq.bar)?; + + // Write the libOS DMA handle to GSP mailboxes. + seq.gsp_falcon.write_mailboxes( + seq.bar, + Some(seq.libos_dma_handle as u32), + Some((seq.libos_dma_handle >> 32) as u32), + ); + + // Start the SEC2 falcon which will trigger GSP-RM to resume on the GSP. + seq.sec2_falcon.start(seq.bar)?; + + // Poll until GSP-RM reload/resume has completed (up to 2 seconds). + seq.gsp_falcon + .check_reload_completed(seq.bar, Delta::from_secs(2))?; + + // Verify SEC2 completed successfully by checking its mailbox for errors. + let mbox0 = seq.sec2_falcon.read_mailbox0(seq.bar); + if mbox0 != 0 { + dev_err!(seq.dev, "Sequencer: sec2 errors: {:?}\n", mbox0); + return Err(EIO); + } + + // Configure GSP with the bootloader version. + seq.gsp_falcon + .write_os_version(seq.bar, seq.bootloader_app_version); + + // Verify the GSP's RISC-V core is active indicating successful GSP boot. + if !seq.gsp_falcon.is_riscv_active(seq.bar) { + dev_err!(seq.dev, "Sequencer: RISC-V core is not active\n"); + return Err(EIO); + } + Ok(()) + } + } + } +} + +/// Iterator over GSP sequencer commands. +pub(crate) struct GspSeqIter<'a> { + /// Command data buffer. + cmd_data: &'a [u8], + /// Current position in the buffer. + current_offset: usize, + /// Total number of commands to process. + total_cmds: u32, + /// Number of commands processed so far. + cmds_processed: u32, + /// Device for logging. + dev: ARef<device::Device>, +} + +impl<'a> Iterator for GspSeqIter<'a> { + type Item = Result<GspSeqCmd>; + + fn next(&mut self) -> Option<Self::Item> { + // Stop if we've processed all commands or reached the end of data. + if self.cmds_processed >= self.total_cmds || self.current_offset >= self.cmd_data.len() { + return None; + } + + // Check if we have enough data for opcode. + if self.current_offset + core::mem::size_of::<u32>() > self.cmd_data.len() { + return Some(Err(EIO)); + } + + let offset = self.current_offset; + + // Handle command creation based on available data, + // zero-pad if necessary (since last command may not be full size). + let mut buffer = [0u8; CMD_SIZE]; + let copy_len = if offset + CMD_SIZE <= self.cmd_data.len() { + CMD_SIZE + } else { + self.cmd_data.len() - offset + }; + buffer[..copy_len].copy_from_slice(&self.cmd_data[offset..offset + copy_len]); + let cmd_result = GspSeqCmd::new(&buffer, &self.dev); + + cmd_result.map_or_else( + |_err| { + dev_err!(self.dev, "Error parsing command at offset {}\n", offset); + None + }, + |(cmd, size)| { + self.current_offset += size; + self.cmds_processed += 1; + Some(Ok(cmd)) + }, + ) + } +} + +impl<'a> GspSequencer<'a> { + fn iter(&self) -> GspSeqIter<'_> { + let cmd_data = &self.seq_info.cmd_data[..]; + + GspSeqIter { + cmd_data, + current_offset: 0, + total_cmds: self.seq_info.cmd_index, + cmds_processed: 0, + dev: self.dev.clone(), + } + } +} + +/// Parameters for running the GSP sequencer. +pub(crate) struct GspSequencerParams<'a> { + /// Bootloader application version. + pub(crate) bootloader_app_version: u32, + /// LibOS DMA handle address. + pub(crate) libos_dma_handle: u64, + /// GSP falcon for core operations. + pub(crate) gsp_falcon: &'a Falcon<Gsp>, + /// SEC2 falcon for core operations. + pub(crate) sec2_falcon: &'a Falcon<Sec2>, + /// Device for logging. + pub(crate) dev: ARef<device::Device>, + /// BAR0 for register access. + pub(crate) bar: &'a Bar0, +} + +impl<'a> GspSequencer<'a> { + pub(crate) fn run(cmdq: &Cmdq, params: GspSequencerParams<'a>) -> Result { + let seq_info = loop { + match cmdq.receive_msg::<GspSequence>(Cmdq::RECEIVE_TIMEOUT) { + Ok(seq_info) => break seq_info, + Err(ERANGE) => continue, + Err(e) => return Err(e), + } + }; + + let sequencer = GspSequencer { + seq_info, + bar: params.bar, + sec2_falcon: params.sec2_falcon, + gsp_falcon: params.gsp_falcon, + libos_dma_handle: params.libos_dma_handle, + bootloader_app_version: params.bootloader_app_version, + dev: params.dev, + }; + + dev_dbg!(sequencer.dev, "Running CPU Sequencer commands\n"); + + for cmd_result in sequencer.iter() { + match cmd_result { + Ok(cmd) => cmd.run(&sequencer)?, + Err(e) => { + dev_err!( + sequencer.dev, + "Error running command at index {}\n", + sequencer.seq_info.cmd_index + ); + return Err(e); + } + } + } + + dev_dbg!( + sequencer.dev, + "CPU Sequencer commands completed successfully\n" + ); + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs new file mode 100644 index 000000000000..04a1fa6b25f8 --- /dev/null +++ b/drivers/gpu/nova-core/nova_core.rs @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Nova Core GPU Driver + +use kernel::{ + debugfs, + driver::Registration, + pci, + prelude::*, + InPlaceModule, // +}; + +#[macro_use] +mod bitfield; + +mod driver; +mod falcon; +mod fb; +mod firmware; +mod gfw; +mod gpu; +mod gsp; +#[macro_use] +mod num; +mod regs; +mod sbuffer; +mod vbios; + +pub(crate) const MODULE_NAME: &core::ffi::CStr = <LocalModule as kernel::ModuleMetadata>::NAME; + +// TODO: Move this into per-module data once that exists. +static mut DEBUGFS_ROOT: Option<debugfs::Dir> = None; + +/// Guard that clears `DEBUGFS_ROOT` when dropped. +struct DebugfsRootGuard; + +impl Drop for DebugfsRootGuard { + fn drop(&mut self) { + // SAFETY: This guard is dropped after `_driver` (due to field order), + // so the driver is unregistered and no probe() can be running. + unsafe { DEBUGFS_ROOT = None }; + } +} + +#[pin_data] +struct NovaCoreModule { + // Fields are dropped in declaration order, so `_driver` is dropped first, + // then `_debugfs_guard` clears `DEBUGFS_ROOT`. + #[pin] + _driver: Registration<pci::Adapter<driver::NovaCore>>, + _debugfs_guard: DebugfsRootGuard, +} + +impl InPlaceModule for NovaCoreModule { + fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> { + let dir = debugfs::Dir::new(kernel::c_str!("nova_core")); + + // SAFETY: We are the only driver code running during init, so there + // cannot be any concurrent access to `DEBUGFS_ROOT`. + unsafe { DEBUGFS_ROOT = Some(dir) }; + + try_pin_init!(Self { + _driver <- Registration::new(MODULE_NAME, module), + _debugfs_guard: DebugfsRootGuard, + }) + } +} + +module! { + type: NovaCoreModule, + name: "NovaCore", + authors: ["Danilo Krummrich"], + description: "Nova Core GPU driver", + license: "GPL v2", + firmware: [], +} + +kernel::module_firmware!(firmware::ModInfoBuilder); diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs new file mode 100644 index 000000000000..6c824b8d7b97 --- /dev/null +++ b/drivers/gpu/nova-core/num.rs @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Numerical helpers functions and traits. +//! +//! This is essentially a staging module for code to mature until it can be moved to the `kernel` +//! crate. + +use kernel::{ + macros::paste, + prelude::*, // +}; + +/// Implements safe `as` conversion functions from a given type into a series of target types. +/// +/// These functions can be used in place of `as`, with the guarantee that they will be lossless. +macro_rules! impl_safe_as { + ($from:ty as { $($into:ty),* }) => { + $( + paste! { + #[doc = ::core::concat!( + "Losslessly converts a [`", + ::core::stringify!($from), + "`] into a [`", + ::core::stringify!($into), + "`].")] + /// + /// This conversion is allowed as it is always lossless. Prefer this over the `as` + /// keyword to ensure no lossy casts are performed. + /// + /// This is for use from a `const` context. For non `const` use, prefer the + /// [`FromSafeCast`] and [`IntoSafeCast`] traits. + /// + /// # Examples + /// + /// ``` + /// use crate::num; + /// + #[doc = ::core::concat!( + "assert_eq!(num::", + ::core::stringify!($from), + "_as_", + ::core::stringify!($into), + "(1", + ::core::stringify!($from), + "), 1", + ::core::stringify!($into), + ");")] + /// ``` + #[allow(unused)] + #[inline(always)] + pub(crate) const fn [<$from _as_ $into>](value: $from) -> $into { + kernel::static_assert!(size_of::<$into>() >= size_of::<$from>()); + + value as $into + } + } + )* + }; +} + +impl_safe_as!(u8 as { u16, u32, u64, usize }); +impl_safe_as!(u16 as { u32, u64, usize }); +impl_safe_as!(u32 as { u64, usize } ); +// `u64` and `usize` have the same size on 64-bit platforms. +#[cfg(CONFIG_64BIT)] +impl_safe_as!(u64 as { usize } ); + +// A `usize` fits into a `u64` on 32 and 64-bit platforms. +#[cfg(any(CONFIG_32BIT, CONFIG_64BIT))] +impl_safe_as!(usize as { u64 }); + +// A `usize` fits into a `u32` on 32-bit platforms. +#[cfg(CONFIG_32BIT)] +impl_safe_as!(usize as { u32 }); + +/// Extension trait providing guaranteed lossless cast to `Self` from `T`. +/// +/// The standard library's `From` implementations do not cover conversions that are not portable or +/// future-proof. For instance, even though it is safe today, `From<usize>` is not implemented for +/// [`u64`] because of the possibility to support larger-than-64bit architectures in the future. +/// +/// The workaround is to either deal with the error handling of [`TryFrom`] for an operation that +/// technically cannot fail, or to use the `as` keyword, which can silently strip data if the +/// destination type is smaller than the source. +/// +/// Both options are hardly acceptable for the kernel. It is also a much more architecture +/// dependent environment, supporting only 32 and 64 bit architectures, with some modules +/// explicitly depending on a specific bus width that could greatly benefit from infallible +/// conversion operations. +/// +/// Thus this extension trait that provides, for the architecture the kernel is built for, safe +/// conversion between types for which such cast is lossless. +/// +/// In other words, this trait is implemented if, for the current build target and with `t: T`, the +/// `t as Self` operation is completely lossless. +/// +/// Prefer this over the `as` keyword to ensure no lossy casts are performed. +/// +/// If you need to perform a conversion in `const` context, use [`u64_as_usize`], [`u32_as_usize`], +/// [`usize_as_u64`], etc. +/// +/// # Examples +/// +/// ``` +/// use crate::num::FromSafeCast; +/// +/// assert_eq!(usize::from_safe_cast(0xf00u32), 0xf00u32 as usize); +/// ``` +pub(crate) trait FromSafeCast<T> { + /// Create a `Self` from `value`. This operation is guaranteed to be lossless. + fn from_safe_cast(value: T) -> Self; +} + +impl FromSafeCast<usize> for u64 { + fn from_safe_cast(value: usize) -> Self { + usize_as_u64(value) + } +} + +#[cfg(CONFIG_32BIT)] +impl FromSafeCast<usize> for u32 { + fn from_safe_cast(value: usize) -> Self { + usize_as_u32(value) + } +} + +impl FromSafeCast<u32> for usize { + fn from_safe_cast(value: u32) -> Self { + u32_as_usize(value) + } +} + +#[cfg(CONFIG_64BIT)] +impl FromSafeCast<u64> for usize { + fn from_safe_cast(value: u64) -> Self { + u64_as_usize(value) + } +} + +/// Counterpart to the [`FromSafeCast`] trait, i.e. this trait is to [`FromSafeCast`] what [`Into`] +/// is to [`From`]. +/// +/// See the documentation of [`FromSafeCast`] for the motivation. +/// +/// # Examples +/// +/// ``` +/// use crate::num::IntoSafeCast; +/// +/// assert_eq!(0xf00u32.into_safe_cast(), 0xf00u32 as usize); +/// ``` +pub(crate) trait IntoSafeCast<T> { + /// Convert `self` into a `T`. This operation is guaranteed to be lossless. + fn into_safe_cast(self) -> T; +} + +/// Reverse operation for types implementing [`FromSafeCast`]. +impl<S, T> IntoSafeCast<T> for S +where + T: FromSafeCast<S>, +{ + fn into_safe_cast(self) -> T { + T::from_safe_cast(self) + } +} + +/// Implements lossless conversion of a constant from a larger type into a smaller one. +macro_rules! impl_const_into { + ($from:ty => { $($into:ty),* }) => { + $( + paste! { + #[doc = ::core::concat!( + "Performs a build-time safe conversion of a [`", + ::core::stringify!($from), + "`] constant value into a [`", + ::core::stringify!($into), + "`].")] + /// + /// This checks at compile-time that the conversion is lossless, and triggers a build + /// error if it isn't. + /// + /// # Examples + /// + /// ``` + /// use crate::num; + /// + /// // Succeeds because the value of the source fits into the destination's type. + #[doc = ::core::concat!( + "assert_eq!(num::", + ::core::stringify!($from), + "_into_", + ::core::stringify!($into), + "::<1", + ::core::stringify!($from), + ">(), 1", + ::core::stringify!($into), + ");")] + /// ``` + #[allow(unused)] + pub(crate) const fn [<$from _into_ $into>]<const N: $from>() -> $into { + // Make sure that the target type is smaller than the source one. + static_assert!($from::BITS >= $into::BITS); + // CAST: we statically enforced above that `$from` is larger than `$into`, so the + // `as` conversion will be lossless. + build_assert!(N >= $into::MIN as $from && N <= $into::MAX as $from); + + N as $into + } + } + )* + }; +} + +impl_const_into!(usize => { u8, u16, u32 }); +impl_const_into!(u64 => { u8, u16, u32 }); +impl_const_into!(u32 => { u8, u16 }); +impl_const_into!(u16 => { u8 }); + +/// Creates an enum type associated to a [`Bounded`](kernel::num::Bounded), with a [`From`] +/// conversion to the associated `Bounded` and either a [`TryFrom`] or `From` conversion from the +/// associated `Bounded`. +// TODO[FPRI]: This is a temporary solution to be replaced with the corresponding derive macros +// once they land. +#[macro_export] +macro_rules! bounded_enum { + ( + $(#[$enum_meta:meta])* + $vis:vis enum $enum_type:ident with $from_impl:ident<Bounded<$width:ty, $length:literal>> { + $( $(#[doc = $variant_doc:expr])* $variant:ident = $value:expr),* $(,)* + } + ) => { + $(#[$enum_meta])* + $vis enum $enum_type { + $( + $(#[doc = $variant_doc])* + $variant = $value + ),* + } + + impl core::convert::From<$enum_type> for kernel::num::Bounded<$width, $length> { + fn from(value: $enum_type) -> Self { + match value { + $($enum_type::$variant => + kernel::num::Bounded::<$width, _>::new::<{ $value }>()),* + } + } + } + + bounded_enum!(@impl_from $enum_type with $from_impl<Bounded<$width, $length>> { + $($variant = $value),* + }); + }; + + // `TryFrom` implementation from associated `Bounded` to enum type. + (@impl_from $enum_type:ident with TryFrom<Bounded<$width:ty, $length:literal>> { + $($variant:ident = $value:expr),* $(,)* + }) => { + impl core::convert::TryFrom<kernel::num::Bounded<$width, $length>> for $enum_type { + type Error = kernel::error::Error; + + fn try_from( + value: kernel::num::Bounded<$width, $length> + ) -> kernel::error::Result<Self> { + match value.get() { + $( + $value => Ok($enum_type::$variant), + )* + _ => Err(kernel::error::code::EINVAL), + } + } + } + }; + + // `From` implementation from associated `Bounded` to enum type. Triggers a build-time error if + // all possible values of the `Bounded` are not covered by the enum type. + (@impl_from $enum_type:ident with From<Bounded<$width:ty, $length:literal>> { + $($variant:ident = $value:expr),* $(,)* + }) => { + impl core::convert::From<kernel::num::Bounded<$width, $length>> for $enum_type { + fn from(value: kernel::num::Bounded<$width, $length>) -> Self { + const MAX: $width = 1 << $length; + + // Makes the compiler optimizer aware of the possible range of values. + let value = value.get() & ((1 << $length) - 1); + match value { + $( + $value => $enum_type::$variant, + )* + // PANIC: we cannot reach this arm as all possible variants are handled by the + // match arms above. It is here to make the compiler complain if `$enum_type` + // does not cover all values of the `0..MAX` range. + MAX.. => unreachable!(), + } + } + } + } +} diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs new file mode 100644 index 000000000000..2f171a4ff9ba --- /dev/null +++ b/drivers/gpu/nova-core/regs.rs @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + io::{ + register, + register::WithBase, + Io, // + }, + prelude::*, + time, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + DmaTrfCmdSize, + FalconCoreRev, + FalconCoreRevSubversion, + FalconEngine, + FalconFbifMemType, + FalconFbifTarget, + FalconMem, + FalconModSelAlgo, + FalconSecurityModel, + PFalcon2Base, + PFalconBase, + PeregrineCoreSelect, // + }, + gpu::{ + Architecture, + Chipset, // + }, + num::FromSafeCast, +}; + +// PMC + +register! { + /// Basic revision information about the GPU. + pub(crate) NV_PMC_BOOT_0(u32) @ 0x00000000 { + /// Lower bits of the architecture. + 28:24 architecture_0; + /// Implementation version of the architecture. + 23:20 implementation; + /// MSB of the architecture. + 8:8 architecture_1; + /// Major revision of the chip. + 7:4 major_revision; + /// Minor revision of the chip. + 3:0 minor_revision; + } + + /// Extended architecture information. + pub(crate) NV_PMC_BOOT_42(u32) @ 0x00000a00 { + /// Architecture value. + 29:24 architecture ?=> Architecture; + /// Implementation version of the architecture. + 23:20 implementation; + /// Major revision of the chip. + 19:16 major_revision; + /// Minor revision of the chip. + 15:12 minor_revision; + } +} + +impl NV_PMC_BOOT_0 { + pub(crate) fn is_older_than_fermi(self) -> bool { + // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals : + const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc; + + // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than + // GF100, means "older than Fermi". + self.architecture_1() == 0 && self.architecture_0() < NV_PMC_BOOT_0_ARCHITECTURE_GF100 + } +} + +impl NV_PMC_BOOT_42 { + /// Combines `architecture` and `implementation` to obtain a code unique to the chipset. + pub(crate) fn chipset(self) -> Result<Chipset> { + self.architecture() + .map(|arch| { + ((arch as u32) << Self::IMPLEMENTATION_RANGE.len()) + | u32::from(self.implementation()) + }) + .and_then(Chipset::try_from) + } + + /// Returns the raw architecture value from the register. + fn architecture_raw(self) -> u8 { + ((self.into_raw() >> Self::ARCHITECTURE_RANGE.start()) + & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) as u8 + } +} + +impl kernel::fmt::Display for NV_PMC_BOOT_42 { + fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result { + write!( + f, + "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})", + self.inner, + self.architecture_raw(), + self.implementation() + ) + } +} + +// PBUS + +register! { + pub(crate) NV_PBUS_SW_SCRATCH(u32)[64] @ 0x00001400 {} + + /// Scratch register 0xe used as FRTS firmware error code. + pub(crate) NV_PBUS_SW_SCRATCH_0E_FRTS_ERR(u32) => NV_PBUS_SW_SCRATCH[0xe] { + 31:16 frts_err_code; + } +} + +// PFB + +register! { + /// Low bits of the physical system memory address used by the GPU to perform sysmembar + /// operations (see [`crate::fb::SysmemFlush`]). + pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR(u32) @ 0x00100c10 { + 31:0 adr_39_08; + } + + /// High bits of the physical system memory address used by the GPU to perform sysmembar + /// operations (see [`crate::fb::SysmemFlush`]). + pub(crate) NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00100c40 { + 23:0 adr_63_40; + } + + pub(crate) NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE(u32) @ 0x00100ce0 { + 30:30 ecc_mode_enabled => bool; + 9:4 lower_mag; + 3:0 lower_scale; + } + + pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_LO(u32) @ 0x001fa824 { + /// Bits 12..40 of the lower (inclusive) bound of the WPR2 region. + 31:4 lo_val; + } + + pub(crate) NV_PFB_PRI_MMU_WPR2_ADDR_HI(u32) @ 0x001fa828 { + /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region. + 31:4 hi_val; + } +} + +impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) + * u64::from_safe_cast(kernel::sizes::SZ_1M); + + if self.ecc_mode_enabled() { + // Remove the amount of memory reserved for ECC (one per 16 units). + size / 16 * 15 + } else { + size + } + } +} + +impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { + /// Returns the lower (inclusive) bound of the WPR2 region. + pub(crate) fn lower_bound(self) -> u64 { + u64::from(self.lo_val()) << 12 + } +} + +impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { + /// Returns the higher (exclusive) bound of the WPR2 region. + /// + /// A value of zero means the WPR2 region is not set. + pub(crate) fn higher_bound(self) -> u64 { + u64::from(self.hi_val()) << 12 + } +} + +// PGSP + +register! { + pub(crate) NV_PGSP_QUEUE_HEAD(u32) @ 0x00110c00 { + 31:0 address; + } +} + +// PGC6 register space. +// +// `GC6` is a GPU low-power state where VRAM is in self-refresh and the GPU is powered down (except +// for power rails needed to keep self-refresh working and important registers and hardware +// blocks). +// +// These scratch registers remain powered on even in a low-power state and have a designated group +// number. + +register! { + /// Boot Sequence Interface (BSI) register used to determine + /// if GSP reload/resume has completed during the boot process. + pub(crate) NV_PGC6_BSI_SECURE_SCRATCH_14(u32) @ 0x001180f8 { + 26:26 boot_stage_3_handoff => bool; + } + + /// Privilege level mask register. It dictates whether the host CPU has privilege to access the + /// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK(u32) @ 0x00118128 { + /// Set after FWSEC lowers its protection level. + 0:0 read_protection_level0 => bool; + } + + /// OpenRM defines this as a register array, but doesn't specify its size and only uses its + /// first element. Be conservative until we know the actual size or need to use more registers. + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05(u32)[1] @ 0x00118234 {} + + /// Scratch group 05 register 0 used as GFW boot progress indicator. + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT(u32) + => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0] { + /// Progress of GFW boot (0xff means completed). + 7:0 progress; + } + + pub(crate) NV_PGC6_AON_SECURE_SCRATCH_GROUP_42(u32) @ 0x001183a4 { + 31:0 value; + } + + /// Scratch group 42 register used as framebuffer size. + pub(crate) NV_USABLE_FB_SIZE_IN_MB(u32) => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 { + /// Usable framebuffer size, in megabytes. + 31:0 value; + } +} + +impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { + /// Returns `true` if GFW boot is completed. + pub(crate) fn completed(self) -> bool { + self.progress() == 0xff + } +} + +impl NV_USABLE_FB_SIZE_IN_MB { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + u64::from(self.value()) * u64::from_safe_cast(kernel::sizes::SZ_1M) + } +} + +// PDISP + +register! { + pub(crate) NV_PDISP_VGA_WORKSPACE_BASE(u32) @ 0x00625f04 { + /// VGA workspace base address divided by 0x10000. + 31:8 addr; + /// Set if the `addr` field is valid. + 3:3 status_valid => bool; + } +} + +impl NV_PDISP_VGA_WORKSPACE_BASE { + /// Returns the base address of the VGA workspace, or `None` if none exists. + pub(crate) fn vga_workspace_addr(self) -> Option<u64> { + if self.status_valid() { + Some(u64::from(self.addr()) << 16) + } else { + None + } + } +} + +// FUSE + +pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; + +register! { + pub(crate) NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824100 { + 15:0 data => u16; + } + + pub(crate) NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x00824140 { + 15:0 data => u16; + } + + pub(crate) NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION(u32)[NV_FUSE_OPT_FPF_SIZE] @ 0x008241c0 { + 15:0 data => u16; + } +} + +// PFALCON + +register! { + pub(crate) NV_PFALCON_FALCON_IRQSCLR(u32) @ PFalconBase + 0x00000004 { + 6:6 swgen0 => bool; + 4:4 halt => bool; + } + + pub(crate) NV_PFALCON_FALCON_MAILBOX0(u32) @ PFalconBase + 0x00000040 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_MAILBOX1(u32) @ PFalconBase + 0x00000044 { + 31:0 value => u32; + } + + /// Used to store version information about the firmware running + /// on the Falcon processor. + pub(crate) NV_PFALCON_FALCON_OS(u32) @ PFalconBase + 0x00000080 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_RM(u32) @ PFalconBase + 0x00000084 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_HWCFG2(u32) @ PFalconBase + 0x000000f4 { + /// Signal indicating that reset is completed (GA102+). + 31:31 reset_ready => bool; + /// Set to 0 after memory scrubbing is completed. + 12:12 mem_scrubbing => bool; + 10:10 riscv => bool; + } + + pub(crate) NV_PFALCON_FALCON_CPUCTL(u32) @ PFalconBase + 0x00000100 { + 6:6 alias_en => bool; + 4:4 halted => bool; + 1:1 startcpu => bool; + } + + pub(crate) NV_PFALCON_FALCON_BOOTVEC(u32) @ PFalconBase + 0x00000104 { + 31:0 value => u32; + } + + pub(crate) NV_PFALCON_FALCON_DMACTL(u32) @ PFalconBase + 0x0000010c { + 7:7 secure_stat => bool; + 6:3 dmaq_num; + 2:2 imem_scrubbing => bool; + 1:1 dmem_scrubbing => bool; + 0:0 require_ctx => bool; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFBASE(u32) @ PFalconBase + 0x00000110 { + 31:0 base => u32; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFMOFFS(u32) @ PFalconBase + 0x00000114 { + 23:0 offs; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFCMD(u32) @ PFalconBase + 0x00000118 { + 16:16 set_dmtag; + 14:12 ctxdma; + 10:8 size ?=> DmaTrfCmdSize; + 5:5 is_write => bool; + 4:4 imem => bool; + 3:2 sec; + 1:1 idle => bool; + 0:0 full => bool; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFFBOFFS(u32) @ PFalconBase + 0x0000011c { + 31:0 offs => u32; + } + + pub(crate) NV_PFALCON_FALCON_DMATRFBASE1(u32) @ PFalconBase + 0x00000128 { + 8:0 base; + } + + pub(crate) NV_PFALCON_FALCON_HWCFG1(u32) @ PFalconBase + 0x0000012c { + /// Core revision subversion. + 7:6 core_rev_subversion => FalconCoreRevSubversion; + /// Security model. + 5:4 security_model ?=> FalconSecurityModel; + /// Core revision. + 3:0 core_rev ?=> FalconCoreRev; + } + + pub(crate) NV_PFALCON_FALCON_CPUCTL_ALIAS(u32) @ PFalconBase + 0x00000130 { + 1:1 startcpu => bool; + } + + /// IMEM access control register. Up to 4 ports are available for IMEM access. + pub(crate) NV_PFALCON_FALCON_IMEMC(u32)[4, stride = 16] @ PFalconBase + 0x00000180 { + /// Access secure IMEM. + 28:28 secure => bool; + /// Auto-increment on write. + 24:24 aincw => bool; + /// IMEM block and word offset. + 15:0 offs; + } + + /// IMEM data register. Reading/writing this register accesses IMEM at the address + /// specified by the corresponding IMEMC register. + pub(crate) NV_PFALCON_FALCON_IMEMD(u32)[4, stride = 16] @ PFalconBase + 0x00000184 { + 31:0 data; + } + + /// IMEM tag register. Used to set the tag for the current IMEM block. + pub(crate) NV_PFALCON_FALCON_IMEMT(u32)[4, stride = 16] @ PFalconBase + 0x00000188 { + 15:0 tag; + } + + /// DMEM access control register. Up to 8 ports are available for DMEM access. + pub(crate) NV_PFALCON_FALCON_DMEMC(u32)[8, stride = 8] @ PFalconBase + 0x000001c0 { + /// Auto-increment on write. + 24:24 aincw => bool; + /// DMEM block and word offset. + 15:0 offs; + } + + /// DMEM data register. Reading/writing this register accesses DMEM at the address + /// specified by the corresponding DMEMC register. + pub(crate) NV_PFALCON_FALCON_DMEMD(u32)[8, stride = 8] @ PFalconBase + 0x000001c4 { + 31:0 data; + } + + /// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the + /// falcon instance. + pub(crate) NV_PFALCON_FALCON_ENGINE(u32) @ PFalconBase + 0x000003c0 { + 0:0 reset => bool; + } + + pub(crate) NV_PFALCON_FBIF_TRANSCFG(u32)[8] @ PFalconBase + 0x00000600 { + 2:2 mem_type => FalconFbifMemType; + 1:0 target ?=> FalconFbifTarget; + } + + pub(crate) NV_PFALCON_FBIF_CTL(u32) @ PFalconBase + 0x00000624 { + 7:7 allow_phys_no_ctx => bool; + } +} + +impl NV_PFALCON_FALCON_DMACTL { + /// Returns `true` if memory scrubbing is completed. + pub(crate) fn mem_scrubbing_done(self) -> bool { + !self.dmem_scrubbing() && !self.imem_scrubbing() + } +} + +impl NV_PFALCON_FALCON_DMATRFCMD { + /// Programs the `imem` and `sec` fields for the given FalconMem + pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self { + let this = self.with_imem(mem != FalconMem::Dmem); + + match mem { + FalconMem::ImemSecure => this.with_const_sec::<1>(), + _ => this.with_const_sec::<0>(), + } + } +} + +impl NV_PFALCON_FALCON_ENGINE { + /// Resets the falcon + pub(crate) fn reset_engine<E: FalconEngine>(bar: &Bar0) { + bar.update(Self::of::<E>(), |r| r.with_reset(true)); + + // TIMEOUT: falcon engine should not take more than 10us to reset. + time::delay::fsleep(time::Delta::from_micros(10)); + + bar.update(Self::of::<E>(), |r| r.with_reset(false)); + } +} + +impl NV_PFALCON_FALCON_HWCFG2 { + /// Returns `true` if memory scrubbing is completed. + pub(crate) fn mem_scrubbing_done(self) -> bool { + !self.mem_scrubbing() + } +} + +/* PFALCON2 */ + +register! { + pub(crate) NV_PFALCON2_FALCON_MOD_SEL(u32) @ PFalcon2Base + 0x00000180 { + 7:0 algo ?=> FalconModSelAlgo; + } + + pub(crate) NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID(u32) @ PFalcon2Base + 0x00000198 { + 7:0 ucode_id => u8; + } + + pub(crate) NV_PFALCON2_FALCON_BROM_ENGIDMASK(u32) @ PFalcon2Base + 0x0000019c { + 31:0 value => u32; + } + + /// OpenRM defines this as a register array, but doesn't specify its size and only uses its + /// first element. Be conservative until we know the actual size or need to use more registers. + pub(crate) NV_PFALCON2_FALCON_BROM_PARAADDR(u32)[1] @ PFalcon2Base + 0x00000210 { + 31:0 value => u32; + } +} + +// PRISCV + +register! { + /// RISC-V status register for debug (Turing and GA100 only). + /// Reflects current RISC-V core status. + pub(crate) NV_PRISCV_RISCV_CORE_SWITCH_RISCV_STATUS(u32) @ PFalcon2Base + 0x00000240 { + /// RISC-V core active/inactive status. + 0:0 active_stat => bool; + } + + /// GA102 and later. + pub(crate) NV_PRISCV_RISCV_CPUCTL(u32) @ PFalcon2Base + 0x00000388 { + 7:7 active_stat => bool; + 0:0 halted => bool; + } + + /// GA102 and later. + pub(crate) NV_PRISCV_RISCV_BCR_CTRL(u32) @ PFalcon2Base + 0x00000668 { + 8:8 br_fetch => bool; + 4:4 core_select => PeregrineCoreSelect; + 0:0 valid => bool; + } +} + +// The modules below provide registers that are not identical on all supported chips. They should +// only be used in HAL modules. + +pub(crate) mod gm107 { + use kernel::io::register; + + // FUSE + + register! { + pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00021c04 { + 0:0 display_disabled => bool; + } + } +} + +pub(crate) mod ga100 { + use kernel::io::register; + + // FUSE + + register! { + pub(crate) NV_FUSE_STATUS_OPT_DISPLAY(u32) @ 0x00820c04 { + 0:0 display_disabled => bool; + } + } +} diff --git a/drivers/gpu/nova-core/sbuffer.rs b/drivers/gpu/nova-core/sbuffer.rs new file mode 100644 index 000000000000..3a41d224c77a --- /dev/null +++ b/drivers/gpu/nova-core/sbuffer.rs @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::ops::Deref; + +use kernel::prelude::*; + +/// A buffer abstraction for discontiguous byte slices. +/// +/// This allows you to treat multiple non-contiguous `&mut [u8]` slices +/// of the same length as a single stream-like read/write buffer. +/// +/// # Examples +/// +/// ``` +// let mut buf1 = [0u8; 5]; +/// let mut buf2 = [0u8; 5]; +/// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]); +/// +/// let data = b"hi world!"; +/// sbuffer.write_all(data)?; +/// drop(sbuffer); +/// +/// assert_eq!(buf1, *b"hi wo"); +/// assert_eq!(buf2, *b"rld!\0"); +/// +/// # Ok::<(), Error>(()) +/// ``` +pub(crate) struct SBufferIter<I: Iterator> { + // [`Some`] if we are not at the end of the data yet. + cur_slice: Option<I::Item>, + // All the slices remaining after `cur_slice`. + slices: I, +} + +impl<'a, I> SBufferIter<I> +where + I: Iterator, +{ + /// Creates a reader buffer for a discontiguous set of byte slices. + /// + /// # Examples + /// + /// ``` + /// let buf1: [u8; 5] = [0, 1, 2, 3, 4]; + /// let buf2: [u8; 5] = [5, 6, 7, 8, 9]; + /// let sbuffer = SBufferIter::new_reader([&buf1[..], &buf2[..]]); + /// let sum: u8 = sbuffer.sum(); + /// assert_eq!(sum, 45); + /// ``` + pub(crate) fn new_reader(slices: impl IntoIterator<IntoIter = I>) -> Self + where + I: Iterator<Item = &'a [u8]>, + { + Self::new(slices) + } + + /// Creates a writeable buffer for a discontiguous set of byte slices. + /// + /// # Examples + /// + /// ``` + /// let mut buf1 = [0u8; 5]; + /// let mut buf2 = [0u8; 5]; + /// let mut sbuffer = SBufferIter::new_writer([&mut buf1[..], &mut buf2[..]]); + /// sbuffer.write_all(&[0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9][..])?; + /// drop(sbuffer); + /// assert_eq!(buf1, [0, 1, 2, 3, 4]); + /// assert_eq!(buf2, [5, 6, 7, 8, 9]); + /// + /// ``` + pub(crate) fn new_writer(slices: impl IntoIterator<IntoIter = I>) -> Self + where + I: Iterator<Item = &'a mut [u8]>, + { + Self::new(slices) + } + + fn new(slices: impl IntoIterator<IntoIter = I>) -> Self + where + I::Item: Deref<Target = [u8]>, + { + let mut slices = slices.into_iter(); + + Self { + // Skip empty slices. + cur_slice: slices.find(|s| !s.deref().is_empty()), + slices, + } + } + + /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns [`None`] to try and obtain the remainder of the data. + /// + /// The closure `f` should split the slice received in it's first parameter + /// at the position given in the second parameter. + fn get_slice_internal( + &mut self, + len: usize, + mut f: impl FnMut(I::Item, usize) -> (I::Item, I::Item), + ) -> Option<I::Item> + where + I::Item: Deref<Target = [u8]>, + { + match self.cur_slice.take() { + None => None, + Some(cur_slice) => { + if len >= cur_slice.len() { + // Caller requested more data than is in the current slice, return it entirely + // and prepare the following slice for being used. Skip empty slices to avoid + // trouble. + self.cur_slice = self.slices.find(|s| !s.is_empty()); + + Some(cur_slice) + } else { + // The current slice can satisfy the request, split it and return a slice of + // the requested size. + let (ret, next) = f(cur_slice, len); + self.cur_slice = Some(next); + + Some(ret) + } + } + } + } + + /// Returns whether this buffer still has data available. + pub(crate) fn is_empty(&self) -> bool { + self.cur_slice.is_none() + } +} + +/// Provides a way to get non-mutable slices of data to read from. +impl<'a, I> SBufferIter<I> +where + I: Iterator<Item = &'a [u8]>, +{ + /// Returns a slice of at most `len` bytes, or [`None`] if we are at the end of the data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns [`None`] to try and obtain the remainder of the data. + fn get_slice(&mut self, len: usize) -> Option<&'a [u8]> { + self.get_slice_internal(len, |s, pos| s.split_at(pos)) + } + + /// Ideally we would implement `Read`, but it is not available in `core`. + /// So mimic `std::io::Read::read_exact`. + #[expect(unused)] + pub(crate) fn read_exact(&mut self, mut dst: &mut [u8]) -> Result { + while !dst.is_empty() { + match self.get_slice(dst.len()) { + None => return Err(EINVAL), + Some(src) => { + let dst_slice; + (dst_slice, dst) = dst.split_at_mut(src.len()); + dst_slice.copy_from_slice(src); + } + } + } + + Ok(()) + } + + /// Read all the remaining data into a [`KVec`]. + /// + /// `self` will be empty after this operation. + pub(crate) fn flush_into_kvec(&mut self, flags: kernel::alloc::Flags) -> Result<KVec<u8>> { + let mut buf = KVec::<u8>::new(); + + if let Some(slice) = core::mem::take(&mut self.cur_slice) { + buf.extend_from_slice(slice, flags)?; + } + for slice in &mut self.slices { + buf.extend_from_slice(slice, flags)?; + } + + Ok(buf) + } +} + +/// Provides a way to get mutable slices of data to write into. +impl<'a, I> SBufferIter<I> +where + I: Iterator<Item = &'a mut [u8]>, +{ + /// Returns a mutable slice of at most `len` bytes, or [`None`] if we are at the end of the + /// data. + /// + /// If a slice shorter than `len` bytes has been returned, the caller can call this method + /// again until it returns `None` to try and obtain the remainder of the data. + fn get_slice_mut(&mut self, len: usize) -> Option<&'a mut [u8]> { + self.get_slice_internal(len, |s, pos| s.split_at_mut(pos)) + } + + /// Ideally we would implement [`Write`], but it is not available in `core`. + /// So mimic `std::io::Write::write_all`. + pub(crate) fn write_all(&mut self, mut src: &[u8]) -> Result { + while !src.is_empty() { + match self.get_slice_mut(src.len()) { + None => return Err(ETOOSMALL), + Some(dst) => { + let src_slice; + (src_slice, src) = src.split_at(dst.len()); + dst.copy_from_slice(src_slice); + } + } + } + + Ok(()) + } +} + +impl<'a, I> Iterator for SBufferIter<I> +where + I: Iterator<Item = &'a [u8]>, +{ + type Item = u8; + + fn next(&mut self) -> Option<Self::Item> { + // Returned slices are guaranteed to not be empty so we can safely index the first entry. + self.get_slice(1).map(|s| s[0]) + } +} diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs new file mode 100644 index 000000000000..ebda28e596c5 --- /dev/null +++ b/drivers/gpu/nova-core/vbios.rs @@ -0,0 +1,1086 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! VBIOS extraction and parsing. + +use core::convert::TryFrom; + +use kernel::{ + device, + io::Io, + prelude::*, + ptr::{ + Alignable, + Alignment, // + }, + sync::aref::ARef, + transmute::FromBytes, +}; + +use crate::{ + driver::Bar0, + firmware::{ + fwsec::Bcrt30Rsa3kSignature, + FalconUCodeDesc, + FalconUCodeDescV2, + FalconUCodeDescV3, // + }, + num::FromSafeCast, +}; + +/// The offset of the VBIOS ROM in the BAR0 space. +const ROM_OFFSET: usize = 0x300000; +/// The maximum length of the VBIOS ROM to scan into. +const BIOS_MAX_SCAN_LEN: usize = 0x100000; +/// The size to read ahead when parsing initial BIOS image headers. +const BIOS_READ_AHEAD_SIZE: usize = 1024; +/// The bit in the last image indicator byte for the PCI Data Structure that +/// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit. +const LAST_IMAGE_BIT_MASK: u8 = 0x80; + +/// BIOS Image Type from PCI Data Structure code_type field. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +enum BiosImageType { + /// PC-AT compatible BIOS image (x86 legacy) + PciAt = 0x00, + /// EFI (Extensible Firmware Interface) BIOS image + Efi = 0x03, + /// NBSI (Notebook System Information) BIOS image + Nbsi = 0x70, + /// FwSec (Firmware Security) BIOS image + FwSec = 0xE0, +} + +impl TryFrom<u8> for BiosImageType { + type Error = Error; + + fn try_from(code: u8) -> Result<Self> { + match code { + 0x00 => Ok(Self::PciAt), + 0x03 => Ok(Self::Efi), + 0x70 => Ok(Self::Nbsi), + 0xE0 => Ok(Self::FwSec), + _ => Err(EINVAL), + } + } +} + +// PMU lookup table entry types. Used to locate PMU table entries +// in the Fwsec image, corresponding to falcon ucodes. +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FIRMWARE_SEC_LIC: u8 = 0x05; +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FWSEC_DBG: u8 = 0x45; +const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85; + +/// Vbios Reader for constructing the VBIOS data. +struct VbiosIterator<'a> { + dev: &'a device::Device, + bar0: &'a Bar0, + /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference + /// or copying into other data structures. It is the entire scanned contents of the VBIOS which + /// progressively extends. It is used so that we do not re-read any contents that are already + /// read as we use the cumulative length read so far, and re-read any gaps as we extend the + /// length. + data: KVec<u8>, + /// Current offset of the [`Iterator`]. + current_offset: usize, + /// Indicate whether the last image has been found. + last_found: bool, +} + +impl<'a> VbiosIterator<'a> { + fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> { + Ok(Self { + dev, + bar0, + data: KVec::new(), + current_offset: 0, + last_found: false, + }) + } + + /// Read bytes from the ROM at the current end of the data vector. + fn read_more(&mut self, len: usize) -> Result { + let current_len = self.data.len(); + let start = ROM_OFFSET + current_len; + + // Ensure length is a multiple of 4 for 32-bit reads + if len % core::mem::size_of::<u32>() != 0 { + dev_err!( + self.dev, + "VBIOS read length {} is not a multiple of 4\n", + len + ); + return Err(EINVAL); + } + + self.data.reserve(len, GFP_KERNEL)?; + // Read ROM data bytes and push directly to `data`. + for addr in (start..start + len).step_by(core::mem::size_of::<u32>()) { + // Read 32-bit word from the VBIOS ROM + let word = self.bar0.try_read32(addr)?; + + // Convert the `u32` to a 4 byte array and push each byte. + word.to_ne_bytes() + .iter() + .try_for_each(|&b| self.data.push(b, GFP_KERNEL))?; + } + + Ok(()) + } + + /// Read bytes at a specific offset, filling any gap. + fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { + if offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n"); + return Err(EINVAL); + } + + // If `offset` is beyond current data size, fill the gap first. + let current_len = self.data.len(); + let gap_bytes = offset.saturating_sub(current_len); + + // Now read the requested bytes at the offset. + self.read_more(gap_bytes + len) + } + + /// Read a BIOS image at a specific offset and create a [`BiosImage`] from it. + /// + /// `self.data` is extended as needed and a new [`BiosImage`] is returned. + /// `context` is a string describing the operation for error reporting. + fn read_bios_image_at_offset( + &mut self, + offset: usize, + len: usize, + context: &str, + ) -> Result<BiosImage> { + let data_len = self.data.len(); + if offset + len > data_len { + self.read_more_at_offset(offset, len).inspect_err(|e| { + dev_err!( + self.dev, + "Failed to read more at offset {:#x}: {:?}\n", + offset, + e + ) + })?; + } + + BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| { + dev_err!( + self.dev, + "Failed to {} at offset {:#x}: {:?}\n", + context, + offset, + err + ) + }) + } +} + +impl<'a> Iterator for VbiosIterator<'a> { + type Item = Result<BiosImage>; + + /// Iterate over all VBIOS images until the last image is detected or offset + /// exceeds scan limit. + fn next(&mut self) -> Option<Self::Item> { + if self.last_found { + return None; + } + + if self.current_offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n"); + return None; + } + + // Parse image headers first to get image size. + let image_size = match self.read_bios_image_at_offset( + self.current_offset, + BIOS_READ_AHEAD_SIZE, + "parse initial BIOS image headers", + ) { + Ok(image) => image.image_size_bytes(), + Err(e) => return Some(Err(e)), + }; + + // Now create a new `BiosImage` with the full image data. + let full_image = match self.read_bios_image_at_offset( + self.current_offset, + image_size, + "parse full BIOS image", + ) { + Ok(image) => image, + Err(e) => return Some(Err(e)), + }; + + self.last_found = full_image.is_last(); + + // Advance to next image (aligned to 512 bytes). + self.current_offset += image_size; + self.current_offset = self.current_offset.align_up(Alignment::new::<512>())?; + + Some(Ok(full_image)) + } +} + +pub(crate) struct Vbios { + fwsec_image: FwSecBiosImage, +} + +impl Vbios { + /// Probe for VBIOS extraction. + /// + /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. + pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> { + // Images to extract from iteration + let mut pci_at_image: Option<PciAtBiosImage> = None; + let mut first_fwsec_image: Option<FwSecBiosBuilder> = None; + let mut second_fwsec_image: Option<FwSecBiosBuilder> = None; + + // Parse all VBIOS images in the ROM + for image_result in VbiosIterator::new(dev, bar0)? { + let image = image_result?; + + dev_dbg!( + dev, + "Found BIOS image: size: {:#x}, type: {:?}, last: {}\n", + image.image_size_bytes(), + image.image_type(), + image.is_last() + ); + + // Convert to a specific image type + match BiosImageType::try_from(image.pcir.code_type) { + Ok(BiosImageType::PciAt) => { + pci_at_image = Some(PciAtBiosImage::try_from(image)?); + } + Ok(BiosImageType::FwSec) => { + let fwsec = FwSecBiosBuilder { + base: image, + falcon_data_offset: None, + pmu_lookup_table: None, + falcon_ucode_offset: None, + }; + if first_fwsec_image.is_none() { + first_fwsec_image = Some(fwsec); + } else { + second_fwsec_image = Some(fwsec); + } + } + _ => { + // Ignore other image types or unknown types + } + } + } + + // Using all the images, setup the falcon data pointer in Fwsec. + if let (Some(mut second), Some(first), Some(pci_at)) = + (second_fwsec_image, first_fwsec_image, pci_at_image) + { + second + .setup_falcon_data(&pci_at, &first) + .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; + Ok(Vbios { + fwsec_image: second.build()?, + }) + } else { + dev_err!( + dev, + "Missing required images for falcon data setup, skipping\n" + ); + Err(EINVAL) + } + } + + pub(crate) fn fwsec_image(&self) -> &FwSecBiosImage { + &self.fwsec_image + } +} + +/// PCI Data Structure as defined in PCI Firmware Specification +#[derive(Debug, Clone)] +#[repr(C)] +struct PcirStruct { + /// PCI Data Structure signature ("PCIR" or "NPDS") + signature: [u8; 4], + /// PCI Vendor ID (e.g., 0x10DE for NVIDIA) + vendor_id: u16, + /// PCI Device ID + device_id: u16, + /// Device List Pointer + device_list_ptr: u16, + /// PCI Data Structure Length + pci_data_struct_len: u16, + /// PCI Data Structure Revision + pci_data_struct_rev: u8, + /// Class code (3 bytes, 0x03 for display controller) + class_code: [u8; 3], + /// Size of this image in 512-byte blocks + image_len: u16, + /// Revision Level of the Vendor's ROM + vendor_rom_rev: u16, + /// ROM image type (0x00 = PC-AT compatible, 0x03 = EFI, 0x70 = NBSI) + code_type: u8, + /// Last image indicator (0x00 = Not last image, 0x80 = Last image) + last_image: u8, + /// Maximum Run-time Image Length (units of 512 bytes) + max_runtime_image_len: u16, +} + +// SAFETY: all bit patterns are valid for `PcirStruct`. +unsafe impl FromBytes for PcirStruct {} + +impl PcirStruct { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + let (pcir, _) = PcirStruct::from_bytes_copy_prefix(data).ok_or(EINVAL)?; + + // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). + if &pcir.signature != b"PCIR" && &pcir.signature != b"NPDS" { + dev_err!( + dev, + "Invalid signature for PcirStruct: {:?}\n", + pcir.signature + ); + return Err(EINVAL); + } + + if pcir.image_len == 0 { + dev_err!(dev, "Invalid image length: 0\n"); + return Err(EINVAL); + } + + Ok(pcir) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + usize::from(self.image_len) * 512 + } +} + +/// BIOS Information Table (BIT) Header. +/// +/// This is the head of the BIT table, that is used to locate the Falcon data. The BIT table (with +/// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the +/// [`FwSecBiosImage`]. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +struct BitHeader { + /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) + id: u16, + /// 2h: BIT Header Signature ("BIT\0") + signature: [u8; 4], + /// 6h: Binary Coded Decimal Version, ex: 0x0100 is 1.00. + bcd_version: u16, + /// 8h: Size of BIT Header (in bytes) + header_size: u8, + /// 9h: Size of BIT Tokens (in bytes) + token_size: u8, + /// 10h: Number of token entries that follow + token_entries: u8, + /// 11h: BIT Header Checksum + checksum: u8, +} + +// SAFETY: all bit patterns are valid for `BitHeader`. +unsafe impl FromBytes for BitHeader {} + +impl BitHeader { + fn new(data: &[u8]) -> Result<Self> { + let (header, _) = BitHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?; + + // Check header ID and signature + if header.id != 0xB8FF || &header.signature != b"BIT\0" { + return Err(EINVAL); + } + + Ok(header) + } +} + +/// BIT Token Entry: Records in the BIT table followed by the BIT header. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct BitToken { + /// 00h: Token identifier + id: u8, + /// 01h: Version of the token data + data_version: u8, + /// 02h: Size of token data in bytes + data_size: u16, + /// 04h: Offset to the token data + data_offset: u16, +} + +// Define the token ID for the Falcon data +const BIT_TOKEN_ID_FALCON_DATA: u8 = 0x70; + +impl BitToken { + /// Find a BIT token entry by BIT ID in a PciAtBiosImage + fn from_id(image: &PciAtBiosImage, token_id: u8) -> Result<Self> { + let header = &image.bit_header; + + // Offset to the first token entry + let tokens_start = image.bit_offset + usize::from(header.header_size); + + for i in 0..usize::from(header.token_entries) { + let entry_offset = tokens_start + (i * usize::from(header.token_size)); + + // Make sure we don't go out of bounds + if entry_offset + usize::from(header.token_size) > image.base.data.len() { + return Err(EINVAL); + } + + // Check if this token has the requested ID + if image.base.data[entry_offset] == token_id { + return Ok(BitToken { + id: image.base.data[entry_offset], + data_version: image.base.data[entry_offset + 1], + data_size: u16::from_le_bytes([ + image.base.data[entry_offset + 2], + image.base.data[entry_offset + 3], + ]), + data_offset: u16::from_le_bytes([ + image.base.data[entry_offset + 4], + image.base.data[entry_offset + 5], + ]), + }); + } + } + + // Token not found + Err(ENOENT) + } +} + +/// PCI ROM Expansion Header as defined in PCI Firmware Specification. +/// +/// This is header is at the beginning of every image in the set of images in the ROM. It contains +/// a pointer to the PCI Data Structure which describes the image. For "NBSI" images (NoteBook +/// System Information), the ROM header deviates from the standard and contains an offset to the +/// NBSI image however we do not yet parse that in this module and keep it for future reference. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct PciRomHeader { + /// 00h: Signature (0xAA55) + signature: u16, + /// 02h: Reserved bytes for processor architecture unique data (20 bytes) + reserved: [u8; 20], + /// 16h: NBSI Data Offset (NBSI-specific, offset from header to NBSI image) + nbsi_data_offset: Option<u16>, + /// 18h: Pointer to PCI Data Structure (offset from start of ROM image) + pci_data_struct_offset: u16, + /// 1Ah: Size of block (this is NBSI-specific) + size_of_block: Option<u32>, +} + +impl PciRomHeader { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < 26 { + // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. + return Err(EINVAL); + } + + let signature = u16::from_le_bytes([data[0], data[1]]); + + // Check for valid ROM signatures. + match signature { + 0xAA55 | 0xBB77 | 0x4E56 => {} + _ => { + dev_err!(dev, "ROM signature unknown {:#x}\n", signature); + return Err(EINVAL); + } + } + + // Read the pointer to the PCI Data Structure at offset 0x18. + let pci_data_struct_ptr = u16::from_le_bytes([data[24], data[25]]); + + // Try to read optional fields if enough data. + let mut size_of_block = None; + let mut nbsi_data_offset = None; + + if data.len() >= 30 { + // Read size_of_block at offset 0x1A. + size_of_block = Some(u32::from_le_bytes([data[26], data[27], data[28], data[29]])); + } + + // For NBSI images, try to read the nbsiDataOffset at offset 0x16. + if data.len() >= 24 { + nbsi_data_offset = Some(u16::from_le_bytes([data[22], data[23]])); + } + + Ok(PciRomHeader { + signature, + reserved: [0u8; 20], + pci_data_struct_offset: pci_data_struct_ptr, + size_of_block, + nbsi_data_offset, + }) + } +} + +/// NVIDIA PCI Data Extension Structure. +/// +/// This is similar to the PCI Data Structure, but is Nvidia-specific and is placed right after the +/// PCI Data Structure. It contains some fields that are redundant with the PCI Data Structure, but +/// are needed for traversing the BIOS images. It is expected to be present in all BIOS images +/// except for NBSI images. +#[derive(Debug, Clone)] +#[repr(C)] +struct NpdeStruct { + /// 00h: Signature ("NPDE") + signature: [u8; 4], + /// 04h: NVIDIA PCI Data Extension Revision + npci_data_ext_rev: u16, + /// 06h: NVIDIA PCI Data Extension Length + npci_data_ext_len: u16, + /// 08h: Sub-image Length (in 512-byte units) + subimage_len: u16, + /// 0Ah: Last image indicator flag + last_image: u8, +} + +// SAFETY: all bit patterns are valid for `NpdeStruct`. +unsafe impl FromBytes for NpdeStruct {} + +impl NpdeStruct { + fn new(dev: &device::Device, data: &[u8]) -> Option<Self> { + let (npde, _) = NpdeStruct::from_bytes_copy_prefix(data)?; + + // Signature should be "NPDE" (0x4544504E). + if &npde.signature != b"NPDE" { + dev_dbg!( + dev, + "Invalid signature for NpdeStruct: {:?}\n", + npde.signature + ); + return None; + } + + if npde.subimage_len == 0 { + dev_dbg!(dev, "Invalid subimage length: 0\n"); + return None; + } + + Some(npde) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + usize::from(self.subimage_len) * 512 + } + + /// Try to find NPDE in the data, the NPDE is right after the PCIR. + fn find_in_data( + dev: &device::Device, + data: &[u8], + rom_header: &PciRomHeader, + pcir: &PcirStruct, + ) -> Option<Self> { + // Calculate the offset where NPDE might be located + // NPDE should be right after the PCIR structure, aligned to 16 bytes + let pcir_offset = usize::from(rom_header.pci_data_struct_offset); + let npde_start = (pcir_offset + usize::from(pcir.pci_data_struct_len) + 0x0F) & !0x0F; + + // Check if we have enough data + if npde_start + core::mem::size_of::<Self>() > data.len() { + dev_dbg!(dev, "Not enough data for NPDE\n"); + return None; + } + + // Try to create NPDE from the data + NpdeStruct::new(dev, &data[npde_start..]) + } +} + +/// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. +/// +/// It contains the BIT header and the BIT tokens. +struct PciAtBiosImage { + base: BiosImage, + bit_header: BitHeader, + bit_offset: usize, +} + +#[expect(dead_code)] +struct EfiBiosImage { + base: BiosImage, + // EFI-specific fields can be added here in the future. +} + +#[expect(dead_code)] +struct NbsiBiosImage { + base: BiosImage, + // NBSI-specific fields can be added here in the future. +} + +struct FwSecBiosBuilder { + base: BiosImage, + /// These are temporary fields that are used during the construction of the + /// [`FwSecBiosBuilder`]. + /// + /// Once FwSecBiosBuilder is constructed, the `falcon_ucode_offset` will be copied into a new + /// [`FwSecBiosImage`]. + /// + /// The offset of the Falcon data from the start of Fwsec image. + falcon_data_offset: Option<usize>, + /// The [`PmuLookupTable`] starts at the offset of the falcon data pointer. + pmu_lookup_table: Option<PmuLookupTable>, + /// The offset of the Falcon ucode. + falcon_ucode_offset: Option<usize>, +} + +/// The [`FwSecBiosImage`] structure contains the PMU table and the Falcon Ucode. +/// +/// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode. +pub(crate) struct FwSecBiosImage { + base: BiosImage, + /// The offset of the Falcon ucode. + falcon_ucode_offset: usize, +} + +/// BIOS Image structure containing various headers and reference fields to all BIOS images. +/// +/// A BiosImage struct is embedded into all image types and implements common operations. +#[expect(dead_code)] +struct BiosImage { + /// Used for logging. + dev: ARef<device::Device>, + /// PCI ROM Expansion Header + rom_header: PciRomHeader, + /// PCI Data Structure + pcir: PcirStruct, + /// NVIDIA PCI Data Extension (optional) + npde: Option<NpdeStruct>, + /// Image data (includes ROM header and PCIR) + data: KVec<u8>, +} + +impl BiosImage { + /// Get the image size in bytes. + fn image_size_bytes(&self) -> usize { + // Prefer NPDE image size if available + if let Some(ref npde) = self.npde { + npde.image_size_bytes() + } else { + // Otherwise, fall back to the PCIR image size + self.pcir.image_size_bytes() + } + } + + /// Get the BIOS image type. + fn image_type(&self) -> Result<BiosImageType> { + BiosImageType::try_from(self.pcir.code_type) + } + + /// Check if this is the last image. + fn is_last(&self) -> bool { + // For NBSI images, return true as they're considered the last image. + if self.image_type() == Ok(BiosImageType::Nbsi) { + return true; + } + + // For other image types, check the NPDE first if available + if let Some(ref npde) = self.npde { + return npde.is_last(); + } + + // Otherwise, fall back to checking the PCIR last_image flag + self.pcir.is_last() + } + + /// Creates a new BiosImage from raw byte data. + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + // Ensure we have enough data for the ROM header. + if data.len() < 26 { + dev_err!(dev, "Not enough data for ROM header\n"); + return Err(EINVAL); + } + + // Parse the ROM header. + let rom_header = PciRomHeader::new(dev, &data[0..26]) + .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; + + // Get the PCI Data Structure using the pointer from the ROM header. + let pcir_offset = usize::from(rom_header.pci_data_struct_offset); + let pcir_data = data + .get(pcir_offset..pcir_offset + core::mem::size_of::<PcirStruct>()) + .ok_or(EINVAL) + .inspect_err(|_| { + dev_err!( + dev, + "PCIR offset {:#x} out of bounds (data length: {})\n", + pcir_offset, + data.len() + ); + dev_err!( + dev, + "Consider reading more data for construction of BiosImage\n" + ); + })?; + + let pcir = PcirStruct::new(dev, pcir_data) + .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?; + + // Look for NPDE structure if this is not an NBSI image (type != 0x70). + let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir); + + // Create a copy of the data. + let mut data_copy = KVec::new(); + data_copy.extend_from_slice(data, GFP_KERNEL)?; + + Ok(BiosImage { + dev: dev.into(), + rom_header, + pcir, + npde, + data: data_copy, + }) + } +} + +impl PciAtBiosImage { + /// Find a byte pattern in a slice. + fn find_byte_pattern(haystack: &[u8], needle: &[u8]) -> Result<usize> { + haystack + .windows(needle.len()) + .position(|window| window == needle) + .ok_or(EINVAL) + } + + /// Find the BIT header in the [`PciAtBiosImage`]. + fn find_bit_header(data: &[u8]) -> Result<(BitHeader, usize)> { + let bit_pattern = [0xff, 0xb8, b'B', b'I', b'T', 0x00]; + let bit_offset = Self::find_byte_pattern(data, &bit_pattern)?; + let bit_header = BitHeader::new(&data[bit_offset..])?; + + Ok((bit_header, bit_offset)) + } + + /// Get a BIT token entry from the BIT table in the [`PciAtBiosImage`] + fn get_bit_token(&self, token_id: u8) -> Result<BitToken> { + BitToken::from_id(self, token_id) + } + + /// Find the Falcon data pointer structure in the [`PciAtBiosImage`]. + /// + /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC + /// image. + fn falcon_data_ptr(&self) -> Result<u32> { + let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; + + // Make sure we don't go out of bounds + if usize::from(token.data_offset) + 4 > self.base.data.len() { + return Err(EINVAL); + } + + // read the 4 bytes at the offset specified in the token + let offset = usize::from(token.data_offset); + let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { + dev_err!(self.base.dev, "Failed to convert data slice to array\n"); + EINVAL + })?; + + let data_ptr = u32::from_le_bytes(bytes); + + if (usize::from_safe_cast(data_ptr)) < self.base.data.len() { + dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); + return Err(EINVAL); + } + + Ok(data_ptr) + } +} + +impl TryFrom<BiosImage> for PciAtBiosImage { + type Error = Error; + + fn try_from(base: BiosImage) -> Result<Self> { + let data_slice = &base.data; + let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?; + + Ok(PciAtBiosImage { + base, + bit_header, + bit_offset, + }) + } +} + +/// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. +/// +/// See the [`PmuLookupTable`] description for more information. +#[repr(C, packed)] +struct PmuLookupTableEntry { + application_id: u8, + target_id: u8, + data: u32, +} + +impl PmuLookupTableEntry { + fn new(data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<Self>() { + return Err(EINVAL); + } + + Ok(PmuLookupTableEntry { + application_id: data[0], + target_id: data[1], + data: u32::from_le_bytes(data[2..6].try_into().map_err(|_| EINVAL)?), + }) + } +} + +#[repr(C)] +struct PmuLookupTableHeader { + version: u8, + header_len: u8, + entry_len: u8, + entry_count: u8, +} + +// SAFETY: all bit patterns are valid for `PmuLookupTableHeader`. +unsafe impl FromBytes for PmuLookupTableHeader {} + +/// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given +/// application ID. +/// +/// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to +/// locate the Falcon Ucode. +struct PmuLookupTable { + header: PmuLookupTableHeader, + table_data: KVec<u8>, +} + +impl PmuLookupTable { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + let (header, _) = PmuLookupTableHeader::from_bytes_copy_prefix(data).ok_or(EINVAL)?; + + let header_len = usize::from(header.header_len); + let entry_len = usize::from(header.entry_len); + let entry_count = usize::from(header.entry_count); + + let required_bytes = header_len + (entry_count * entry_len); + + if data.len() < required_bytes { + dev_err!(dev, "PmuLookupTable data length less than required\n"); + return Err(EINVAL); + } + + // Create a copy of only the table data + let table_data = { + let mut ret = KVec::new(); + ret.extend_from_slice(&data[header_len..required_bytes], GFP_KERNEL)?; + ret + }; + + Ok(PmuLookupTable { header, table_data }) + } + + fn lookup_index(&self, idx: u8) -> Result<PmuLookupTableEntry> { + if idx >= self.header.entry_count { + return Err(EINVAL); + } + + let index = (usize::from(idx)) * usize::from(self.header.entry_len); + PmuLookupTableEntry::new(&self.table_data[index..]) + } + + // find entry by type value + fn find_entry_by_type(&self, entry_type: u8) -> Result<PmuLookupTableEntry> { + for i in 0..self.header.entry_count { + let entry = self.lookup_index(i)?; + if entry.application_id == entry_type { + return Ok(entry); + } + } + + Err(EINVAL) + } +} + +impl FwSecBiosBuilder { + fn setup_falcon_data( + &mut self, + pci_at_image: &PciAtBiosImage, + first_fwsec: &FwSecBiosBuilder, + ) -> Result { + let mut offset = usize::from_safe_cast(pci_at_image.falcon_data_ptr()?); + let mut pmu_in_first_fwsec = false; + + // The falcon data pointer assumes that the PciAt and FWSEC images + // are contiguous in memory. However, testing shows the EFI image sits in + // between them. So calculate the offset from the end of the PciAt image + // rather than the start of it. Compensate. + offset -= pci_at_image.base.data.len(); + + // The offset is now from the start of the first Fwsec image, however + // the offset points to a location in the second Fwsec image. Since + // the fwsec images are contiguous, subtract the length of the first Fwsec + // image from the offset to get the offset to the start of the second + // Fwsec image. + if offset < first_fwsec.base.data.len() { + pmu_in_first_fwsec = true; + } else { + offset -= first_fwsec.base.data.len(); + } + + self.falcon_data_offset = Some(offset); + + if pmu_in_first_fwsec { + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &first_fwsec.base.data[offset..], + )?); + } else { + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &self.base.data[offset..], + )?); + } + + match self + .pmu_lookup_table + .as_ref() + .ok_or(EINVAL)? + .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD) + { + Ok(entry) => { + let mut ucode_offset = usize::from_safe_cast(entry.data); + ucode_offset -= pci_at_image.base.data.len(); + if ucode_offset < first_fwsec.base.data.len() { + dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); + return Err(EINVAL); + } + ucode_offset -= first_fwsec.base.data.len(); + self.falcon_ucode_offset = Some(ucode_offset); + } + Err(e) => { + dev_err!( + self.base.dev, + "PmuLookupTableEntry not found, error: {:?}\n", + e + ); + return Err(EINVAL); + } + } + Ok(()) + } + + /// Build the final FwSecBiosImage from this builder + fn build(self) -> Result<FwSecBiosImage> { + let ret = FwSecBiosImage { + base: self.base, + falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, + }; + + if cfg!(debug_assertions) { + // Print the desc header for debugging + let desc = ret.header()?; + dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc); + } + + Ok(ret) + } +} + +impl FwSecBiosImage { + /// Get the FwSec header ([`FalconUCodeDesc`]). + pub(crate) fn header(&self) -> Result<FalconUCodeDesc> { + // Get the falcon ucode offset that was found in setup_falcon_data. + let falcon_ucode_offset = self.falcon_ucode_offset; + + // Read the first 4 bytes to get the version. + let hdr_bytes: [u8; 4] = self.base.data[falcon_ucode_offset..falcon_ucode_offset + 4] + .try_into() + .map_err(|_| EINVAL)?; + let hdr = u32::from_le_bytes(hdr_bytes); + let ver = (hdr & 0xff00) >> 8; + + let data = self.base.data.get(falcon_ucode_offset..).ok_or(EINVAL)?; + match ver { + 2 => { + let v2 = FalconUCodeDescV2::from_bytes_copy_prefix(data) + .ok_or(EINVAL)? + .0; + Ok(FalconUCodeDesc::V2(v2)) + } + 3 => { + let v3 = FalconUCodeDescV3::from_bytes_copy_prefix(data) + .ok_or(EINVAL)? + .0; + Ok(FalconUCodeDesc::V3(v3)) + } + _ => { + dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver); + Err(EINVAL) + } + } + } + + /// Get the ucode data as a byte slice + pub(crate) fn ucode(&self, desc: &FalconUCodeDesc) -> Result<&[u8]> { + let falcon_ucode_offset = self.falcon_ucode_offset; + + // The ucode data follows the descriptor. + let ucode_data_offset = falcon_ucode_offset + desc.size(); + let size = usize::from_safe_cast(desc.imem_load_size() + desc.dmem_load_size()); + + // Get the data slice, checking bounds in a single operation. + self.base + .data + .get(ucode_data_offset..ucode_data_offset + size) + .ok_or(ERANGE) + .inspect_err(|_| { + dev_err!( + self.base.dev, + "fwsec ucode data not contained within BIOS bounds\n" + ) + }) + } + + /// Get the signatures as a byte slice + pub(crate) fn sigs(&self, desc: &FalconUCodeDesc) -> Result<&[Bcrt30Rsa3kSignature]> { + let hdr_size = match desc { + FalconUCodeDesc::V2(_v2) => core::mem::size_of::<FalconUCodeDescV2>(), + FalconUCodeDesc::V3(_v3) => core::mem::size_of::<FalconUCodeDescV3>(), + }; + // The signatures data follows the descriptor. + let sigs_data_offset = self.falcon_ucode_offset + hdr_size; + let sigs_count = usize::from(desc.signature_count()); + let sigs_size = sigs_count * core::mem::size_of::<Bcrt30Rsa3kSignature>(); + + // Make sure the data is within bounds. + if sigs_data_offset + sigs_size > self.base.data.len() { + dev_err!( + self.base.dev, + "fwsec signatures data not contained within BIOS bounds\n" + ); + return Err(ERANGE); + } + + // SAFETY: we checked that `data + sigs_data_offset + (signature_count * + // sizeof::<Bcrt30Rsa3kSignature>()` is within the bounds of `data`. + Ok(unsafe { + core::slice::from_raw_parts( + self.base + .data + .as_ptr() + .add(sigs_data_offset) + .cast::<Bcrt30Rsa3kSignature>(), + sigs_count, + ) + }) + } +} |
