summaryrefslogtreecommitdiff
path: root/Documentation/driver-api
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/driver-api')
-rw-r--r--Documentation/driver-api/80211/index.rst7
-rw-r--r--Documentation/driver-api/acpi/acpi-drivers.rst80
-rw-r--r--Documentation/driver-api/acpi/index.rst1
-rw-r--r--Documentation/driver-api/basics.rst20
-rw-r--r--Documentation/driver-api/coco/index.rst10
-rw-r--r--Documentation/driver-api/coco/measurement-registers.rst12
-rw-r--r--Documentation/driver-api/crypto/iaa/iaa-crypto.rst2
-rw-r--r--Documentation/driver-api/crypto/iaa/index.rst7
-rw-r--r--Documentation/driver-api/crypto/index.rst7
-rw-r--r--Documentation/driver-api/cxl/access-coordinates.rst91
-rw-r--r--Documentation/driver-api/cxl/allocation/dax.rst60
-rw-r--r--Documentation/driver-api/cxl/allocation/hugepages.rst32
-rw-r--r--Documentation/driver-api/cxl/allocation/page-allocator.rst54
-rw-r--r--Documentation/driver-api/cxl/allocation/reclaim.rst51
-rw-r--r--Documentation/driver-api/cxl/conventions.rst18
-rw-r--r--Documentation/driver-api/cxl/conventions/cxl-atl.rst304
-rw-r--r--Documentation/driver-api/cxl/conventions/cxl-lmh.rst135
-rw-r--r--Documentation/driver-api/cxl/conventions/template.rst37
-rw-r--r--Documentation/driver-api/cxl/devices/device-types.rst165
-rw-r--r--Documentation/driver-api/cxl/index.rst48
-rw-r--r--Documentation/driver-api/cxl/linux/access-coordinates.rst178
-rw-r--r--Documentation/driver-api/cxl/linux/cxl-driver.rst630
-rw-r--r--Documentation/driver-api/cxl/linux/dax-driver.rst43
-rw-r--r--Documentation/driver-api/cxl/linux/early-boot.rst137
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/hb-interleave.rst314
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/intra-hb-interleave.rst291
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/multi-interleave.rst401
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/single-device.rst246
-rw-r--r--Documentation/driver-api/cxl/linux/memory-hotplug.rst78
-rw-r--r--Documentation/driver-api/cxl/linux/overview.rst103
-rw-r--r--Documentation/driver-api/cxl/maturity-map.rst8
-rw-r--r--Documentation/driver-api/cxl/platform/acpi.rst76
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/cedt.rst62
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/dsdt.rst28
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/hmat.rst32
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/slit.rst21
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/srat.rst71
-rw-r--r--Documentation/driver-api/cxl/platform/bios-and-efi.rst285
-rw-r--r--Documentation/driver-api/cxl/platform/cdat.rst118
-rw-r--r--Documentation/driver-api/cxl/platform/device-hotplug.rst130
-rw-r--r--Documentation/driver-api/cxl/platform/example-configs.rst13
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/flexible.rst296
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/hb-interleave.rst107
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/multi-dev-per-hb.rst90
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/one-dev-per-hb.rst136
-rw-r--r--Documentation/driver-api/cxl/theory-of-operation.rst (renamed from Documentation/driver-api/cxl/memory-devices.rst)39
-rw-r--r--Documentation/driver-api/device-io.rst4
-rw-r--r--Documentation/driver-api/dma-buf.rst5
-rw-r--r--Documentation/driver-api/dmaengine/index.rst7
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst17
-rw-r--r--Documentation/driver-api/dmaengine/pxa_dma.rst2
-rw-r--r--Documentation/driver-api/dpll.rst117
-rw-r--r--Documentation/driver-api/driver-model/binding.rst57
-rw-r--r--Documentation/driver-api/driver-model/design-patterns.rst2
-rw-r--r--Documentation/driver-api/driver-model/devres.rst10
-rw-r--r--Documentation/driver-api/driver-model/index.rst7
-rw-r--r--Documentation/driver-api/driver-model/overview.rst2
-rw-r--r--Documentation/driver-api/driver-model/platform.rst2
-rw-r--r--Documentation/driver-api/early-userspace/buffer-format.rst39
-rw-r--r--Documentation/driver-api/early-userspace/early_userspace_support.rst2
-rw-r--r--Documentation/driver-api/early-userspace/index.rst7
-rw-r--r--Documentation/driver-api/eisa.rst6
-rw-r--r--Documentation/driver-api/firmware/efi/index.rst11
-rw-r--r--Documentation/driver-api/firmware/firmware-usage-guidelines.rst5
-rw-r--r--Documentation/driver-api/firmware/index.rst7
-rw-r--r--Documentation/driver-api/generic-counter.rst4
-rw-r--r--Documentation/driver-api/generic_pt.rst137
-rw-r--r--Documentation/driver-api/gpio/board.rst94
-rw-r--r--Documentation/driver-api/gpio/driver.rst29
-rw-r--r--Documentation/driver-api/gpio/index.rst4
-rw-r--r--Documentation/driver-api/gpio/legacy-boards.rst316
-rw-r--r--Documentation/driver-api/gpio/pca953x.rst639
-rw-r--r--Documentation/driver-api/hw-recoverable-errors.rst60
-rw-r--r--Documentation/driver-api/i3c/protocol.rst4
-rw-r--r--Documentation/driver-api/iio/core.rst2
-rw-r--r--Documentation/driver-api/index.rst10
-rw-r--r--Documentation/driver-api/infiniband.rst16
-rw-r--r--Documentation/driver-api/interconnect.rst16
-rw-r--r--Documentation/driver-api/ipmi.rst33
-rw-r--r--Documentation/driver-api/libata.rst25
-rw-r--r--Documentation/driver-api/mailbox.rst2
-rw-r--r--Documentation/driver-api/media/camera-sensor.rst24
-rw-r--r--Documentation/driver-api/media/drivers/zoran.rst2
-rw-r--r--Documentation/driver-api/media/index.rst1
-rw-r--r--Documentation/driver-api/media/maintainer-entry-profile.rst463
-rw-r--r--Documentation/driver-api/media/media-committers.rst203
-rw-r--r--Documentation/driver-api/media/tx-rx.rst30
-rw-r--r--Documentation/driver-api/media/v4l2-controls.rst9
-rw-r--r--Documentation/driver-api/media/v4l2-core.rst1
-rw-r--r--Documentation/driver-api/media/v4l2-dev.rst8
-rw-r--r--Documentation/driver-api/media/v4l2-fh.rst59
-rw-r--r--Documentation/driver-api/media/v4l2-isp.rst49
-rw-r--r--Documentation/driver-api/memory-devices/index.rst7
-rw-r--r--Documentation/driver-api/mtdnand.rst4
-rw-r--r--Documentation/driver-api/ntb.rst2
-rw-r--r--Documentation/driver-api/nvdimm/btt.rst2
-rw-r--r--Documentation/driver-api/nvdimm/nvdimm.rst8
-rw-r--r--Documentation/driver-api/nvmem.rst14
-rw-r--r--Documentation/driver-api/parport-lowlevel.rst5
-rw-r--r--Documentation/driver-api/pci/index.rst8
-rw-r--r--Documentation/driver-api/pci/p2pdma.rst97
-rw-r--r--Documentation/driver-api/pci/pci.rst3
-rw-r--r--Documentation/driver-api/pci/tsm.rst21
-rw-r--r--Documentation/driver-api/phy/index.rst8
-rw-r--r--Documentation/driver-api/phy/phy.rst5
-rw-r--r--Documentation/driver-api/pin-control.rst71
-rw-r--r--Documentation/driver-api/pldmfw/index.rst1
-rw-r--r--Documentation/driver-api/pm/devices.rst6
-rw-r--r--Documentation/driver-api/pm/index.rst7
-rw-r--r--Documentation/driver-api/pps.rst27
-rw-r--r--Documentation/driver-api/pwm.rst13
-rw-r--r--Documentation/driver-api/reset.rst2
-rw-r--r--Documentation/driver-api/scsi.rst4
-rw-r--r--Documentation/driver-api/serial/driver.rst11
-rw-r--r--Documentation/driver-api/serial/index.rst7
-rw-r--r--Documentation/driver-api/soundwire/bra.rst336
-rw-r--r--Documentation/driver-api/soundwire/bra_cadence.rst66
-rw-r--r--Documentation/driver-api/soundwire/index.rst9
-rw-r--r--Documentation/driver-api/soundwire/stream.rst2
-rw-r--r--Documentation/driver-api/soundwire/summary.rst8
-rw-r--r--Documentation/driver-api/spi.rst2
-rw-r--r--Documentation/driver-api/surface_aggregator/clients/index.rst7
-rw-r--r--Documentation/driver-api/surface_aggregator/index.rst7
-rw-r--r--Documentation/driver-api/tee.rst18
-rw-r--r--Documentation/driver-api/thermal/exynos_thermal_emulation.rst14
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst56
-rw-r--r--Documentation/driver-api/thermal/sysfs-api.rst25
-rw-r--r--Documentation/driver-api/tty/tty_driver.rst4
-rw-r--r--Documentation/driver-api/tty/tty_ldisc.rst2
-rw-r--r--Documentation/driver-api/tty/tty_port.rst5
-rw-r--r--Documentation/driver-api/tty/tty_struct.rst2
-rw-r--r--Documentation/driver-api/usb/anchors.rst11
-rw-r--r--Documentation/driver-api/usb/gadget.rst2
-rw-r--r--Documentation/driver-api/usb/hotplug.rst2
-rw-r--r--Documentation/driver-api/usb/index.rst8
-rw-r--r--Documentation/driver-api/usb/usb.rst5
-rw-r--r--Documentation/driver-api/usb/writing_musb_glue_layer.rst4
-rw-r--r--Documentation/driver-api/vme.rst2
-rw-r--r--Documentation/driver-api/wmi.rst5
-rw-r--r--Documentation/driver-api/xilinx/index.rst7
140 files changed, 7938 insertions, 627 deletions
diff --git a/Documentation/driver-api/80211/index.rst b/Documentation/driver-api/80211/index.rst
index af210859d3e1..62305e9c3113 100644
--- a/Documentation/driver-api/80211/index.rst
+++ b/Documentation/driver-api/80211/index.rst
@@ -8,10 +8,3 @@ Linux 802.11 Driver Developer's Guide
cfg80211
mac80211
mac80211-advanced
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/acpi/acpi-drivers.rst b/Documentation/driver-api/acpi/acpi-drivers.rst
new file mode 100644
index 000000000000..376b6d8a678c
--- /dev/null
+++ b/Documentation/driver-api/acpi/acpi-drivers.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=========================================
+Why using ACPI drivers is not a good idea
+=========================================
+
+:Copyright: |copy| 2026, Intel Corporation
+
+:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+Even though binding drivers directly to struct acpi_device objects, also
+referred to as "ACPI device nodes", allows basic functionality to be provided
+at least in some cases, there are problems with it, related to general
+consistency, sysfs layout, power management operation ordering, and code
+cleanliness.
+
+First of all, ACPI device nodes represent firmware entities rather than
+hardware and in many cases they provide auxiliary information on devices
+enumerated independently (like PCI devices or CPUs). It is therefore generally
+questionable to assign resources to them because the entities represented by
+them do not decode addresses in the memory or I/O address spaces and do not
+generate interrupts or similar (all of that is done by hardware).
+
+Second, as a general rule, a struct acpi_device can only be a parent of another
+struct acpi_device. If that is not the case, the location of the child device
+in the device hierarchy is at least confusing and it may not be straightforward
+to identify the piece of hardware providing functionality represented by it.
+However, binding a driver directly to an ACPI device node may cause that to
+happen if the given driver registers input devices or wakeup sources under it,
+for example.
+
+Next, using system suspend and resume callbacks directly on ACPI device nodes
+is also questionable because it may cause ordering problems to appear. Namely,
+ACPI device nodes are registered before enumerating hardware corresponding to
+them and they land on the PM list in front of the majority of other device
+objects. Consequently, the execution ordering of their PM callbacks may be
+different from what is generally expected. Also, in general, dependencies
+returned by _DEP objects do not affect ACPI device nodes themselves, but the
+"physical" devices associated with them, which potentially is one more source
+of inconsistency related to treating ACPI device nodes as "real" device
+representation.
+
+All of the above means that binding drivers to ACPI device nodes should
+generally be avoided and so struct acpi_driver objects should not be used.
+
+Moreover, a device ID is necessary to bind a driver directly to an ACPI device
+node, but device IDs are not generally associated with all of them. Some of
+them contain alternative information allowing the corresponding pieces of
+hardware to be identified, for example represented by an _ADR object return
+value, and device IDs are not used in those cases. In consequence, confusingly
+enough, binding an ACPI driver to an ACPI device node may even be impossible.
+
+When that happens, the piece of hardware corresponding to the given ACPI device
+node is represented by another device object, like a struct pci_dev, and the
+ACPI device node is the "ACPI companion" of that device, accessible through its
+fwnode pointer used by the ACPI_COMPANION() macro. The ACPI companion holds
+additional information on the device configuration and possibly some "recipes"
+on device manipulation in the form of AML (ACPI Machine Language) bytecode
+provided by the platform firmware. Thus the role of the ACPI device node is
+similar to the role of a struct device_node on a system where Device Tree is
+used for platform description.
+
+For consistency, this approach has been extended to the cases in which ACPI
+device IDs are used. Namely, in those cases, an additional device object is
+created to represent the piece of hardware corresponding to a given ACPI device
+node. By default, it is a platform device, but it may also be a PNP device, a
+CPU device, or another type of device, depending on what the given piece of
+hardware actually is. There are even cases in which multiple devices are
+"backed" or "accompanied" by one ACPI device node (e.g. ACPI device nodes
+corresponding to GPUs that may provide firmware interfaces for backlight
+brightness control in addition to GPU configuration information).
+
+This means that it really should never be necessary to bind a driver directly to
+an ACPI device node because there is a "proper" device object representing the
+corresponding piece of hardware that can be bound to by a "proper" driver using
+the given ACPI device node as the device's ACPI companion. Thus, in principle,
+there is no reason to use ACPI drivers and if they all were replaced with other
+driver types (for example, platform drivers), some code could be dropped and
+some complexity would go away.
diff --git a/Documentation/driver-api/acpi/index.rst b/Documentation/driver-api/acpi/index.rst
index ace0008e54c2..2b10d83f9994 100644
--- a/Documentation/driver-api/acpi/index.rst
+++ b/Documentation/driver-api/acpi/index.rst
@@ -7,3 +7,4 @@ ACPI Support
linuxized-acpica
scan_handlers
+ acpi-drivers
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index d78b7c328ff7..8b6a5888cb11 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -108,13 +108,31 @@ Kernel objects manipulation
.. kernel-doc:: lib/kobject.c
:export:
+.. kernel-doc:: lib/kobject_uevent.c
+ :export:
+
Kernel utility functions
------------------------
-.. kernel-doc:: include/linux/kernel.h
+.. kernel-doc:: include/linux/array_size.h
+ :internal:
+
+.. kernel-doc:: include/linux/container_of.h
+ :internal:
+
+.. kernel-doc:: include/linux/kstrtox.h
:internal:
:no-identifiers: kstrtol kstrtoul
+.. kernel-doc:: include/linux/stddef.h
+ :internal:
+
+.. kernel-doc:: include/linux/util_macros.h
+ :internal:
+
+.. kernel-doc:: include/linux/wordpart.h
+ :internal:
+
.. kernel-doc:: kernel/printk/printk.c
:export:
:no-identifiers: printk
diff --git a/Documentation/driver-api/coco/index.rst b/Documentation/driver-api/coco/index.rst
new file mode 100644
index 000000000000..783c8b033547
--- /dev/null
+++ b/Documentation/driver-api/coco/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Confidential Computing
+======================
+
+.. toctree::
+ :maxdepth: 1
+
+ measurement-registers
diff --git a/Documentation/driver-api/coco/measurement-registers.rst b/Documentation/driver-api/coco/measurement-registers.rst
new file mode 100644
index 000000000000..962a44efa2c0
--- /dev/null
+++ b/Documentation/driver-api/coco/measurement-registers.rst
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=====================
+Measurement Registers
+=====================
+
+.. kernel-doc:: include/linux/tsm-mr.h
+ :internal:
+
+.. kernel-doc:: drivers/virt/coco/guest/tsm-mr.c
+ :export:
diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
index 8e50b900d51c..f815d4fd8372 100644
--- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
+++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
@@ -476,7 +476,6 @@ Use the following commands to enable zswap::
# echo 0 > /sys/module/zswap/parameters/enabled
# echo 50 > /sys/module/zswap/parameters/max_pool_percent
# echo deflate-iaa > /sys/module/zswap/parameters/compressor
- # echo zsmalloc > /sys/module/zswap/parameters/zpool
# echo 1 > /sys/module/zswap/parameters/enabled
# echo 100 > /proc/sys/vm/swappiness
# echo never > /sys/kernel/mm/transparent_hugepage/enabled
@@ -625,7 +624,6 @@ the 'fixed' compression mode::
echo 0 > /sys/module/zswap/parameters/enabled
echo 50 > /sys/module/zswap/parameters/max_pool_percent
echo deflate-iaa > /sys/module/zswap/parameters/compressor
- echo zsmalloc > /sys/module/zswap/parameters/zpool
echo 1 > /sys/module/zswap/parameters/enabled
echo 100 > /proc/sys/vm/swappiness
diff --git a/Documentation/driver-api/crypto/iaa/index.rst b/Documentation/driver-api/crypto/iaa/index.rst
index aa6837e27264..463f7da569c5 100644
--- a/Documentation/driver-api/crypto/iaa/index.rst
+++ b/Documentation/driver-api/crypto/iaa/index.rst
@@ -11,10 +11,3 @@ API.
:maxdepth: 1
iaa-crypto
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/crypto/index.rst b/Documentation/driver-api/crypto/index.rst
index fb9709b98bea..bba669014cb2 100644
--- a/Documentation/driver-api/crypto/index.rst
+++ b/Documentation/driver-api/crypto/index.rst
@@ -11,10 +11,3 @@ configuration.
:maxdepth: 1
iaa/index
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/cxl/access-coordinates.rst b/Documentation/driver-api/cxl/access-coordinates.rst
deleted file mode 100644
index b07950ea30c9..000000000000
--- a/Documentation/driver-api/cxl/access-coordinates.rst
+++ /dev/null
@@ -1,91 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-.. include:: <isonum.txt>
-
-==================================
-CXL Access Coordinates Computation
-==================================
-
-Shared Upstream Link Calculation
-================================
-For certain CXL region construction with endpoints behind CXL switches (SW) or
-Root Ports (RP), there is the possibility of the total bandwidth for all
-the endpoints behind a switch being more than the switch upstream link.
-A similar situation can occur within the host, upstream of the root ports.
-The CXL driver performs an additional pass after all the targets have
-arrived for a region in order to recalculate the bandwidths with possible
-upstream link being a limiting factor in mind.
-
-The algorithm assumes the configuration is a symmetric topology as that
-maximizes performance. When asymmetric topology is detected, the calculation
-is aborted. An asymmetric topology is detected during topology walk where the
-number of RPs detected as a grandparent is not equal to the number of devices
-iterated in the same iteration loop. The assumption is made that subtle
-asymmetry in properties does not happen and all paths to EPs are equal.
-
-There can be multiple switches under an RP. There can be multiple RPs under
-a CXL Host Bridge (HB). There can be multiple HBs under a CXL Fixed Memory
-Window Structure (CFMWS).
-
-An example hierarchy:
-
-> CFMWS 0
-> |
-> _________|_________
-> | |
-> ACPI0017-0 ACPI0017-1
-> GP0/HB0/ACPI0016-0 GP1/HB1/ACPI0016-1
-> | | | |
-> RP0 RP1 RP2 RP3
-> | | | |
-> SW 0 SW 1 SW 2 SW 3
-> | | | | | | | |
-> EP0 EP1 EP2 EP3 EP4 EP5 EP6 EP7
-
-Computation for the example hierarchy:
-
-Min (GP0 to CPU BW,
- Min(SW 0 Upstream Link to RP0 BW,
- Min(SW0SSLBIS for SW0DSP0 (EP0), EP0 DSLBIS, EP0 Upstream Link) +
- Min(SW0SSLBIS for SW0DSP1 (EP1), EP1 DSLBIS, EP1 Upstream link)) +
- Min(SW 1 Upstream Link to RP1 BW,
- Min(SW1SSLBIS for SW1DSP0 (EP2), EP2 DSLBIS, EP2 Upstream Link) +
- Min(SW1SSLBIS for SW1DSP1 (EP3), EP3 DSLBIS, EP3 Upstream link))) +
-Min (GP1 to CPU BW,
- Min(SW 2 Upstream Link to RP2 BW,
- Min(SW2SSLBIS for SW2DSP0 (EP4), EP4 DSLBIS, EP4 Upstream Link) +
- Min(SW2SSLBIS for SW2DSP1 (EP5), EP5 DSLBIS, EP5 Upstream link)) +
- Min(SW 3 Upstream Link to RP3 BW,
- Min(SW3SSLBIS for SW3DSP0 (EP6), EP6 DSLBIS, EP6 Upstream Link) +
- Min(SW3SSLBIS for SW3DSP1 (EP7), EP7 DSLBIS, EP7 Upstream link))))
-
-The calculation starts at cxl_region_shared_upstream_perf_update(). A xarray
-is created to collect all the endpoint bandwidths via the
-cxl_endpoint_gather_bandwidth() function. The min() of bandwidth from the
-endpoint CDAT and the upstream link bandwidth is calculated. If the endpoint
-has a CXL switch as a parent, then min() of calculated bandwidth and the
-bandwidth from the SSLBIS for the switch downstream port that is associated
-with the endpoint is calculated. The final bandwidth is stored in a
-'struct cxl_perf_ctx' in the xarray indexed by a device pointer. If the
-endpoint is direct attached to a root port (RP), the device pointer would be an
-RP device. If the endpoint is behind a switch, the device pointer would be the
-upstream device of the parent switch.
-
-At the next stage, the code walks through one or more switches if they exist
-in the topology. For endpoints directly attached to RPs, this step is skipped.
-If there is another switch upstream, the code takes the min() of the current
-gathered bandwidth and the upstream link bandwidth. If there's a switch
-upstream, then the SSLBIS of the upstream switch.
-
-Once the topology walk reaches the RP, whether it's direct attached endpoints
-or walking through the switch(es), cxl_rp_gather_bandwidth() is called. At
-this point all the bandwidths are aggregated per each host bridge, which is
-also the index for the resulting xarray.
-
-The next step is to take the min() of the per host bridge bandwidth and the
-bandwidth from the Generic Port (GP). The bandwidths for the GP is retrieved
-via ACPI tables SRAT/HMAT. The min bandwidth are aggregated under the same
-ACPI0017 device to form a new xarray.
-
-Finally, the cxl_region_update_bandwidth() is called and the aggregated
-bandwidth from all the members of the last xarray is updated for the
-access coordinates residing in the cxl region (cxlr) context.
diff --git a/Documentation/driver-api/cxl/allocation/dax.rst b/Documentation/driver-api/cxl/allocation/dax.rst
new file mode 100644
index 000000000000..c6f7a5da832f
--- /dev/null
+++ b/Documentation/driver-api/cxl/allocation/dax.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+DAX Devices
+===========
+CXL capacity exposed as a DAX device can be accessed directly via mmap.
+Users may wish to use this interface mechanism to write their own userland
+CXL allocator, or to managed shared or persistent memory regions across multiple
+hosts.
+
+If the capacity is shared across hosts or persistent, appropriate flushing
+mechanisms must be employed unless the region supports Snoop Back-Invalidate.
+
+Note that mappings must be aligned (size and base) to the dax device's base
+alignment, which is typically 2MB - but maybe be configured larger.
+
+::
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <stdint.h>
+ #include <sys/mman.h>
+ #include <fcntl.h>
+ #include <unistd.h>
+
+ #define DEVICE_PATH "/dev/dax0.0" // Replace DAX device path
+ #define DEVICE_SIZE (4ULL * 1024 * 1024 * 1024) // 4GB
+
+ int main() {
+ int fd;
+ void* mapped_addr;
+
+ /* Open the DAX device */
+ fd = open(DEVICE_PATH, O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return -1;
+ }
+
+ /* Map the device into memory */
+ mapped_addr = mmap(NULL, DEVICE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (mapped_addr == MAP_FAILED) {
+ perror("mmap");
+ close(fd);
+ return -1;
+ }
+
+ printf("Mapped address: %p\n", mapped_addr);
+
+ /* You can now access the device through the mapped address */
+ uint64_t* ptr = (uint64_t*)mapped_addr;
+ *ptr = 0x1234567890abcdef; // Write a value to the device
+ printf("Value at address %p: 0x%016llx\n", ptr, *ptr);
+
+ /* Clean up */
+ munmap(mapped_addr, DEVICE_SIZE);
+ close(fd);
+ return 0;
+ }
diff --git a/Documentation/driver-api/cxl/allocation/hugepages.rst b/Documentation/driver-api/cxl/allocation/hugepages.rst
new file mode 100644
index 000000000000..1023c6922829
--- /dev/null
+++ b/Documentation/driver-api/cxl/allocation/hugepages.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Huge Pages
+==========
+
+Contiguous Memory Allocator
+===========================
+CXL Memory onlined as SystemRAM during early boot is eligible for use by CMA,
+as the NUMA node hosting that capacity will be `Online` at the time CMA
+carves out contiguous capacity.
+
+CXL Memory deferred to the CXL Driver for configuration cannot have its
+capacity allocated by CMA - as the NUMA node hosting the capacity is `Offline`
+at :code:`__init` time - when CMA carves out contiguous capacity.
+
+HugeTLB
+=======
+Different huge page sizes allow different memory configurations.
+
+2MB Huge Pages
+--------------
+All CXL capacity regardless of configuration time or memory zone is eligible
+for use as 2MB huge pages.
+
+1GB Huge Pages
+--------------
+CXL capacity onlined in :code:`ZONE_NORMAL` is eligible for 1GB Gigantic Page
+allocation.
+
+CXL capacity onlined in :code:`ZONE_MOVABLE` is not eligible for 1GB Gigantic
+Page allocation.
diff --git a/Documentation/driver-api/cxl/allocation/page-allocator.rst b/Documentation/driver-api/cxl/allocation/page-allocator.rst
new file mode 100644
index 000000000000..3fa584a248bd
--- /dev/null
+++ b/Documentation/driver-api/cxl/allocation/page-allocator.rst
@@ -0,0 +1,54 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+The Page Allocator
+==================
+
+The kernel page allocator services all general page allocation requests, such
+as :code:`kmalloc`. CXL configuration steps affect the behavior of the page
+allocator based on the selected `Memory Zone` and `NUMA node` the capacity is
+placed in.
+
+This section mostly focuses on how these configurations affect the page
+allocator (as of Linux v6.15) rather than the overall page allocator behavior.
+
+NUMA nodes and mempolicy
+========================
+Unless a task explicitly registers a mempolicy, the default memory policy
+of the linux kernel is to allocate memory from the `local NUMA node` first,
+and fall back to other nodes only if the local node is pressured.
+
+Generally, we expect to see local DRAM and CXL memory on separate NUMA nodes,
+with the CXL memory being non-local. Technically, however, it is possible
+for a compute node to have no local DRAM, and for CXL memory to be the
+`local` capacity for that compute node.
+
+
+Memory Zones
+============
+CXL capacity may be onlined in :code:`ZONE_NORMAL` or :code:`ZONE_MOVABLE`.
+
+As of v6.15, the page allocator attempts to allocate from the highest
+available and compatible ZONE for an allocation from the local node first.
+
+An example of a `zone incompatibility` is attempting to service an allocation
+marked :code:`GFP_KERNEL` from :code:`ZONE_MOVABLE`. Kernel allocations are
+typically not migratable, and as a result can only be serviced from
+:code:`ZONE_NORMAL` or lower.
+
+To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
+:code:`ZONE_NORMAL` by default, but if :code:`ZONE_MOVABLE` is depleted, it
+will fallback to allocate from :code:`ZONE_NORMAL`.
+
+
+CGroups and CPUSets
+===================
+Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined
+in :code:`ZONE_NORMAL`), the :code:`cpusets.mems_allowed` may be used by
+containers to limit the accessibility of certain NUMA nodes for tasks in that
+container. Users may wish to utilize this in multi-tenant systems where some
+tasks prefer not to use slower memory.
+
+In the reclaim section we'll discuss some limitations of this interface to
+prevent demotions of shared data to CXL memory (if demotions are enabled).
+
diff --git a/Documentation/driver-api/cxl/allocation/reclaim.rst b/Documentation/driver-api/cxl/allocation/reclaim.rst
new file mode 100644
index 000000000000..f40f1cae391a
--- /dev/null
+++ b/Documentation/driver-api/cxl/allocation/reclaim.rst
@@ -0,0 +1,51 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+Reclaim
+=======
+Another way CXL memory can be utilized *indirectly* is via the reclaim system
+in :code:`mm/vmscan.c`. Reclaim is engaged when memory capacity on the system
+becomes pressured based on global and cgroup-local `watermark` settings.
+
+In this section we won't discuss the `watermark` configurations, just how CXL
+memory can be consumed by various pieces of reclaim system.
+
+Demotion
+========
+By default, the reclaim system will prefer swap (or zswap) when reclaiming
+memory. Enabling :code:`kernel/mm/numa/demotion_enabled` will cause vmscan
+to opportunistically prefer distant NUMA nodes to swap or zswap, if capacity
+is available.
+
+Demotion engages the :code:`mm/memory_tier.c` component to determine the
+next demotion node. The next demotion node is based on the :code:`HMAT`
+or :code:`CDAT` performance data.
+
+cpusets.mems_allowed quirk
+--------------------------
+In Linux v6.15 and below, demotion does not respect :code:`cpusets.mems_allowed`
+when migrating pages. As a result, if demotion is enabled, vmscan cannot
+guarantee isolation of a container's memory from nodes not set in mems_allowed.
+
+In Linux v6.XX and up, demotion does attempt to respect
+:code:`cpusets.mems_allowed`; however, certain classes of shared memory
+originally instantiated by another cgroup (such as common libraries - e.g.
+libc) may still be demoted. As a result, the mems_allowed interface still
+cannot provide perfect isolation from the remote nodes.
+
+ZSwap and Node Preference
+=========================
+In Linux v6.15 and below, ZSwap allocates memory from the local node of the
+processor for the new pages being compressed. Since pages being compressed
+are typically cold, the result is a cold page becomes promoted - only to
+be later demoted as it ages off the LRU.
+
+In Linux v6.XX, ZSwap tries to prefer the node of the page being compressed
+as the allocation target for the compression page. This helps prevent
+thrashing.
+
+Demotion with ZSwap
+===================
+When enabling both Demotion and ZSwap, you create a situation where ZSwap
+will prefer the slowest form of CXL memory by default until that tier of
+memory is exhausted.
diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst
new file mode 100644
index 000000000000..0d2e07279ad9
--- /dev/null
+++ b/Documentation/driver-api/cxl/conventions.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Compute Express Link: Linux Conventions
+#######################################
+
+There exists shipping platforms that bend or break CXL specification
+expectations. Record the details and the rationale for those deviations.
+Borrow the ACPI Code First template format to capture the assumptions
+and tradeoffs such that multiple platform implementations can follow the
+same convention.
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Contents
+
+ conventions/cxl-lmh.rst
+ conventions/cxl-atl.rst
+ conventions/template.rst
diff --git a/Documentation/driver-api/cxl/conventions/cxl-atl.rst b/Documentation/driver-api/cxl/conventions/cxl-atl.rst
new file mode 100644
index 000000000000..3a36a84743d0
--- /dev/null
+++ b/Documentation/driver-api/cxl/conventions/cxl-atl.rst
@@ -0,0 +1,304 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+ACPI PRM CXL Address Translation
+================================
+
+Document
+--------
+
+CXL Revision 3.2, Version 1.0
+
+License
+-------
+
+SPDX-License Identifier: CC-BY-4.0
+
+Creator/Contributors
+--------------------
+
+- Robert Richter, AMD et al.
+
+Summary of the Change
+---------------------
+
+The CXL Fixed Memory Window Structures (CFMWS) describe zero or more Host
+Physical Address (HPA) windows associated with one or more CXL Host Bridges.
+Each HPA range of a CXL Host Bridge is represented by a CFMWS entry. An HPA
+range may include addresses currently assigned to CXL.mem devices, or an OS may
+assign ranges from an address window to a device.
+
+Host-managed Device Memory is Device-attached memory that is mapped to system
+coherent address space and accessible to the Host using standard write-back
+semantics. The managed address range is configured in the CXL HDM Decoder
+registers of the device. An HDM Decoder in a device is responsible for
+converting HPA into DPA by stripping off specific address bits.
+
+CXL devices and CXL bridges use the same HPA space. It is common across all
+components that belong to the same host domain. The view of the address region
+must be consistent on the CXL.mem path between the Host and the Device.
+
+This is described in the *CXL 3.2 specification* (Table 1-1, 3.3.1,
+8.2.4.20, 9.13.1, 9.18.1.3). [#cxl-spec-3.2]_
+
+Depending on the interconnect architecture of the platform, components attached
+to a host may not share the same host physical address space. Those platforms
+need address translation to convert an HPA between the host and the attached
+component, such as a CXL device. The translation mechanism is host-specific and
+implementation dependent.
+
+For example, x86 AMD platforms use a Data Fabric that manages access to physical
+memory. Devices have their own memory space and can be configured to use
+'Normalized addresses' different from System Physical Addresses (SPA). Address
+translation is then needed. For details, see
+:doc:`x86 AMD Address Translation </admin-guide/RAS/address-translation>`.
+
+Those AMD platforms provide PRM [#prm-spec]_ handlers in firmware to perform
+various types of address translation, including for CXL endpoints. AMD Zen5
+systems implement the ACPI PRM CXL Address Translation firmware call. The ACPI
+PRM handler has a specific GUID to uniquely identify platforms with support for
+Normalized addressing. This is documented in the *ACPI v6.5 Porting Guide*
+(Address Translation - CXL DPA to System Physical Address). [#amd-ppr-58088]_
+
+When in Normalized address mode, HDM decoder address ranges must be configured
+and handled differently. Hardware addresses used in the HDM decoder
+configurations of an endpoint are not SPA and need to be translated from the
+address range of the endpoint to that of the CXL host bridge. This is especially
+important for finding an endpoint's associated CXL Host Bridge and HPA window
+described in the CFMWS. Additionally, the interleave decoding is done by the
+Data Fabric and the endpoint does not perform decoding when converting HPA to
+DPA. Instead, interleaving is switched off for the endpoint (1-way). Finally,
+address translation might also be needed to inspect the endpoint's hardware
+addresses, such as during profiling, tracing, or error handling.
+
+For example, with Normalized addressing the HDM decoders could look as follows::
+
+ -------------------------------
+ | Root Decoder (CFMWS) |
+ | SPA Range: 0x850000000 |
+ | Size: 0x8000000000 (512 GB) |
+ | Interleave Ways: 1 |
+ -------------------------------
+ |
+ v
+ -------------------------------
+ | Host Bridge Decoder (HDM) |
+ | SPA Range: 0x850000000 |
+ | Size: 0x8000000000 (512 GB) |
+ | Interleave Ways: 4 |
+ | Targets: endpoint5,8,11,13 |
+ | Granularity: 256 |
+ -------------------------------
+ |
+ -----------------------------+------------------------------
+ | | | |
+ v v v v
+ ------------------- ------------------- ------------------- -------------------
+ | endpoint5 | | endpoint8 | | endpoint11 | | endpoint13 |
+ | decoder5.0 | | decoder8.0 | | decoder11.0 | | decoder13.0 |
+ | PCIe: | | PCIe: | | PCIe: | | PCIe: |
+ | 0000:e2:00.0 | | 0000:e3:00.0 | | 0000:e4:00.0 | | 0000:e1:00.0 |
+ | DPA: | | DPA: | | DPA: | | DPA: |
+ | Start: 0x0 | | Start: 0x0 | | Start: 0x0 | | Start: 0x0 |
+ | Size: | | Size: | | Size: | | Size: |
+ | 0x2000000000 | | 0x2000000000 | | 0x2000000000 | | 0x2000000000 |
+ | (128 GB) | | (128 GB) | | (128 GB) | | (128 GB) |
+ | Interleaving: | | Interleaving: | | Interleaving: | | Interleaving: |
+ | Ways: 1 | | Ways: 1 | | Ways: 1 | | Ways: 1 |
+ | Gran: 256 | | Gran: 256 | | Gran: 256 | | Gran: 256 |
+ ------------------- ------------------- ------------------- -------------------
+ | | | |
+ v v v v
+ DPA DPA DPA DPA
+
+This shows the representation in sysfs:
+
+.. code-block:: none
+
+ /sys/bus/cxl/devices/endpoint5/decoder5.0/interleave_granularity:256
+ /sys/bus/cxl/devices/endpoint5/decoder5.0/interleave_ways:1
+ /sys/bus/cxl/devices/endpoint5/decoder5.0/size:0x2000000000
+ /sys/bus/cxl/devices/endpoint5/decoder5.0/start:0x0
+ /sys/bus/cxl/devices/endpoint8/decoder8.0/interleave_granularity:256
+ /sys/bus/cxl/devices/endpoint8/decoder8.0/interleave_ways:1
+ /sys/bus/cxl/devices/endpoint8/decoder8.0/size:0x2000000000
+ /sys/bus/cxl/devices/endpoint8/decoder8.0/start:0x0
+ /sys/bus/cxl/devices/endpoint11/decoder11.0/interleave_granularity:256
+ /sys/bus/cxl/devices/endpoint11/decoder11.0/interleave_ways:1
+ /sys/bus/cxl/devices/endpoint11/decoder11.0/size:0x2000000000
+ /sys/bus/cxl/devices/endpoint11/decoder11.0/start:0x0
+ /sys/bus/cxl/devices/endpoint13/decoder13.0/interleave_granularity:256
+ /sys/bus/cxl/devices/endpoint13/decoder13.0/interleave_ways:1
+ /sys/bus/cxl/devices/endpoint13/decoder13.0/size:0x2000000000
+ /sys/bus/cxl/devices/endpoint13/decoder13.0/start:0x0
+
+Note that the endpoint interleaving configurations use direct mapping (1-way).
+
+With PRM calls, the kernel can determine the following mappings:
+
+.. code-block:: none
+
+ cxl decoder5.0: address mapping found for 0000:e2:00.0 (hpa -> spa):
+ 0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
+ cxl decoder8.0: address mapping found for 0000:e3:00.0 (hpa -> spa):
+ 0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
+ cxl decoder11.0: address mapping found for 0000:e4:00.0 (hpa -> spa):
+ 0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
+ cxl decoder13.0: address mapping found for 0000:e1:00.0 (hpa -> spa):
+ 0x0+0x2000000000 -> 0x850000000+0x8000000000 ways:4 granularity:256
+
+The corresponding CXL host bridge (HDM) decoders and root decoder (CFMWS) match
+the calculated endpoint mappings shown:
+
+.. code-block:: none
+
+ /sys/bus/cxl/devices/port1/decoder1.0/interleave_granularity:256
+ /sys/bus/cxl/devices/port1/decoder1.0/interleave_ways:4
+ /sys/bus/cxl/devices/port1/decoder1.0/size:0x8000000000
+ /sys/bus/cxl/devices/port1/decoder1.0/start:0x850000000
+ /sys/bus/cxl/devices/port1/decoder1.0/target_list:0,1,2,3
+ /sys/bus/cxl/devices/port1/decoder1.0/target_type:expander
+ /sys/bus/cxl/devices/root0/decoder0.0/interleave_granularity:256
+ /sys/bus/cxl/devices/root0/decoder0.0/interleave_ways:1
+ /sys/bus/cxl/devices/root0/decoder0.0/size:0x8000000000
+ /sys/bus/cxl/devices/root0/decoder0.0/start:0x850000000
+ /sys/bus/cxl/devices/root0/decoder0.0/target_list:7
+
+The following changes to the specification are needed:
+
+* Allow a CXL device to be in an HPA space other than the host's address space.
+
+* Allow the platform to use implementation-specific address translation when
+ crossing memory domains on the CXL.mem path between the host and the device.
+
+* Define a PRM handler method for converting device addresses to SPAs.
+
+* Specify that the platform shall provide the PRM handler method to the
+ Operating System to detect Normalized addressing and for determining Endpoint
+ SPA ranges and interleaving configurations.
+
+* Add reference to:
+
+ | Platform Runtime Mechanism Specification, Version 1.1 – November 2020
+ | https://uefi.org/sites/default/files/resources/PRM_Platform_Runtime_Mechanism_1_1_release_candidate.pdf
+
+Benefits of the Change
+----------------------
+
+Without the change, the Operating System may be unable to determine the memory
+region and Root Decoder for an Endpoint and its corresponding HDM decoder.
+Region creation would fail. Platforms with a different interconnect architecture
+would fail to set up and use CXL.
+
+References
+----------
+
+.. [#cxl-spec-3.2] Compute Express Link Specification, Revision 3.2, Version 1.0,
+ https://www.computeexpresslink.org/
+
+.. [#amd-ppr-58088] AMD Family 1Ah Models 00h–0Fh and Models 10h–1Fh,
+ ACPI v6.5 Porting Guide, Publication # 58088,
+ https://www.amd.com/en/search/documentation/hub.html
+
+.. [#prm-spec] Platform Runtime Mechanism, Version: 1.1,
+ https://uefi.org/sites/default/files/resources/PRM_Platform_Runtime_Mechanism_1_1_release_candidate.pdf
+
+Detailed Description of the Change
+----------------------------------
+
+The following describes the necessary changes to the *CXL 3.2 specification*
+[#cxl-spec-3.2]_:
+
+Add the following reference to the table:
+
+Table 1-2. Reference Documents
+
++----------------------------+-------------------+---------------------------+
+| Document | Chapter Reference | Document No./Location |
++============================+===================+===========================+
+| Platform Runtime Mechanism | Chapter 8, 9 | https://www.uefi.org/acpi |
+| Version: 1.1 | | |
++----------------------------+-------------------+---------------------------+
+
+Add the following paragraphs to the end of the section:
+
+**8.2.4.20 CXL HDM Decoder Capability Structure**
+
+"A device may use an HPA space that is not common to other components of the
+host domain. The platform is responsible for address translation when crossing
+HPA spaces. The Operating System must determine the interleaving configuration
+and perform address translation to the HPA ranges of the HDM decoders as needed.
+The translation mechanism is host-specific and implementation dependent.
+
+The platform indicates support of independent HPA spaces and the need for
+address translation by providing a Platform Runtime Mechanism (PRM) handler. The
+OS shall use that handler to perform the necessary translations from the DPA
+space to the HPA space. The handler is defined in Section 9.18.4 *PRM Handler
+for CXL DPA to System Physical Address Translation*."
+
+Add the following section and sub-section including tables:
+
+**9.18.4 PRM Handler for CXL DPA to System Physical Address Translation**
+
+"A platform may be configured to use 'Normalized addresses'. Host physical
+address (HPA) spaces are component-specific and differ from system physical
+addresses (SPAs). The endpoint has its own physical address space. All requests
+presented to the device already use Device Physical Addresses (DPAs). The CXL
+endpoint decoders have interleaving disabled (1-way interleaving) and the device
+does not perform HPA decoding to determine a DPA.
+
+The platform provides a PRM handler for CXL DPA to System Physical Address
+Translation. The PRM handler translates a Device Physical Address (DPA) to a
+System Physical Address (SPA) for a specified CXL endpoint. In the address space
+of the host, SPA and HPA are equivalent, and the OS shall use this handler to
+determine the HPA that corresponds to a device address, for example when
+configuring HDM decoders on platforms with Normalized addressing. The GUID and
+the parameter buffer format of the handler are specified in section 9.18.4.1. If
+the OS identifies the PRM handler, the platform supports Normalized addressing
+and the OS must perform DPA address translation as needed."
+
+**9.18.4.1 PRM Handler Invocation**
+
+"The OS calls the PRM handler for CXL DPA to System Physical Address Translation
+using the direct invocation mechanism. Details of calling a PRM handler are
+described in the Platform Runtime Mechanism (PRM) specification.
+
+The PRM handler is identified by the following GUID:
+
+ EE41B397-25D4-452C-AD54-48C6E3480B94
+
+The caller allocates and prepares a Parameter Buffer, then passes the PRM
+handler GUID and a pointer to the Parameter Buffer to invoke the handler. The
+Parameter Buffer is described in Table 9-32."
+
+**Table 9-32. PRM Parameter Buffer used for CXL DPA to System Physical Address Translation**
+
++-------------+-----------+------------------------------------------------------------------------+
+| Byte Offset | Length in | Description |
+| | Bytes | |
++=============+===========+========================================================================+
+| 00h | 8 | **CXL Device Physical Address (DPA)**: CXL DPA (e.g., from |
+| | | CXL Component Event Log) |
++-------------+-----------+------------------------------------------------------------------------+
+| 08h | 4 | **CXL Endpoint SBDF**: |
+| | | |
+| | | - Byte 3 - PCIe Segment |
+| | | - Byte 2 - Bus Number |
+| | | - Byte 1: |
+| | | - Device Number Bits[7:3] |
+| | | - Function Number Bits[2:0] |
+| | | - Byte 0 - RESERVED (MBZ) |
+| | | |
++-------------+-----------+------------------------------------------------------------------------+
+| 0Ch | 8 | **Output Buffer**: Virtual Address Pointer to the buffer, |
+| | | as defined in Table 9-33. |
++-------------+-----------+------------------------------------------------------------------------+
+
+**Table 9-33. PRM Output Buffer used for CXL DPA to System Physical Address Translation**
+
++-------------+-----------+------------------------------------------------------------------------+
+| Byte Offset | Length in | Description |
+| | Bytes | |
++=============+===========+========================================================================+
+| 00h | 8 | **System Physical Address (SPA)**: The SPA converted |
+| | | from the CXL DPA. |
++-------------+-----------+------------------------------------------------------------------------+
diff --git a/Documentation/driver-api/cxl/conventions/cxl-lmh.rst b/Documentation/driver-api/cxl/conventions/cxl-lmh.rst
new file mode 100644
index 000000000000..baece5c35345
--- /dev/null
+++ b/Documentation/driver-api/cxl/conventions/cxl-lmh.rst
@@ -0,0 +1,135 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders
+============================================================================
+
+Document
+--------
+
+CXL Revision 3.2, Version 1.0
+
+License
+-------
+
+SPDX-License Identifier: CC-BY-4.0
+
+Creator/Contributors
+--------------------
+
+- Fabio M. De Francesco, Intel
+- Dan J. Williams, Intel
+- Mahesh Natu, Intel
+
+Summary of the Change
+---------------------
+
+According to the current Compute Express Link (CXL) Specifications (Revision
+3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero
+or more Host Physical Address (HPA) windows associated with each CXL Host
+Bridge. Each window represents a contiguous HPA range that may be interleaved
+across one or more targets, including CXL Host Bridges. Each window has a set
+of restrictions that govern its usage. It is the Operating System-directed
+configuration and Power Management (OSPM) responsibility to utilize each window
+for the specified use.
+
+Table 9-22 of the current CXL Specifications states that the Window Size field
+contains the total number of consecutive bytes of HPA this window describes.
+This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB.
+
+Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a
+memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases,
+the CFMWS Range Size may not adhere to the NIW * 256 MB rule.
+
+The HPA represents the actual physical memory address space that the CXL devices
+can decode and respond to, while the System Physical Address (SPA), a related
+but distinct concept, represents the system-visible address space that users can
+direct transaction to and so it excludes reserved regions.
+
+BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms
+with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole,
+resulting in lost capacity in the Endpoints with no SPA to map to that part of
+the HPA range that intersects the hole.
+
+E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB:
+
+ +--------+------------+-------------------+------------------+-------------------+------+
+ | Window | CFMWS Base | CFMWS Size | HDM Decoder Base | HDM Decoder Size | Ways |
+ +========+============+===================+==================+===================+======+
+ |  0 | 0 GB | 2 GB | 0 GB | 3 GB | 12 |
+ +--------+------------+-------------------+------------------+-------------------+------+
+ |  1 | 4 GB | NIW*256MB Aligned | 4 GB | NIW*256MB Aligned | 12 |
+ +--------+------------+-------------------+------------------+-------------------+------+
+
+HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of
+a 12 ways region and all the intermediate Switch Decoders. They are configured
+by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of
+3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root
+Decoder HPA range that results smaller (2GB) than that of the Switch and
+Endpoint Decoders in the hierarchy (3GB).
+
+This creates 2 issues which lead to a failure to construct a region:
+
+1) A mismatch in region size between root and any HDM decoder. The root decoders
+ will always be smaller due to the trim.
+
+2) The trim causes the root decoder to violate the (NIW * 256MB) rule.
+
+This change allows a region with a base address of 0GB to bypass these checks to
+allow for region creation with the trimmed root decoder address range.
+
+This change does not allow for any other arbitrary region to violate these
+checks - it is intended exclusively to enable x86 platforms which map CXL memory
+under 4GB.
+
+Despite the HDM decoders covering the PCIE hole HPA region, it is expected that
+the platform will never route address accesses to the CXL complex because the
+root decoder only covers the trimmed region (which excludes this). This is
+outside the ability of Linux to enforce.
+
+On the example platform, only the first 2GB will be potentially usable, but
+Linux, aiming to adhere to the current specifications, fails to construct
+Regions and attach Endpoint and intermediate Switch Decoders to them.
+
+There are several points of failure that due to the expectation that the Root
+Decoder HPA size, that is equal to the CFMWS from which it is configured, has
+to be greater or equal to the matching Switch and Endpoint HDM Decoders.
+
+In order to succeed with construction and attachment, Linux must construct a
+Region with Root Decoder HPA range size, and then attach to that all the
+intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy
+regardless of their range sizes.
+
+Benefits of the Change
+----------------------
+
+Without the change, the OSPM wouldn't match intermediate Switch and Endpoint
+Decoders with Root Decoders configured with CFMWS HPA sizes that don't align
+with the NIW * 256MB constraint, and so it leads to lost memdev capacity.
+
+This change allows the OSPM to construct Regions and attach intermediate Switch
+and Endpoint Decoders to them, so that the addressable part of the memory
+devices total capacity is made available to the users.
+
+References
+----------
+
+Compute Express Link Specification Revision 3.2, Version 1.0
+<https://www.computeexpresslink.org/>
+
+Detailed Description of the Change
+----------------------------------
+
+The description of the Window Size field in table 9-22 needs to account for
+platforms with Low Memory Holes, where SPA ranges might be subsets of the
+endpoints HPA. Therefore, it has to be changed to the following:
+
+"The total number of consecutive bytes of HPA this window represents. This value
+shall be a multiple of NIW * 256 MB.
+
+On platforms that reserve physical addresses below 4 GB, such as the Low Memory
+Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might
+have a size that doesn't align with the NIW * 256 MB constraint.
+
+Note that the matching intermediate Switch Decoders and the Endpoint Decoders
+HPA range sizes must still align to the above-mentioned rule, but the memory
+capacity that exceeds the CFMWS window size won't be accessible.".
diff --git a/Documentation/driver-api/cxl/conventions/template.rst b/Documentation/driver-api/cxl/conventions/template.rst
new file mode 100644
index 000000000000..ff2fcf1b5e24
--- /dev/null
+++ b/Documentation/driver-api/cxl/conventions/template.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. :: Template Title here:
+
+Template File
+=============
+
+Document
+--------
+CXL Revision <rev>, Version <ver>
+
+License
+-------
+SPDX-License Identifier: CC-BY-4.0
+
+Creator/Contributors
+--------------------
+
+Summary of the Change
+---------------------
+
+<Detail the conflict with the specification and where available the
+assumptions and tradeoffs taken by the hardware platform.>
+
+Benefits of the Change
+----------------------
+
+<Detail what happens if platforms and Linux do not adopt this
+convention.>
+
+References
+----------
+
+Detailed Description of the Change
+----------------------------------
+
+<Propose spec language that corrects the conflict.>
diff --git a/Documentation/driver-api/cxl/devices/device-types.rst b/Documentation/driver-api/cxl/devices/device-types.rst
new file mode 100644
index 000000000000..7f69dfa4509b
--- /dev/null
+++ b/Documentation/driver-api/cxl/devices/device-types.rst
@@ -0,0 +1,165 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Devices and Protocols
+=====================
+
+The type of CXL device (Memory, Accelerator, etc) dictates many configuration steps. This section
+covers some basic background on device types and on-device resources used by the platform and OS
+which impact configuration.
+
+Protocols
+=========
+
+There are three core protocols to CXL. For the purpose of this documentation,
+we will only discuss very high level definitions as the specific hardware
+details are largely abstracted away from Linux. See the CXL specification
+for more details.
+
+CXL.io
+------
+The basic interaction protocol, similar to PCIe configuration mechanisms.
+Typically used for initialization, configuration, and I/O access for anything
+other than memory (CXL.mem) or cache (CXL.cache) operations.
+
+The Linux CXL driver exposes access to .io functionality via the various sysfs
+interfaces and /dev/cxl/ devices (which exposes direct access to device
+mailboxes).
+
+CXL.cache
+---------
+The mechanism by which a device may coherently access and cache host memory.
+
+Largely transparent to Linux once configured.
+
+CXL.mem
+---------
+The mechanism by which the CPU may coherently access and cache device memory.
+
+Largely transparent to Linux once configured.
+
+
+Device Types
+============
+
+Type-1
+------
+
+A Type-1 CXL device:
+
+* Supports cxl.io and cxl.cache protocols
+* Implements a fully coherent cache
+* Allows Device-to-Host coherence and Host-to-Device snoops.
+* Does NOT have host-managed device memory (HDM)
+
+Typical examples of type-1 devices is a Smart NIC - which may want to
+directly operate on host-memory (DMA) to store incoming packets. These
+devices largely rely on CPU-attached memory.
+
+Type-2
+------
+
+A Type-2 CXL Device:
+
+* Supports cxl.io, cxl.cache, and cxl.mem protocols
+* Optionally implements coherent cache and Host-Managed Device Memory
+* Is typically an accelerator device with high bandwidth memory.
+
+The primary difference between a type-1 and type-2 device is the presence
+of host-managed device memory, which allows the device to operate on a
+local memory bank - while the CPU still has coherent DMA to the same memory.
+
+This allows things like GPUs to expose their memory via DAX devices or file
+descriptors, allows drivers and programs direct access to device memory
+rather than use block-transfer semantics.
+
+Type-3
+------
+
+A Type-3 CXL Device
+
+* Supports cxl.io and cxl.mem
+* Implements Host-Managed Device Memory
+* May provide either Volatile or Persistent memory capacity (or both).
+
+A basic example of a type-3 device is a simple memory expander, whose
+local memory capacity is exposed to the CPU for access directly via
+basic coherent DMA.
+
+Switch
+------
+
+A CXL switch is a device capable of routing any CXL (and by extension, PCIe)
+protocol between an upstream, downstream, or peer devices. Many devices, such
+as Multi-Logical Devices, imply the presence of switching in some manner.
+
+Logical Devices and Heads
+-------------------------
+
+A CXL device may present one or more "Logical Devices" to one or more hosts
+(via physical "Heads").
+
+A Single-Logical Device (SLD) is a device which presents a single device to
+one or more heads.
+
+A Multi-Logical Device (MLD) is a device which may present multiple devices
+to one or more upstream devices.
+
+A Single-Headed Device exposes only a single physical connection.
+
+A Multi-Headed Device exposes multiple physical connections.
+
+MHSLD
+~~~~~
+A Multi-Headed Single-Logical Device (MHSLD) exposes a single logical
+device to multiple heads which may be connected to one or more discrete
+hosts. An example of this would be a simple memory-pool which may be
+statically configured (prior to boot) to expose portions of its memory
+to Linux via :doc:`CEDT <../platform/acpi/cedt>`.
+
+MHMLD
+~~~~~
+A Multi-Headed Multi-Logical Device (MHMLD) exposes multiple logical
+devices to multiple heads which may be connected to one or more discrete
+hosts. An example of this would be a Dynamic Capacity Device or which
+may be configured at runtime to expose portions of its memory to Linux.
+
+Example Devices
+===============
+
+Memory Expander
+---------------
+The simplest form of Type-3 device is a memory expander. A memory expander
+exposes Host-Managed Device Memory (HDM) to Linux. This memory may be
+Volatile or Non-Volatile (Persistent).
+
+Memory Expanders will typically be considered a form of Single-Headed,
+Single-Logical Device - as its form factor will typically be an add-in-card
+(AIC) or some other similar form-factor.
+
+The Linux CXL driver provides support for static or dynamic configuration of
+basic memory expanders. The platform may program decoders prior to OS init
+(e.g. auto-decoders), or the user may program the fabric if the platform
+defers these operations to the OS.
+
+Multiple Memory Expanders may be added to an external chassis and exposed to
+a host via a head attached to a CXL switch. This is a "memory pool", and
+would be considered an MHSLD or MHMLD depending on the management capabilities
+provided by the switch platform.
+
+As of v6.14, Linux does not provide a formalized interface to manage non-DCD
+MHSLD or MHMLD devices.
+
+Dynamic Capacity Device (DCD)
+-----------------------------
+
+A Dynamic Capacity Device is a Type-3 device which provides dynamic management
+of memory capacity. The basic premise of a DCD to provide an allocator-like
+interface for physical memory capacity to a "Fabric Manager" (an external,
+privileged host with privileges to change configurations for other hosts).
+
+A DCD manages "Memory Extents", which may be volatile or persistent. Extents
+may also be exclusive to a single host or shared across multiple hosts.
+
+As of v6.14, Linux does not provide a formalized interface to manage DCD
+devices, however there is active work on LKML targeting future release.
diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst
index 965ba90e8fb7..3dfae1d310ca 100644
--- a/Documentation/driver-api/cxl/index.rst
+++ b/Documentation/driver-api/cxl/index.rst
@@ -4,12 +4,50 @@
Compute Express Link
====================
-.. toctree::
- :maxdepth: 1
+CXL device configuration has a complex handoff between platform (Hardware,
+BIOS, EFI), OS (early boot, core kernel, driver), and user policy decisions
+that have impacts on each other. The docs here break up configurations steps.
- memory-devices
- access-coordinates
+.. toctree::
+ :maxdepth: 2
+ :caption: Overview
+ theory-of-operation
maturity-map
+ conventions
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Device Reference
+
+ devices/device-types
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Platform Configuration
+
+ platform/bios-and-efi
+ platform/acpi
+ platform/cdat
+ platform/example-configs
+ platform/device-hotplug
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Linux Kernel Configuration
+
+ linux/overview
+ linux/early-boot
+ linux/cxl-driver
+ linux/dax-driver
+ linux/memory-hotplug
+ linux/access-coordinates
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Memory Allocation
-.. only:: subproject and html
+ allocation/dax
+ allocation/page-allocator
+ allocation/reclaim
+ allocation/hugepages.rst
diff --git a/Documentation/driver-api/cxl/linux/access-coordinates.rst b/Documentation/driver-api/cxl/linux/access-coordinates.rst
new file mode 100644
index 000000000000..341a7c682043
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/access-coordinates.rst
@@ -0,0 +1,178 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==================================
+CXL Access Coordinates Computation
+==================================
+
+Latency and Bandwidth Calculation
+=================================
+A memory region performance coordinates (latency and bandwidth) are typically
+provided via ACPI tables :doc:`SRAT <../platform/acpi/srat>` and
+:doc:`HMAT <../platform/acpi/hmat>`. However, the platform firmware (BIOS) is
+not able to annotate those for CXL devices that are hot-plugged since they do
+not exist during platform firmware initialization. The CXL driver can compute
+the performance coordinates by retrieving data from several components.
+
+The :doc:`SRAT <../platform/acpi/srat>` provides a Generic Port Affinity
+subtable that ties a proximity domain to a device handle, which in this case
+would be the CXL hostbridge. Using this association, the performance
+coordinates for the Generic Port can be retrieved from the
+:doc:`HMAT <../platform/acpi/hmat>` subtable. This piece represents the
+performance coordinates between a CPU and a Generic Port (CXL hostbridge).
+
+The :doc:`CDAT <../platform/cdat>` provides the performance coordinates for
+the CXL device itself. That is the bandwidth and latency to access that device's
+memory region. The DSMAS subtable provides a DSMADHandle that is tied to a
+Device Physical Address (DPA) range. The DSLBIS subtable provides the
+performance coordinates that's tied to a DSMADhandle and this ties the two
+table entries together to provide the performance coordinates for each DPA
+region. For example, if a device exports a DRAM region and a PMEM region,
+then there would be different performance characteristsics for each of those
+regions.
+
+If there's a CXL switch in the topology, then the performance coordinates for the
+switch is provided by SSLBIS subtable. This provides the bandwidth and latency
+for traversing the switch between the switch upstream port and the switch
+downstream port that points to the endpoint device.
+
+Simple topology example::
+
+ GP0/HB0/ACPI0016-0
+ RP0
+ |
+ | L0
+ |
+ SW 0 / USP0
+ SW 0 / DSP0
+ |
+ | L1
+ |
+ EP0
+
+In this example, there is a CXL switch between an endpoint and a root port.
+Latency in this example is calculated as such:
+L(EP0) - Latency from EP0 CDAT DSMAS+DSLBIS
+L(L1) - Link latency between EP0 and SW0DSP0
+L(SW0) - Latency for the switch from SW0 CDAT SSLBIS.
+L(L0) - Link latency between SW0 and RP0
+L(RP0) - Latency from root port to CPU via SRAT and HMAT (Generic Port).
+Total read and write latencies are the sum of all these parts.
+
+Bandwidth in this example is calculated as such:
+B(EP0) - Bandwidth from EP0 CDAT DSMAS+DSLBIS
+B(L1) - Link bandwidth between EP0 and SW0DSP0
+B(SW0) - Bandwidth for the switch from SW0 CDAT SSLBIS.
+B(L0) - Link bandwidth between SW0 and RP0
+B(RP0) - Bandwidth from root port to CPU via SRAT and HMAT (Generic Port).
+The total read and write bandwidth is the min() of all these parts.
+
+To calculate the link bandwidth:
+LinkOperatingFrequency (GT/s) is the current negotiated link speed.
+DataRatePerLink (MB/s) = LinkOperatingFrequency / 8
+Bandwidth (MB/s) = PCIeCurrentLinkWidth * DataRatePerLink
+Where PCIeCurrentLinkWidth is the number of lanes in the link.
+
+To calculate the link latency:
+LinkLatency (picoseconds) = FlitSize / LinkBandwidth (MB/s)
+
+See `CXL Memory Device SW Guide r1.0 <https://www.intel.com/content/www/us/en/content-details/643805/cxl-memory-device-software-guide.html>`_,
+section 2.11.3 and 2.11.4 for details.
+
+In the end, the access coordinates for a constructed memory region is calculated from one
+or more memory partitions from each of the CXL device(s).
+
+Shared Upstream Link Calculation
+================================
+For certain CXL region construction with endpoints behind CXL switches (SW) or
+Root Ports (RP), there is the possibility of the total bandwidth for all
+the endpoints behind a switch being more than the switch upstream link.
+A similar situation can occur within the host, upstream of the root ports.
+The CXL driver performs an additional pass after all the targets have
+arrived for a region in order to recalculate the bandwidths with possible
+upstream link being a limiting factor in mind.
+
+The algorithm assumes the configuration is a symmetric topology as that
+maximizes performance. When asymmetric topology is detected, the calculation
+is aborted. An asymmetric topology is detected during topology walk where the
+number of RPs detected as a grandparent is not equal to the number of devices
+iterated in the same iteration loop. The assumption is made that subtle
+asymmetry in properties does not happen and all paths to EPs are equal.
+
+There can be multiple switches under an RP. There can be multiple RPs under
+a CXL Host Bridge (HB). There can be multiple HBs under a CXL Fixed Memory
+Window Structure (CFMWS) in the :doc:`CEDT <../platform/acpi/cedt>`.
+
+An example hierarchy::
+
+ CFMWS 0
+ |
+ _________|_________
+ | |
+ ACPI0017-0 ACPI0017-1
+ GP0/HB0/ACPI0016-0 GP1/HB1/ACPI0016-1
+ | | | |
+ RP0 RP1 RP2 RP3
+ | | | |
+ SW 0 SW 1 SW 2 SW 3
+ | | | | | | | |
+ EP0 EP1 EP2 EP3 EP4 EP5 EP6 EP7
+
+Computation for the example hierarchy:
+
+Min (GP0 to CPU BW,
+ Min(SW 0 Upstream Link to RP0 BW,
+ Min(SW0SSLBIS for SW0DSP0 (EP0), EP0 DSLBIS, EP0 Upstream Link) +
+ Min(SW0SSLBIS for SW0DSP1 (EP1), EP1 DSLBIS, EP1 Upstream link)) +
+ Min(SW 1 Upstream Link to RP1 BW,
+ Min(SW1SSLBIS for SW1DSP0 (EP2), EP2 DSLBIS, EP2 Upstream Link) +
+ Min(SW1SSLBIS for SW1DSP1 (EP3), EP3 DSLBIS, EP3 Upstream link))) +
+Min (GP1 to CPU BW,
+ Min(SW 2 Upstream Link to RP2 BW,
+ Min(SW2SSLBIS for SW2DSP0 (EP4), EP4 DSLBIS, EP4 Upstream Link) +
+ Min(SW2SSLBIS for SW2DSP1 (EP5), EP5 DSLBIS, EP5 Upstream link)) +
+ Min(SW 3 Upstream Link to RP3 BW,
+ Min(SW3SSLBIS for SW3DSP0 (EP6), EP6 DSLBIS, EP6 Upstream Link) +
+ Min(SW3SSLBIS for SW3DSP1 (EP7), EP7 DSLBIS, EP7 Upstream link))))
+
+The calculation starts at cxl_region_shared_upstream_perf_update(). A xarray
+is created to collect all the endpoint bandwidths via the
+cxl_endpoint_gather_bandwidth() function. The min() of bandwidth from the
+endpoint CDAT and the upstream link bandwidth is calculated. If the endpoint
+has a CXL switch as a parent, then min() of calculated bandwidth and the
+bandwidth from the SSLBIS for the switch downstream port that is associated
+with the endpoint is calculated. The final bandwidth is stored in a
+'struct cxl_perf_ctx' in the xarray indexed by a device pointer. If the
+endpoint is direct attached to a root port (RP), the device pointer would be an
+RP device. If the endpoint is behind a switch, the device pointer would be the
+upstream device of the parent switch.
+
+At the next stage, the code walks through one or more switches if they exist
+in the topology. For endpoints directly attached to RPs, this step is skipped.
+If there is another switch upstream, the code takes the min() of the current
+gathered bandwidth and the upstream link bandwidth. If there's a switch
+upstream, then the SSLBIS of the upstream switch.
+
+Once the topology walk reaches the RP, whether it's direct attached endpoints
+or walking through the switch(es), cxl_rp_gather_bandwidth() is called. At
+this point all the bandwidths are aggregated per each host bridge, which is
+also the index for the resulting xarray.
+
+The next step is to take the min() of the per host bridge bandwidth and the
+bandwidth from the Generic Port (GP). The bandwidths for the GP are retrieved
+via ACPI tables (:doc:`SRAT <../platform/acpi/srat>` and
+:doc:`HMAT <../platform/acpi/hmat>`). The minimum bandwidth are aggregated
+under the same ACPI0017 device to form a new xarray.
+
+Finally, the cxl_region_update_bandwidth() is called and the aggregated
+bandwidth from all the members of the last xarray is updated for the
+access coordinates residing in the cxl region (cxlr) context.
+
+QTG ID
+======
+Each :doc:`CEDT <../platform/acpi/cedt>` has a QTG ID field. This field provides
+the ID that associates with a QoS Throttling Group (QTG) for the CFMWS window.
+Once the access coordinates are calculated, an ACPI Device Specific Method can
+be issued to the ACPI0016 device to retrieve the QTG ID depends on the access
+coordinates provided. The QTG ID for the device can be used as guidance to match
+to the CFMWS to setup the best Linux root decoder for the device performance.
diff --git a/Documentation/driver-api/cxl/linux/cxl-driver.rst b/Documentation/driver-api/cxl/linux/cxl-driver.rst
new file mode 100644
index 000000000000..dd6dd17dc536
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/cxl-driver.rst
@@ -0,0 +1,630 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+CXL Driver Operation
+====================
+
+The devices described in this section are present in ::
+
+ /sys/bus/cxl/devices/
+ /dev/cxl/
+
+The :code:`cxl-cli` library, maintained as part of the NDTCL project, may
+be used to script interactions with these devices.
+
+Drivers
+=======
+The CXL driver is split into a number of drivers.
+
+* cxl_core - fundamental init interface and core object creation
+* cxl_port - initializes root and provides port enumeration interface.
+* cxl_acpi - initializes root decoders and interacts with ACPI data.
+* cxl_p/mem - initializes memory devices
+* cxl_pci - uses cxl_port to enumerate the actual fabric hierarchy.
+
+Driver Devices
+==============
+Here is an example from a single-socket system with 4 host bridges. Two host
+bridges have a single memory device attached, and the devices are interleaved
+into a single memory region. The memory region has been converted to dax. ::
+
+ # ls /sys/bus/cxl/devices/
+ dax_region0 decoder3.0 decoder6.0 mem0 port3
+ decoder0.0 decoder4.0 decoder6.1 mem1 port4
+ decoder1.0 decoder5.0 endpoint5 port1 region0
+ decoder2.0 decoder5.1 endpoint6 port2 root0
+
+
+.. kernel-render:: DOT
+ :alt: Digraph of CXL fabric describing host-bridge interleaving
+ :caption: Diagraph of CXL fabric with a host-bridge interleave memory region
+
+ digraph foo {
+ "root0" -> "port1";
+ "root0" -> "port3";
+ "root0" -> "decoder0.0";
+ "port1" -> "endpoint5";
+ "port3" -> "endpoint6";
+ "port1" -> "decoder1.0";
+ "port3" -> "decoder3.0";
+ "endpoint5" -> "decoder5.0";
+ "endpoint6" -> "decoder6.0";
+ "decoder0.0" -> "region0";
+ "decoder0.0" -> "decoder1.0";
+ "decoder0.0" -> "decoder3.0";
+ "decoder1.0" -> "decoder5.0";
+ "decoder3.0" -> "decoder6.0";
+ "decoder5.0" -> "region0";
+ "decoder6.0" -> "region0";
+ "region0" -> "dax_region0";
+ "dax_region0" -> "dax0.0";
+ }
+
+For this section we'll explore the devices present in this configuration, but
+we'll explore more configurations in-depth in example configurations below.
+
+Base Devices
+------------
+Most devices in a CXL fabric are a `port` of some kind (because each
+device mostly routes request from one device to the next, rather than
+provide a direct service).
+
+Root
+~~~~
+The `CXL Root` is logical object created by the `cxl_acpi` driver during
+:code:`cxl_acpi_probe` - if the :code:`ACPI0017` `Compute Express Link
+Root Object` Device Class is found.
+
+The Root contains links to:
+
+* `Host Bridge Ports` defined by CHBS in the :doc:`CEDT<../platform/acpi/cedt>`
+
+* `Downstream Ports` typically connected to `Host Bridge Ports`.
+
+* `Root Decoders` defined by CFMWS the :doc:`CEDT<../platform/acpi/cedt>`
+
+::
+
+ # ls /sys/bus/cxl/devices/root0
+ decoder0.0 dport0 dport5 port2 subsystem
+ decoders_committed dport1 modalias port3 uevent
+ devtype dport4 port1 port4 uport
+
+ # cat /sys/bus/cxl/devices/root0/devtype
+ cxl_port
+
+ # cat port1/devtype
+ cxl_port
+
+ # cat decoder0.0/devtype
+ cxl_decoder_root
+
+The root is first `logical port` in the CXL fabric, as presented by the Linux
+CXL driver. The `CXL root` is a special type of `switch port`, in that it
+only has downstream port connections.
+
+Port
+~~~~
+A `port` object is better described as a `switch port`. It may represent a
+host bridge to the root or an actual switch port on a switch. A `switch port`
+contains one or more decoders used to route memory requests downstream ports,
+which may be connected to another `switch port` or an `endpoint port`.
+
+::
+
+ # ls /sys/bus/cxl/devices/port1
+ decoder1.0 dport0 driver parent_dport uport
+ decoders_committed dport113 endpoint5 subsystem
+ devtype dport2 modalias uevent
+
+ # cat devtype
+ cxl_port
+
+ # cat decoder1.0/devtype
+ cxl_decoder_switch
+
+ # cat endpoint5/devtype
+ cxl_port
+
+CXL `Host Bridges` in the fabric are probed during :code:`cxl_acpi_probe` at
+the time the `CXL Root` is probed. The allows for the immediate logical
+connection to between the root and host bridge.
+
+* The root has a downstream port connection to a host bridge
+
+* The host bridge has an upstream port connection to the root.
+
+* The host bridge has one or more downstream port connections to switch
+ or endpoint ports.
+
+A `Host Bridge` is a special type of CXL `switch port`. It is explicitly
+defined in the ACPI specification via `ACPI0016` ID. `Host Bridge` ports
+will be probed at `acpi_probe` time, while similar ports on an actual switch
+will be probed later. Otherwise, switch and host bridge ports look very
+similar - the both contain switch decoders which route accesses between
+upstream and downstream ports.
+
+Endpoint
+~~~~~~~~
+An `endpoint` is a terminal port in the fabric. This is a `logical device`,
+and may be one of many `logical devices` presented by a memory device. It
+is still considered a type of `port` in the fabric.
+
+An `endpoint` contains `endpoint decoders` and the device's Coherent Device
+Attribute Table (which describes the device's capabilities). ::
+
+ # ls /sys/bus/cxl/devices/endpoint5
+ CDAT decoders_committed modalias uevent
+ decoder5.0 devtype parent_dport uport
+ decoder5.1 driver subsystem
+
+ # cat /sys/bus/cxl/devices/endpoint5/devtype
+ cxl_port
+
+ # cat /sys/bus/cxl/devices/endpoint5/decoder5.0/devtype
+ cxl_decoder_endpoint
+
+
+Memory Device (memdev)
+~~~~~~~~~~~~~~~~~~~~~~
+A `memdev` is probed and added by the `cxl_pci` driver in :code:`cxl_pci_probe`
+and is managed by the `cxl_mem` driver. It primarily provides the `IOCTL`
+interface to a memory device, via :code:`/dev/cxl/memN`, and exposes various
+device configuration data. ::
+
+ # ls /sys/bus/cxl/devices/mem0
+ dev firmware_version payload_max security uevent
+ driver label_storage_size pmem serial
+ firmware numa_node ram subsystem
+
+A Memory Device is a discrete base object that is not a port. While the
+physical device it belongs to may also host an `endpoint`, the relationship
+between an `endpoint` and a `memdev` is not captured in sysfs.
+
+Port Relationships
+~~~~~~~~~~~~~~~~~~
+In our example described above, there are four host bridges attached to the
+root, and two of the host bridges have one endpoint attached.
+
+.. kernel-render:: DOT
+ :alt: Digraph of CXL fabric describing host-bridge interleaving
+ :caption: Diagraph of CXL fabric with a host-bridge interleave memory region
+
+ digraph foo {
+ "root0" -> "port1";
+ "root0" -> "port2";
+ "root0" -> "port3";
+ "root0" -> "port4";
+ "port1" -> "endpoint5";
+ "port3" -> "endpoint6";
+ }
+
+Decoders
+--------
+A `Decoder` is short for a CXL Host-Managed Device Memory (HDM) Decoder. It is
+a device that routes accesses through the CXL fabric to an endpoint, and at
+the endpoint translates a `Host Physical` to `Device Physical` Addressing.
+
+The CXL 3.1 specification heavily implies that only endpoint decoders should
+engage in translation of `Host Physical Address` to `Device Physical Address`.
+::
+
+ 8.2.4.20 CXL HDM Decoder Capability Structure
+
+ IMPLEMENTATION NOTE
+ CXL Host Bridge and Upstream Switch Port Decode Flow
+
+ IMPLEMENTATION NOTE
+ Device Decode Logic
+
+These notes imply that there are two logical groups of decoders.
+
+* Routing Decoder - a decoder which routes accesses but does not translate
+ addresses from HPA to DPA.
+
+* Translating Decoder - a decoder which translates accesses from HPA to DPA
+ for an endpoint to service.
+
+The CXL drivers distinguish 3 decoder types: root, switch, and endpoint. Only
+endpoint decoders are Translating Decoders, all others are Routing Decoders.
+
+.. note:: PLATFORM VENDORS BE AWARE
+
+ Linux makes a strong assumption that endpoint decoders are the only decoder
+ in the fabric that actively translates HPA to DPA. Linux assumes routing
+ decoders pass the HPA unchanged to the next decoder in the fabric.
+
+ It is therefore assumed that any given decoder in the fabric will have an
+ address range that is a subset of its upstream port decoder. Any deviation
+ from this scheme undefined per the specification. Linux prioritizes
+ spec-defined / architectural behavior.
+
+Decoders may have one or more `Downstream Targets` if configured to interleave
+memory accesses. This will be presented in sysfs via the :code:`target_list`
+parameter.
+
+Root Decoder
+~~~~~~~~~~~~
+A `Root Decoder` is logical construct of the physical address and interleave
+configurations present in the CFMWS field of the :doc:`CEDT
+<../platform/acpi/cedt>`.
+Linux presents this information as a decoder present in the `CXL Root`. We
+consider this a `Root Decoder`, though technically it exists on the boundary
+of the CXL specification and platform-specific CXL root implementations.
+
+Linux considers these logical decoders a type of `Routing Decoder`, and is the
+first decoder in the CXL fabric to receive a memory access from the platform's
+memory controllers.
+
+`Root Decoders` are created during :code:`cxl_acpi_probe`. One root decoder
+is created per CFMWS entry in the :doc:`CEDT <../platform/acpi/cedt>`.
+
+The :code:`target_list` parameter is filled by the CFMWS target fields. Targets
+of a root decoder are `Host Bridges`, which means interleave done at the root
+decoder level is an `Inter-Host-Bridge Interleave`.
+
+Only root decoders are capable of `Inter-Host-Bridge Interleave`.
+
+Such interleaves must be configured by the platform and described in the ACPI
+CEDT CFMWS, as the target CXL host bridge UIDs in the CFMWS must match the CXL
+host bridge UIDs in the CHBS field of the :doc:`CEDT
+<../platform/acpi/cedt>` and the UID field of CXL Host Bridges defined in
+the :doc:`DSDT <../platform/acpi/dsdt>`.
+
+Interleave settings in a root decoder describe how to interleave accesses among
+the *immediate downstream targets*, not the entire interleave set.
+
+The memory range described in the root decoder is used to
+
+1) Create a memory region (:code:`region0` in this example), and
+
+2) Associate the region with an IO Memory Resource (:code:`kernel/resource.c`)
+
+::
+
+ # ls /sys/bus/cxl/devices/decoder0.0/
+ cap_pmem devtype region0
+ cap_ram interleave_granularity size
+ cap_type2 interleave_ways start
+ cap_type3 locked subsystem
+ create_ram_region modalias target_list
+ delete_region qos_class uevent
+
+ # cat /sys/bus/cxl/devices/decoder0.0/region0/resource
+ 0xc050000000
+
+The IO Memory Resource is created during early boot when the CFMWS region is
+identified in the EFI Memory Map or E820 table (on x86).
+
+Root decoders are defined as a separate devtype, but are also a type
+of `Switch Decoder` due to having downstream targets. ::
+
+ # cat /sys/bus/cxl/devices/decoder0.0/devtype
+ cxl_decoder_root
+
+Switch Decoder
+~~~~~~~~~~~~~~
+Any non-root, translating decoder is considered a `Switch Decoder`, and will
+present with the type :code:`cxl_decoder_switch`. Both `Host Bridge` and `CXL
+Switch` (device) decoders are of type :code:`cxl_decoder_switch`. ::
+
+ # ls /sys/bus/cxl/devices/decoder1.0/
+ devtype locked size target_list
+ interleave_granularity modalias start target_type
+ interleave_ways region subsystem uevent
+
+ # cat /sys/bus/cxl/devices/decoder1.0/devtype
+ cxl_decoder_switch
+
+ # cat /sys/bus/cxl/devices/decoder1.0/region
+ region0
+
+A `Switch Decoder` has associations between a region defined by a root
+decoder and downstream target ports. Interleaving done within a switch decoder
+is a multi-downstream-port interleave (or `Intra-Host-Bridge Interleave` for
+host bridges).
+
+Interleave settings in a switch decoder describe how to interleave accesses
+among the *immediate downstream targets*, not the entire interleave set.
+
+Switch decoders are created during :code:`cxl_switch_port_probe` in the
+:code:`cxl_port` driver, and is created based on a PCI device's DVSEC
+registers.
+
+Switch decoder programming is validated during probe if the platform programs
+them during boot (See `Auto Decoders` below), or on commit if programmed at
+runtime (See `Runtime Programming` below).
+
+
+Endpoint Decoder
+~~~~~~~~~~~~~~~~
+Any decoder attached to a *terminal* point in the CXL fabric (`An Endpoint`) is
+considered an `Endpoint Decoder`. Endpoint decoders are of type
+:code:`cxl_decoder_endpoint`. ::
+
+ # ls /sys/bus/cxl/devices/decoder5.0
+ devtype locked start
+ dpa_resource modalias subsystem
+ dpa_size mode target_type
+ interleave_granularity region uevent
+ interleave_ways size
+
+ # cat /sys/bus/cxl/devices/decoder5.0/devtype
+ cxl_decoder_endpoint
+
+ # cat /sys/bus/cxl/devices/decoder5.0/region
+ region0
+
+An `Endpoint Decoder` has an association with a region defined by a root
+decoder and describes the device-local resource associated with this region.
+
+Unlike root and switch decoders, endpoint decoders translate `Host Physical` to
+`Device Physical` address ranges. The interleave settings on an endpoint
+therefore describe the entire *interleave set*.
+
+`Device Physical Address` regions must be committed in-order. For example, the
+DPA region starting at 0x80000000 cannot be committed before the DPA region
+starting at 0x0.
+
+As of Linux v6.15, Linux does not support *imbalanced* interleave setups, all
+endpoints in an interleave set are expected to have the same interleave
+settings (granularity and ways must be the same).
+
+Endpoint decoders are created during :code:`cxl_endpoint_port_probe` in the
+:code:`cxl_port` driver, and is created based on a PCI device's DVSEC registers.
+
+Decoder Relationships
+~~~~~~~~~~~~~~~~~~~~~
+In our example described above, there is one root decoder which routes memory
+accesses over two host bridges. Each host bridge has a decoder which routes
+access to their singular endpoint targets. Each endpoint has a decoder which
+translates HPA to DPA and services the memory request.
+
+The driver validates relationships between ports by decoder programming, so
+we can think of decoders being related in a similarly hierarchical fashion to
+ports.
+
+.. kernel-render:: DOT
+ :alt: Digraph of hierarchical relationship between root, switch, and endpoint decoders.
+ :caption: Diagraph of CXL root, switch, and endpoint decoders.
+
+ digraph foo {
+ "root0" -> "decoder0.0";
+ "decoder0.0" -> "decoder1.0";
+ "decoder0.0" -> "decoder3.0";
+ "decoder1.0" -> "decoder5.0";
+ "decoder3.0" -> "decoder6.0";
+ }
+
+Regions
+-------
+
+Memory Region
+~~~~~~~~~~~~~
+A `Memory Region` is a logical construct that connects a set of CXL ports in
+the fabric to an IO Memory Resource. It is ultimately used to expose the memory
+on these devices to the DAX subsystem via a `DAX Region`.
+
+An example RAM region: ::
+
+ # ls /sys/bus/cxl/devices/region0/
+ access0 devtype modalias subsystem uuid
+ access1 driver mode target0
+ commit interleave_granularity resource target1
+ dax_region0 interleave_ways size uevent
+
+A memory region can be constructed during endpoint probe, if decoders were
+programmed by BIOS/EFI (see `Auto Decoders`), or by creating a region manually
+via a `Root Decoder`'s :code:`create_ram_region` or :code:`create_pmem_region`
+interfaces.
+
+The interleave settings in a `Memory Region` describe the configuration of the
+`Interleave Set` - and are what can be expected to be seen in the endpoint
+interleave settings.
+
+.. kernel-render:: DOT
+ :alt: Digraph of CXL memory region relationships between root and endpoint decoders.
+ :caption: Regions are created based on root decoder configurations. Endpoint decoders
+ must be programmed with the same interleave settings as the region.
+
+ digraph foo {
+ "root0" -> "decoder0.0";
+ "decoder0.0" -> "region0";
+ "region0" -> "decoder5.0";
+ "region0" -> "decoder6.0";
+ }
+
+DAX Region
+~~~~~~~~~~
+A `DAX Region` is used to convert a CXL `Memory Region` to a DAX device. A
+DAX device may then be accessed directly via a file descriptor interface, or
+converted to System RAM via the DAX kmem driver. See the DAX driver section
+for more details. ::
+
+ # ls /sys/bus/cxl/devices/dax_region0/
+ dax0.0 devtype modalias uevent
+ dax_region driver subsystem
+
+Mailbox Interfaces
+------------------
+A mailbox command interface for each device is exposed in ::
+
+ /dev/cxl/mem0
+ /dev/cxl/mem1
+
+These mailboxes may receive any specification-defined command. Raw commands
+(custom commands) can only be sent to these interfaces if the build config
+:code:`CXL_MEM_RAW_COMMANDS` is set. This is considered a debug and/or
+development interface, not an officially supported mechanism for creation
+of vendor-specific commands (see the `fwctl` subsystem for that).
+
+Decoder Programming
+===================
+
+Runtime Programming
+-------------------
+During probe, the only decoders *required* to be programmed are `Root Decoders`.
+In reality, `Root Decoders` are a logical construct to describe the memory
+region and interleave configuration at the host bridge level - as described
+in the ACPI CEDT CFMWS.
+
+All other `Switch` and `Endpoint` decoders may be programmed by the user
+at runtime - if the platform supports such configurations.
+
+This interaction is what creates a `Software Defined Memory` environment.
+
+See the :code:`cxl-cli` documentation for more information about how to
+configure CXL decoders at runtime.
+
+Auto Decoders
+-------------
+Auto Decoders are decoders programmed by BIOS/EFI at boot time, and are
+almost always locked (cannot be changed). This is done by a platform
+which may have a static configuration - or certain quirks which may prevent
+dynamic runtime changes to the decoders (such as requiring additional
+controller programming within the CPU complex outside the scope of CXL).
+
+Auto Decoders are probed automatically as long as the devices and memory
+regions they are associated with probe without issue. When probing Auto
+Decoders, the driver's primary responsibility is to ensure the fabric is
+sane - as-if validating runtime programmed regions and decoders.
+
+If Linux cannot validate auto-decoder configuration, the memory will not
+be surfaced as a DAX device - and therefore not be exposed to the page
+allocator - effectively stranding it.
+
+Interleave
+----------
+
+The Linux CXL driver supports `Cross-Link First` interleave. This dictates
+how interleave is programmed at each decoder step, as the driver validates
+the relationships between a decoder and it's parent.
+
+For example, in a `Cross-Link First` interleave setup with 16 endpoints
+attached to 4 host bridges, linux expects the following ways/granularity
+across the root, host bridge, and endpoints respectively.
+
+.. flat-table:: 4x4 cross-link first interleave settings
+
+ * - decoder
+ - ways
+ - granularity
+
+ * - root
+ - 4
+ - 256
+
+ * - host bridge
+ - 4
+ - 1024
+
+ * - endpoint
+ - 16
+ - 256
+
+At the root, every a given access will be routed to the
+:code:`((HPA / 256) % 4)th` target host bridge. Within a host bridge, every
+:code:`((HPA / 1024) % 4)th` target endpoint. Each endpoint translates based
+on the entire 16 device interleave set.
+
+Unbalanced interleave sets are not supported - decoders at a similar point
+in the hierarchy (e.g. all host bridge decoders) must have the same ways and
+granularity configuration.
+
+At Root
+~~~~~~~
+Root decoder interleave is defined by CFMWS field of the :doc:`CEDT
+<../platform/acpi/cedt>`. The CEDT may actually define multiple CFMWS
+configurations to describe the same physical capacity, with the intent to allow
+users to decide at runtime whether to online memory as interleaved or
+non-interleaved. ::
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Window base address : 0000000100000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ First Target : 00000007
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Window base address : 0000000200000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ First Target : 00000006
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Window base address : 0000000300000000
+ Window size : 0000000200000000
+ Interleave Members (2^n) : 01
+ Interleave Arithmetic : 00
+ First Target : 00000007
+ Next Target : 00000006
+
+In this example, the CFMWS defines two discrete non-interleaved 4GB regions
+for each host bridge, and one interleaved 8GB region that targets both. This
+would result in 3 root decoders presenting in the root. ::
+
+ # ls /sys/bus/cxl/devices/root0/decoder*
+ decoder0.0 decoder0.1 decoder0.2
+
+ # cat /sys/bus/cxl/devices/decoder0.0/target_list start size
+ 7
+ 0x100000000
+ 0x100000000
+
+ # cat /sys/bus/cxl/devices/decoder0.1/target_list start size
+ 6
+ 0x200000000
+ 0x100000000
+
+ # cat /sys/bus/cxl/devices/decoder0.2/target_list start size
+ 7,6
+ 0x300000000
+ 0x200000000
+
+These decoders are not runtime programmable. They are used to generate a
+`Memory Region` to bring this memory online with runtime programmed settings
+at the `Switch` and `Endpoint` decoders.
+
+At Host Bridge or Switch
+~~~~~~~~~~~~~~~~~~~~~~~~
+`Host Bridge` and `Switch` decoders are programmable via the following fields:
+
+- :code:`start` - the HPA region associated with the memory region
+- :code:`size` - the size of the region
+- :code:`target_list` - the list of downstream ports
+- :code:`interleave_ways` - the number downstream ports to interleave across
+- :code:`interleave_granularity` - the granularity to interleave at.
+
+Linux expects the :code:`interleave_granularity` of switch decoders to be
+derived from their upstream port connections. In `Cross-Link First` interleave
+configurations, the :code:`interleave_granularity` of a decoder is equal to
+:code:`parent_interleave_granularity * parent_interleave_ways`.
+
+At Endpoint
+~~~~~~~~~~~
+`Endpoint Decoders` are programmed similar to Host Bridge and Switch decoders,
+with the exception that the ways and granularity are defined by the interleave
+set (e.g. the interleave settings defined by the associated `Memory Region`).
+
+- :code:`start` - the HPA region associated with the memory region
+- :code:`size` - the size of the region
+- :code:`interleave_ways` - the number endpoints in the interleave set
+- :code:`interleave_granularity` - the granularity to interleave at.
+
+These settings are used by endpoint decoders to *Translate* memory requests
+from HPA to DPA. This is why they must be aware of the entire interleave set.
+
+Linux does not support unbalanced interleave configurations. As a result, all
+endpoints in an interleave set must have the same ways and granularity.
+
+Example Configurations
+======================
+.. toctree::
+ :maxdepth: 1
+
+ example-configurations/single-device.rst
+ example-configurations/hb-interleave.rst
+ example-configurations/intra-hb-interleave.rst
+ example-configurations/multi-interleave.rst
diff --git a/Documentation/driver-api/cxl/linux/dax-driver.rst b/Documentation/driver-api/cxl/linux/dax-driver.rst
new file mode 100644
index 000000000000..10d953a2167b
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/dax-driver.rst
@@ -0,0 +1,43 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+DAX Driver Operation
+====================
+The `Direct Access Device` driver was originally designed to provide a
+memory-like access mechanism to memory-like block-devices. It was
+extended to support CXL Memory Devices, which provide user-configured
+memory devices.
+
+The CXL subsystem depends on the DAX subsystem to either:
+
+- Generate a file-like interface to userland via :code:`/dev/daxN.Y`, or
+- Engage the memory-hotplug interface to add CXL memory to page allocator.
+
+The DAX subsystem exposes this ability through the `cxl_dax_region` driver.
+A `dax_region` provides the translation between a CXL `memory_region` and
+a `DAX Device`.
+
+DAX Device
+==========
+A `DAX Device` is a file-like interface exposed in :code:`/dev/daxN.Y`. A
+memory region exposed via dax device can be accessed via userland software
+via the :code:`mmap()` system-call. The result is direct mappings to the
+CXL capacity in the task's page tables.
+
+Users wishing to manually handle allocation of CXL memory should use this
+interface.
+
+kmem conversion
+===============
+The :code:`dax_kmem` driver converts a `DAX Device` into a series of `hotplug
+memory blocks` managed by :code:`kernel/memory-hotplug.c`. This capacity
+will be exposed to the kernel page allocator in the user-selected memory
+zone.
+
+The :code:`memmap_on_memory` setting (both global and DAX device local)
+dictates where the kernell will allocate the :code:`struct folio` descriptors
+for this memory will come from. If :code:`memmap_on_memory` is set, memory
+hotplug will set aside a portion of the memory block capacity to allocate
+folios. If unset, the memory is allocated via a normal :code:`GFP_KERNEL`
+allocation - and as a result will most likely land on the local NUM node of the
+CPU executing the hotplug operation.
diff --git a/Documentation/driver-api/cxl/linux/early-boot.rst b/Documentation/driver-api/cxl/linux/early-boot.rst
new file mode 100644
index 000000000000..414481f33819
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/early-boot.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Linux Init (Early Boot)
+=======================
+
+Linux configuration is split into two major steps: Early-Boot and everything else.
+
+During early boot, Linux sets up immutable resources (such as numa nodes), while
+later operations include things like driver probe and memory hotplug. Linux may
+read EFI and ACPI information throughout this process to configure logical
+representations of the devices.
+
+During Linux Early Boot stage (functions in the kernel that have the __init
+decorator), the system takes the resources created by EFI/BIOS
+(:doc:`ACPI tables <../platform/acpi>`) and turns them into resources that the
+kernel can consume.
+
+
+BIOS, Build and Boot Options
+============================
+
+There are 4 pre-boot options that need to be considered during kernel build
+which dictate how memory will be managed by Linux during early boot.
+
+* EFI_MEMORY_SP
+
+ * BIOS/EFI Option that dictates whether memory is SystemRAM or
+ Specific Purpose. Specific Purpose memory will be deferred to
+ drivers to manage - and not immediately exposed as system RAM.
+
+* CONFIG_EFI_SOFT_RESERVE
+
+ * Linux Build config option that dictates whether the kernel supports
+ Specific Purpose memory.
+
+* CONFIG_MHP_DEFAULT_ONLINE_TYPE
+
+ * Linux Build config that dictates whether and how Specific Purpose memory
+ converted to a dax device should be managed (left as DAX or onlined as
+ SystemRAM in ZONE_NORMAL or ZONE_MOVABLE).
+
+* nosoftreserve
+
+ * Linux kernel boot option that dictates whether Soft Reserve should be
+ supported. Similar to CONFIG_EFI_SOFT_RESERVE.
+
+Memory Map Creation
+===================
+
+While the kernel parses the EFI memory map, if :code:`Specific Purpose` memory
+is supported and detected, it will set this region aside as
+:code:`SOFT_RESERVED`.
+
+If :code:`EFI_MEMORY_SP=0`, :code:`CONFIG_EFI_SOFT_RESERVE=n`, or
+:code:`nosoftreserve=y` - Linux will default a CXL device memory region to
+SystemRAM. This will expose the memory to the kernel page allocator in
+:code:`ZONE_NORMAL`, making it available for use for most allocations (including
+:code:`struct page` and page tables).
+
+If `Specific Purpose` is set and supported, :code:`CONFIG_MHP_DEFAULT_ONLINE_TYPE_*`
+dictates whether the memory is onlined by default (:code:`_OFFLINE` or
+:code:`_ONLINE_*`), and if online which zone to online this memory to by default
+(:code:`_NORMAL` or :code:`_MOVABLE`).
+
+If placed in :code:`ZONE_MOVABLE`, the memory will not be available for most
+kernel allocations (such as :code:`struct page` or page tables). This may
+significant impact performance depending on the memory capacity of the system.
+
+
+NUMA Node Reservation
+=====================
+
+Linux refers to the proximity domains (:code:`PXM`) defined in the :doc:`SRAT
+<../platform/acpi/srat>` to create NUMA nodes in :code:`acpi_numa_init`.
+Typically, there is a 1:1 relation between :code:`PXM` and NUMA node IDs.
+
+The SRAT is the only ACPI defined way of defining Proximity Domains. Linux
+chooses to, at most, map those 1:1 with NUMA nodes.
+:doc:`CEDT <../platform/acpi/cedt>` adds a description of SPA ranges which
+Linux may map to one or more NUMA nodes.
+
+If there are CXL ranges in the CFMWS but not in SRAT, then a fake :code:`PXM`
+is created (as of v6.15). In the future, Linux may reject CFMWS not described
+by SRAT due to the ambiguity of proximity domain association.
+
+It is important to note that NUMA node creation cannot be done at runtime. All
+possible NUMA nodes are identified at :code:`__init` time, more specifically
+during :code:`mm_init`. The CEDT and SRAT must contain sufficient :code:`PXM`
+data for Linux to identify NUMA nodes their associated memory regions.
+
+The relevant code exists in: :code:`linux/drivers/acpi/numa/srat.c`.
+
+See :doc:`Example Platform Configurations <../platform/example-configs>`
+for more info.
+
+Memory Tiers Creation
+=====================
+Memory tiers are a collection of NUMA nodes grouped by performance characteristics.
+During :code:`__init`, Linux initializes the system with a default memory tier that
+contains all nodes marked :code:`N_MEMORY`.
+
+:code:`memory_tier_init` is called at boot for all nodes with memory online by
+default. :code:`memory_tier_late_init` is called during late-init for nodes setup
+during driver configuration.
+
+Nodes are only marked :code:`N_MEMORY` if they have *online* memory.
+
+Tier membership can be inspected in ::
+
+ /sys/devices/virtual/memory_tiering/memory_tierN/nodelist
+ 0-1
+
+If nodes are grouped which have clear difference in performance, check the
+:doc:`HMAT <../platform/acpi/hmat>` and CDAT information for the CXL nodes. All
+nodes default to the DRAM tier, unless HMAT/CDAT information is reported to the
+memory_tier component via `access_coordinates`.
+
+For more, see :doc:`CXL access coordinates documentation
+<../linux/access-coordinates>`.
+
+Contiguous Memory Allocation
+============================
+The contiguous memory allocator (CMA) enables reservation of contiguous memory
+regions on NUMA nodes during early boot. However, CMA cannot reserve memory
+on NUMA nodes that are not online during early boot. ::
+
+ void __init hugetlb_cma_reserve(void) {
+ if (!node_online(nid))
+ /* do not allow reservations */
+ }
+
+This means if users intend to defer management of CXL memory to the driver, CMA
+cannot be used to guarantee huge page allocations. If enabling CXL memory as
+SystemRAM in `ZONE_NORMAL` during early boot, CMA reservations per-node can be
+made with the :code:`cma_pernuma` or :code:`numa_cma` kernel command line
+parameters.
diff --git a/Documentation/driver-api/cxl/linux/example-configurations/hb-interleave.rst b/Documentation/driver-api/cxl/linux/example-configurations/hb-interleave.rst
new file mode 100644
index 000000000000..f071490763a2
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/example-configurations/hb-interleave.rst
@@ -0,0 +1,314 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Inter-Host-Bridge Interleave
+============================
+This cxl-cli configuration dump shows the following host configuration:
+
+* A single socket system with one CXL root
+* CXL Root has Four (4) CXL Host Bridges
+* Two CXL Host Bridges have a single CXL Memory Expander Attached
+* The CXL root is configured to interleave across the two host bridges.
+
+This output is generated by :code:`cxl list -v` and describes the relationships
+between objects exposed in :code:`/sys/bus/cxl/devices/`.
+
+::
+
+ [
+ {
+ "bus":"root0",
+ "provider":"ACPI.CXL",
+ "nr_dports":4,
+ "dports":[
+ {
+ "dport":"pci0000:00",
+ "alias":"ACPI0016:01",
+ "id":0
+ },
+ {
+ "dport":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "id":4
+ },
+ {
+ "dport":"pci0000:2a",
+ "alias":"ACPI0016:03",
+ "id":1
+ },
+ {
+ "dport":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "id":5
+ }
+ ],
+
+This chunk shows the CXL "bus" (root0) has 4 downstream ports attached to CXL
+Host Bridges. The `Root` can be considered the singular upstream port attached
+to the platform's memory controller - which routes memory requests to it.
+
+The `ports:root0` section lays out how each of these downstream ports are
+configured. If a port is not configured (id's 0 and 1), they are omitted.
+
+::
+
+ "ports:root0":[
+ {
+ "port":"port1",
+ "host":"pci0000:d2",
+ "depth":1,
+ "nr_dports":3,
+ "dports":[
+ {
+ "dport":"0000:d2:01.1",
+ "alias":"device:02",
+ "id":0
+ },
+ {
+ "dport":"0000:d2:01.3",
+ "alias":"device:05",
+ "id":2
+ },
+ {
+ "dport":"0000:d2:07.1",
+ "alias":"device:0d",
+ "id":113
+ }
+ ],
+
+This chunk shows the available downstream ports associated with the CXL Host
+Bridge :code:`port1`. In this case, :code:`port1` has 3 available downstream
+ports: :code:`dport1`, :code:`dport2`, and :code:`dport113`..
+
+::
+
+ "endpoints:port1":[
+ {
+ "endpoint":"endpoint5",
+ "host":"mem0",
+ "parent_dport":"0000:d2:01.1",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem0",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:d3:00.0"
+ },
+ "decoders:endpoint5":[
+ {
+ "decoder":"decoder5.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ }
+ ],
+
+This chunk shows the endpoints attached to the host bridge :code:`port1`.
+
+:code:`endpoint5` contains a single configured decoder :code:`decoder5.0`
+which has the same interleave configuration as :code:`region0` (shown later).
+
+Next we have the decodesr belonging to the host bridge:
+
+::
+
+ "decoders:port1":[
+ {
+ "decoder":"decoder1.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":1,
+ "region":"region0",
+ "nr_targets":1,
+ "targets":[
+ {
+ "target":"0000:d2:01.1",
+ "alias":"device:02",
+ "position":0,
+ "id":0
+ }
+ ]
+ }
+ ]
+ },
+
+Host Bridge :code:`port1` has a single decoder (:code:`decoder1.0`), whose only
+target is :code:`dport1` - which is attached to :code:`endpoint5`.
+
+The following chunk shows a similar configuration for Host Bridge :code:`port3`,
+the second host bridge with a memory device attached.
+
+::
+
+ {
+ "port":"port3",
+ "host":"pci0000:a8",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:a8:01.1",
+ "alias":"device:c3",
+ "id":0
+ }
+ ],
+ "endpoints:port3":[
+ {
+ "endpoint":"endpoint6",
+ "host":"mem1",
+ "parent_dport":"0000:a8:01.1",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem1",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:a9:00.0"
+ },
+ "decoders:endpoint6":[
+ {
+ "decoder":"decoder6.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ }
+ ],
+ "decoders:port3":[
+ {
+ "decoder":"decoder3.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":1,
+ "region":"region0",
+ "nr_targets":1,
+ "targets":[
+ {
+ "target":"0000:a8:01.1",
+ "alias":"device:c3",
+ "position":0,
+ "id":0
+ }
+ ]
+ }
+ ]
+ },
+
+
+The next chunk shows the two CXL host bridges without attached endpoints.
+
+::
+
+ {
+ "port":"port2",
+ "host":"pci0000:00",
+ "depth":1,
+ "nr_dports":2,
+ "dports":[
+ {
+ "dport":"0000:00:01.3",
+ "alias":"device:55",
+ "id":2
+ },
+ {
+ "dport":"0000:00:07.1",
+ "alias":"device:5d",
+ "id":113
+ }
+ ]
+ },
+ {
+ "port":"port4",
+ "host":"pci0000:2a",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:2a:01.1",
+ "alias":"device:d0",
+ "id":0
+ }
+ ]
+ }
+ ],
+
+Next we have the `Root Decoders` belonging to :code:`root0`. This root decoder
+applies the interleave across the downstream ports :code:`port1` and
+:code:`port3` - with a granularity of 256 bytes.
+
+This information is generated by the CXL driver reading the ACPI CEDT CMFWS.
+
+::
+
+ "decoders:root0":[
+ {
+ "decoder":"decoder0.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "max_available_extent":0,
+ "volatile_capable":true,
+ "nr_targets":2,
+ "targets":[
+ {
+ "target":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "position":1,
+ "id":4
+ },
+ {
+ "target":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "position":0,
+ "id":5
+ }
+ ],
+
+Finally we have the `Memory Region` associated with the `Root Decoder`
+:code:`decoder0.0`. This region describes the overall interleave configuration
+of the interleave set.
+
+::
+
+ "regions:decoder0.0":[
+ {
+ "region":"region0",
+ "resource":825975898112,
+ "size":274877906944,
+ "type":"ram",
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "decode_state":"commit",
+ "mappings":[
+ {
+ "position":1,
+ "memdev":"mem1",
+ "decoder":"decoder6.0"
+ },
+ {
+ "position":0,
+ "memdev":"mem0",
+ "decoder":"decoder5.0"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
diff --git a/Documentation/driver-api/cxl/linux/example-configurations/intra-hb-interleave.rst b/Documentation/driver-api/cxl/linux/example-configurations/intra-hb-interleave.rst
new file mode 100644
index 000000000000..077dfaf8458d
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/example-configurations/intra-hb-interleave.rst
@@ -0,0 +1,291 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Intra-Host-Bridge Interleave
+============================
+This cxl-cli configuration dump shows the following host configuration:
+
+* A single socket system with one CXL root
+* CXL Root has Four (4) CXL Host Bridges
+* One (1) CXL Host Bridges has two CXL Memory Expanders Attached
+* The Host bridge decoder is programmed to interleave across the expanders.
+
+This output is generated by :code:`cxl list -v` and describes the relationships
+between objects exposed in :code:`/sys/bus/cxl/devices/`.
+
+::
+
+ [
+ {
+ "bus":"root0",
+ "provider":"ACPI.CXL",
+ "nr_dports":4,
+ "dports":[
+ {
+ "dport":"pci0000:00",
+ "alias":"ACPI0016:01",
+ "id":0
+ },
+ {
+ "dport":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "id":4
+ },
+ {
+ "dport":"pci0000:2a",
+ "alias":"ACPI0016:03",
+ "id":1
+ },
+ {
+ "dport":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "id":5
+ }
+ ],
+
+This chunk shows the CXL "bus" (root0) has 4 downstream ports attached to CXL
+Host Bridges. The `Root` can be considered the singular upstream port attached
+to the platform's memory controller - which routes memory requests to it.
+
+The `ports:root0` section lays out how each of these downstream ports are
+configured. If a port is not configured (id's 0 and 1), they are omitted.
+
+::
+
+ "ports:root0":[
+ {
+ "port":"port1",
+ "host":"pci0000:d2",
+ "depth":1,
+ "nr_dports":3,
+ "dports":[
+ {
+ "dport":"0000:d2:01.1",
+ "alias":"device:02",
+ "id":0
+ },
+ {
+ "dport":"0000:d2:01.3",
+ "alias":"device:05",
+ "id":2
+ },
+ {
+ "dport":"0000:d2:07.1",
+ "alias":"device:0d",
+ "id":113
+ }
+ ],
+
+This chunk shows the available downstream ports associated with the CXL Host
+Bridge :code:`port1`. In this case, :code:`port1` has 3 available downstream
+ports: :code:`dport1`, :code:`dport2`, and :code:`dport113`..
+
+::
+
+ "endpoints:port1":[
+ {
+ "endpoint":"endpoint5",
+ "host":"mem0",
+ "parent_dport":"0000:d2:01.1",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem0",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:d3:00.0"
+ },
+ "decoders:endpoint5":[
+ {
+ "decoder":"decoder5.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ },
+ {
+ "endpoint":"endpoint6",
+ "host":"mem1",
+ "parent_dport":"0000:d2:01.3,
+ "depth":2,
+ "memdev":{
+ "memdev":"mem1",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:a9:00.0"
+ },
+ "decoders:endpoint6":[
+ {
+ "decoder":"decoder6.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ }
+ ],
+
+This chunk shows the endpoints attached to the host bridge :code:`port1`.
+
+:code:`endpoint5` contains a single configured decoder :code:`decoder5.0`
+which has the same interleave configuration memory region they belong to
+(show later).
+
+Next we have the decoders belonging to the host bridge:
+
+::
+
+ "decoders:port1":[
+ {
+ "decoder":"decoder1.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "region":"region0",
+ "nr_targets":2,
+ "targets":[
+ {
+ "target":"0000:d2:01.1",
+ "alias":"device:02",
+ "position":0,
+ "id":0
+ },
+ {
+ "target":"0000:d2:01.3",
+ "alias":"device:05",
+ "position":1,
+ "id":0
+ }
+ ]
+ }
+ ]
+ },
+
+Host Bridge :code:`port1` has a single decoder (:code:`decoder1.0`) with two
+targets: :code:`dport1` and :code:`dport3` - which are attached to
+:code:`endpoint5` and :code:`endpoint6` respectively.
+
+The host bridge decoder interleaves these devices at a 256 byte granularity.
+
+The next chunk shows the three CXL host bridges without attached endpoints.
+
+::
+
+ {
+ "port":"port2",
+ "host":"pci0000:00",
+ "depth":1,
+ "nr_dports":2,
+ "dports":[
+ {
+ "dport":"0000:00:01.3",
+ "alias":"device:55",
+ "id":2
+ },
+ {
+ "dport":"0000:00:07.1",
+ "alias":"device:5d",
+ "id":113
+ }
+ ]
+ },
+ {
+ "port":"port3",
+ "host":"pci0000:a8",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:a8:01.1",
+ "alias":"device:c3",
+ "id":0
+ }
+ ],
+ },
+ {
+ "port":"port4",
+ "host":"pci0000:2a",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:2a:01.1",
+ "alias":"device:d0",
+ "id":0
+ }
+ ]
+ }
+ ],
+
+Next we have the `Root Decoders` belonging to :code:`root0`. This root decoder
+applies the interleave across the downstream ports :code:`port1` and
+:code:`port3` - with a granularity of 256 bytes.
+
+This information is generated by the CXL driver reading the ACPI CEDT CMFWS.
+
+::
+
+ "decoders:root0":[
+ {
+ "decoder":"decoder0.0",
+ "resource":825975898112,
+ "size":274877906944,
+ "interleave_ways":1,
+ "max_available_extent":0,
+ "volatile_capable":true,
+ "nr_targets":2,
+ "targets":[
+ {
+ "target":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "position":1,
+ "id":4
+ },
+ ],
+
+Finally we have the `Memory Region` associated with the `Root Decoder`
+:code:`decoder0.0`. This region describes the overall interleave configuration
+of the interleave set.
+
+::
+
+ "regions:decoder0.0":[
+ {
+ "region":"region0",
+ "resource":825975898112,
+ "size":274877906944,
+ "type":"ram",
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "decode_state":"commit",
+ "mappings":[
+ {
+ "position":1,
+ "memdev":"mem1",
+ "decoder":"decoder6.0"
+ },
+ {
+ "position":0,
+ "memdev":"mem0",
+ "decoder":"decoder5.0"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
diff --git a/Documentation/driver-api/cxl/linux/example-configurations/multi-interleave.rst b/Documentation/driver-api/cxl/linux/example-configurations/multi-interleave.rst
new file mode 100644
index 000000000000..008f9053c630
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/example-configurations/multi-interleave.rst
@@ -0,0 +1,401 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Multi-Level Interleave
+======================
+This cxl-cli configuration dump shows the following host configuration:
+
+* A single socket system with one CXL root
+* CXL Root has Four (4) CXL Host Bridges
+* Two CXL Host Bridges have a two CXL Memory Expanders Attached each.
+* The CXL root is configured to interleave across the two host bridges.
+* Each host bridge with expanders interleaves across two endpoints.
+
+This output is generated by :code:`cxl list -v` and describes the relationships
+between objects exposed in :code:`/sys/bus/cxl/devices/`.
+
+::
+
+ [
+ {
+ "bus":"root0",
+ "provider":"ACPI.CXL",
+ "nr_dports":4,
+ "dports":[
+ {
+ "dport":"pci0000:00",
+ "alias":"ACPI0016:01",
+ "id":0
+ },
+ {
+ "dport":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "id":4
+ },
+ {
+ "dport":"pci0000:2a",
+ "alias":"ACPI0016:03",
+ "id":1
+ },
+ {
+ "dport":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "id":5
+ }
+ ],
+
+This chunk shows the CXL "bus" (root0) has 4 downstream ports attached to CXL
+Host Bridges. The `Root` can be considered the singular upstream port attached
+to the platform's memory controller - which routes memory requests to it.
+
+The `ports:root0` section lays out how each of these downstream ports are
+configured. If a port is not configured (id's 0 and 1), they are omitted.
+
+::
+
+ "ports:root0":[
+ {
+ "port":"port1",
+ "host":"pci0000:d2",
+ "depth":1,
+ "nr_dports":3,
+ "dports":[
+ {
+ "dport":"0000:d2:01.1",
+ "alias":"device:02",
+ "id":0
+ },
+ {
+ "dport":"0000:d2:01.3",
+ "alias":"device:05",
+ "id":2
+ },
+ {
+ "dport":"0000:d2:07.1",
+ "alias":"device:0d",
+ "id":113
+ }
+ ],
+
+This chunk shows the available downstream ports associated with the CXL Host
+Bridge :code:`port1`. In this case, :code:`port1` has 3 available downstream
+ports: :code:`dport0`, :code:`dport2`, and :code:`dport113`.
+
+::
+
+ "endpoints:port1":[
+ {
+ "endpoint":"endpoint5",
+ "host":"mem0",
+ "parent_dport":"0000:d2:01.1",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem0",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:d3:00.0"
+ },
+ "decoders:endpoint5":[
+ {
+ "decoder":"decoder5.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":4,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ },
+ {
+ "endpoint":"endpoint6",
+ "host":"mem1",
+ "parent_dport":"0000:d2:01.3",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem1",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:d3:00.0"
+ },
+ "decoders:endpoint6":[
+ {
+ "decoder":"decoder6.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":4,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ }
+ ],
+
+This chunk shows the endpoints attached to the host bridge :code:`port1`.
+
+:code:`endpoint5` contains a single configured decoder :code:`decoder5.0`
+which has the same interleave configuration as :code:`region0` (shown later).
+
+:code:`endpoint6` contains a single configured decoder :code:`decoder5.0`
+which has the same interleave configuration as :code:`region0` (shown later).
+
+Next we have the decoders belonging to the host bridge:
+
+::
+
+ "decoders:port1":[
+ {
+ "decoder":"decoder1.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":2,
+ "interleave_granularity":512,
+ "region":"region0",
+ "nr_targets":2,
+ "targets":[
+ {
+ "target":"0000:d2:01.1",
+ "alias":"device:02",
+ "position":0,
+ "id":0
+ },
+ {
+ "target":"0000:d2:01.3",
+ "alias":"device:05",
+ "position":2,
+ "id":0
+ }
+ ]
+ }
+ ]
+ },
+
+Host Bridge :code:`port1` has a single decoder (:code:`decoder1.0`), whose
+targets are :code:`dport0` and :code:`dport2` - which are attached to
+:code:`endpoint5` and :code:`endpoint6` respectively.
+
+The following chunk shows a similar configuration for Host Bridge :code:`port3`,
+the second host bridge with a memory device attached.
+
+::
+
+ {
+ "port":"port3",
+ "host":"pci0000:a8",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:a8:01.1",
+ "alias":"device:c3",
+ "id":0
+ },
+ {
+ "dport":"0000:a8:01.3",
+ "alias":"device:c5",
+ "id":0
+ }
+ ],
+ "endpoints:port3":[
+ {
+ "endpoint":"endpoint7",
+ "host":"mem2",
+ "parent_dport":"0000:a8:01.1",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem2",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:a9:00.0"
+ },
+ "decoders:endpoint7":[
+ {
+ "decoder":"decoder7.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":4,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ },
+ {
+ "endpoint":"endpoint8",
+ "host":"mem3",
+ "parent_dport":"0000:a8:01.3",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem3",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:a9:00.0"
+ },
+ "decoders:endpoint8":[
+ {
+ "decoder":"decoder8.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":4,
+ "interleave_granularity":256,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ }
+ ],
+ "decoders:port3":[
+ {
+ "decoder":"decoder3.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":2,
+ "interleave_granularity":512,
+ "region":"region0",
+ "nr_targets":1,
+ "targets":[
+ {
+ "target":"0000:a8:01.1",
+ "alias":"device:c3",
+ "position":1,
+ "id":0
+ },
+ {
+ "target":"0000:a8:01.3",
+ "alias":"device:c5",
+ "position":3,
+ "id":0
+ }
+ ]
+ }
+ ]
+ },
+
+
+The next chunk shows the two CXL host bridges without attached endpoints.
+
+::
+
+ {
+ "port":"port2",
+ "host":"pci0000:00",
+ "depth":1,
+ "nr_dports":2,
+ "dports":[
+ {
+ "dport":"0000:00:01.3",
+ "alias":"device:55",
+ "id":2
+ },
+ {
+ "dport":"0000:00:07.1",
+ "alias":"device:5d",
+ "id":113
+ }
+ ]
+ },
+ {
+ "port":"port4",
+ "host":"pci0000:2a",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:2a:01.1",
+ "alias":"device:d0",
+ "id":0
+ }
+ ]
+ }
+ ],
+
+Next we have the `Root Decoders` belonging to :code:`root0`. This root decoder
+applies the interleave across the downstream ports :code:`port1` and
+:code:`port3` - with a granularity of 256 bytes.
+
+This information is generated by the CXL driver reading the ACPI CEDT CMFWS.
+
+::
+
+ "decoders:root0":[
+ {
+ "decoder":"decoder0.0",
+ "resource":825975898112,
+ "size":549755813888,
+ "interleave_ways":2,
+ "interleave_granularity":256,
+ "max_available_extent":0,
+ "volatile_capable":true,
+ "nr_targets":2,
+ "targets":[
+ {
+ "target":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "position":1,
+ "id":4
+ },
+ {
+ "target":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "position":0,
+ "id":5
+ }
+ ],
+
+Finally we have the `Memory Region` associated with the `Root Decoder`
+:code:`decoder0.0`. This region describes the overall interleave configuration
+of the interleave set. So we see there are a total of :code:`4` interleave
+targets across 4 endpoint decoders.
+
+::
+
+ "regions:decoder0.0":[
+ {
+ "region":"region0",
+ "resource":825975898112,
+ "size":549755813888,
+ "type":"ram",
+ "interleave_ways":4,
+ "interleave_granularity":256,
+ "decode_state":"commit",
+ "mappings":[
+ {
+ "position":3,
+ "memdev":"mem3",
+ "decoder":"decoder8.0"
+ },
+ {
+ "position":2,
+ "memdev":"mem1",
+ "decoder":"decoder6.0"
+ }
+ {
+ "position":1,
+ "memdev":"mem2",
+ "decoder":"decoder7.0"
+ },
+ {
+ "position":0,
+ "memdev":"mem0",
+ "decoder":"decoder5.0"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
diff --git a/Documentation/driver-api/cxl/linux/example-configurations/single-device.rst b/Documentation/driver-api/cxl/linux/example-configurations/single-device.rst
new file mode 100644
index 000000000000..5fd38eb0aaf4
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/example-configurations/single-device.rst
@@ -0,0 +1,246 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Single Device
+=============
+This cxl-cli configuration dump shows the following host configuration:
+
+* A single socket system with one CXL root
+* CXL Root has Four (4) CXL Host Bridges
+* One CXL Host Bridges has a single CXL Memory Expander Attached
+* No interleave is present.
+
+This output is generated by :code:`cxl list -v` and describes the relationships
+between objects exposed in :code:`/sys/bus/cxl/devices/`.
+
+::
+
+ [
+ {
+ "bus":"root0",
+ "provider":"ACPI.CXL",
+ "nr_dports":4,
+ "dports":[
+ {
+ "dport":"pci0000:00",
+ "alias":"ACPI0016:01",
+ "id":0
+ },
+ {
+ "dport":"pci0000:a8",
+ "alias":"ACPI0016:02",
+ "id":4
+ },
+ {
+ "dport":"pci0000:2a",
+ "alias":"ACPI0016:03",
+ "id":1
+ },
+ {
+ "dport":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "id":5
+ }
+ ],
+
+This chunk shows the CXL "bus" (root0) has 4 downstream ports attached to CXL
+Host Bridges. The `Root` can be considered the singular upstream port attached
+to the platform's memory controller - which routes memory requests to it.
+
+The `ports:root0` section lays out how each of these downstream ports are
+configured. If a port is not configured (id's 0, 1, and 4), they are omitted.
+
+::
+
+ "ports:root0":[
+ {
+ "port":"port1",
+ "host":"pci0000:d2",
+ "depth":1,
+ "nr_dports":3,
+ "dports":[
+ {
+ "dport":"0000:d2:01.1",
+ "alias":"device:02",
+ "id":0
+ },
+ {
+ "dport":"0000:d2:01.3",
+ "alias":"device:05",
+ "id":2
+ },
+ {
+ "dport":"0000:d2:07.1",
+ "alias":"device:0d",
+ "id":113
+ }
+ ],
+
+This chunk shows the available downstream ports associated with the CXL Host
+Bridge :code:`port1`. In this case, :code:`port1` has 3 available downstream
+ports: :code:`dport1`, :code:`dport2`, and :code:`dport113`..
+
+::
+
+ "endpoints:port1":[
+ {
+ "endpoint":"endpoint5",
+ "host":"mem0",
+ "parent_dport":"0000:d2:01.1",
+ "depth":2,
+ "memdev":{
+ "memdev":"mem0",
+ "ram_size":137438953472,
+ "serial":0,
+ "numa_node":0,
+ "host":"0000:d3:00.0"
+ },
+ "decoders:endpoint5":[
+ {
+ "decoder":"decoder5.0",
+ "resource":825975898112,
+ "size":137438953472,
+ "interleave_ways":1,
+ "region":"region0",
+ "dpa_resource":0,
+ "dpa_size":137438953472,
+ "mode":"ram"
+ }
+ ]
+ }
+ ],
+
+This chunk shows the endpoints attached to the host bridge :code:`port1`.
+
+:code:`endpoint5` contains a single configured decoder :code:`decoder5.0`
+which has the same interleave configuration as :code:`region0` (shown later).
+
+Next we have the decoders belonging to the host bridge:
+
+::
+
+ "decoders:port1":[
+ {
+ "decoder":"decoder1.0",
+ "resource":825975898112,
+ "size":137438953472,
+ "interleave_ways":1,
+ "region":"region0",
+ "nr_targets":1,
+ "targets":[
+ {
+ "target":"0000:d2:01.1",
+ "alias":"device:02",
+ "position":0,
+ "id":0
+ }
+ ]
+ }
+ ]
+ },
+
+Host Bridge :code:`port1` has a single decoder (:code:`decoder1.0`), whose only
+target is :code:`dport1` - which is attached to :code:`endpoint5`.
+
+The next chunk shows the three CXL host bridges without attached endpoints.
+
+::
+
+ {
+ "port":"port2",
+ "host":"pci0000:00",
+ "depth":1,
+ "nr_dports":2,
+ "dports":[
+ {
+ "dport":"0000:00:01.3",
+ "alias":"device:55",
+ "id":2
+ },
+ {
+ "dport":"0000:00:07.1",
+ "alias":"device:5d",
+ "id":113
+ }
+ ]
+ },
+ {
+ "port":"port3",
+ "host":"pci0000:a8",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:a8:01.1",
+ "alias":"device:c3",
+ "id":0
+ }
+ ]
+ },
+ {
+ "port":"port4",
+ "host":"pci0000:2a",
+ "depth":1,
+ "nr_dports":1,
+ "dports":[
+ {
+ "dport":"0000:2a:01.1",
+ "alias":"device:d0",
+ "id":0
+ }
+ ]
+ }
+ ],
+
+Next we have the `Root Decoders` belonging to :code:`root0`. This root decoder
+is a pass-through decoder because :code:`interleave_ways` is set to :code:`1`.
+
+This information is generated by the CXL driver reading the ACPI CEDT CMFWS.
+
+::
+
+ "decoders:root0":[
+ {
+ "decoder":"decoder0.0",
+ "resource":825975898112,
+ "size":137438953472,
+ "interleave_ways":1,
+ "max_available_extent":0,
+ "volatile_capable":true,
+ "nr_targets":1,
+ "targets":[
+ {
+ "target":"pci0000:d2",
+ "alias":"ACPI0016:00",
+ "position":0,
+ "id":5
+ }
+ ],
+
+Finally we have the `Memory Region` associated with the `Root Decoder`
+:code:`decoder0.0`. This region describes the discrete region associated
+with the lone device.
+
+::
+
+ "regions:decoder0.0":[
+ {
+ "region":"region0",
+ "resource":825975898112,
+ "size":137438953472,
+ "type":"ram",
+ "interleave_ways":1,
+ "decode_state":"commit",
+ "mappings":[
+ {
+ "position":0,
+ "memdev":"mem0",
+ "decoder":"decoder5.0"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
diff --git a/Documentation/driver-api/cxl/linux/memory-hotplug.rst b/Documentation/driver-api/cxl/linux/memory-hotplug.rst
new file mode 100644
index 000000000000..af368c2bc9cf
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/memory-hotplug.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Memory Hotplug
+==============
+The final phase of surfacing CXL memory to the kernel page allocator is for
+the `DAX` driver to surface a `Driver Managed` memory region via the
+memory-hotplug component.
+
+There are four major configurations to consider:
+
+1) Default Online Behavior (on/off and zone)
+2) Hotplug Memory Block size
+3) Memory Map Resource location
+4) Driver-Managed Memory Designation
+
+Default Online Behavior
+=======================
+The default-online behavior of hotplug memory is dictated by the following,
+in order of precedence:
+
+- :code:`CONFIG_MHP_DEFAULT_ONLINE_TYPE` Build Configuration
+- :code:`memhp_default_state` Boot parameter
+- :code:`/sys/devices/system/memory/auto_online_blocks` value
+
+These dictate whether hotplugged memory blocks arrive in one of three states:
+
+1) Offline
+2) Online in :code:`ZONE_NORMAL`
+3) Online in :code:`ZONE_MOVABLE`
+
+:code:`ZONE_NORMAL` implies this capacity may be used for almost any allocation,
+while :code:`ZONE_MOVABLE` implies this capacity should only be used for
+migratable allocations.
+
+:code:`ZONE_MOVABLE` attempts to retain the hotplug-ability of a memory block
+so that it the entire region may be hot-unplugged at a later time. Any capacity
+onlined into :code:`ZONE_NORMAL` should be considered permanently attached to
+the page allocator.
+
+Hotplug Memory Block Size
+=========================
+By default, on most architectures, the Hotplug Memory Block Size is either
+128MB or 256MB. On x86, the block size increases up to 2GB as total memory
+capacity exceeds 64GB. As of v6.15, Linux does not take into account the
+size and alignment of the ACPI CEDT CFMWS regions (see Early Boot docs) when
+deciding the Hotplug Memory Block Size.
+
+Memory Map
+==========
+The location of :code:`struct folio` allocations to represent the hotplugged
+memory capacity are dictated by the following system settings:
+
+- :code:`/sys_module/memory_hotplug/parameters/memmap_on_memory`
+- :code:`/sys/bus/dax/devices/daxN.Y/memmap_on_memory`
+
+If both of these parameters are set to true, :code:`struct folio` for this
+capacity will be carved out of the memory block being onlined. This has
+performance implications if the memory is particularly high-latency and
+its :code:`struct folio` becomes hotly contended.
+
+If either parameter is set to false, :code:`struct folio` for this capacity
+will be allocated from the local node of the processor running the hotplug
+procedure. This capacity will be allocated from :code:`ZONE_NORMAL` on
+that node, as it is a :code:`GFP_KERNEL` allocation.
+
+Systems with extremely large amounts of :code:`ZONE_MOVABLE` memory (e.g.
+CXL memory pools) must ensure that there is sufficient local
+:code:`ZONE_NORMAL` capacity to host the memory map for the hotplugged capacity.
+
+Driver Managed Memory
+=====================
+The DAX driver surfaces this memory to memory-hotplug as "Driver Managed". This
+is not a configurable setting, but it's important to note that driver managed
+memory is explicitly excluded from use during kexec. This is required to ensure
+any reset or out-of-band operations that the CXL device may be subject to during
+a functional system-reboot (such as a reset-on-probe) will not cause portions of
+the kexec kernel to be overwritten.
diff --git a/Documentation/driver-api/cxl/linux/overview.rst b/Documentation/driver-api/cxl/linux/overview.rst
new file mode 100644
index 000000000000..648beb2c8c83
--- /dev/null
+++ b/Documentation/driver-api/cxl/linux/overview.rst
@@ -0,0 +1,103 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+Overview
+========
+
+This section presents the configuration process of a CXL Type-3 memory device,
+and how it is ultimately exposed to users as either a :code:`DAX` device or
+normal memory pages via the kernel's page allocator.
+
+Portions marked with a bullet are points at which certain kernel objects
+are generated.
+
+1) Early Boot
+
+ a) BIOS, Build, and Boot Parameters
+
+ i) EFI_MEMORY_SP
+ ii) CONFIG_EFI_SOFT_RESERVE
+ iii) CONFIG_MHP_DEFAULT_ONLINE_TYPE
+ iv) nosoftreserve
+
+ b) Memory Map Creation
+
+ i) EFI Memory Map / E820 Consulted for Soft-Reserved
+
+ * CXL Memory is set aside to be handled by the CXL driver
+
+ * Soft-Reserved IO Resource created for CFMWS entry
+
+ c) NUMA Node Creation
+
+ * Nodes created from ACPI CEDT CFMWS and SRAT Proximity domains (PXM)
+
+ d) Memory Tier Creation
+
+ * A default memory_tier is created with all nodes.
+
+ e) Contiguous Memory Allocation
+
+ * Any requested CMA is allocated from Online nodes
+
+ f) Init Finishes, Drivers start probing
+
+2) ACPI and PCI Drivers
+
+ a) Detects PCI device is CXL, marking it for probe by CXL driver
+
+3) CXL Driver Operation
+
+ a) Base device creation
+
+ * root, port, and memdev devices created
+ * CEDT CFMWS IO Resource creation
+
+ b) Decoder creation
+
+ * root, switch, and endpoint decoders created
+
+ c) Logical device creation
+
+ * memory_region and endpoint devices created
+
+ d) Devices are associated with each other
+
+ * If auto-decoder (BIOS-programmed decoders), driver validates
+ configurations, builds associations, and locks configs at probe time.
+
+ * If user-configured, validation and associations are built at
+ decoder-commit time.
+
+ e) Regions surfaced as DAX region
+
+ * dax_region created
+
+ * DAX device created via DAX driver
+
+4) DAX Driver Operation
+
+ a) DAX driver surfaces DAX region as one of two dax device modes
+
+ * kmem - dax device is converted to hotplug memory blocks
+
+ * DAX kmem IO Resource creation
+
+ * hmem - dax device is left as daxdev to be accessed as a file.
+
+ * If hmem, journey ends here.
+
+ b) DAX kmem surfaces memory region to Memory Hotplug to add to page
+ allocator as "driver managed memory"
+
+5) Memory Hotplug
+
+ a) mhp component surfaces a dax device memory region as multiple memory
+ blocks to the page allocator
+
+ * blocks appear in :code:`/sys/bus/memory/devices` and linked to a NUMA node
+
+ b) blocks are onlined into the requested zone (NORMAL or MOVABLE)
+
+ * Memory is marked "Driver Managed" to avoid kexec from using it as region
+ for kernel updates
diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst
index df8e2ac2a320..282c1102dd81 100644
--- a/Documentation/driver-api/cxl/maturity-map.rst
+++ b/Documentation/driver-api/cxl/maturity-map.rst
@@ -51,9 +51,9 @@ in place, but there are several corner cases that are pending closure.
* [2] CXL Window Enumeration
- * [0] :ref:`Extended-linear memory-side cache <extended-linear>`
+ * [2] :ref:`Extended-linear memory-side cache <extended-linear>`
* [0] Low Memory-hole
- * [0] Hetero-interleave
+ * [X] Hetero-interleave
* [2] Switch Enumeration
@@ -130,7 +130,7 @@ Mailbox commands
* [0] Switch CCI
* [3] Timestamp
* [1] PMEM labels
-* [0] PMEM GPF / Dirty Shutdown
+* [3] PMEM GPF / Dirty Shutdown
* [0] Scan Media
PMU
@@ -173,7 +173,7 @@ Accelerator
User Flow Support
-----------------
-* [0] HPA->DPA Address translation (need xormaps export solution)
+* [2] Inject & clear poison by region offset
Details
=======
diff --git a/Documentation/driver-api/cxl/platform/acpi.rst b/Documentation/driver-api/cxl/platform/acpi.rst
new file mode 100644
index 000000000000..ee7e6bd4c43d
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/acpi.rst
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+ACPI Tables
+===========
+
+ACPI is the "Advanced Configuration and Power Interface", which is a standard
+that defines how platforms and OS manage power and configure computer hardware.
+For the purpose of this theory of operation, when referring to "ACPI" we will
+usually refer to "ACPI Tables" - which are the way a platform (BIOS/EFI)
+communicates static configuration information to the operation system.
+
+The Following ACPI tables contain *static* configuration and performance data
+about CXL devices.
+
+.. toctree::
+ :maxdepth: 1
+
+ acpi/cedt.rst
+ acpi/srat.rst
+ acpi/hmat.rst
+ acpi/slit.rst
+ acpi/dsdt.rst
+
+The SRAT table may also contain generic port/initiator content that is intended
+to describe the generic port, but not information about the rest of the path to
+the endpoint.
+
+Linux uses these tables to configure kernel resources for statically configured
+(by BIOS/EFI) CXL devices, such as:
+
+- NUMA nodes
+- Memory Tiers
+- NUMA Abstract Distances
+- SystemRAM Memory Regions
+- Weighted Interleave Node Weights
+
+ACPI Debugging
+==============
+
+The :code:`acpidump -b` command dumps the ACPI tables into binary format.
+
+The :code:`iasl -d` command disassembles the files into human readable format.
+
+Example :code:`acpidump -b && iasl -d cedt.dat` ::
+
+ [000h 0000 4] Signature : "CEDT" [CXL Early Discovery Table]
+
+Common Issues
+-------------
+Most failures described here result in a failure of the driver to surface
+memory as a DAX device and/or kmem.
+
+* CEDT CFMWS targets list UIDs do not match CEDT CHBS UIDs.
+* CEDT CFMWS targets list UIDs do not match DSDT CXL Host Bridge UIDs.
+* CEDT CFMWS Restriction Bits are not correct.
+* CEDT CFMWS Memory regions are poorly aligned.
+* CEDT CFMWS Memory regions spans a platform memory hole.
+* CEDT CHBS UIDs do not match DSDT CXL Host Bridge UIDs.
+* CEDT CHBS Specification version is incorrect.
+* SRAT is missing regions described in CEDT CFMWS.
+
+ * Result: failure to create a NUMA node for the region, or
+ region is placed in wrong node.
+
+* HMAT is missing data for regions described in CEDT CFMWS.
+
+ * Result: NUMA node being placed in the wrong memory tier.
+
+* SLIT has bad data.
+
+ * Result: Lots of performance mechanisms in the kernel will be very unhappy.
+
+All of these issues will appear to users as if the driver is failing to
+support CXL - when in reality they are all the failure of a platform to
+configure the ACPI tables correctly.
diff --git a/Documentation/driver-api/cxl/platform/acpi/cedt.rst b/Documentation/driver-api/cxl/platform/acpi/cedt.rst
new file mode 100644
index 000000000000..217a75fb4881
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/acpi/cedt.rst
@@ -0,0 +1,62 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+CEDT - CXL Early Discovery Table
+================================
+
+The CXL Early Discovery Table is generated by BIOS to describe the CXL memory
+regions configured at boot by the BIOS.
+
+CHBS
+====
+The CXL Host Bridge Structure describes CXL host bridges. Other than describing
+device register information, it reports the specific host bridge UID for this
+host bridge. These host bridge ID's will be referenced in other tables.
+
+Example ::
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000007 <- Host bridge _UID
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010370400000
+ Register length : 0000000000010000
+
+CFMWS
+=====
+The CXL Fixed Memory Window structure describes a memory region associated
+with one or more CXL host bridges (as described by the CHBS). It additionally
+describes any inter-host-bridge interleave configuration that may have been
+programmed by BIOS.
+
+Example ::
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 000000C050000000 <- Memory Region
+ Window size : 0000003CA0000000
+ Interleave Members (2^n) : 01 <- Interleave configuration
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007 <- Host Bridge _UID
+ Next Target : 00000006 <- Host Bridge _UID
+
+The restriction field dictates what this SPA range may be used for (memory type,
+voltile vs persistent, etc). One or more bits may be set. ::
+
+ Bit[0]: CXL Type 2 Memory
+ Bit[1]: CXL Type 3 Memory
+ Bit[2]: Volatile Memory
+ Bit[3]: Persistent Memory
+ Bit[4]: Fixed Config (HPA cannot be reused)
+
+INTRA-host-bridge interleave (multiple devices on one host bridge) is NOT
+reported in this structure, and is solely defined via CXL device decoder
+programming (host bridge and endpoint decoders).
diff --git a/Documentation/driver-api/cxl/platform/acpi/dsdt.rst b/Documentation/driver-api/cxl/platform/acpi/dsdt.rst
new file mode 100644
index 000000000000..b4583b01d67d
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/acpi/dsdt.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================
+DSDT - Differentiated system Description Table
+==============================================
+
+This table describes what peripherals a machine has.
+
+This table's UIDs for CXL devices - specifically host bridges, must be
+consistent with the contents of the CEDT, otherwise the CXL driver will
+fail to probe correctly.
+
+Example Compute Express Link Host Bridge ::
+
+ Scope (_SB)
+ {
+ Device (S0D0)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ Name (_CID, Package (0x02) // _CID: Compatible ID
+ {
+ EisaId ("PNP0A08") /* PCI Express Bus */,
+ EisaId ("PNP0A03") /* PCI Bus */
+ })
+ ...
+ Name (_UID, 0x05) // _UID: Unique ID
+ ...
+ }
diff --git a/Documentation/driver-api/cxl/platform/acpi/hmat.rst b/Documentation/driver-api/cxl/platform/acpi/hmat.rst
new file mode 100644
index 000000000000..095a26f02a37
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/acpi/hmat.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+HMAT - Heterogeneous Memory Attribute Table
+===========================================
+
+The Heterogeneous Memory Attributes Table contains information such as cache
+attributes and bandwidth and latency details for memory proximity domains.
+For the purpose of this document, we will only discuss the SSLIB entry.
+
+SLLBI
+=====
+The System Locality Latency and Bandwidth Information records latency and
+bandwidth information for proximity domains.
+
+This table is used by Linux to configure interleave weights and memory tiers.
+
+Example (Heavily truncated for brevity) ::
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 00 <- Latency
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Entry : 0080 <- DRAM LTC
+ Entry : 0100 <- CXL LTC
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 03 <- Bandwidth
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Entry : 1200 <- DRAM BW
+ Entry : 0200 <- CXL BW
diff --git a/Documentation/driver-api/cxl/platform/acpi/slit.rst b/Documentation/driver-api/cxl/platform/acpi/slit.rst
new file mode 100644
index 000000000000..a56768e8fe41
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/acpi/slit.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+SLIT - System Locality Information Table
+========================================
+
+The system locality information table provides "abstract distances" between
+accessor and memory nodes. Node without initiators (cpus) are infinitely (FF)
+distance away from all other nodes.
+
+The abstract distance described in this table does not describe any real
+latency of bandwidth information.
+
+Example ::
+
+ Signature : "SLIT" [System Locality Information Table]
+ Localities : 0000000000000004
+ Locality 0 : 10 20 20 30
+ Locality 1 : 20 10 30 20
+ Locality 2 : FF FF 0A FF
+ Locality 3 : FF FF FF 0A
diff --git a/Documentation/driver-api/cxl/platform/acpi/srat.rst b/Documentation/driver-api/cxl/platform/acpi/srat.rst
new file mode 100644
index 000000000000..cc98ca0e508e
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/acpi/srat.rst
@@ -0,0 +1,71 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+SRAT - Static Resource Affinity Table
+=====================================
+
+The System/Static Resource Affinity Table describes resource (CPU, Memory)
+affinity to "Proximity Domains". This table is technically optional, but for
+performance information (see "HMAT") to be enumerated by linux it must be
+present.
+
+There is a careful dance between the CEDT and SRAT tables and how NUMA nodes are
+created. If things don't look quite the way you expect - check the SRAT Memory
+Affinity entries and CEDT CFMWS to determine what your platform actually
+supports in terms of flexible topologies.
+
+The SRAT may statically assign portions of a CFMWS SPA range to a specific
+proximity domains. See linux numa creation for more information about how
+this presents in the NUMA topology.
+
+Proximity Domain
+================
+A proximity domain is ROUGHLY equivalent to "NUMA Node" - though a 1-to-1
+mapping is not guaranteed. There are scenarios where "Proximity Domain 4" may
+map to "NUMA Node 3", for example. (See "NUMA Node Creation")
+
+Memory Affinity
+===============
+Generally speaking, if a host does any amount of CXL fabric (decoder)
+programming in BIOS - an SRAT entry for that memory needs to be present.
+
+Example ::
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000001 <- NUMA Node 1
+ Reserved1 : 0000
+ Base Address : 000000C050000000 <- Physical Memory Region
+ Address Length : 0000003CA0000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+
+Generic Port Affinity
+=====================
+The Generic Port Affinity subtable provides an association between a proximity
+domain and a device handle representing a Generic Port such as a CXL host
+bridge. With the association, latency and bandwidth numbers can be retrieved
+from the SRAT for the path between CPU(s) (initiator) and the Generic Port.
+This is used to construct performance coordinates for hotplugged CXL DEVICES,
+which cannot be enumerated at boot by platform firmware.
+
+Example ::
+
+ Subtable Type : 06 [Generic Port Affinity]
+ Length : 20 <- 32d, length of table
+ Reserved : 00
+ Device Handle Type : 00 <- 0 - ACPI, 1 - PCI
+ Proximity Domain : 00000001
+ Device Handle : ACPI0016:01
+ Flags : 00000001 <- Bit 0 (Enabled)
+ Reserved : 00000000
+
+The Proximity Domain is matched up to the :doc:`HMAT <hmat>` SSLBI Target
+Proximity Domain List for the related latency or bandwidth numbers. Those
+performance numbers are tied to a CXL host bridge via the Device Handle.
+The driver uses the association to retrieve the Generic Port performance
+numbers for the whole CXL path access coordinates calculation.
diff --git a/Documentation/driver-api/cxl/platform/bios-and-efi.rst b/Documentation/driver-api/cxl/platform/bios-and-efi.rst
new file mode 100644
index 000000000000..5d918b06f6c0
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/bios-and-efi.rst
@@ -0,0 +1,285 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+BIOS/EFI Configuration
+======================
+
+BIOS and EFI are largely responsible for configuring static information about
+devices (or potential future devices) such that Linux can build the appropriate
+logical representations of these devices.
+
+At a high level, this is what occurs during this phase of configuration.
+
+* The bootloader starts the BIOS/EFI.
+
+* BIOS/EFI do early device probe to determine static configuration
+
+* BIOS/EFI creates ACPI Tables that describe static config for the OS
+
+* BIOS/EFI create the system memory map (EFI Memory Map, E820, etc)
+
+* BIOS/EFI calls :code:`start_kernel` and begins the Linux Early Boot process.
+
+Much of what this section is concerned with is ACPI Table production and
+static memory map configuration. More detail on these tables can be found
+at :doc:`ACPI Tables <acpi>`.
+
+.. note::
+ Platform Vendors should read carefully, as this sections has recommendations
+ on physical memory region size and alignment, memory holes, HDM interleave,
+ and what linux expects of HDM decoders trying to work with these features.
+
+
+Linux Expectations of BIOS/EFI Software
+=======================================
+Linux expects BIOS/EFI software to construct sufficient ACPI tables (such as
+CEDT, SRAT, HMAT, etc) and platform-specific configurations (such as HPA spaces
+and host-bridge interleave configurations) to allow the Linux driver to
+subsequently configure the devices in the CXL fabric at runtime.
+
+Programming of HDM decoders and switch ports is not required, and may be
+deferred to the CXL driver based on admin policy (e.g. udev rules).
+
+Some platforms may require pre-programming HDM decoders and locking them
+due to quirks (see: Zen5 address translation), but this is not the normal,
+"expected" configuration path. This should be avoided if possible.
+
+Some platforms may wish to pre-configure these resources to bring memory
+up without requiring CXL driver support. These platform vendors should
+test their configurations with the existing CXL driver and provide driver
+support for their auto-configurations if features like RAS are required.
+
+Platforms requiring boot-time programming and/or locking of CXL fabric
+components may prevent features, such as device hot-plug, from working.
+
+UEFI Settings
+=============
+If your platform supports it, the :code:`uefisettings` command can be used to
+read/write EFI settings. Changes will be reflected on the next reboot. Kexec
+is not a sufficient reboot.
+
+One notable configuration here is the EFI_MEMORY_SP (Specific Purpose) bit.
+When this is enabled, this bit tells linux to defer management of a memory
+region to a driver (in this case, the CXL driver). Otherwise, the memory is
+treated as "normal memory", and is exposed to the page allocator during
+:code:`__init`.
+
+uefisettings examples
+---------------------
+
+:code:`uefisettings identify` ::
+
+ uefisettings identify
+
+ bios_vendor: xxx
+ bios_version: xxx
+ bios_release: xxx
+ bios_date: xxx
+ product_name: xxx
+ product_family: xxx
+ product_version: xxx
+
+On some AMD platforms, the :code:`EFI_MEMORY_SP` bit is set via the :code:`CXL
+Memory Attribute` field. This may be called something else on your platform.
+
+:code:`uefisettings get "CXL Memory Attribute"` ::
+
+ selector: xxx
+ ...
+ question: Question {
+ name: "CXL Memory Attribute",
+ answer: "Enabled",
+ ...
+ }
+
+Physical Memory Map
+===================
+
+Physical Address Region Alignment
+---------------------------------
+
+As of Linux v6.14, the hotplug memory system requires memory regions to be
+uniform in size and alignment. While the CXL specification allows for memory
+regions as small as 256MB, the supported memory block size and alignment for
+hotplugged memory is architecture-defined.
+
+A Linux memory blocks may be as small as 128MB and increase in powers of two.
+
+* On ARM, the default block size and alignment is either 128MB or 256MB.
+
+* On x86, the default block size is 256MB, and increases to 2GB as the
+ capacity of the system increases up to 64GB.
+
+For best support across versions, platform vendors should place CXL memory at
+a 2GB aligned base address, and regions should be 2GB aligned. This also helps
+prevent the creating thousands of memory devices (one per block).
+
+Memory Holes
+------------
+
+Holes in the memory map are tricky. Consider a 4GB device located at base
+address 0x100000000, but with the following memory map ::
+
+ ---------------------
+ | 0x100000000 |
+ | CXL |
+ | 0x1BFFFFFFF |
+ ---------------------
+ | 0x1C0000000 |
+ | MEMORY HOLE |
+ | 0x1FFFFFFFF |
+ ---------------------
+ | 0x200000000 |
+ | CXL CONT. |
+ | 0x23FFFFFFF |
+ ---------------------
+
+There are two issues to consider:
+
+* decoder programming, and
+* memory block alignment.
+
+If your architecture requires 2GB uniform size and aligned memory blocks, the
+only capacity Linux is capable of mapping (as of v6.14) would be the capacity
+from `0x100000000-0x180000000`. The remaining capacity will be stranded, as
+they are not of 2GB aligned length.
+
+Assuming your architecture and memory configuration allows 1GB memory blocks,
+this memory map is supported and this should be presented as multiple CFMWS
+in the CEDT that describe each side of the memory hole separately - along with
+matching decoders.
+
+Multiple decoders can (and should) be used to manage such a memory hole (see
+below), but each chunk of a memory hole should be aligned to a reasonable block
+size (larger alignment is always better). If you intend to have memory holes
+in the memory map, expect to use one decoder per contiguous chunk of host
+physical memory.
+
+As of v6.14, Linux does provide support for memory hotplug of multiple
+physical memory regions separated by a memory hole described by a single
+HDM decoder.
+
+
+Decoder Programming
+===================
+If BIOS/EFI intends to program the decoders to be statically configured,
+there are a few things to consider to avoid major pitfalls that will
+prevent Linux compatibility. Some of these recommendations are not
+required "per the specification", but Linux makes no guarantees of support
+otherwise.
+
+
+Translation Point
+-----------------
+Per the specification, the only decoders which **TRANSLATE** Host Physical
+Address (HPA) to Device Physical Address (DPA) are the **Endpoint Decoders**.
+All other decoders in the fabric are intended to route accesses without
+translating the addresses.
+
+This is heavily implied by the specification, see: ::
+
+ CXL Specification 3.1
+ 8.2.4.20: CXL HDM Decoder Capability Structure
+ - Implementation Note: CXL Host Bridge and Upstream Switch Port Decoder Flow
+ - Implementation Note: Device Decoder Logic
+
+Given this, Linux makes a strong assumption that decoders between CPU and
+endpoint will all be programmed with addresses ranges that are subsets of
+their parent decoder.
+
+Due to some ambiguity in how Architecture, ACPI, PCI, and CXL specifications
+"hand off" responsibility between domains, some early adopting platforms
+attempted to do translation at the originating memory controller or host
+bridge. This configuration requires a platform specific extension to the
+driver and is not officially endorsed - despite being supported.
+
+It is *highly recommended* **NOT** to do this; otherwise, you are on your own
+to implement driver support for your platform.
+
+Interleave and Configuration Flexibility
+----------------------------------------
+If providing cross-host-bridge interleave, a CFMWS entry in the :doc:`CEDT
+<acpi/cedt>` must be presented with target host-bridges for the interleaved
+device sets (there may be multiple behind each host bridge).
+
+If providing intra-host-bridge interleaving, only 1 CFMWS entry in the CEDT is
+required for that host bridge - if it covers the entire capacity of the devices
+behind the host bridge.
+
+If intending to provide users flexibility in programming decoders beyond the
+root, you may want to provide multiple CFMWS entries in the CEDT intended for
+different purposes. For example, you may want to consider adding:
+
+1) A CFMWS entry to cover all interleavable host bridges.
+2) A CFMWS entry to cover all devices on a single host bridge.
+3) A CFMWS entry to cover each device.
+
+A platform may choose to add all of these, or change the mode based on a BIOS
+setting. For each CFMWS entry, Linux expects descriptions of the described
+memory regions in the :doc:`SRAT <acpi/srat>` to determine the number of
+NUMA nodes it should reserve during early boot / init.
+
+As of v6.14, Linux will create a NUMA node for each CEDT CFMWS entry, even if
+a matching SRAT entry does not exist; however, this is not guaranteed in the
+future and such a configuration should be avoided.
+
+Memory Holes
+------------
+If your platform includes memory holes interspersed between your CXL memory, it
+is recommended to utilize multiple decoders to cover these regions of memory,
+rather than try to program the decoders to accept the entire range and expect
+Linux to manage the overlap.
+
+For example, consider the Memory Hole described above ::
+
+ ---------------------
+ | 0x100000000 |
+ | CXL |
+ | 0x1BFFFFFFF |
+ ---------------------
+ | 0x1C0000000 |
+ | MEMORY HOLE |
+ | 0x1FFFFFFFF |
+ ---------------------
+ | 0x200000000 |
+ | CXL CONT. |
+ | 0x23FFFFFFF |
+ ---------------------
+
+Assuming this is provided by a single device attached directly to a host bridge,
+Linux would expect the following decoder programming ::
+
+ ----------------------- -----------------------
+ | root-decoder-0 | | root-decoder-1 |
+ | base: 0x100000000 | | base: 0x200000000 |
+ | size: 0xC0000000 | | size: 0x40000000 |
+ ----------------------- -----------------------
+ | |
+ ----------------------- -----------------------
+ | HB-decoder-0 | | HB-decoder-1 |
+ | base: 0x100000000 | | base: 0x200000000 |
+ | size: 0xC0000000 | | size: 0x40000000 |
+ ----------------------- -----------------------
+ | |
+ ----------------------- -----------------------
+ | ep-decoder-0 | | ep-decoder-1 |
+ | base: 0x100000000 | | base: 0x200000000 |
+ | size: 0xC0000000 | | size: 0x40000000 |
+ ----------------------- -----------------------
+
+With a CEDT configuration with two CFMWS describing the above root decoders.
+
+Linux makes no guarantee of support for strange memory hole situations.
+
+Multi-Media Devices
+-------------------
+The CFMWS field of the CEDT has special restriction bits which describe whether
+the described memory region allows volatile or persistent memory (or both). If
+the platform intends to support either:
+
+1) A device with multiple media, or
+2) Using a persistent memory device as normal memory
+
+A platform may wish to create multiple CEDT CFMWS entries to describe the same
+memory, with the intent of allowing the end user flexibility in how that memory
+is configured. Linux does not presently have strong requirements in this area.
diff --git a/Documentation/driver-api/cxl/platform/cdat.rst b/Documentation/driver-api/cxl/platform/cdat.rst
new file mode 100644
index 000000000000..34bbe7264d71
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/cdat.rst
@@ -0,0 +1,118 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Coherent Device Attribute Table (CDAT)
+======================================
+
+The CDAT provides functional and performance attributes of devices such
+as CXL accelerators, switches, or endpoints. The table formatting is
+similar to ACPI tables. CDAT data may be parsed by BIOS at boot or may
+be enumerated at runtime (after device hotplug, for example).
+
+Terminology:
+DPA - Device Physical Address, used by the CXL device to denote the address
+it supports for that device.
+
+DSMADHandle - A device unique handle that is associated with a DPA range
+defined by the DSMAS table.
+
+
+===============================================
+Device Scoped Memory Affinity Structure (DSMAS)
+===============================================
+
+The DSMAS contains information such as DSMADHandle, the DPA Base, and DPA
+Length.
+
+This table is used by Linux in conjunction with the Device Scoped Latency and
+Bandwidth Information Structure (DSLBIS) to determine the performance
+attributes of the CXL device itself.
+
+Example ::
+
+ Structure Type : 00 [DSMAS]
+ Reserved : 00
+ Length : 0018 <- 24d, size of structure
+ DSMADHandle : 01
+ Flags : 00
+ Reserved : 0000
+ DPA Base : 0000000040000000 <- 1GiB base
+ DPA Length : 0000000080000000 <- 2GiB size
+
+
+==================================================================
+Device Scoped Latency and Bandwidth Information Structure (DSLBIS)
+==================================================================
+
+This table is used by Linux in conjunction with DSMAS to determine the
+performance attributes of a CXL device. The DSLBIS contains latency
+and bandwidth information based on DSMADHandle matching.
+
+Example ::
+
+ Structure Type : 01 [DSLBIS]
+ Reserved : 00
+ Length : 18 <- 24d, size of structure
+ Handle : 0001 <- DSMAS handle
+ Flags : 00 <- Matches flag field for HMAT SLLBIS
+ Data Type : 00 <- Latency
+ Entry Basee Unit : 0000000000001000 <- Entry Base Unit field in HMAT SSLBIS
+ Entry : 010000000000 <- First byte used here, CXL LTC
+ Reserved : 0000
+
+ Structure Type : 01 [DSLBIS]
+ Reserved : 00
+ Length : 18 <- 24d, size of structure
+ Handle : 0001 <- DSMAS handle
+ Flags : 00 <- Matches flag field for HMAT SLLBIS
+ Data Type : 03 <- Bandwidth
+ Entry Basee Unit : 0000000000001000 <- Entry Base Unit field in HMAT SSLBIS
+ Entry : 020000000000 <- First byte used here, CXL BW
+ Reserved : 0000
+
+
+==================================================================
+Switch Scoped Latency and Bandwidth Information Structure (SSLBIS)
+==================================================================
+
+The SSLBIS contains information about the latency and bandwidth of a switch.
+
+The table is used by Linux to compute the performance coordinates of a CXL path
+from the device to the root port where a switch is part of the path.
+
+Example ::
+
+ Structure Type : 05 [SSLBIS]
+ Reserved : 00
+ Length : 20 <- 32d, length of record, including SSLB entries
+ Data Type : 00 <- Latency
+ Reserved : 000000
+ Entry Base Unit : 00000000000000001000 <- Matches Entry Base Unit in HMAT SSLBIS
+
+ <- SSLB Entry 0
+ Port X ID : 0100 <- First port, 0100h represents an upstream port
+ Port Y ID : 0000 <- Second port, downstream port 0
+ Latency : 0100 <- Port latency
+ Reserved : 0000
+ <- SSLB Entry 1
+ Port X ID : 0100
+ Port Y ID : 0001
+ Latency : 0100
+ Reserved : 0000
+
+
+ Structure Type : 05 [SSLBIS]
+ Reserved : 00
+ Length : 18 <- 24d, length of record, including SSLB entry
+ Data Type : 03 <- Bandwidth
+ Reserved : 000000
+ Entry Base Unit : 00000000000000001000 <- Matches Entry Base Unit in HMAT SSLBIS
+
+ <- SSLB Entry 0
+ Port X ID : 0100 <- First port, 0100h represents an upstream port
+ Port Y ID : FFFF <- Second port, FFFFh indicates any port
+ Bandwidth : 1200 <- Port bandwidth
+ Reserved : 0000
+
+The CXL driver uses a combination of CDAT, HMAT, SRAT, and other data to
+generate "whole path performance" data for a CXL device.
diff --git a/Documentation/driver-api/cxl/platform/device-hotplug.rst b/Documentation/driver-api/cxl/platform/device-hotplug.rst
new file mode 100644
index 000000000000..e4a065fdd3ec
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/device-hotplug.rst
@@ -0,0 +1,130 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+CXL Device Hotplug
+==================
+
+Device hotplug refers to *physical* hotplug of a device (addition or removal
+of a physical device from the machine).
+
+BIOS/EFI software is expected to configure sufficient resources **at boot
+time** to allow hotplugged devices to be configured by software (such as
+proximity domains, HPA regions, and host-bridge configurations).
+
+BIOS/EFI is not expected (**nor suggested**) to configure hotplugged
+devices at hotplug time (i.e. HDM decoders should be left unprogrammed).
+
+This document covers some examples of those resources, but should not
+be considered exhaustive.
+
+Hot-Remove
+==========
+Hot removal of a device typically requires careful removal of software
+constructs (memory regions, associated drivers) which manage these devices.
+
+Hard-removing a CXL.mem device without carefully tearing down driver stacks
+is likely to cause the system to machine-check (or at least SIGBUS if memory
+access is limited to user space).
+
+Memory Device Hot-Add
+=====================
+A device present at boot may be associated with a CXL Fixed Memory Window
+reported in :doc:`CEDT<acpi/cedt>`. That CFMWS may match the size of the
+device, but the construction of the CEDT CFMWS is platform-defined.
+
+Hot-adding a memory device requires this pre-defined, **static** CFMWS to
+have sufficient HPA space to describe that device.
+
+There are a few common scenarios to consider.
+
+Single-Endpoint Memory Device Present at Boot
+---------------------------------------------
+A device present at boot likely had its capacity reported in the
+:doc:`CEDT<acpi/cedt>`. If a device is removed and a new device hotplugged,
+the capacity of the new device will be limited to the original CFMWS capacity.
+
+Adding capacity larger than the original device will cause memory region
+creation to fail if the region size is greater than the CFMWS size.
+
+The CFMWS is **static** and cannot be adjusted. Platforms which may expect
+different sized devices to be hotplugged must allocate sufficient CFMWS space
+**at boot time** to cover all future expected devices.
+
+Multi-Endpoint Memory Device Present at Boot
+--------------------------------------------
+Non-switch-based Multi-Endpoint devices are outside the scope of what the
+CXL specification describes, but they are technically possible. We describe
+them here for instructive reasons only - this does not imply Linux support.
+
+A hot-plug capable CXL memory device, such as one which presents multiple
+expanders as a single large-capacity device, should report the **maximum
+possible capacity** for the device at boot. ::
+
+ HB0
+ RP0
+ |
+ [Multi-Endpoint Memory Device]
+ _____|_____
+ | |
+ [Endpoint0] [Empty]
+
+
+Limiting the size to the capacity preset at boot will limit hot-add support
+to replacing capacity that was present at boot.
+
+No CXL Device Present at Boot
+-----------------------------
+When no CXL memory device is present on boot, some platforms omit the CFMWS
+in the :doc:`CEDT<acpi/cedt>`. When this occurs, hot-add is not possible.
+
+This describes the base case for any given device not being present at boot.
+If a future possible device is not described in the CEDT at boot, hot-add
+of that device is either limited or not possible.
+
+For a platform to support hot-add of a full memory device, it must allocate
+a CEDT CFMWS region with sufficient memory capacity to cover all future
+potentially added capacity (along with any relevant CEDT CHBS entry).
+
+To support memory hotplug directly on the host bridge/root port, or on a switch
+downstream of the host bridge, a platform must construct a CEDT CFMWS at boot
+with sufficient resources to support the max possible (or expected) hotplug
+memory capacity. ::
+
+ HB0 HB1
+ RP0 RP1 RP2
+ | | |
+ Empty Empty USP
+ ________|________
+ | | | |
+ DSP DSP DSP DSP
+ | | | |
+ All Empty
+
+For example, a BIOS/EFI may expose an option to configure a CEDT CFMWS with
+a pre-configured amount of memory capacity (per host bridge, or host bridge
+interleave set), even if no device is attached to Root Ports or Downstream
+Ports at boot (as depicted in the figure above).
+
+
+Interleave Sets
+===============
+
+Host Bridge Interleave
+----------------------
+Host-bridge interleaved memory regions are defined **statically** in the
+:doc:`CEDT<acpi/cedt>`. To apply cross-host-bridge interleave, a CFMWS entry
+describing that interleave must have been provided **at boot**. Hotplugged
+devices cannot add host-bridge interleave capabilities at hotplug time.
+
+See the :doc:`Flexible CEDT Configuration<example-configurations/flexible>`
+example to see how a platform can provide this kind of flexibility regarding
+hotplugged memory devices. BIOS/EFI software should consider options to
+present flexible CEDT configurations with hotplug support.
+
+HDM Interleave
+--------------
+Decoder-applied interleave can flexibly handle hotplugged devices, as decoders
+can be re-programmed after hotplug.
+
+To add or remove a device to/from an existing HDM-applied interleaved region,
+that region must be torn down an re-created.
diff --git a/Documentation/driver-api/cxl/platform/example-configs.rst b/Documentation/driver-api/cxl/platform/example-configs.rst
new file mode 100644
index 000000000000..90a10d7473c6
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/example-configs.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Example Platform Configurations
+###############################
+
+.. toctree::
+ :maxdepth: 1
+ :caption: Contents
+
+ example-configurations/one-dev-per-hb.rst
+ example-configurations/multi-dev-per-hb.rst
+ example-configurations/hb-interleave.rst
+ example-configurations/flexible.rst
diff --git a/Documentation/driver-api/cxl/platform/example-configurations/flexible.rst b/Documentation/driver-api/cxl/platform/example-configurations/flexible.rst
new file mode 100644
index 000000000000..dab704b6fcc2
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/example-configurations/flexible.rst
@@ -0,0 +1,296 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Flexible Presentation
+=====================
+This system has a single socket with two CXL host bridges. Each host bridge
+has two CXL memory expanders with a 4GB of memory (32GB total).
+
+On this system, the platform designer wanted to provide the user flexibility
+to configure the memory devices in various interleave or NUMA node
+configurations. So they provided every combination.
+
+Things to note:
+
+* Cross-Bridge interleave is described in one CFMWS that covers all capacity.
+* One CFMWS is also described per-host bridge.
+* One CFMWS is also described per-device.
+* This SRAT describes one node for each of the above CFMWS.
+* The HMAT describes performance for each node in the SRAT.
+
+:doc:`CEDT <../acpi/cedt>`::
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000007
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010370400000
+ Register length : 0000000000010000
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000006
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010380800000
+ Register length : 0000000000010000
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000001000000000
+ Window size : 0000000400000000
+ Interleave Members (2^n) : 01
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+ Second Target : 00000006
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000002000000000
+ Window size : 0000000200000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000002200000000
+ Window size : 0000000200000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000006
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000003000000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000003100000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000003200000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000006
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000003300000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000006
+
+:doc:`SRAT <../acpi/srat>`::
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000001
+ Reserved1 : 0000
+ Base Address : 0000001000000000
+ Address Length : 0000000400000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000002
+ Reserved1 : 0000
+ Base Address : 0000002000000000
+ Address Length : 0000000200000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000003
+ Reserved1 : 0000
+ Base Address : 0000002200000000
+ Address Length : 0000000200000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000004
+ Reserved1 : 0000
+ Base Address : 0000003000000000
+ Address Length : 0000000100000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000005
+ Reserved1 : 0000
+ Base Address : 0000003100000000
+ Address Length : 0000000100000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000006
+ Reserved1 : 0000
+ Base Address : 0000003200000000
+ Address Length : 0000000100000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000007
+ Reserved1 : 0000
+ Base Address : 0000003300000000
+ Address Length : 0000000100000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+:doc:`HMAT <../acpi/hmat>`::
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 00 [Latency]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Target Proximity Domain List : 00000002
+ Target Proximity Domain List : 00000003
+ Target Proximity Domain List : 00000004
+ Target Proximity Domain List : 00000005
+ Target Proximity Domain List : 00000006
+ Target Proximity Domain List : 00000007
+ Entry : 0080
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 03 [Bandwidth]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Target Proximity Domain List : 00000002
+ Target Proximity Domain List : 00000003
+ Target Proximity Domain List : 00000004
+ Target Proximity Domain List : 00000005
+ Target Proximity Domain List : 00000006
+ Target Proximity Domain List : 00000007
+ Entry : 1200
+ Entry : 0400
+ Entry : 0200
+ Entry : 0200
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+ Entry : 0100
+
+:doc:`SLIT <../acpi/slit>`::
+
+ Signature : "SLIT" [System Locality Information Table]
+ Localities : 0000000000000003
+ Locality 0 : 10 20 20 20 20 20 20 20
+ Locality 1 : FF 0A FF FF FF FF FF FF
+ Locality 2 : FF FF 0A FF FF FF FF FF
+ Locality 3 : FF FF FF 0A FF FF FF FF
+ Locality 4 : FF FF FF FF 0A FF FF FF
+ Locality 5 : FF FF FF FF FF 0A FF FF
+ Locality 6 : FF FF FF FF FF FF 0A FF
+ Locality 7 : FF FF FF FF FF FF FF 0A
+
+:doc:`DSDT <../acpi/dsdt>`::
+
+ Scope (_SB)
+ {
+ Device (S0D0)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x07) // _UID: Unique ID
+ }
+ ...
+ Device (S0D5)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x06) // _UID: Unique ID
+ }
+ }
diff --git a/Documentation/driver-api/cxl/platform/example-configurations/hb-interleave.rst b/Documentation/driver-api/cxl/platform/example-configurations/hb-interleave.rst
new file mode 100644
index 000000000000..c474dcf09fb0
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/example-configurations/hb-interleave.rst
@@ -0,0 +1,107 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Cross-Host-Bridge Interleave
+============================
+This system has a single socket with two CXL host bridges. Each host bridge
+has a single CXL memory expander with a 4GB of memory.
+
+Things to note:
+
+* Cross-Bridge interleave is described.
+* The expanders are described by a single CFMWS.
+* This SRAT describes one node for both host bridges.
+* The HMAT describes a single node's performance.
+
+:doc:`CEDT <../acpi/cedt>`::
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000007
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010370400000
+ Register length : 0000000000010000
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000006
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010380800000
+ Register length : 0000000000010000
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000001000000000
+ Window size : 0000000200000000
+ Interleave Members (2^n) : 01
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+ Second Target : 00000006
+
+:doc:`SRAT <../acpi/srat>`::
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000001
+ Reserved1 : 0000
+ Base Address : 0000001000000000
+ Address Length : 0000000200000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+:doc:`HMAT <../acpi/hmat>`::
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 00 [Latency]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Target Proximity Domain List : 00000002
+ Entry : 0080
+ Entry : 0100
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 03 [Bandwidth]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Target Proximity Domain List : 00000002
+ Entry : 1200
+ Entry : 0400
+
+:doc:`SLIT <../acpi/slit>`::
+
+ Signature : "SLIT" [System Locality Information Table]
+ Localities : 0000000000000003
+ Locality 0 : 10 20
+ Locality 1 : FF 0A
+
+:doc:`DSDT <../acpi/dsdt>`::
+
+ Scope (_SB)
+ {
+ Device (S0D0)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x07) // _UID: Unique ID
+ }
+ ...
+ Device (S0D5)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x06) // _UID: Unique ID
+ }
+ }
diff --git a/Documentation/driver-api/cxl/platform/example-configurations/multi-dev-per-hb.rst b/Documentation/driver-api/cxl/platform/example-configurations/multi-dev-per-hb.rst
new file mode 100644
index 000000000000..a7854a79dbbd
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/example-configurations/multi-dev-per-hb.rst
@@ -0,0 +1,90 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Multiple Devices per Host Bridge
+================================
+
+In this example system we will have a single socket and one CXL host bridge.
+There are two CXL memory expanders with 4GB attached to the host bridge.
+
+Things to note:
+
+* Intra-Bridge interleave is not described here.
+* The expanders are described by a single CEDT/CFMWS.
+* This CEDT/SRAT describes one node for both devices.
+* There is only one proximity domain the HMAT for both devices.
+
+:doc:`CEDT <../acpi/cedt>`::
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000007
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010370400000
+ Register length : 0000000000010000
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000001000000000
+ Window size : 0000000200000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+
+:doc:`SRAT <../acpi/srat>`::
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000001
+ Reserved1 : 0000
+ Base Address : 0000001000000000
+ Address Length : 0000000200000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+:doc:`HMAT <../acpi/hmat>`::
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 00 [Latency]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Entry : 0080
+ Entry : 0100
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 03 [Bandwidth]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Entry : 1200
+ Entry : 0200
+
+:doc:`SLIT <../acpi/slit>`::
+
+ Signature : "SLIT" [System Locality Information Table]
+ Localities : 0000000000000003
+ Locality 0 : 10 20
+ Locality 1 : FF 0A
+
+:doc:`DSDT <../acpi/dsdt>`::
+
+ Scope (_SB)
+ {
+ Device (S0D0)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x07) // _UID: Unique ID
+ }
+ ...
+ }
diff --git a/Documentation/driver-api/cxl/platform/example-configurations/one-dev-per-hb.rst b/Documentation/driver-api/cxl/platform/example-configurations/one-dev-per-hb.rst
new file mode 100644
index 000000000000..a4c3fb51ea7d
--- /dev/null
+++ b/Documentation/driver-api/cxl/platform/example-configurations/one-dev-per-hb.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+One Device per Host Bridge
+==========================
+
+This system has a single socket with two CXL host bridges. Each host bridge
+has a single CXL memory expander with a 4GB of memory.
+
+Things to note:
+
+* Cross-Bridge interleave is not being used.
+* The expanders are in two separate but adjacent memory regions.
+* This CEDT/SRAT describes one node per device
+* The expanders have the same performance and will be in the same memory tier.
+
+:doc:`CEDT <../acpi/cedt>`::
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000007
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010370400000
+ Register length : 0000000000010000
+
+ Subtable Type : 00 [CXL Host Bridge Structure]
+ Reserved : 00
+ Length : 0020
+ Associated host bridge : 00000006
+ Specification version : 00000001
+ Reserved : 00000000
+ Register base : 0000010380800000
+ Register length : 0000000000010000
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000001000000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000007
+
+ Subtable Type : 01 [CXL Fixed Memory Window Structure]
+ Reserved : 00
+ Length : 002C
+ Reserved : 00000000
+ Window base address : 0000001100000000
+ Window size : 0000000100000000
+ Interleave Members (2^n) : 00
+ Interleave Arithmetic : 00
+ Reserved : 0000
+ Granularity : 00000000
+ Restrictions : 0006
+ QtgId : 0001
+ First Target : 00000006
+
+:doc:`SRAT <../acpi/srat>`::
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000001
+ Reserved1 : 0000
+ Base Address : 0000001000000000
+ Address Length : 0000000100000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+ Subtable Type : 01 [Memory Affinity]
+ Length : 28
+ Proximity Domain : 00000002
+ Reserved1 : 0000
+ Base Address : 0000001100000000
+ Address Length : 0000000100000000
+ Reserved2 : 00000000
+ Flags (decoded below) : 0000000B
+ Enabled : 1
+ Hot Pluggable : 1
+ Non-Volatile : 0
+
+:doc:`HMAT <../acpi/hmat>`::
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 00 [Latency]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Target Proximity Domain List : 00000002
+ Entry : 0080
+ Entry : 0100
+ Entry : 0100
+
+ Structure Type : 0001 [SLLBI]
+ Data Type : 03 [Bandwidth]
+ Target Proximity Domain List : 00000000
+ Target Proximity Domain List : 00000001
+ Target Proximity Domain List : 00000002
+ Entry : 1200
+ Entry : 0200
+ Entry : 0200
+
+:doc:`SLIT <../acpi/slit>`::
+
+ Signature : "SLIT" [System Locality Information Table]
+ Localities : 0000000000000003
+ Locality 0 : 10 20 20
+ Locality 1 : FF 0A FF
+ Locality 2 : FF FF 0A
+
+:doc:`DSDT <../acpi/dsdt>`::
+
+ Scope (_SB)
+ {
+ Device (S0D0)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x07) // _UID: Unique ID
+ }
+ ...
+ Device (S0D5)
+ {
+ Name (_HID, "ACPI0016" /* Compute Express Link Host Bridge */) // _HID: Hardware ID
+ ...
+ Name (_UID, 0x06) // _UID: Unique ID
+ }
+ }
diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/theory-of-operation.rst
index d732c42526df..257f513e320c 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/theory-of-operation.rst
@@ -1,9 +1,9 @@
.. SPDX-License-Identifier: GPL-2.0
.. include:: <isonum.txt>
-===================================
-Compute Express Link Memory Devices
-===================================
+===============================================
+Compute Express Link Driver Theory of Operation
+===============================================
A Compute Express Link Memory Device is a CXL component that implements the
CXL.mem protocol. It contains some amount of volatile memory, persistent memory,
@@ -14,8 +14,8 @@ that optionally define a device's contribution to an interleaved address
range across multiple devices underneath a host-bridge or interleaved
across host-bridges.
-CXL Bus: Theory of Operation
-============================
+The CXL Bus
+===========
Similar to how a RAID driver takes disk objects and assembles them into a new
logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and
assemble them into a CXL.mem decode topology. The need for runtime configuration
@@ -29,8 +29,8 @@ Platform firmware enumerates a menu of interleave options at the "CXL root port"
(Linux term for the top of the CXL decode topology). From there, PCIe topology
dictates which endpoints can participate in which Host Bridge decode regimes.
Each PCIe Switch in the path between the root and an endpoint introduces a point
-at which the interleave can be split. For example platform firmware may say at a
-given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn
+at which the interleave can be split. For example, platform firmware may say a
+given range only decodes to one Host Bridge, but that Host Bridge may in turn
interleave cycles across multiple Root Ports. An intervening Switch between a
port and an endpoint may interleave cycles across multiple Downstream Switch
Ports, etc.
@@ -187,7 +187,7 @@ decodes them to "ports", "ports" decode to "endpoints", and "endpoints"
represent the decode from SPA (System Physical Address) to DPA (Device Physical
Address).
-Continuing the RAID analogy, disks have both topology metadata and on device
+Continuing the RAID analogy, disks have both topology metadata and on-device
metadata that determine RAID set assembly. CXL Port topology and CXL Port link
status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated
by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches
@@ -197,7 +197,7 @@ the Linux PCI core to tear down switch-level CXL resources because the endpoint
->remove() event cleans up the port data that was established to support that
Memory Expander.
-The port metadata and potential decode schemes that a give memory device may
+The port metadata and potential decode schemes that a given memory device may
participate can be determined via a command like::
# cxl list -BDMu -d root -m mem3
@@ -249,8 +249,8 @@ participate can be determined via a command like::
...which queries the CXL topology to ask "given CXL Memory Expander with a kernel
device name of 'mem3' which platform level decode ranges may this device
participate". A given expander can participate in multiple CXL.mem interleave
-sets simultaneously depending on how many decoder resource it has. In this
-example mem3 can participate in one or more of a PMEM interleave that spans to
+sets simultaneously depending on how many decoder resources it has. In this
+example mem3 can participate in one or more of a PMEM interleave that spans two
Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile
memory interleave that spans 2 Host Bridges, and a Volatile memory interleave
that only targets a single Host Bridge.
@@ -347,6 +347,9 @@ CXL Core
.. kernel-doc:: drivers/cxl/cxl.h
:internal:
+.. kernel-doc:: drivers/cxl/acpi.c
+ :identifiers: add_cxl_resources
+
.. kernel-doc:: drivers/cxl/core/hdm.c
:doc: cxl core hdm
@@ -371,12 +374,26 @@ CXL Core
.. kernel-doc:: drivers/cxl/core/pmem.c
:doc: cxl pmem
+.. kernel-doc:: drivers/cxl/core/pmem.c
+ :identifiers:
+
.. kernel-doc:: drivers/cxl/core/regs.c
:doc: cxl registers
+.. kernel-doc:: drivers/cxl/core/regs.c
+ :identifiers:
+
.. kernel-doc:: drivers/cxl/core/mbox.c
:doc: cxl mbox
+.. kernel-doc:: drivers/cxl/core/mbox.c
+ :identifiers:
+
+.. kernel-doc:: drivers/cxl/core/features.c
+ :doc: cxl features
+
+See :c:func:`devm_cxl_setup_features` for API details.
+
CXL Regions
-----------
.. kernel-doc:: drivers/cxl/core/region.c
diff --git a/Documentation/driver-api/device-io.rst b/Documentation/driver-api/device-io.rst
index 5c7e8194bef9..d1aaa961cac4 100644
--- a/Documentation/driver-api/device-io.rst
+++ b/Documentation/driver-api/device-io.rst
@@ -16,7 +16,7 @@ Bus-Independent Device Accesses
Introduction
============
-Linux provides an API which abstracts performing IO across all busses
+Linux provides an API which abstracts performing IO across all buses
and devices, allowing device drivers to be written independently of bus
type.
@@ -71,7 +71,7 @@ can be compiler optimised, you can use __readb() and friends to
indicate the relaxed ordering. Use this with care.
While the basic functions are defined to be synchronous with respect to
-each other and ordered with respect to each other the busses the devices
+each other and ordered with respect to each other the buses the devices
sit on may themselves have asynchronicity. In particular many authors
are burned by the fact that PCI bus writes are posted asynchronously. A
driver author must issue a read from the same device to ensure that
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index 29abf1eebf9f..2f36c21d9948 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -125,11 +125,6 @@ Implicit Fence Poll Support
.. kernel-doc:: drivers/dma-buf/dma-buf.c
:doc: implicit fence polling
-DMA-BUF statistics
-~~~~~~~~~~~~~~~~~~
-.. kernel-doc:: drivers/dma-buf/dma-buf-sysfs-stats.c
- :doc: overview
-
DMA Buffer ioctls
~~~~~~~~~~~~~~~~~
diff --git a/Documentation/driver-api/dmaengine/index.rst b/Documentation/driver-api/dmaengine/index.rst
index bdc45d8b4cfb..e74677c664ac 100644
--- a/Documentation/driver-api/dmaengine/index.rst
+++ b/Documentation/driver-api/dmaengine/index.rst
@@ -46,10 +46,3 @@ This book adds some notes about PXA DMA
:maxdepth: 1
pxa_dma
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index 3085f8b460fa..f4ed98f701c9 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -172,8 +172,8 @@ Currently, the types available are:
- It's usually used for copying pixel data between host memory and
memory-mapped GPU device memory, such as found on modern PCI video graphics
cards. The most immediate example is the OpenGL API function
- ``glReadPielx()``, which might require a verbatim copy of a huge framebuffer
- from local device memory onto host memory.
+ ``glReadPixels()``, which might require a verbatim copy of a huge
+ framebuffer from local device memory onto host memory.
- DMA_XOR
@@ -217,10 +217,12 @@ Currently, the types available are:
- DMA_ASYNC_TX
- - Must not be set by the device, and will be set by the framework
- if needed
+ - The device supports asynchronous memory-to-memory operations,
+ including memcpy, memset, xor, pq, xor_val, and pq_val.
- - TODO: What is it about?
+ - This capability is automatically set by the DMA engine
+ framework and must not be configured manually by device
+ drivers.
- DMA_SLAVE
@@ -409,7 +411,7 @@ supported.
- This structure can be initialized using the function
``dma_async_tx_descriptor_init``.
- - You'll also need to set two fields in this structure:
+ - You'll also need to set following fields in this structure:
- flags:
TODO: Can it be modified by the driver itself, or
@@ -419,6 +421,9 @@ supported.
that is supposed to push the current transaction descriptor to a
pending queue, waiting for issue_pending to be called.
+ - phys: Physical address of the descriptor which is used later by
+ the dma engine to read the descriptor and initiate transfer.
+
- In this structure the function pointer callback_result can be
initialized in order for the submitter to be notified that a
transaction has completed. In the earlier code the function pointer
diff --git a/Documentation/driver-api/dmaengine/pxa_dma.rst b/Documentation/driver-api/dmaengine/pxa_dma.rst
index 442ee691a190..8f9da66b0bfa 100644
--- a/Documentation/driver-api/dmaengine/pxa_dma.rst
+++ b/Documentation/driver-api/dmaengine/pxa_dma.rst
@@ -40,7 +40,7 @@ Design
======
a) Virtual channels
Same concept as in sa11x0 driver, ie. a driver was assigned a "virtual
-channel" linked to the requestor line, and the physical DMA channel is
+channel" linked to the requester line, and the physical DMA channel is
assigned on the fly when the transfer is issued.
b) Transfer anatomy for a scatter-gather transfer
diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst
index e6855cd37e85..93c191b2d089 100644
--- a/Documentation/driver-api/dpll.rst
+++ b/Documentation/driver-api/dpll.rst
@@ -179,29 +179,47 @@ Phase offset measurement and adjustment
Device may provide ability to measure a phase difference between signals
on a pin and its parent dpll device. If pin-dpll phase offset measurement
is supported, it shall be provided with ``DPLL_A_PIN_PHASE_OFFSET``
-attribute for each parent dpll device.
+attribute for each parent dpll device. The reported phase offset may be
+computed as the average of prior values and the current measurement, using
+the following formula:
+
+.. math::
+ curr\_avg = prev\_avg * \frac{2^N-1}{2^N} + new\_val * \frac{1}{2^N}
+
+where `curr_avg` is the current reported phase offset, `prev_avg` is the
+previously reported value, `new_val` is the current measurement, and `N` is
+the averaging factor. Configured averaging factor value is provided with
+``DPLL_A_PHASE_OFFSET_AVG_FACTOR`` attribute of a device and value change can
+be requested with the same attribute with ``DPLL_CMD_DEVICE_SET`` command.
+
+ ================================== ======================================
+ ``DPLL_A_PHASE_OFFSET_AVG_FACTOR`` attr configured value of phase offset
+ averaging factor
+ ================================== ======================================
Device may also provide ability to adjust a signal phase on a pin.
-If pin phase adjustment is supported, minimal and maximal values that pin
-handle shall be provide to the user on ``DPLL_CMD_PIN_GET`` respond
-with ``DPLL_A_PIN_PHASE_ADJUST_MIN`` and ``DPLL_A_PIN_PHASE_ADJUST_MAX``
+If pin phase adjustment is supported, minimal and maximal values and
+granularity that pin handle shall be provided to the user on
+``DPLL_CMD_PIN_GET`` respond with ``DPLL_A_PIN_PHASE_ADJUST_MIN``,
+``DPLL_A_PIN_PHASE_ADJUST_MAX`` and ``DPLL_A_PIN_PHASE_ADJUST_GRAN``
attributes. Configured phase adjust value is provided with
``DPLL_A_PIN_PHASE_ADJUST`` attribute of a pin, and value change can be
requested with the same attribute with ``DPLL_CMD_PIN_SET`` command.
- =============================== ======================================
- ``DPLL_A_PIN_ID`` configured pin id
- ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment
- ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment
- ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase
- adjustment on parent dpll device
- ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting
- configuration on given parent dpll
- device
- ``DPLL_A_PIN_PARENT_ID`` parent dpll device id
- ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference
- between a pin and parent dpll device
- =============================== ======================================
+ ================================ ==========================================
+ ``DPLL_A_PIN_ID`` configured pin id
+ ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attr granularity of phase adjustment value
+ ``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase adjustment
+ ``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase adjustment
+ ``DPLL_A_PIN_PHASE_ADJUST`` attr configured value of phase
+ adjustment on parent dpll device
+ ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting
+ configuration on given parent dpll
+ device
+ ``DPLL_A_PIN_PARENT_ID`` parent dpll device id
+ ``DPLL_A_PIN_PHASE_OFFSET`` attr measured phase difference
+ between a pin and parent dpll device
+ ================================ ==========================================
All phase related values are provided in pico seconds, which represents
time difference between signals phase. The negative value means that
@@ -214,6 +232,42 @@ offset values are fractional with 3-digit decimal places and shell be
divided with ``DPLL_PIN_PHASE_OFFSET_DIVIDER`` to get integer part and
modulo divided to get fractional part.
+Phase offset monitor
+====================
+
+Phase offset measurement is typically performed against the current active
+source. However, some DPLL (Digital Phase-Locked Loop) devices may offer
+the capability to monitor phase offsets across all available inputs.
+The attribute and current feature state shall be included in the response
+message of the ``DPLL_CMD_DEVICE_GET`` command for supported DPLL devices.
+In such cases, users can also control the feature using the
+``DPLL_CMD_DEVICE_SET`` command by setting the ``enum dpll_feature_state``
+values for the attribute.
+Once enabled the phase offset measurements for the input shall be returned
+in the ``DPLL_A_PIN_PHASE_OFFSET`` attribute.
+
+ =============================== ========================
+ ``DPLL_A_PHASE_OFFSET_MONITOR`` attr state of a feature
+ =============================== ========================
+
+Frequency monitor
+=================
+
+Some DPLL devices may offer the capability to measure the actual
+frequency of all available input pins. The attribute and current feature state
+shall be included in the response message of the ``DPLL_CMD_DEVICE_GET``
+command for supported DPLL devices. In such cases, users can also control
+the feature using the ``DPLL_CMD_DEVICE_SET`` command by setting the
+``enum dpll_feature_state`` values for the attribute.
+Once enabled the measured input frequency for each input pin shall be
+returned in the ``DPLL_A_PIN_MEASURED_FREQUENCY`` attribute. The value
+is in millihertz (mHz), using ``DPLL_PIN_MEASURED_FREQUENCY_DIVIDER``
+as the divider.
+
+ =============================== ========================
+ ``DPLL_A_FREQUENCY_MONITOR`` attr state of a feature
+ =============================== ========================
+
Embedded SYNC
=============
@@ -235,6 +289,31 @@ the pin.
``DPLL_A_PIN_ESYNC_PULSE`` pulse type of Embedded SYNC
========================================= =================================
+Reference SYNC
+==============
+
+The device may support the Reference SYNC feature, which allows the combination
+of two inputs into a input pair. In this configuration, clock signals
+from both inputs are used to synchronize the DPLL device. The higher frequency
+signal is utilized for the loop bandwidth of the DPLL, while the lower frequency
+signal is used to syntonize the output signal of the DPLL device. This feature
+enables the provision of a high-quality loop bandwidth signal from an external
+source.
+
+A capable input provides a list of inputs that can be bound with to create
+Reference SYNC. To control this feature, the user must request a desired
+state for a target pin: use ``DPLL_PIN_STATE_CONNECTED`` to enable or
+``DPLL_PIN_STATE_DISCONNECTED`` to disable the feature. An input pin can be
+bound to only one other pin at any given time.
+
+ ============================== ==========================================
+ ``DPLL_A_PIN_REFERENCE_SYNC`` nested attribute for providing info or
+ requesting configuration of the Reference
+ SYNC feature
+ ``DPLL_A_PIN_ID`` target pin id for Reference SYNC feature
+ ``DPLL_A_PIN_STATE`` state of Reference SYNC connection
+ ============================== ==========================================
+
Configuration commands group
============================
@@ -325,6 +404,8 @@ according to attribute purpose.
frequencies
``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency
``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency
+ ``DPLL_A_PIN_PHASE_ADJUST_GRAN`` attr granularity of phase
+ adjustment value
``DPLL_A_PIN_PHASE_ADJUST_MIN`` attr minimum value of phase
adjustment
``DPLL_A_PIN_PHASE_ADJUST_MAX`` attr maximum value of phase
@@ -348,6 +429,8 @@ according to attribute purpose.
``DPLL_A_PIN_STATE`` attr state of pin on the parent
pin
``DPLL_A_PIN_CAPABILITIES`` attr bitmask of pin capabilities
+ ``DPLL_A_PIN_MEASURED_FREQUENCY`` attr measured frequency of
+ an input pin in mHz
==================================== ==================================
==================================== =================================
diff --git a/Documentation/driver-api/driver-model/binding.rst b/Documentation/driver-api/driver-model/binding.rst
index 7ea1d7a41e1d..fa0888c2b3b9 100644
--- a/Documentation/driver-api/driver-model/binding.rst
+++ b/Documentation/driver-api/driver-model/binding.rst
@@ -53,9 +53,12 @@ class's register_dev callback.
Driver
~~~~~~
-When a driver is attached to a device, the device is inserted into the
-driver's list of devices.
-
+When a driver is attached to a device, the driver's probe() function is
+called. Within probe(), the driver initializes the device and allocates
+and initializes per-device data structures. This per-device state is
+associated with the device object for as long as the driver remains bound
+to it. Conceptually, this per-device data together with the binding to
+the device can be thought of as an instance of the driver.
sysfs
~~~~~
@@ -96,3 +99,51 @@ of the driver is decremented. All symlinks between the two are removed.
When a driver is removed, the list of devices that it supports is
iterated over, and the driver's remove callback is called for each
one. The device is removed from that list and the symlinks removed.
+
+
+Driver Override
+~~~~~~~~~~~~~~~
+
+Userspace may override the standard matching by writing a driver name to
+a device's ``driver_override`` sysfs attribute. When set, only a driver
+whose name matches the override will be considered during binding. This
+bypasses all bus-specific matching (OF, ACPI, ID tables, etc.).
+
+The override may be cleared by writing an empty string, which returns
+the device to standard matching rules. Writing to ``driver_override``
+does not automatically unbind the device from its current driver or
+make any attempt to load the specified driver.
+
+Buses opt into this mechanism by setting the ``driver_override`` flag in
+their ``struct bus_type``::
+
+ const struct bus_type example_bus_type = {
+ ...
+ .driver_override = true,
+ };
+
+When the flag is set, the driver core automatically creates the
+``driver_override`` sysfs attribute for every device on that bus.
+
+The bus's ``match()`` callback should check the override before performing
+its own matching, using ``device_match_driver_override()``::
+
+ static int example_match(struct device *dev, const struct device_driver *drv)
+ {
+ int ret;
+
+ ret = device_match_driver_override(dev, drv);
+ if (ret >= 0)
+ return ret;
+
+ /* Fall through to bus-specific matching... */
+ }
+
+``device_match_driver_override()`` returns > 0 if the override matches
+the given driver, 0 if the override is set but does not match, or < 0 if
+no override is set at all.
+
+Additional helpers are available:
+
+- ``device_set_driver_override()`` - set or clear the override from kernel code.
+- ``device_has_driver_override()`` - check whether an override is set.
diff --git a/Documentation/driver-api/driver-model/design-patterns.rst b/Documentation/driver-api/driver-model/design-patterns.rst
index 41eb8f41f7dd..965b2b93be6f 100644
--- a/Documentation/driver-api/driver-model/design-patterns.rst
+++ b/Documentation/driver-api/driver-model/design-patterns.rst
@@ -103,7 +103,7 @@ The design pattern is the same for an hrtimer or something similar that will
return a single argument which is a pointer to a struct member in the
callback.
-container_of() is a macro defined in <linux/kernel.h>
+container_of() is a macro defined in <linux/container_of.h>
What container_of() does is to obtain a pointer to the containing struct from
a pointer to a member by a simple subtraction using the offsetof() macro from
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index d75728eb05f8..017fb155a5bc 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -275,7 +275,6 @@ GPIO
devm_gpiod_put()
devm_gpiod_unhinge()
devm_gpiochip_add_data()
- devm_gpio_request()
devm_gpio_request_one()
I2C
@@ -384,19 +383,17 @@ NET
PER-CPU MEM
devm_alloc_percpu()
- devm_free_percpu()
PCI
devm_pci_alloc_host_bridge() : managed PCI host bridge allocation
devm_pci_remap_cfgspace() : ioremap PCI configuration space
devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource
- pcim_enable_device() : after success, some PCI ops become managed
+ pcim_enable_device() : after success, the PCI device gets disabled automatically on driver detach
pcim_iomap() : do iomap() on a single BAR
pcim_iomap_regions() : do request_region() and iomap() on multiple BARs
pcim_iomap_table() : array of mapped addresses indexed by BAR
pcim_iounmap() : do iounmap() on a single BAR
- pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs
pcim_pin_device() : keep PCI device enabled after release
pcim_set_mwi() : enable Memory-Write-Invalidate PCI transaction
@@ -411,7 +408,6 @@ PINCTRL
devm_pinctrl_get_select()
devm_pinctrl_register()
devm_pinctrl_register_and_init()
- devm_pinctrl_unregister()
POWER
devm_reboot_mode_register()
@@ -468,3 +464,7 @@ SPI
WATCHDOG
devm_watchdog_register_device()
+
+WORKQUEUE
+ devm_alloc_workqueue()
+ devm_alloc_ordered_workqueue()
diff --git a/Documentation/driver-api/driver-model/index.rst b/Documentation/driver-api/driver-model/index.rst
index 4831bdd92e5c..abeb4b36636b 100644
--- a/Documentation/driver-api/driver-model/index.rst
+++ b/Documentation/driver-api/driver-model/index.rst
@@ -14,10 +14,3 @@ Driver Model
overview
platform
porting
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst
index e98d0ab4a9b6..b3f447bf9f07 100644
--- a/Documentation/driver-api/driver-model/overview.rst
+++ b/Documentation/driver-api/driver-model/overview.rst
@@ -22,7 +22,7 @@ uniformity across the different bus types.
The current driver model provides a common, uniform data model for describing
a bus and the devices that can appear under the bus. The unified bus
-model includes a set of common attributes which all busses carry, and a set
+model includes a set of common attributes which all buses carry, and a set
of common callbacks, such as device discovery during bus probing, bus
shutdown, bus power management, etc.
diff --git a/Documentation/driver-api/driver-model/platform.rst b/Documentation/driver-api/driver-model/platform.rst
index 7beb8a9648c5..cf5ff48d3115 100644
--- a/Documentation/driver-api/driver-model/platform.rst
+++ b/Documentation/driver-api/driver-model/platform.rst
@@ -4,7 +4,7 @@ Platform Devices and Drivers
See <linux/platform_device.h> for the driver model interface to the
platform bus: platform_device, and platform_driver. This pseudo-bus
-is used to connect devices on busses with minimal infrastructure,
+is used to connect devices on buses with minimal infrastructure,
like those used to integrate peripherals on many system-on-chip
processors, or some "legacy" PC interconnects; as opposed to large
formally specified ones like PCI or USB.
diff --git a/Documentation/driver-api/early-userspace/buffer-format.rst b/Documentation/driver-api/early-userspace/buffer-format.rst
index 7f74e301fdf3..4597a91100b7 100644
--- a/Documentation/driver-api/early-userspace/buffer-format.rst
+++ b/Documentation/driver-api/early-userspace/buffer-format.rst
@@ -4,20 +4,18 @@ initramfs buffer format
Al Viro, H. Peter Anvin
-Last revision: 2002-01-13
-
-Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
-getting {replaced/complemented} with the new "initial ramfs"
-(initramfs) protocol. The initramfs contents is passed using the same
-memory buffer protocol used by the initrd protocol, but the contents
+With kernel 2.5.x, the old "initial ramdisk" protocol was complemented
+with an "initial ramfs" protocol. The initramfs content is passed
+using the same memory buffer protocol used by initrd, but the content
is different. The initramfs buffer contains an archive which is
-expanded into a ramfs filesystem; this document details the format of
-the initramfs buffer format.
+expanded into a ramfs filesystem; this document details the initramfs
+buffer format.
The initramfs buffer format is based around the "newc" or "crc" CPIO
formats, and can be created with the cpio(1) utility. The cpio
-archive can be compressed using gzip(1). One valid version of an
-initramfs buffer is thus a single .cpio.gz file.
+archive can be compressed using gzip(1), or any other algorithm provided
+via CONFIG_DECOMPRESS_*. One valid version of an initramfs buffer is
+thus a single .cpio.gz file.
The full format of the initramfs buffer is defined by the following
grammar, where::
@@ -25,12 +23,20 @@ grammar, where::
* is used to indicate "0 or more occurrences of"
(|) indicates alternatives
+ indicates concatenation
- GZIP() indicates the gzip(1) of the operand
+ GZIP() indicates gzip compression of the operand
+ BZIP2() indicates bzip2 compression of the operand
+ LZMA() indicates lzma compression of the operand
+ XZ() indicates xz compression of the operand
+ LZO() indicates lzo compression of the operand
+ LZ4() indicates lz4 compression of the operand
+ ZSTD() indicates zstd compression of the operand
ALGN(n) means padding with null bytes to an n-byte boundary
- initramfs := ("\0" | cpio_archive | cpio_gzip_archive)*
+ initramfs := ("\0" | cpio_archive | cpio_compressed_archive)*
- cpio_gzip_archive := GZIP(cpio_archive)
+ cpio_compressed_archive := (GZIP(cpio_archive) | BZIP2(cpio_archive)
+ | LZMA(cpio_archive) | XZ(cpio_archive) | LZO(cpio_archive)
+ | LZ4(cpio_archive) | ZSTD(cpio_archive))
cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
@@ -75,9 +81,16 @@ c_chksum 8 bytes Checksum of data field if c_magic is 070702;
The c_mode field matches the contents of st_mode returned by stat(2)
on Linux, and encodes the file type and file permissions.
+c_mtime is ignored unless CONFIG_INITRAMFS_PRESERVE_MTIME=y is set.
+
The c_filesize should be zero for any file which is not a regular file
or symlink.
+c_namesize may account for more than one trailing '\0', as long as the
+value doesn't exceed PATH_MAX. This can be useful for ensuring that a
+subsequent file data segment is aligned, e.g. to a filesystem block
+boundary.
+
The c_chksum field contains a simple 32-bit unsigned sum of all the
bytes in the data field. cpio(1) refers to this as "crc", which is
clearly incorrect (a cyclic redundancy check is a different and
diff --git a/Documentation/driver-api/early-userspace/early_userspace_support.rst b/Documentation/driver-api/early-userspace/early_userspace_support.rst
index 61bdeac1bae5..60d1e1bc9413 100644
--- a/Documentation/driver-api/early-userspace/early_userspace_support.rst
+++ b/Documentation/driver-api/early-userspace/early_userspace_support.rst
@@ -73,7 +73,7 @@ usr/gen_initramfs.sh. This means that CONFIG_INITRAMFS_SOURCE
can really be interpreted as any legal argument to
gen_initramfs.sh. If a directory is specified as an argument then
the contents are scanned, uid/gid translation is performed, and
-usr/gen_init_cpio file directives are output. If a directory is
+usr/gen_init_cpio file directives are output. If a file is
specified as an argument to usr/gen_initramfs.sh then the
contents of the file are simply copied to the output. All of the output
directives from directory scanning and file contents copying are
diff --git a/Documentation/driver-api/early-userspace/index.rst b/Documentation/driver-api/early-userspace/index.rst
index 149c1822f06d..ff459471258f 100644
--- a/Documentation/driver-api/early-userspace/index.rst
+++ b/Documentation/driver-api/early-userspace/index.rst
@@ -9,10 +9,3 @@ Early Userspace
early_userspace_support
buffer-format
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/eisa.rst b/Documentation/driver-api/eisa.rst
index b33ebe1ec9ed..3563e5f7e98d 100644
--- a/Documentation/driver-api/eisa.rst
+++ b/Documentation/driver-api/eisa.rst
@@ -8,9 +8,9 @@ This document groups random notes about porting EISA drivers to the
new EISA/sysfs API.
Starting from version 2.5.59, the EISA bus is almost given the same
-status as other much more mainstream busses such as PCI or USB. This
+status as other much more mainstream buses such as PCI or USB. This
has been possible through sysfs, which defines a nice enough set of
-abstractions to manage busses, devices and drivers.
+abstractions to manage buses, devices and drivers.
Although the new API is quite simple to use, converting existing
drivers to the new infrastructure is not an easy task (mostly because
@@ -205,7 +205,7 @@ Random notes
Converting an EISA driver to the new API mostly involves *deleting*
code (since probing is now in the core EISA code). Unfortunately, most
drivers share their probing routine between ISA, and EISA. Special
-care must be taken when ripping out the EISA code, so other busses
+care must be taken when ripping out the EISA code, so other buses
won't suffer from these surgical strikes...
You *must not* expect any EISA device to be detected when returning
diff --git a/Documentation/driver-api/firmware/efi/index.rst b/Documentation/driver-api/firmware/efi/index.rst
index 4fe8abba9fc6..5a6b6229592c 100644
--- a/Documentation/driver-api/firmware/efi/index.rst
+++ b/Documentation/driver-api/firmware/efi/index.rst
@@ -1,11 +1,16 @@
.. SPDX-License-Identifier: GPL-2.0
-============
-UEFI Support
-============
+====================================================
+Unified Extensible Firmware Interface (UEFI) Support
+====================================================
UEFI stub library functions
===========================
.. kernel-doc:: drivers/firmware/efi/libstub/mem.c
:internal:
+
+UEFI Common Platform Error Record (CPER) functions
+==================================================
+
+.. kernel-doc:: drivers/firmware/efi/cper.c
diff --git a/Documentation/driver-api/firmware/firmware-usage-guidelines.rst b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst
index fdcfce42c6d2..336e912281c3 100644
--- a/Documentation/driver-api/firmware/firmware-usage-guidelines.rst
+++ b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst
@@ -42,3 +42,8 @@ then of course these rules will not apply strictly.)
deprecating old major versions, then this should only be done as a
last option, and be stated clearly in all communications.
+* Firmware files that affect the User API (UAPI) shall not introduce
+ changes that break existing userspace programs. Updates to such firmware
+ must ensure backward compatibility with existing userspace applications.
+ This includes maintaining consistent interfaces and behaviors that
+ userspace programs rely on.
diff --git a/Documentation/driver-api/firmware/index.rst b/Documentation/driver-api/firmware/index.rst
index 9d2c19dc8e36..86a3dd4bc3f8 100644
--- a/Documentation/driver-api/firmware/index.rst
+++ b/Documentation/driver-api/firmware/index.rst
@@ -10,10 +10,3 @@ Linux Firmware API
request_firmware
fw_upload
other_interfaces
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst
index 71ccc30e586b..e826f16ea43d 100644
--- a/Documentation/driver-api/generic-counter.rst
+++ b/Documentation/driver-api/generic-counter.rst
@@ -467,7 +467,7 @@ Counter sysfs
Translates counter data to the standard Counter sysfs interface format
and vice versa.
-Please refer to the ``Documentation/ABI/testing/sysfs-bus-counter`` file
+Please refer to the Documentation/ABI/testing/sysfs-bus-counter file
for a detailed breakdown of the available Generic Counter interface
sysfs attributes.
@@ -483,7 +483,7 @@ Sysfs Interface
Several sysfs attributes are generated by the Generic Counter interface,
and reside under the ``/sys/bus/counter/devices/counterX`` directory,
where ``X`` is to the respective counter device id. Please see
-``Documentation/ABI/testing/sysfs-bus-counter`` for detailed information
+Documentation/ABI/testing/sysfs-bus-counter for detailed information
on each Generic Counter interface sysfs attribute.
Through these sysfs attributes, programs and scripts may interact with
diff --git a/Documentation/driver-api/generic_pt.rst b/Documentation/driver-api/generic_pt.rst
new file mode 100644
index 000000000000..fd29d1b525e5
--- /dev/null
+++ b/Documentation/driver-api/generic_pt.rst
@@ -0,0 +1,137 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Generic Radix Page Table
+========================
+
+.. kernel-doc:: include/linux/generic_pt/common.h
+ :doc: Generic Radix Page Table
+
+.. kernel-doc:: drivers/iommu/generic_pt/pt_defs.h
+ :doc: Generic Page Table Language
+
+Usage
+=====
+
+Generic PT is structured as a multi-compilation system. Since each format
+provides an API using a common set of names there can be only one format active
+within a compilation unit. This design avoids function pointers around the low
+level API.
+
+Instead the function pointers can end up at the higher level API (i.e.
+map/unmap, etc.) and the per-format code can be directly inlined into the
+per-format compilation unit. For something like IOMMU each format will be
+compiled into a per-format IOMMU operations kernel module.
+
+For this to work the .c file for each compilation unit will include both the
+format headers and the generic code for the implementation. For instance in an
+implementation compilation unit the headers would normally be included as
+follows:
+
+generic_pt/fmt/iommu_amdv1.c::
+
+ #include <linux/generic_pt/common.h>
+ #include "defs_amdv1.h"
+ #include "../pt_defs.h"
+ #include "amdv1.h"
+ #include "../pt_common.h"
+ #include "../pt_iter.h"
+ #include "../iommu_pt.h" /* The IOMMU implementation */
+
+iommu_pt.h includes definitions that will generate the operations functions for
+map/unmap/etc. using the definitions provided by AMDv1. The resulting module
+will have exported symbols named like pt_iommu_amdv1_init().
+
+Refer to drivers/iommu/generic_pt/fmt/iommu_template.h for an example of how the
+IOMMU implementation uses multi-compilation to generate per-format ops structs
+pointers.
+
+The format code is written so that the common names arise from #defines to
+distinct format specific names. This is intended to aid debuggability by
+avoiding symbol clashes across all the different formats.
+
+Exported symbols and other global names are mangled using a per-format string
+via the NS() helper macro.
+
+The format uses struct pt_common as the top-level struct for the table,
+and each format will have its own struct pt_xxx which embeds it to store
+format-specific information.
+
+The implementation will further wrap struct pt_common in its own top-level
+struct, such as struct pt_iommu_amdv1.
+
+Format functions at the struct pt_common level
+----------------------------------------------
+
+.. kernel-doc:: include/linux/generic_pt/common.h
+ :identifiers:
+.. kernel-doc:: drivers/iommu/generic_pt/pt_common.h
+
+Iteration Helpers
+-----------------
+
+.. kernel-doc:: drivers/iommu/generic_pt/pt_iter.h
+
+Writing a Format
+----------------
+
+It is best to start from a simple format that is similar to the target. x86_64
+is usually a good reference for something simple, and AMDv1 is something fairly
+complete.
+
+The required inline functions need to be implemented in the format header.
+These should all follow the standard pattern of::
+
+ static inline pt_oaddr_t amdv1pt_entry_oa(const struct pt_state *pts)
+ {
+ [..]
+ }
+ #define pt_entry_oa amdv1pt_entry_oa
+
+where a uniquely named per-format inline function provides the implementation
+and a define maps it to the generic name. This is intended to make debug symbols
+work better. inline functions should always be used as the prototypes in
+pt_common.h will cause the compiler to validate the function signature to
+prevent errors.
+
+Review pt_fmt_defaults.h to understand some of the optional inlines.
+
+Once the format compiles then it should be run through the generic page table
+kunit test in kunit_generic_pt.h using kunit. For example::
+
+ $ tools/testing/kunit/kunit.py run --build_dir build_kunit_x86_64 --arch x86_64 --kunitconfig ./drivers/iommu/generic_pt/.kunitconfig amdv1_fmt_test.*
+ [...]
+ [11:15:08] Testing complete. Ran 9 tests: passed: 9
+ [11:15:09] Elapsed time: 3.137s total, 0.001s configuring, 2.368s building, 0.311s running
+
+The generic tests are intended to prove out the format functions and give
+clearer failures to speed up finding the problems. Once those pass then the
+entire kunit suite should be run.
+
+IOMMU Invalidation Features
+---------------------------
+
+Invalidation is how the page table algorithms synchronize with a HW cache of the
+page table memory, typically called the TLB (or IOTLB for IOMMU cases).
+
+The TLB can store present PTEs, non-present PTEs and table pointers, depending
+on its design. Every HW has its own approach on how to describe what has changed
+to have changed items removed from the TLB.
+
+PT_FEAT_FLUSH_RANGE
+~~~~~~~~~~~~~~~~~~~
+
+PT_FEAT_FLUSH_RANGE is the easiest scheme to understand. It tries to generate a
+single range invalidation for each operation, over-invalidating if there are
+gaps of VA that don't need invalidation. This trades off impacted VA for number
+of invalidation operations. It does not keep track of what is being invalidated;
+however, if pages have to be freed then page table pointers have to be cleaned
+from the walk cache. The range can start/end at any page boundary.
+
+PT_FEAT_FLUSH_RANGE_NO_GAPS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+PT_FEAT_FLUSH_RANGE_NO_GAPS is similar to PT_FEAT_FLUSH_RANGE; however, it tries
+to minimize the amount of impacted VA by issuing extra flush operations. This is
+useful if the cost of processing VA is very high, for instance because a
+hypervisor is processing the page table with a shadowing algorithm.
diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst
index 4fd1cbd8296e..4ac1e12cf872 100644
--- a/Documentation/driver-api/gpio/board.rst
+++ b/Documentation/driver-api/gpio/board.rst
@@ -94,6 +94,84 @@ with the help of _DSD (Device Specific Data), introduced in ACPI 5.1::
For more information about the ACPI GPIO bindings see
Documentation/firmware-guide/acpi/gpio-properties.rst.
+Software Nodes
+--------------
+
+Software nodes allow board-specific code to construct an in-memory,
+device-tree-like structure using struct software_node and struct
+property_entry. This structure can then be associated with a platform device,
+allowing drivers to use the standard device properties API to query
+configuration, just as they would on an ACPI or device tree system.
+
+Software-node-backed GPIOs are described using the ``PROPERTY_ENTRY_GPIO()``
+macro, which ties a software node representing the GPIO controller with
+consumer device. It allows consumers to use regular gpiolib APIs, such as
+gpiod_get(), gpiod_get_optional().
+
+The software node representing a GPIO controller must be attached to the
+GPIO controller device - either as the primary or the secondary firmware node.
+
+For example, here is how to describe a single GPIO-connected LED. This is an
+alternative to using platform_data on legacy systems.
+
+.. code-block:: c
+
+ #include <linux/property.h>
+ #include <linux/gpio/machine.h>
+ #include <linux/gpio/property.h>
+
+ /*
+ * 1. Define a node for the GPIO controller.
+ */
+ static const struct software_node gpio_controller_node = {
+ .name = "gpio-foo",
+ };
+
+ /* 2. Define the properties for the LED device. */
+ static const struct property_entry led_device_props[] = {
+ PROPERTY_ENTRY_STRING("label", "myboard:green:status"),
+ PROPERTY_ENTRY_STRING("linux,default-trigger", "heartbeat"),
+ PROPERTY_ENTRY_GPIO("gpios", &gpio_controller_node, 42, GPIO_ACTIVE_HIGH),
+ { }
+ };
+
+ /* 3. Define the software node for the LED device. */
+ static const struct software_node led_device_swnode = {
+ .name = "status-led",
+ .properties = led_device_props,
+ };
+
+ /*
+ * 4. Register the software nodes and the platform device.
+ */
+ const struct software_node *swnodes[] = {
+ &gpio_controller_node,
+ &led_device_swnode,
+ NULL
+ };
+ software_node_register_node_group(swnodes);
+
+ /*
+ * 5. Attach the GPIO controller's software node to the device and
+ * register it.
+ */
+ static void gpio_foo_register(void)
+ {
+ struct platform_device_info pdev_info = {
+ .name = "gpio-foo",
+ .id = PLATFORM_DEVID_NONE,
+ .swnode = &gpio_controller_node
+ };
+
+ platform_device_register_full(&pdev_info);
+ }
+
+ // Then register a platform_device for "leds-gpio" and associate
+ // it with &led_device_swnode via .fwnode.
+
+For a complete guide on converting board files to use software nodes, see
+Documentation/driver-api/gpio/legacy-boards.rst.
+
Platform Data
-------------
Finally, GPIOs can be bound to devices and functions using platform data. Board
@@ -174,22 +252,6 @@ mapping and is thus transparent to GPIO consumers.
A set of functions such as gpiod_set_value() is available to work with
the new descriptor-oriented interface.
-Boards using platform data can also hog GPIO lines by defining GPIO hog tables.
-
-.. code-block:: c
-
- struct gpiod_hog gpio_hog_table[] = {
- GPIO_HOG("gpio.0", 10, "foo", GPIO_ACTIVE_LOW, GPIOD_OUT_HIGH),
- { }
- };
-
-And the table can be added to the board code as follows::
-
- gpiod_add_hogs(gpio_hog_table);
-
-The line will be hogged as soon as the gpiochip is created or - in case the
-chip was created earlier - when the hog table is registered.
-
Arrays of pins
--------------
In addition to requesting pins belonging to a function one by one, a device may
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index ae433261e11a..a4f160b95089 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -87,6 +87,33 @@ atomic context on realtime kernels (inside hard IRQ handlers and similar
contexts). Normally this should not be required.
+GPIO level semantics
+--------------------
+
+The gpip_chip .get/set[_multiple]() line values are clamped to the boolean
+space [0, 1], low level or high level.
+
+Low and high values are defined as physical low on the line in/out to the
+connector such as a physical pad, pin or rail.
+
+The GPIO library has internal logic to handle lines that are active low, such
+as indicated by overstrike or #name in a schematic, and the driver should not
+try to second-guess the logic value of a line.
+
+The way GPIO values are handled by the consumers is that the library present
+the *logical* value to the consumer. A line is *asserted* if its *logical*
+value is 1, and *de-asserted* if its logical value is 0. If inversion is
+required, this is handled by gpiolib and configured using hardware descriptions
+such as device tree or ACPI that can clearly indicate if a line is active
+high or low.
+
+Since electronics commonly insert inverters as driving stages or protection
+buffers in front of a GPIO line it is necessary that this semantic is part
+of the hardware description, so that consumers such as kernel drivers need
+not worry about this, and can for example assert a RESET line tied to a GPIO
+pin by setting it to logic 1 even if it is physically active low.
+
+
GPIO electrical configuration
-----------------------------
@@ -750,7 +777,7 @@ compliance:
- Test your driver with the appropriate in-kernel real-time test cases for both
level and edge IRQs
-* [1] http://www.spinics.net/lists/linux-omap/msg120425.html
+* [1] https://lore.kernel.org/r/1437496011-11486-1-git-send-email-bigeasy@linutronix.de/
* [2] https://lore.kernel.org/r/1443209283-20781-2-git-send-email-grygorii.strashko@ti.com
* [3] https://lore.kernel.org/r/1443209283-20781-3-git-send-email-grygorii.strashko@ti.com
diff --git a/Documentation/driver-api/gpio/index.rst b/Documentation/driver-api/gpio/index.rst
index 34b57cee3391..bee58f709b9a 100644
--- a/Documentation/driver-api/gpio/index.rst
+++ b/Documentation/driver-api/gpio/index.rst
@@ -12,8 +12,10 @@ Contents:
driver
consumer
board
+ legacy-boards
drivers-on-gpio
bt8xxgpio
+ pca953x
Core
====
@@ -27,7 +29,7 @@ Core
ACPI support
============
-.. kernel-doc:: drivers/gpio/gpiolib-acpi.c
+.. kernel-doc:: drivers/gpio/gpiolib-acpi-core.c
:export:
Device tree support
diff --git a/Documentation/driver-api/gpio/legacy-boards.rst b/Documentation/driver-api/gpio/legacy-boards.rst
new file mode 100644
index 000000000000..a9d33bcbb176
--- /dev/null
+++ b/Documentation/driver-api/gpio/legacy-boards.rst
@@ -0,0 +1,316 @@
+Supporting Legacy Boards
+========================
+
+Many drivers in the kernel, such as ``leds-gpio`` and ``gpio-keys``, are
+migrating away from using board-specific ``platform_data`` to a unified device
+properties interface. This interface allows drivers to be simpler and more
+generic, as they can query properties in a standardized way.
+
+On modern systems, these properties are provided via device tree. However, some
+older platforms have not been converted to device tree and instead rely on
+board files to describe their hardware configuration. To bridge this gap and
+allow these legacy boards to work with modern, generic drivers, the kernel
+provides a mechanism called **software nodes**.
+
+This document provides a guide on how to convert a legacy board file from using
+``platform_data`` and ``gpiod_lookup_table`` to the modern software node
+approach for describing GPIO-connected devices.
+
+The Core Idea: Software Nodes
+-----------------------------
+
+Software nodes allow board-specific code to construct an in-memory,
+device-tree-like structure using struct software_node and struct
+property_entry. This structure can then be associated with a platform device,
+allowing drivers to use the standard device properties API (e.g.,
+device_property_read_u32(), device_property_read_string()) to query
+configuration, just as they would on an ACPI or device tree system.
+
+The gpiolib code has support for handling software nodes, so that if GPIO is
+described properly, as detailed in the section below, then regular gpiolib APIs,
+such as gpiod_get(), gpiod_get_optional(), and others will work.
+
+Requirements for GPIO Properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using software nodes to describe GPIO connections, the following
+requirements must be met for the GPIO core to correctly resolve the reference:
+
+1. **The GPIO controller's software node must be registered and attached to
+ the controller's ``struct device`` either as its primary or secondary
+ firmware node.** The gpiolib core uses the address of the firmware node to
+ find the corresponding ``struct gpio_chip`` at runtime.
+
+2. **The GPIO property must be a reference.** The ``PROPERTY_ENTRY_GPIO()``
+ macro handles this as it is an alias for ``PROPERTY_ENTRY_REF()``.
+
+3. **The reference must have exactly two arguments:**
+
+ - The first argument is the GPIO offset within the controller.
+ - The second argument is the flags for the GPIO line (e.g.,
+ GPIO_ACTIVE_HIGH, GPIO_ACTIVE_LOW).
+
+The ``PROPERTY_ENTRY_GPIO()`` macro is the preferred way of defining GPIO
+properties in software nodes.
+
+Conversion Example
+------------------
+
+Let's walk through an example of converting a board file that defines a GPIO-
+connected LED and a button.
+
+Before: Using Platform Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A typical legacy board file might look like this:
+
+.. code-block:: c
+
+ #include <linux/platform_device.h>
+ #include <linux/leds.h>
+ #include <linux/gpio_keys.h>
+ #include <linux/gpio/machine.h>
+
+ #define MYBOARD_GPIO_CONTROLLER "gpio-foo"
+
+ /* LED setup */
+ static const struct gpio_led myboard_leds[] = {
+ {
+ .name = "myboard:green:status",
+ .default_trigger = "heartbeat",
+ },
+ };
+
+ static const struct gpio_led_platform_data myboard_leds_pdata = {
+ .num_leds = ARRAY_SIZE(myboard_leds),
+ .leds = myboard_leds,
+ };
+
+ static struct gpiod_lookup_table myboard_leds_gpios = {
+ .dev_id = "leds-gpio",
+ .table = {
+ GPIO_LOOKUP_IDX(MYBOARD_GPIO_CONTROLLER, 42, NULL, 0, GPIO_ACTIVE_HIGH),
+ { },
+ },
+ };
+
+ /* Button setup */
+ static struct gpio_keys_button myboard_buttons[] = {
+ {
+ .code = KEY_WPS_BUTTON,
+ .desc = "WPS Button",
+ .active_low = 1,
+ },
+ };
+
+ static const struct gpio_keys_platform_data myboard_buttons_pdata = {
+ .buttons = myboard_buttons,
+ .nbuttons = ARRAY_SIZE(myboard_buttons),
+ };
+
+ static struct gpiod_lookup_table myboard_buttons_gpios = {
+ .dev_id = "gpio-keys",
+ .table = {
+ GPIO_LOOKUP_IDX(MYBOARD_GPIO_CONTROLLER, 15, NULL, 0, GPIO_ACTIVE_LOW),
+ { },
+ },
+ };
+
+ /* Device registration */
+ static int __init myboard_init(void)
+ {
+ struct platform_device_info pdev_info = {
+ .name = MYBOARD_GPIO_CONTROLLER,
+ .id = PLATFORM_DEVID_NONE,
+ .swnode = &gpio_controller_node
+ };
+
+ gpiod_add_lookup_table(&myboard_leds_gpios);
+ gpiod_add_lookup_table(&myboard_buttons_gpios);
+
+ platform_device_register_full(&pdev_info);
+ platform_device_register_data(NULL, "leds-gpio", -1,
+ &myboard_leds_pdata, sizeof(myboard_leds_pdata));
+ platform_device_register_data(NULL, "gpio-keys", -1,
+ &myboard_buttons_pdata,
+ sizeof(myboard_buttons_pdata));
+
+ return 0;
+ }
+
+After: Using Software Nodes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here is how the same configuration can be expressed using software nodes.
+
+Step 1: Define the GPIO Controller Node
+***************************************
+
+First, define a software node that represents the GPIO controller that the
+LEDs and buttons are connected to. The ``name`` of this node is optional.
+
+.. code-block:: c
+
+ #include <linux/property.h>
+ #include <linux/gpio/property.h>
+
+ #define MYBOARD_GPIO_CONTROLLER "gpio-foo"
+
+ static const struct software_node myboard_gpio_controller_node = {
+ .name = MYBOARD_GPIO_CONTROLLER,
+ };
+
+Step 2: Define Consumer Device Nodes and Properties
+***************************************************
+
+Next, define the software nodes for the consumer devices (the LEDs and buttons).
+This involves creating a parent node for each device type and child nodes for
+each individual LED or button.
+
+.. code-block:: c
+
+ /* LED setup */
+ static const struct software_node myboard_leds_node = {
+ .name = "myboard-leds",
+ };
+
+ static const struct property_entry myboard_status_led_props[] = {
+ PROPERTY_ENTRY_STRING("label", "myboard:green:status"),
+ PROPERTY_ENTRY_STRING("linux,default-trigger", "heartbeat"),
+ PROPERTY_ENTRY_GPIO("gpios", &myboard_gpio_controller_node, 42, GPIO_ACTIVE_HIGH),
+ { }
+ };
+
+ static const struct software_node myboard_status_led_swnode = {
+ .name = "status-led",
+ .parent = &myboard_leds_node,
+ .properties = myboard_status_led_props,
+ };
+
+ /* Button setup */
+ static const struct software_node myboard_keys_node = {
+ .name = "myboard-keys",
+ };
+
+ static const struct property_entry myboard_wps_button_props[] = {
+ PROPERTY_ENTRY_STRING("label", "WPS Button"),
+ PROPERTY_ENTRY_U32("linux,code", KEY_WPS_BUTTON),
+ PROPERTY_ENTRY_GPIO("gpios", &myboard_gpio_controller_node, 15, GPIO_ACTIVE_LOW),
+ { }
+ };
+
+ static const struct software_node myboard_wps_button_swnode = {
+ .name = "wps-button",
+ .parent = &myboard_keys_node,
+ .properties = myboard_wps_button_props,
+ };
+
+
+
+Step 3: Group and Register the Nodes
+************************************
+
+For maintainability, it is often beneficial to group all software nodes into a
+single array and register them with one call.
+
+.. code-block:: c
+
+ static const struct software_node * const myboard_swnodes[] = {
+ &myboard_gpio_controller_node,
+ &myboard_leds_node,
+ &myboard_status_led_swnode,
+ &myboard_keys_node,
+ &myboard_wps_button_swnode,
+ NULL
+ };
+
+ static int __init myboard_init(void)
+ {
+ int error;
+
+ error = software_node_register_node_group(myboard_swnodes);
+ if (error) {
+ pr_err("Failed to register software nodes: %d\n", error);
+ return error;
+ }
+
+ // ... platform device registration follows
+ }
+
+.. note::
+ When splitting registration of nodes by devices that they represent, it is
+ essential that the software node representing the GPIO controller itself
+ is registered first, before any of the nodes that reference it.
+
+Step 4: Register Platform Devices with Software Nodes
+*****************************************************
+
+Finally, register the platform devices and associate them with their respective
+software nodes using the ``fwnode`` field in struct platform_device_info.
+
+.. code-block:: c
+
+ static struct platform_device *leds_pdev;
+ static struct platform_device *keys_pdev;
+
+ static int __init myboard_init(void)
+ {
+ struct platform_device_info pdev_info;
+ int error;
+
+ error = software_node_register_node_group(myboard_swnodes);
+ if (error)
+ return error;
+
+ memset(&pdev_info, 0, sizeof(pdev_info));
+ pdev_info.name = MYBOARD_GPIO_CONTROLLER;
+ pdev_info.id = PLATFORM_DEVID_NONE;
+ pdev_info.swnode = &myboard_gpio_controller_node;
+ gpio_pdev = platform_device_register_full(&pdev_info);
+ if (IS_ERR(gpio_pdev)) {
+ error = PTR_ERR(gpio_pdev);
+ goto err_unregister_nodes;
+ }
+
+ memset(&pdev_info, 0, sizeof(pdev_info));
+ pdev_info.name = "leds-gpio";
+ pdev_info.id = PLATFORM_DEVID_NONE;
+ pdev_info.fwnode = software_node_fwnode(&myboard_leds_node);
+ leds_pdev = platform_device_register_full(&pdev_info);
+ if (IS_ERR(leds_pdev)) {
+ error = PTR_ERR(leds_pdev);
+ platform_device_unregister(gpio_pdev);
+ goto err_unregister_nodes;
+ }
+
+ memset(&pdev_info, 0, sizeof(pdev_info));
+ pdev_info.name = "gpio-keys";
+ pdev_info.id = PLATFORM_DEVID_NONE;
+ pdev_info.fwnode = software_node_fwnode(&myboard_keys_node);
+ keys_pdev = platform_device_register_full(&pdev_info);
+ if (IS_ERR(keys_pdev)) {
+ error = PTR_ERR(keys_pdev);
+ platform_device_unregister(gpio_pdev);
+ platform_device_unregister(leds_pdev);
+ goto err_unregister_nodes;
+ }
+
+ return 0;
+
+ err_unregister_nodes:
+ software_node_unregister_node_group(myboard_swnodes);
+ return error;
+ }
+
+ static void __exit myboard_exit(void)
+ {
+ platform_device_unregister(keys_pdev);
+ platform_device_unregister(leds_pdev);
+ platform_device_unregister(gpio_pdev);
+ software_node_unregister_node_group(myboard_swnodes);
+ }
+
+With these changes, the generic ``leds-gpio`` and ``gpio-keys`` drivers will
+be able to probe successfully and get their configuration from the properties
+defined in the software nodes, removing the need for board-specific platform
+data.
diff --git a/Documentation/driver-api/gpio/pca953x.rst b/Documentation/driver-api/gpio/pca953x.rst
new file mode 100644
index 000000000000..fa4a57aa82a7
--- /dev/null
+++ b/Documentation/driver-api/gpio/pca953x.rst
@@ -0,0 +1,639 @@
+============================================
+PCA953x I²C GPIO expander compatibility list
+============================================
+
+:Author: Levente Révész <levente.revesz@eilabs.com>
+
+I went through all the datasheets and created this note listing
+chip functions and register layouts.
+
+Overview of chips
+=================
+
+Chips with the basic 4 registers
+--------------------------------
+
+These chips have 4 register banks: input, output, invert and direction.
+Each of these banks contains (lines/8) registers, one for each GPIO port.
+
+Banks offset is always a power of 2:
+
+- 4 lines -> bank offset is 1
+- 8 lines -> bank offset is 1
+- 16 lines -> bank offset is 2
+- 24 lines -> bank offset is 4
+- 32 lines -> bank offset is 4
+- 40 lines -> bank offset is 8
+
+For example, register layout of GPIO expander with 24 lines:
+
++------+-----------------+--------+
+| addr | function | bank |
++======+=================+========+
+| 00 | input port0 | |
++------+-----------------+ |
+| 01 | input port1 | bank 0 |
++------+-----------------+ |
+| 02 | input port2 | |
++------+-----------------+--------+
+| 03 | n/a | |
++------+-----------------+--------+
+| 04 | output port0 | |
++------+-----------------+ |
+| 05 | output port1 | bank 1 |
++------+-----------------+ |
+| 06 | output port2 | |
++------+-----------------+--------+
+| 07 | n/a | |
++------+-----------------+--------+
+| 08 | invert port0 | |
++------+-----------------+ |
+| 09 | invert port1 | bank 2 |
++------+-----------------+ |
+| 0A | invert port2 | |
++------+-----------------+--------+
+| 0B | n/a | |
++------+-----------------+--------+
+| 0C | direction port0 | |
++------+-----------------+ |
+| 0D | direction port1 | bank 3 |
++------+-----------------+ |
+| 0E | direction port2 | |
++------+-----------------+--------+
+| 0F | n/a | |
++------+-----------------+--------+
+
+.. note::
+ This is followed by all supported chips, except by pcal6534.
+
+The table below shows the offsets for each of the compatible chips:
+
+========== ===== ========= ===== ====== ====== =========
+compatible lines interrupt input output invert direction
+========== ===== ========= ===== ====== ====== =========
+pca9536 4 no 00 01 02 03
+pca9537 4 yes 00 01 02 03
+pca6408 8 yes 00 01 02 03
+tca6408 8 yes 00 01 02 03
+pca9534 8 yes 00 01 02 03
+pca9538 8 yes 00 01 02 03
+pca9554 8 yes 00 01 02 03
+tca9554 8 yes 00 01 02 03
+pca9556 8 no 00 01 02 03
+pca9557 8 no 00 01 02 03
+pca6107 8 yes 00 01 02 03
+pca6416 16 yes 00 02 04 06
+tca6416 16 yes 00 02 04 06
+pca9535 16 yes 00 02 04 06
+pca9539 16 yes 00 02 04 06
+tca9539 16 yes 00 02 04 06
+pca9555 16 yes 00 02 04 06
+max7318 16 yes 00 02 04 06
+tca6424 24 yes 00 04 08 0C
+========== ===== ========= ===== ====== ====== =========
+
+Chips with additional timeout_en register
+-----------------------------------------
+
+These Maxim chips have a bus timeout function which can be enabled in
+the timeout_en register. This is present in only two chips. Defaults to
+timeout disabled.
+
+========== ===== ========= ===== ====== ====== ========= ==========
+compatible lines interrupt input output invert direction timeout_en
+========== ===== ========= ===== ====== ====== ========= ==========
+max7310 8 no 00 01 02 03 04
+max7312 16 yes 00 02 04 06 08
+========== ===== ========= ===== ====== ====== ========= ==========
+
+Chips with additional int_mask register
+---------------------------------------
+
+These chips have an interrupt mask register in addition to the 4 basic
+registers. The interrupt masks default to all interrupts disabled. To
+use interrupts with these chips, the driver has to set the int_mask
+register.
+
+========== ===== ========= ===== ====== ====== ========= ========
+compatible lines interrupt input output invert direction int_mask
+========== ===== ========= ===== ====== ====== ========= ========
+pca9505 40 yes 00 08 10 18 20
+pca9506 40 yes 00 08 10 18 20
+========== ===== ========= ===== ====== ====== ========= ========
+
+Chips with additional int_mask and out_conf registers
+-----------------------------------------------------
+
+This chip has an interrupt mask register, and an output port
+configuration register, which can select between push-pull and
+open-drain modes. Each bit controls two lines. Both of these registers
+are present in PCAL chips as well, albeit the out_conf works
+differently.
+
+========== ===== ========= ===== ====== ====== ========= ======== ========
+compatible lines interrupt input output invert direction int_mask out_conf
+========== ===== ========= ===== ====== ====== ========= ======== ========
+pca9698 40 yes 00 08 10 18 20 28
+========== ===== ========= ===== ====== ====== ========= ======== ========
+
+pca9698 also has a "master output" register for setting all outputs per
+port to the same value simultaneously, and a chip specific mode register
+for various additional chip settings.
+
+========== ============= ====
+compatible master_output mode
+========== ============= ====
+pca9698 29 2A
+========== ============= ====
+
+Chips with LED blink and intensity control
+------------------------------------------
+
+These Maxim chips have no invert register.
+
+They have two sets of output registers (output0 and output1). An internal
+timer alternates the effective output between the values set in these
+registers, if blink mode is enabled in the blink register. The
+master_intensity register and the intensity registers together define
+the PWM intensity value for each pair of outputs.
+
+These chips can be used as simple GPIO expanders if the driver handles the
+input, output0 and direction registers.
+
+========== ===== ========= ===== ======= ========= ======= ================ ===== =========
+compatible lines interrupt input output0 direction output1 master_intensity blink intensity
+========== ===== ========= ===== ======= ========= ======= ================ ===== =========
+max7315 8 yes 00 01 03 09 0E 0F 10
+max7313 16 yes 00 02 06 0A 0E 0F 10
+========== ===== ========= ===== ======= ========= ======= ================ ===== =========
+
+Basic PCAL chips
+----------------
+
+========== ===== ========= ===== ====== ====== =========
+compatible lines interrupt input output invert direction
+========== ===== ========= ===== ====== ====== =========
+pcal6408 8 yes 00 01 02 03
+pcal9554b 8 yes 00 01 02 03
+pcal6416 16 yes 00 02 04 06
+pcal9535 16 yes 00 02 04 06
+pcal9555a 16 yes 00 02 04 06
+tcal6408 8 yes 00 01 02 03
+tcal6416 16 yes 00 02 04 06
+========== ===== ========= ===== ====== ====== =========
+
+These chips have several additional features:
+
+ 1. output drive strength setting (out_strength)
+ 2. input latch (in_latch)
+ 3. pull-up/pull-down (pull_in, pull_sel)
+ 4. push-pull/open-drain outputs (out_conf)
+ 5. interrupt mask and interrupt status (int_mask, int_status)
+
+========== ============ ======== ======= ======== ======== ========== ========
+compatible out_strength in_latch pull_en pull_sel int_mask int_status out_conf
+========== ============ ======== ======= ======== ======== ========== ========
+pcal6408 40 42 43 44 45 46 4F
+pcal9554b 40 42 43 44 45 46 4F
+pcal6416 40 44 46 48 4A 4C 4F
+pcal9535 40 44 46 48 4A 4C 4F
+pcal9555a 40 44 46 48 4A 4C 4F
+tcal6408 40 42 43 44 45 46 4F
+tcal6416 40 44 46 48 4A 4C 4F
+========== ============ ======== ======= ======== ======== ========== ========
+
+Currently the driver has support for the input latch, pull-up/pull-down
+and uses int_mask and int_status for interrupts.
+
+PCAL chips with extended interrupt and output configuration functions
+---------------------------------------------------------------------
+
+========== ===== ========= ===== ====== ====== =========
+compatible lines interrupt input output invert direction
+========== ===== ========= ===== ====== ====== =========
+pcal6524 24 yes 00 04 08 0C
+pcal6534 34 yes 00 05 0A 0F
+========== ===== ========= ===== ====== ====== =========
+
+These chips have the full PCAL register set, plus the following functions:
+
+ 1. interrupt event selection: level, rising, falling, any edge
+ 2. clear interrupt status per line
+ 3. read input without clearing interrupt status
+ 4. individual output config (push-pull/open-drain) per output line
+ 5. debounce inputs
+
+========== ============ ======== ======= ======== ======== ========== ========
+compatible out_strength in_latch pull_en pull_sel int_mask int_status out_conf
+========== ============ ======== ======= ======== ======== ========== ========
+pcal6524 40 48 4C 50 54 58 5C
+pcal6534 30 3A 3F 44 49 4E 53
+========== ============ ======== ======= ======== ======== ========== ========
+
+========== ======== ========= ============ ============== ======== ==============
+compatible int_edge int_clear input_status indiv_out_conf debounce debounce_count
+========== ======== ========= ============ ============== ======== ==============
+pcal6524 60 68 6C 70 74 76
+pcal6534 54 5E 63 68 6D 6F
+========== ======== ========= ============ ============== ======== ==============
+
+As can be seen in the table above, pcal6534 does not follow the usual
+bank spacing rule. Its banks are closely packed instead.
+
+PCA957X chips with a completely different register layout
+---------------------------------------------------------
+
+These chips have the basic 4 registers, but at unusual addresses.
+
+Additionally, they have:
+
+ 1. pull-up/pull-down (pull_sel)
+ 2. a global pull enable, defaults to disabled (config)
+ 3. interrupt mask, interrupt status (int_mask, int_status)
+
+========== ===== ========= ===== ====== ====== ======== ========= ====== ======== ==========
+compatible lines interrupt input invert config pull_sel direction output int_mask int_status
+========== ===== ========= ===== ====== ====== ======== ========= ====== ======== ==========
+pca9574 8 yes 00 01 02 03 04 05 06 07
+pca9575 16 yes 00 02 04 06 08 0A 0C 0E
+========== ===== ========= ===== ====== ====== ======== ========= ====== ======== ==========
+
+Currently the driver supports none of the advanced features.
+
+XRA1202
+-------
+
+Basic 4 registers, plus advanced features:
+
+ 1. interrupt mask, defaults to interrupts disabled
+ 2. interrupt status
+ 3. interrupt event selection, level, rising, falling, any edge
+ (int_mask, rising_mask, falling_mask)
+ 4. pull-up (no pull-down)
+ 5. tri-state
+ 6. debounce
+
+========== ===== ========= ===== ====== ====== ========= =========
+compatible lines interrupt input output invert direction pullup_en
+========== ===== ========= ===== ====== ====== ========= =========
+xra1202 8 yes 00 01 02 03 04
+========== ===== ========= ===== ====== ====== ========= =========
+
+========== ======== ======== ========== =========== ============ ========
+compatible int_mask tristate int_status rising_mask falling_mask debounce
+========== ======== ======== ========== =========== ============ ========
+xra1202 05 06 07 08 09 0A
+========== ======== ======== ========== =========== ============ ========
+
+Overview of functions
+=====================
+
+This section lists chip functions that are supported by the driver
+already, or are at least common in multiple chips.
+
+Input, Output, Invert, Direction
+--------------------------------
+
+The basic 4 GPIO functions are present in all but one chip category, i.e.
+`Chips with LED blink and intensity control`_ are missing the invert
+register.
+
+3 different layouts are used for these registers:
+
+ 1. banks 0, 1, 2, 3 with bank offsets of 2^n
+ - all other chips
+
+ 2. banks 0, 1, 2, 3 with closely packed banks
+ - pcal6534
+
+ 3. banks 0, 5, 1, 4 with bank offsets of 2^n
+ - pca9574
+ - pca9575
+
+Interrupts
+----------
+
+Only an interrupt mask register
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The same layout is used for all of these:
+
+ 1. bank 5 with bank offsets of 2^n
+ - pca9505
+ - pca9506
+ - pca9698
+
+Interrupt mask and interrupt status registers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+These work the same way in all of the chips: mask and status have
+one bit per line, 1 in the mask means interrupt enabled.
+
+Layouts:
+
+ 1. base offset 0x40, bank 5 and bank 6, bank offsets of 2^n
+ - pcal6408
+ - pcal6416
+ - pcal9535
+ - pcal9554b
+ - pcal9555a
+ - pcal6524
+ - tcal6408
+ - tcal6416
+
+ 2. base offset 0x30, bank 5 and 6, closely packed banks
+ - pcal6534
+
+ 3. bank 6 and 7, bank offsets of 2^n
+ - pca9574
+ - pca9575
+
+ 4. bank 5 and 7, bank offsets of 2^n
+ - xra1202
+
+Interrupt on specific edges
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+`PCAL chips with extended interrupt and output configuration functions`_
+have an int_edge register. This contains 2 bits per line, one of 4 events
+can be selected for each line:
+
+ 0: level, 1: rising edge, 2: falling edge, 3: any edge
+
+Layouts:
+
+ 1. base offset 0x40, bank 7, bank offsets of 2^n
+
+ - pcal6524
+
+ 2. base offset 0x30, bank 7 + offset 0x01, closely packed banks
+ (out_conf is 1 byte, not (lines/8) bytes, hence the 0x01 offset)
+
+ - pcal6534
+
+`XRA1202`_ chips have a different mechanism for the same thing: they have
+a rising mask and a falling mask, with one bit per line.
+
+Layout:
+
+ 1. bank 5, bank offsets of 2^n
+
+Input latch
+-----------
+
+Only `Basic PCAL chips`_ and
+`PCAL chips with extended interrupt and output configuration functions`_
+have this function. When the latch is enabled, the interrupt is not cleared
+until the input port is read. When the latch is disabled, the interrupt
+is cleared even if the input register is not read, if the input pin returns
+to the logic value it had before generating the interrupt. Defaults to latch
+disabled.
+
+Currently the driver enables the latch for each line with interrupt
+enabled.
+
+An interrupt status register records which pins triggered an interrupt.
+However, the status register and the input port register must be read
+separately; there is no atomic mechanism to read both simultaneously, so races
+are possible. Refer to the chapter `Interrupt source detection`_ to understand
+the implications of this and how the driver still makes use of the latching
+feature.
+
+ 1. base offset 0x40, bank 2, bank offsets of 2^n
+ - pcal6408
+ - pcal6416
+ - pcal9535
+ - pcal9554b
+ - pcal9555a
+ - pcal6524
+ - tcal6408
+ - tcal6416
+
+ 2. base offset 0x30, bank 2, closely packed banks
+ - pcal6534
+
+Pull-up and pull-down
+---------------------
+
+`Basic PCAL chips`_ and
+`PCAL chips with extended interrupt and output configuration functions`_
+use the same mechanism: their pull_en register enables the pull-up or pull-down
+function, and their pull_sel register chooses the direction. They all use one
+bit per line.
+
+ 0: pull-down, 1: pull-up
+
+Layouts:
+
+ 1. base offset 0x40, bank 3 (en) and 4 (sel), bank offsets of 2^n
+ - pcal6408
+ - pcal6416
+ - pcal9535
+ - pcal9554b
+ - pcal9555a
+ - pcal6524
+
+ 2. base offset 0x30, bank 3 (en) and 4 (sel), closely packed banks
+ - pcal6534
+
+`PCA957X chips with a completely different register layout`_ have a pull_sel
+register with one bit per line, and a global pull_en bit in their config
+register.
+
+Layout:
+
+ 1. bank 2 (config), bank 3 (sel), bank offsets of 2^n
+ - pca9574
+ - pca9575
+
+`XRA1202`_ chips can only pull-up. They have a pullup_en register.
+
+Layout:
+
+ 1. bank 4, bank offsets of 2^n
+ - xra1202
+
+Push-pull and open-drain
+------------------------
+
+`Chips with additional int_mask and out_conf registers`_ have this function,
+but only for select IO ports. Register has 1 bit per 2 lines. In pca9698,
+only port0 and port1 have this function.
+
+ 0: open-drain, 1: push-pull
+
+Layout:
+
+ 1. base offset 5*bankoffset
+ - pca9698
+
+`Basic PCAL chips`_ have 1 bit per port in one single out_conf register.
+Only whole ports can be configured.
+
+ 0: push-pull, 1: open-drain
+
+Layout:
+
+ 1. base offset 0x4F
+ - pcal6408
+ - pcal6416
+ - pcal9535
+ - pcal9554b
+ - pcal9555a
+ - tcal6408
+ - tcal6416
+
+`PCAL chips with extended interrupt and output configuration functions`_
+can set this for each line individually. They have the same per-port out_conf
+register as `Basic PCAL chips`_, but they also have an indiv_out_conf register
+with one bit per line, which inverts the effect of the port-wise setting.
+
+ 0: push-pull, 1: open-drain
+
+Layouts:
+
+ 1. base offset 0x40 + 7*bankoffset (out_conf),
+ base offset 0x60, bank 4 (indiv_out_conf) with bank offset of 2^n
+
+ - pcal6524
+
+ 2. base offset 0x30 + 7*banksize (out_conf),
+ base offset 0x54, bank 4 (indiv_out_conf), closely packed banks
+
+ - pcal6534
+
+This function is currently not supported by the driver.
+
+Output drive strength
+---------------------
+
+Only PCAL chips have this function. 2 bits per line.
+
+==== ==============
+bits drive strength
+==== ==============
+ 00 0.25x
+ 01 0.50x
+ 10 0.75x
+ 11 1.00x
+==== ==============
+
+ 1. base offset 0x40, bank 0 and 1, bank offsets of 2^n
+ - pcal6408
+ - pcal6416
+ - pcal9535
+ - pcal9554b
+ - pcal9555a
+ - pcal6524
+ - tcal6408
+ - tcal6416
+
+ 2. base offset 0x30, bank 0 and 1, closely packed banks
+ - pcal6534
+
+Currently not supported by the driver.
+
+Interrupt source detection
+==========================
+
+When triggered by the GPIO expander's interrupt, the driver determines which
+IRQs are pending by reading the input port register.
+
+To be able to filter on specific interrupt events for all compatible devices,
+the driver keeps track of the previous input state of the lines, and emits an
+IRQ only for the correct edge or level. This system works irrespective of the
+number of enabled interrupts. Events will not be missed even if they occur
+between the GPIO expander's interrupt and the actual I2C read. Edges could of
+course be missed if the related signal level changes back to the value
+previously saved by the driver before the I2C read. PCAL variants offer input
+latching for that reason.
+
+PCAL input latching
+-------------------
+
+The PCAL variants have an input latch and the driver enables this for all
+interrupt-enabled lines. The interrupt is then only cleared when the input port
+is read out. These variants provide an interrupt status register that records
+which pins triggered an interrupt, but the status and input registers cannot be
+read atomically. If another interrupt occurs on a different line after the
+status register has been read but before the input port register is sampled,
+that event will not be reflected in the earlier status snapshot, so relying
+solely on the interrupt status register is insufficient.
+
+Thus, the PCAL variants also have to use the existing level-change logic.
+
+For short pulses, the first edge is captured when the input register is read,
+but if the signal returns to its previous level before this read, the second
+edge is not observed. As a result, successive pulses can produce identical
+input values at read time and no level change is detected, causing interrupts
+to be missed. Below timing diagram shows this situation where the top signal is
+the input pin level and the bottom signal indicates the latched value::
+
+ ─────┐ ┌──*───────────────┐ ┌──*─────────────────┐ ┌──*───
+ │ │ . │ │ . │ │ .
+ │ │ │ │ │ │ │ │ │
+ └──*──┘ │ └──*──┘ │ └──*──┘ │
+ Input │ │ │ │ │ │
+ ▼ │ ▼ │ ▼ │
+ IRQ │ IRQ │ IRQ │
+ . . .
+ ─────┐ .┌──────────────┐ .┌────────────────┐ .┌──
+ │ │ │ │ │ │
+ │ │ │ │ │ │
+ └────────*┘ └────────*┘ └────────*┘
+ Latched │ │ │
+ ▼ ▼ ▼
+ READ 0 READ 0 READ 0
+ NO CHANGE NO CHANGE
+
+To deal with this, events indicated by the interrupt status register are merged
+with events detected through the existing level-change logic. As a result:
+
+- short pulses, whose second edges are invisible, are detected via the
+ interrupt status register, and
+- interrupts that occur between the status and input reads are still
+ caught by the generic level-change logic.
+
+Note that this is still best-effort: the status and input registers are read
+separately, and short pulses on other lines may occur in between those reads.
+Such pulses can still be latched as an interrupt without leaving an observable
+level change at read time, and may not be attributable to a specific edge. This
+does not reduce detection compared to the generic path, but reflects inherent
+atomicity limitations.
+
+Datasheets
+==========
+
+- MAX7310: https://datasheets.maximintegrated.com/en/ds/MAX7310.pdf
+- MAX7312: https://datasheets.maximintegrated.com/en/ds/MAX7312.pdf
+- MAX7313: https://datasheets.maximintegrated.com/en/ds/MAX7313.pdf
+- MAX7315: https://datasheets.maximintegrated.com/en/ds/MAX7315.pdf
+- MAX7318: https://datasheets.maximintegrated.com/en/ds/MAX7318.pdf
+- PCA6107: https://pdf1.alldatasheet.com/datasheet-pdf/view/161780/TI/PCA6107.html
+- PCA6408A: https://www.nxp.com/docs/en/data-sheet/PCA6408A.pdf
+- PCA6416A: https://www.nxp.com/docs/en/data-sheet/PCA6416A.pdf
+- PCA9505: https://www.nxp.com/docs/en/data-sheet/PCA9505_9506.pdf
+- PCA9505: https://www.nxp.com/docs/en/data-sheet/PCA9505_9506.pdf
+- PCA9534: https://www.nxp.com/docs/en/data-sheet/PCA9534.pdf
+- PCA9535: https://www.nxp.com/docs/en/data-sheet/PCA9535_PCA9535C.pdf
+- PCA9536: https://www.nxp.com/docs/en/data-sheet/PCA9536.pdf
+- PCA9537: https://www.nxp.com/docs/en/data-sheet/PCA9537.pdf
+- PCA9538: https://www.nxp.com/docs/en/data-sheet/PCA9538.pdf
+- PCA9539: https://www.nxp.com/docs/en/data-sheet/PCA9539_PCA9539R.pdf
+- PCA9554: https://www.nxp.com/docs/en/data-sheet/PCA9554_9554A.pdf
+- PCA9555: https://www.nxp.com/docs/en/data-sheet/PCA9555.pdf
+- PCA9556: https://www.nxp.com/docs/en/data-sheet/PCA9556.pdf
+- PCA9557: https://www.nxp.com/docs/en/data-sheet/PCA9557.pdf
+- PCA9574: https://www.nxp.com/docs/en/data-sheet/PCA9574.pdf
+- PCA9575: https://www.nxp.com/docs/en/data-sheet/PCA9575.pdf
+- PCA9698: https://www.nxp.com/docs/en/data-sheet/PCA9698.pdf
+- PCAL6408A: https://www.nxp.com/docs/en/data-sheet/PCAL6408A.pdf
+- PCAL6416A: https://www.nxp.com/docs/en/data-sheet/PCAL6416A.pdf
+- PCAL6524: https://www.nxp.com/docs/en/data-sheet/PCAL6524.pdf
+- PCAL6534: https://www.nxp.com/docs/en/data-sheet/PCAL6534.pdf
+- PCAL9535A: https://www.nxp.com/docs/en/data-sheet/PCAL9535A.pdf
+- PCAL9554B: https://www.nxp.com/docs/en/data-sheet/PCAL9554B_PCAL9554C.pdf
+- PCAL9555A: https://www.nxp.com/docs/en/data-sheet/PCAL9555A.pdf
+- TCA6408A: https://www.ti.com/lit/gpn/tca6408a
+- TCA6416: https://www.ti.com/lit/gpn/tca6416
+- TCA6424: https://www.ti.com/lit/gpn/tca6424
+- TCA9539: https://www.ti.com/lit/gpn/tca9539
+- TCA9554: https://www.ti.com/lit/gpn/tca9554
+- XRA1202: https://assets.maxlinear.com/web/documents/xra1202_1202p_101_042213.pdf
diff --git a/Documentation/driver-api/hw-recoverable-errors.rst b/Documentation/driver-api/hw-recoverable-errors.rst
new file mode 100644
index 000000000000..fc526c3454bd
--- /dev/null
+++ b/Documentation/driver-api/hw-recoverable-errors.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Recoverable Hardware Error Tracking in vmcoreinfo
+=================================================
+
+Overview
+--------
+
+This feature provides a generic infrastructure within the Linux kernel to track
+and log recoverable hardware errors. These are hardware recoverable errors
+visible that might not cause immediate panics but may influence health, mainly
+because new code path will be executed in the kernel.
+
+By recording counts and timestamps of recoverable errors into the vmcoreinfo
+crash dump notes, this infrastructure aids post-mortem crash analysis tools in
+correlating hardware events with kernel failures. This enables faster triage
+and better understanding of root causes, especially in large-scale cloud
+environments where hardware issues are common.
+
+Benefits
+--------
+
+- Facilitates correlation of hardware recoverable errors with kernel panics or
+ unusual code paths that lead to system crashes.
+- Provides operators and cloud providers quick insights, improving reliability
+ and reducing troubleshooting time.
+- Complements existing full hardware diagnostics without replacing them.
+
+Data Exposure and Consumption
+-----------------------------
+
+- The tracked error data consists of per-error-type counts and timestamps of
+ last occurrence.
+- This data is stored in the `hwerror_data` array, categorized by error source
+ types like CPU, memory, PCI, CXL, and others.
+- It is exposed via vmcoreinfo crash dump notes and can be read using tools
+ like `crash`, `drgn`, or other kernel crash analysis utilities.
+- There is no other way to read these data other than from crash dumps.
+- These errors are divided by area, which includes CPU, Memory, PCI, CXL and
+ others.
+
+Typical usage example (in drgn REPL):
+
+.. code-block:: python
+
+ >>> prog['hwerror_data']
+ (struct hwerror_info[HWERR_RECOV_MAX]){
+ {
+ .count = (int)844,
+ .timestamp = (time64_t)1752852018,
+ },
+ ...
+ }
+
+Enabling
+--------
+
+- This feature is enabled when CONFIG_VMCORE_INFO is set.
+
diff --git a/Documentation/driver-api/i3c/protocol.rst b/Documentation/driver-api/i3c/protocol.rst
index 23a0b93c62b1..fe338f8085db 100644
--- a/Documentation/driver-api/i3c/protocol.rst
+++ b/Documentation/driver-api/i3c/protocol.rst
@@ -165,8 +165,8 @@ The first thing attached to an HDR command is the HDR mode. There are currently
for more details):
* HDR-DDR: Double Data Rate mode
-* HDR-TSP: Ternary Symbol Pure. Only usable on busses with no I2C devices
-* HDR-TSL: Ternary Symbol Legacy. Usable on busses with I2C devices
+* HDR-TSP: Ternary Symbol Pure. Only usable on buses with no I2C devices
+* HDR-TSL: Ternary Symbol Legacy. Usable on buses with I2C devices
When sending an HDR command, the whole bus has to enter HDR mode, which is done
using a broadcast CCC command.
diff --git a/Documentation/driver-api/iio/core.rst b/Documentation/driver-api/iio/core.rst
index dfe438dc91a7..42b580fb2989 100644
--- a/Documentation/driver-api/iio/core.rst
+++ b/Documentation/driver-api/iio/core.rst
@@ -60,7 +60,7 @@ directory. Common attributes are:
* :file:`sampling_frequency_available`, available discrete set of sampling
frequency values for device.
* Available standard attributes for IIO devices are described in the
- :file:`Documentation/ABI/testing/sysfs-bus-iio` file in the Linux kernel
+ :file:Documentation/ABI/testing/sysfs-bus-iio file in the Linux kernel
sources.
IIO device channels
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 16e2c4ec3c01..eaf7161ff957 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -81,6 +81,7 @@ Subsystem-specific APIs
acpi/index
backlight/lp855x-driver.rst
clk
+ coco/index
console
crypto/index
dmaengine/index
@@ -92,9 +93,11 @@ Subsystem-specific APIs
frame-buffer
aperture
generic-counter
+ generic_pt
gpio/index
hsi
hte/index
+ hw-recoverable-errors
i2c
iio/index
infiniband
@@ -146,10 +149,3 @@ Subsystem-specific APIs
wmi
xilinx/index
zorro
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/infiniband.rst b/Documentation/driver-api/infiniband.rst
index 30e142ccbee9..10d8be9e74fe 100644
--- a/Documentation/driver-api/infiniband.rst
+++ b/Documentation/driver-api/infiniband.rst
@@ -77,14 +77,14 @@ iSCSI Extensions for RDMA (iSER)
:internal:
.. kernel-doc:: drivers/infiniband/ulp/iser/iscsi_iser.c
- :functions: iscsi_iser_pdu_alloc iser_initialize_task_headers \
- iscsi_iser_task_init iscsi_iser_mtask_xmit iscsi_iser_task_xmit \
- iscsi_iser_cleanup_task iscsi_iser_check_protection \
- iscsi_iser_conn_create iscsi_iser_conn_bind \
- iscsi_iser_conn_start iscsi_iser_conn_stop \
- iscsi_iser_session_destroy iscsi_iser_session_create \
- iscsi_iser_set_param iscsi_iser_ep_connect iscsi_iser_ep_poll \
- iscsi_iser_ep_disconnect
+ :functions: iscsi_iser_pdu_alloc iser_initialize_task_headers
+ iscsi_iser_task_init iscsi_iser_mtask_xmit iscsi_iser_task_xmit
+ iscsi_iser_cleanup_task iscsi_iser_check_protection
+ iscsi_iser_conn_create iscsi_iser_conn_bind
+ iscsi_iser_conn_start iscsi_iser_conn_stop
+ iscsi_iser_session_destroy iscsi_iser_session_create
+ iscsi_iser_set_param iscsi_iser_ep_connect iscsi_iser_ep_poll
+ iscsi_iser_ep_disconnect
.. kernel-doc:: drivers/infiniband/ulp/iser/iser_initiator.c
:internal:
diff --git a/Documentation/driver-api/interconnect.rst b/Documentation/driver-api/interconnect.rst
index a92d0f277a1f..cebb77b49d8d 100644
--- a/Documentation/driver-api/interconnect.rst
+++ b/Documentation/driver-api/interconnect.rst
@@ -84,13 +84,25 @@ be registered with the interconnect provider core.
.. kernel-doc:: include/linux/interconnect-provider.h
+.. kernel-doc:: drivers/interconnect/core.c
+ :functions: icc_provider_init icc_provider_register icc_provider_deregister
+ icc_node_create icc_node_create_dyn icc_node_destroy
+ icc_node_add icc_node_del icc_nodes_remove icc_node_set_name
+ icc_link_create icc_link_nodes
+
Interconnect consumers
----------------------
Interconnect consumers are the clients which use the interconnect APIs to
get paths between endpoints and set their bandwidth/latency/QoS requirements
-for these interconnect paths. These interfaces are not currently
-documented.
+for these interconnect paths.
+
+.. kernel-doc:: drivers/interconnect/core.c
+ :functions: devm_of_icc_get of_icc_get_by_index of_icc_get icc_get
+ icc_put icc_enable icc_disable icc_set_bw icc_set_tag
+ icc_get_name
+
+.. kernel-doc:: drivers/interconnect/bulk.c
Interconnect debugfs interfaces
-------------------------------
diff --git a/Documentation/driver-api/ipmi.rst b/Documentation/driver-api/ipmi.rst
index dfa021eacd63..f52ab2df2569 100644
--- a/Documentation/driver-api/ipmi.rst
+++ b/Documentation/driver-api/ipmi.rst
@@ -45,7 +45,7 @@ manual), choose the 'IPMI SI handler' option. A driver also exists
for direct I2C access to the IPMI management controller. Some boards
support this, but it is unknown if it will work on every board. For
this, choose 'IPMI SMBus handler', but be ready to try to do some
-figuring to see if it will work on your system if the SMBIOS/APCI
+figuring to see if it will work on your system if the SMBIOS/ACPI
information is wrong or not present. It is fairly safe to have both
these enabled and let the drivers auto-detect what is present.
@@ -63,7 +63,7 @@ situation, you need to read the section below named 'The SI Driver' or
IPMI defines a standard watchdog timer. You can enable this with the
'IPMI Watchdog Timer' config option. If you compile the driver into
the kernel, then via a kernel command-line option you can have the
-watchdog timer start as soon as it initializes. It also have a lot
+watchdog timer start as soon as it initializes. It also has a lot
of other options, see the 'Watchdog' section below for more details.
Note that you can also have the watchdog continue to run if it is
closed (by default it is disabled on close). Go into the 'Watchdog
@@ -280,10 +280,8 @@ Creating the User
To use the message handler, you must first create a user using
ipmi_create_user. The interface number specifies which SMI you want
to connect to, and you must supply callback functions to be called
-when data comes in. The callback function can run at interrupt level,
-so be careful using the callbacks. This also allows to you pass in a
-piece of data, the handler_data, that will be passed back to you on
-all calls.
+when data comes in. This also allows to you pass in a piece of data,
+the handler_data, that will be passed back to you on all calls.
Once you are done, call ipmi_destroy_user() to get rid of the user.
@@ -303,8 +301,7 @@ use it for anything you like.
Responses come back in the function pointed to by the ipmi_recv_hndl
field of the "handler" that you passed in to ipmi_create_user().
-Remember again, these may be running at interrupt level. Remember to
-look at the receive type, too.
+Remember to look at the receive type, too.
From userland, you fill out an ipmi_req_t structure and use the
IPMICTL_SEND_COMMAND ioctl. For incoming stuff, you can use select()
@@ -317,13 +314,13 @@ This gives the receiver a place to actually put the message.
If the message cannot fit into the data you provide, you will get an
EMSGSIZE error and the driver will leave the data in the receive
-queue. If you want to get it and have it truncate the message, us
+queue. If you want to get it and have it truncate the message, use
the IPMICTL_RECEIVE_MSG_TRUNC ioctl.
When you send a command (which is defined by the lowest-order bit of
the netfn per the IPMI spec) on the IPMB bus, the driver will
automatically assign the sequence number to the command and save the
-command. If the response is not receive in the IPMI-specified 5
+command. If the response is not received in the IPMI-specified 5
seconds, it will generate a response automatically saying the command
timed out. If an unsolicited response comes in (if it was after 5
seconds, for instance), that response will be ignored.
@@ -367,7 +364,7 @@ channel bitmasks do not overlap.
To respond to a received command, set the response bit in the returned
netfn, use the address from the received message, and use the same
-msgid that you got in the receive message.
+msgid that you got in the received message.
From userland, equivalent IOCTLs are provided to do these functions.
@@ -440,7 +437,7 @@ register would be 0xca6. This defaults to 1.
The regsizes parameter gives the size of a register, in bytes. The
data used by IPMI is 8-bits wide, but it may be inside a larger
-register. This parameter allows the read and write type to specified.
+register. This parameter allows the read and write type to be specified.
It may be 1, 2, 4, or 8. The default is 1.
Since the register size may be larger than 32 bits, the IPMI data may not
@@ -481,8 +478,8 @@ If your IPMI interface does not support interrupts and is a KCS or
SMIC interface, the IPMI driver will start a kernel thread for the
interface to help speed things up. This is a low-priority kernel
thread that constantly polls the IPMI driver while an IPMI operation
-is in progress. The force_kipmid module parameter will all the user to
-force this thread on or off. If you force it off and don't have
+is in progress. The force_kipmid module parameter will allow the user
+to force this thread on or off. If you force it off and don't have
interrupts, the driver will run VERY slowly. Don't blame me,
these interfaces suck.
@@ -583,7 +580,7 @@ kernel command line as::
These are the same options as on the module command line.
The I2C driver does not support non-blocking access or polling, so
-this driver cannod to IPMI panic events, extend the watchdog at panic
+this driver cannot do IPMI panic events, extend the watchdog at panic
time, or other panic-related IPMI functions without special kernel
patches and driver modifications. You can get those at the openipmi
web page.
@@ -610,7 +607,7 @@ Parameters are::
ipmi_ipmb.retry_time_ms=<Time between retries on IPMB>
ipmi_ipmb.max_retries=<Number of times to retry a message>
-Loading the module will not result in the driver automatcially
+Loading the module will not result in the driver automatically
starting unless there is device tree information setting it up. If
you want to instantiate one of these by hand, do::
@@ -620,12 +617,12 @@ Note that the address you give here is the I2C address, not the IPMI
address. So if you want your MC address to be 0x60, you put 0x30
here. See the I2C driver info for more details.
-Command bridging to other IPMB busses through this interface does not
+Command bridging to other IPMB buses through this interface does not
work. The receive message queue is not implemented, by design. There
is only one receive message queue on a BMC, and that is meant for the
host drivers, not something on the IPMB bus.
-A BMC may have multiple IPMB busses, which bus your device sits on
+A BMC may have multiple IPMB buses, which bus your device sits on
depends on how the system is wired. You can fetch the channels with
"ipmitool channel info <n>" where <n> is the channel, with the
channels being 0-7 and try the IPMB channels.
diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index 5da27a749246..28b8437f6e4f 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -283,18 +283,25 @@ interrupts, start DMA engine, etc.
``->error_handler()`` is a driver's hook into probe, hotplug, and recovery
and other exceptional conditions. The primary responsibility of an
-implementation is to call :c:func:`ata_do_eh` or :c:func:`ata_bmdma_drive_eh`
-with a set of EH hooks as arguments:
+implementation is to call :c:func:`ata_std_error_handler`.
-'prereset' hook (may be NULL) is called during an EH reset, before any
-other actions are taken.
+:c:func:`ata_std_error_handler` will perform a standard error handling sequence
+to resurrect failed devices, detach lost devices and add new devices (if any).
+This function will call the various reset operations for a port, as needed.
+These operations are as follows.
-'postreset' hook (may be NULL) is called after the EH reset is
-performed. Based on existing conditions, severity of the problem, and
-hardware capabilities,
+* The 'prereset' operation (which may be NULL) is called during an EH reset,
+ before any other action is taken.
-Either 'softreset' (may be NULL) or 'hardreset' (may be NULL) will be
-called to perform the low-level EH reset.
+* The 'postreset' hook (which may be NULL) is called after the EH reset is
+ performed. Based on existing conditions, severity of the problem, and hardware
+ capabilities,
+
+* Either the 'softreset' operation or the 'hardreset' operation will be called
+ to perform the low-level EH reset. If both operations are defined,
+ 'hardreset' is preferred and used. If both are not defined, no low-level reset
+ is performed and EH assumes that an ATA class device is connected through the
+ link.
::
diff --git a/Documentation/driver-api/mailbox.rst b/Documentation/driver-api/mailbox.rst
index 0ed95009cc30..463dd032b96c 100644
--- a/Documentation/driver-api/mailbox.rst
+++ b/Documentation/driver-api/mailbox.rst
@@ -27,7 +27,7 @@ Controller Driver (See include/linux/mailbox_controller.h)
Allocate mbox_controller and the array of mbox_chan.
-Populate mbox_chan_ops, except peek_data() all are mandatory.
+Populate mbox_chan_ops, except flush() and peek_data() all are mandatory.
The controller driver might know a message has been consumed
by the remote by getting an IRQ or polling some hardware flag
or it can never know (the client knows by way of the protocol).
diff --git a/Documentation/driver-api/media/camera-sensor.rst b/Documentation/driver-api/media/camera-sensor.rst
index c290833165e6..94bd1dae82d5 100644
--- a/Documentation/driver-api/media/camera-sensor.rst
+++ b/Documentation/driver-api/media/camera-sensor.rst
@@ -29,21 +29,31 @@ used in the system. Using another frequency may cause harmful effects
elsewhere. Therefore only the pre-determined frequencies are configurable by the
user.
+The external clock frequency shall be retrieved by obtaining the external clock
+using the ``devm_v4l2_sensor_clk_get()`` helper function, and then getting its
+frequency with ``clk_get_rate()``. Usage of the helper function guarantees
+correct behaviour regardless of whether the sensor is integrated in a DT-based
+or ACPI-based system.
+
ACPI
~~~~
-Read the ``clock-frequency`` _DSD property to denote the frequency. The driver
-can rely on this frequency being used.
+ACPI-based systems typically don't register the sensor external clock with the
+kernel, but specify the external clock frequency in the ``clock-frequency``
+_DSD property. The ``devm_v4l2_sensor_clk_get()`` helper creates and returns a
+fixed clock set at that rate.
Devicetree
~~~~~~~~~~
-The preferred way to achieve this is using ``assigned-clocks``,
-``assigned-clock-parents`` and ``assigned-clock-rates`` properties. See the
-`clock device tree bindings
+Devicetree-based systems declare the sensor external clock in the device tree
+and reference it from the sensor node. The preferred way to select the external
+clock frequency is to use the ``assigned-clocks``, ``assigned-clock-parents``
+and ``assigned-clock-rates`` properties in the sensor node to set the clock
+rate. See the `clock device tree bindings
<https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/clock/clock.yaml>`_
-for more information. The driver then gets the frequency using
-``clk_get_rate()``.
+for more information. The ``devm_v4l2_sensor_clk_get()`` helper retrieves and
+returns that clock.
This approach has the drawback that there's no guarantee that the frequency
hasn't been modified directly or indirectly by another driver, or supported by
diff --git a/Documentation/driver-api/media/drivers/zoran.rst b/Documentation/driver-api/media/drivers/zoran.rst
index b205e10c3154..2538473c3233 100644
--- a/Documentation/driver-api/media/drivers/zoran.rst
+++ b/Documentation/driver-api/media/drivers/zoran.rst
@@ -222,7 +222,7 @@ The CCIR - I uses the PAL colorsystem, and is used in Great Britain, Hong Kong,
Ireland, Nigeria, South Africa.
The CCIR - N uses the PAL colorsystem and PAL frame size but the NTSC framerate,
-and is used in Argentinia, Uruguay, an a few others
+and is used in Argentina, Uruguay, and a few others
We do not talk about how the audio is broadcast !
diff --git a/Documentation/driver-api/media/index.rst b/Documentation/driver-api/media/index.rst
index d5593182a3f9..08fc2cfc07a3 100644
--- a/Documentation/driver-api/media/index.rst
+++ b/Documentation/driver-api/media/index.rst
@@ -26,6 +26,7 @@ Documentation/userspace-api/media/index.rst
:numbered:
maintainer-entry-profile
+ media-committers
v4l2-core
dtv-core
diff --git a/Documentation/driver-api/media/maintainer-entry-profile.rst b/Documentation/driver-api/media/maintainer-entry-profile.rst
index ffc712a5f632..c5c00c66d85c 100644
--- a/Documentation/driver-api/media/maintainer-entry-profile.rst
+++ b/Documentation/driver-api/media/maintainer-entry-profile.rst
@@ -1,45 +1,328 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Media Subsystem Profile
=======================
Overview
--------
-The media subsystem covers support for a variety of devices: stream
-capture, analog and digital TV streams, cameras, remote controllers, HDMI CEC
-and media pipeline control.
+The Linux Media Community (aka: the LinuxTV Community) is formed by
+developers working on Linux Kernel Media Subsystem, together with users
+who also play an important role in testing the code.
+
+The Media Subsystem has code to support a wide variety of media-related
+devices: stream capture, analog and digital TV streams, cameras,
+video codecs, video processing (resizers, etc.), radio, remote controllers,
+HDMI CEC and media pipeline control.
-It covers, mainly, the contents of those directories:
+The Media Subsystem consists of the following directories in the kernel
+tree:
- drivers/media
- drivers/staging/media
+ - include/media
+ - Documentation/devicetree/bindings/media/\ [1]_
- Documentation/admin-guide/media
- Documentation/driver-api/media
- Documentation/userspace-api/media
- - Documentation/devicetree/bindings/media/\ [1]_
- - include/media
.. [1] Device tree bindings are maintained by the
OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS maintainers
(see the MAINTAINERS file). So, changes there must be reviewed
- by them before being merged via the media subsystem's development
+ by them before being merged into the media subsystem's development
tree.
Both media userspace and Kernel APIs are documented and the documentation
must be kept in sync with the API changes. It means that all patches that
add new features to the subsystem must also bring changes to the
-corresponding API files.
+corresponding API documentation.
+
+Media Maintainers
+-----------------
+
+Media Maintainers are not just people capable of writing code, but they
+are developers who have demonstrated their ability to collaborate with
+the team, get the most knowledgeable people to review code, contribute
+high-quality code, and follow through to fix issues (in code or tests).
+
+Due to the size and wide scope of the media subsystem, multiple layers of
+maintainers are required, each with their own areas of expertise:
+
+- **Media Driver Maintainer**:
+ Responsible for one or more drivers within the Media Subsystem. They
+ are listed in the MAINTAINERS file as maintainer for those drivers. Media
+ Driver Maintainers review patches for those drivers, provide feedback if
+ patches do not follow the subsystem rules, or are not using the
+ media kernel or userspace APIs correctly, or if they have poor code
+ quality.
+
+ If you are the patch author, you work with other Media
+ Maintainers to ensure your patches are reviewed.
+
+ Some Media Driver Maintainers have additional responsibilities. They have
+ been granted Patchwork access and keep
+ `Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_
+ up to date, decide when patches are ready for merging, and create Pull
+ Requests for the Media Subsystem Maintainers to merge.
+
+- **Media Core Maintainer**:
+ Media Driver Maintainers with Patchwork access who are also responsible for
+ one or more media core frameworks.
+
+ Core framework changes are done via consensus between the relevant Media
+ Core Maintainers. Media Maintainers may include core framework changes in
+ their Pull Requests if they are signed off by the relevant Media Core
+ Maintainers.
+
+- **Media Subsystem Maintainers**:
+ Media Core Maintainers who are also responsible for the subsystem as a
+ whole, with access to the entire subsystem. Responsible for merging Pull
+ Requests from other Media Maintainers.
+
+ Userspace API/ABI changes are made via consensus among Media Subsystem
+ Maintainers\ [2]_. Media Maintainers may include API/ABI changes in
+ their Pull Requests if they are signed off by all Media Subsystem
+ Maintainers.
+
+All Media Maintainers shall agree with the Kernel development process as
+described in Documentation/process/index.rst and with the Kernel development
+rules in the Kernel documentation, including its code of conduct.
+
+Media Maintainers are often reachable via the #linux-media IRC channel at OFTC.
+
+.. [2] Everything that would break backward compatibility with existing
+ non-kernel code are API/ABI changes. This includes ioctl and sysfs
+ interfaces, v4l2 controls, and their behaviors.
+
+Patchwork Access
+----------------
+
+All Media Maintainers who have been granted Patchwork access shall ensure that
+`Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_
+will reflect the current status, e.g. patches shall be delegated to the Media
+Maintainer who is handling them and the patch status shall be updated according
+to these rules:
+
+- ``Under Review``: Used if the patch requires a second opinion
+ or when it is part of a Pull Request;
+- ``Superseded``: There is a newer version of the patch posted to the
+ mailing list.
+- ``Duplicated``: There was another patch doing the same thing from someone
+ else that was accepted.
+- ``Not Applicable``: Use for patch series that are not merged at media.git
+ tree (e.g. drm, dmabuf, upstream merge, etc.) but were cross-posted to the
+ linux-media mailing list.
+- ``Accepted``: Once a patch is merged in the multi-committer tree. Only Media
+ Maintainers with commit rights are allowed to set this state.
+
+If Media Maintainers decide not to accept a patch, they should reply to the
+patch authors by e‑mail, explaining why it is not accepted, and
+update `Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_
+accordingly with one of the following statuses:
+
+- ``Changes Requested``: if a new revision was requested;
+- ``Rejected``: if the proposed change is not acceptable at all.
+
+.. Note::
+
+ Patchwork supports a couple of clients to help semi-automate
+ status updates via its REST interface:
+
+ https://patchwork.readthedocs.io/en/latest/usage/clients/
+
+For patches that fall within their area of responsibility a Media Maintainer
+also decides when those patches are ready for merging, and create Pull Requests
+for the Media Subsystem Maintainers to merge.
+
+The most important aspect of becoming a Media Maintainer with Patchwork access
+is that you have demonstrated an ability to give good code reviews. We value
+your ability to deliver thorough, constructive code reviews.
+
+As such, potential maintainers must earn enough credibility and trust from the
+Linux Media Community. To do that, developers shall be familiar with the open
+source model and have been active in the Linux Kernel community for some time,
+and, in particular, in the media subsystem.
+
+In addition to actually making the code changes, you are basically
+demonstrating your:
+
+- commitment to the project;
+- ability to collaborate with the team and communicate well;
+- understanding of how upstream and the Linux Media Community work
+ (policies, processes for testing, code review, ...)
+- reasonable knowledge about:
+
+ - the Kernel development process:
+ Documentation/process/index.rst
+
+ - the Media development profile:
+ Documentation/driver-api/media/maintainer-entry-profile.rst
+
+- understanding of the projects' code base and coding style;
+- ability to provide feedback to the patch authors;
+- ability to judge when a patch might be ready for review and to submit;
+- ability to write good code (last but certainly not least).
+
+Media Driver Maintainers that desire to get Patchwork access are encouraged
+to participate at the yearly Linux Media Summit, typically co-located with
+a Linux-related conference. These summits are announced on the linux-media
+mailing list.
+
+If you are doing such tasks and have become a valued developer, an
+existing Media Maintainer can nominate you to the Media Subsystem Maintainers.
+
+The ultimate responsibility for accepting a nominated maintainer is up to
+the subsystem's maintainers. The nominated maintainer must have earned a trust
+relationship with all Media Subsystem Maintainers, as, by being granted
+Patchwork access, you will take over part of their maintenance tasks.
+
+Media Committers
+----------------
+
+Experienced and trusted Media Maintainers may be granted commit rights
+which allow them to directly push patches to the media development tree instead
+of posting a Pull Request for the Media Subsystem Maintainers. This helps
+offloading some of the work of the Media Subsystem Maintainers.
+
+More details about Media Committers' roles and responsibilities can be
+found here: :ref:`Media Committers`.
+
+Media development sites
+-----------------------
+
+The `LinuxTV <https://linuxtv.org/>`_ web site hosts news about the subsystem,
+together with:
+
+- `Wiki pages <https://www.linuxtv.org/wiki/index.php/Main_Page>`_;
+- `Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_;
+- `Linux Media documentation <https://linuxtv.org/docs.php>`_;
+- and more.
+
+The main development trees used by the media subsystem are at:
+
+- Stable tree:
+ - https://git.linuxtv.org/media.git/
+
+- Media committers tree:
+ - https://gitlab.freedesktop.org/linux-media/media-committers.git
+
+ Please note that it can be rebased, although only as a last resort.
+
+- Media development trees, including apps and CI:
+
+ - https://git.linuxtv.org/
+ - https://gitlab.freedesktop.org/linux-media/
+
+
+.. _Media development workflow:
+
+Media development workflow
+++++++++++++++++++++++++++
+
+All changes for the media subsystem shall be sent first as e-mails to the
+media mailing list, following the process documented at
+Documentation/process/index.rst.
+
+It means that patches shall be submitted as plain text only via e-mail to
+linux-media@vger.kernel.org (aka: LMML). While subscription is not mandatory,
+you can find details about how to subscribe to it and to see its archives at:
+
+ https://subspace.kernel.org/vger.kernel.org.html
+
+Emails with HTML will be automatically rejected by the mail server.
+
+It could be wise to also copy the relevant Media Maintainer(s). You should use
+``scripts/get_maintainers.pl`` to identify whom else needs to be copied.
+Please always copy driver's authors and maintainers.
+
+To minimize the chance of merge conflicts for your patch series, and make it
+easier to backport patches to stable Kernels, we recommend that you use the
+following baseline for your patch series:
-Due to the size and wide scope of the media subsystem, media's
-maintainership model is to have sub-maintainers that have a broad
-knowledge of a specific aspect of the subsystem. It is the sub-maintainers'
-task to review the patches, providing feedback to users if the patches are
-following the subsystem rules and are properly using the media kernel and
-userspace APIs.
+1. Features for the next mainline release:
-Patches for the media subsystem must be sent to the media mailing list
-at linux-media@vger.kernel.org as plain text only e-mail. Emails with
-HTML will be automatically rejected by the mail server. It could be wise
-to also copy the sub-maintainer(s).
+ - baseline shall be the ``media-committers.git next`` branch;
+
+2. Bug fixes for the next mainline release:
+
+ - baseline shall be the ``media-committers.git next`` branch. If the
+ changes depend on a fix from the ``media-committers.git fixes``
+ branch, then you can use that as baseline.
+
+3. Bug fixes for the current mainline release (-rcX):
+
+ - baseline shall be the latest mainline -rcX release or the
+ ``media-committers.git fixes`` branch if changes depend on a mainline
+ fix that is not yet merged;
+
+.. Note::
+
+ See https://www.kernel.org/category/releases.html for an overview
+ about Kernel release types.
+
+Patches with fixes shall have:
+
+- a ``Fixes:`` tag pointing to the first commit that introduced the bug;
+- when applicable, a ``Cc: stable@vger.kernel.org``.
+
+Patches that were fixing bugs publicly reported by someone at the
+linux-media@vger.kernel.org mailing list shall have:
+
+- a ``Reported-by:`` tag immediately followed by a ``Closes:`` tag.
+
+Patches that change API shall update documentation accordingly at the
+same patch series.
+
+See Documentation/process/index.rst for more details about e-mail submission.
+
+Once a patch is submitted, it may follow either one of the following
+workflows:
+
+a. Media Maintainers' workflow: Media Maintainers post the Pull Requests,
+ which are handled by the Media Subsystem Maintainers::
+
+ +-------+ +------------+ +------+ +-------+ +---------------------+
+ |e-mail |-->|picked up by|-->|code |-->|pull |-->|Subsystem Maintainers|
+ |to LMML| |Patchwork | |review| |request| |merge in |
+ | | | | | | | | |media-committers.git |
+ +-------+ +------------+ +------+ +-------+ +---------------------+
+
+ For this workflow, Pull Requests are generated by Media Maintainers with
+ Patchwork access. If you do not have Patchwork access, then please don't
+ submit Pull Requests, as they will not be processed.
+
+b. Media Committers' workflow: patches are handled by Media Maintainers with
+ commit rights::
+
+ +-------+ +------------+ +------+ +--------------------------+
+ |e-mail |-->|picked up by|-->|code |-->|Media Committers merge in |
+ |to LMML| |Patchwork | |review| |media-committers.git |
+ +-------+ +------------+ +------+ +--------------------------+
+
+When patches are picked up by
+`Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_
+and when merged at media-committers, Media CI bots will check for errors and
+may provide e-mail feedback about patch problems. When this happens, the patch
+submitter must fix them or explain why the errors are false positives.
+
+Patches will only be moved to the next stage in these two workflows if they
+pass on Media CI or if there are false-positives in the Media CI reports.
+
+For both workflows, all patches shall be properly reviewed at
+linux-media@vger.kernel.org (LMML) before being merged in
+``media-committers.git``. Media patches will be reviewed in a timely manner
+by the maintainers and reviewers as listed in the MAINTAINERS file.
+
+Media Maintainers shall request reviews from other Media Maintainers and
+developers where applicable, i.e. because those developers have more
+knowledge about some areas that are changed by a patch.
+
+There shall be no open issues or unresolved or conflicting feedback
+from anyone. Clear them up first. Defer to the Media Subsystem
+Maintainers if needed.
+
+Failures during e-mail submission
++++++++++++++++++++++++++++++++++
Media's workflow is heavily based on Patchwork, meaning that, once a patch
is submitted, the e-mail will first be accepted by the mailing list
@@ -47,51 +330,107 @@ server, and, after a while, it should appear at:
- https://patchwork.linuxtv.org/project/linux-media/list/
-If it doesn't automatically appear there after a few minutes, then
+If it doesn't automatically appear there after some time [3]_, then
probably something went wrong on your submission. Please check if the
-email is in plain text\ [2]_ only and if your emailer is not mangling
+email is in plain text\ [4]_ only and if your emailer is not mangling
whitespaces before complaining or submitting them again.
-You can check if the mailing list server accepted your patch, by looking at:
+To troubleshoot problems, you should first check if the mailing list
+server has accepted your patch, by looking at:
- https://lore.kernel.org/linux-media/
-.. [2] If your email contains HTML, the mailing list server will simply
+If the patch is there and not at
+`Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_,
+it is likely that your e-mailer mangled the patch. Patchwork internally
+has logic that checks if the received e-mail contains a valid patch.
+Any whitespace and new line breakages mangling the patch won't be recognized by
+`Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_,
+and such a patch will be rejected.
+
+.. [3] It usually takes a few minutes for the patch to arrive, but
+ the e-mail server may be busy, so it may take a longer time
+ for a patch to be picked by
+ `Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_.
+
+.. [4] If your email contains HTML, the mailing list server will simply
drop it, without any further notice.
+.. _media-developers-gpg:
+
+Authentication for pull and merge requests
+++++++++++++++++++++++++++++++++++++++++++
+
+The authenticity of developers submitting Pull Requests and merge requests
+shall be validated by using the Linux Kernel Web of Trust, with PGP signing
+at some moment. See: :ref:`kernel_org_trust_repository`.
-Media maintainers
-+++++++++++++++++
+With the Pull Request workflow, Pull Requests shall use PGP-signed tags.
-At the media subsystem, we have a group of senior developers that
-are responsible for doing the code reviews at the drivers (also known as
-sub-maintainers), and another senior developer responsible for the
-subsystem as a whole. For core changes, whenever possible, multiple
-media maintainers do the review.
+With the committers' workflow, this is ensured at the time merge request
+rights will be granted to the gitlab instance used by the media-committers.git
+tree, after receiving the e-mail documented in
+:ref:`media-committer-agreement`.
+
+For more details about PGP signing, please read
+Documentation/process/maintainer-pgp-guide.rst.
+
+Maintaining media maintainer status
+-----------------------------------
+
+See :ref:`Maintain Media Status`.
+
+List of Media Maintainers
+-------------------------
-The media maintainers that work on specific areas of the subsystem are:
+The Media Maintainers listed here all have patchwork access and can
+make Pull Requests or have commit rights.
-- Remote Controllers (infrared):
- Sean Young <sean@mess.org>
+The Media Subsystem Maintainers are:
+ - Mauro Carvalho Chehab <mchehab@kernel.org>
+ - Hans Verkuil <hverkuil@kernel.org>
-- HDMI CEC:
- Hans Verkuil <hverkuil@xs4all.nl>
+The Media Core Maintainers are:
+ - Sakari Ailus <sakari.ailus@linux.intel.com>
-- Media controller drivers:
- Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ - Media controller drivers
+ - Core media controller framework
+ - ISP
+ - sensor drivers
+ - v4l2-async and v4l2-fwnode core frameworks
+ - v4l2-flash-led-class core framework
-- ISP, v4l2-async, v4l2-fwnode, v4l2-flash-led-class and Sensor drivers:
- Sakari Ailus <sakari.ailus@linux.intel.com>
+ - Mauro Carvalho Chehab <mchehab@kernel.org>
-- V4L2 drivers and core V4L2 frameworks:
- Hans Verkuil <hverkuil@xs4all.nl>
+ - DVB
-The subsystem maintainer is:
- Mauro Carvalho Chehab <mchehab@kernel.org>
+ - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
-Media maintainers may delegate a patch to other media maintainers as needed.
-On such case, checkpatch's ``delegate`` field indicates who's currently
-responsible for reviewing a patch.
+ - Media controller drivers
+ - Core media controller framework
+ - ISP
+
+ - Hans Verkuil <hverkuil@kernel.org>
+
+ - V4L2 drivers
+ - V4L2 and videobuf2 core frameworks
+ - HDMI CEC drivers
+ - HDMI CEC core framework
+
+ - Sean Young <sean@mess.org>
+
+ - Remote Controller (infrared) drivers
+ - Remote Controller (infrared) core framework
+
+The Media Driver Maintainers responsible for specific areas are:
+ - Nicolas Dufresne <nicolas.dufresne@collabora.com>
+
+ - Codec drivers
+ - M2M driver not otherwise delegated
+
+ - Bryan O'Donoghue <bryan.odonoghue@linaro.org>
+
+ - Qualcomm drivers
Submit Checklist Addendum
-------------------------
@@ -106,18 +445,15 @@ that should be used in order to check if the drivers are properly
implementing the media APIs:
==================== =======================================================
-Type Tool
+Type Utility
==================== =======================================================
-V4L2 drivers\ [3]_ ``v4l2-compliance``
+V4L2 drivers\ [5]_ ``v4l2-compliance``
V4L2 virtual drivers ``contrib/test/test-media``
CEC drivers ``cec-compliance``
==================== =======================================================
-.. [3] The ``v4l2-compliance`` also covers the media controller usage inside
- V4L2 drivers.
-
-Other compilance tools are under development to check other parts of the
-subsystem.
+.. [5] The ``v4l2-compliance`` utility also covers the media controller usage
+ inside V4L2 drivers.
Those tests need to pass before the patches go upstream.
@@ -134,6 +470,8 @@ Where the check script is::
Be sure to not introduce new warnings on your patches without a
very good reason.
+Please see `Media development workflow`_ for e-mail submission rules.
+
Style Cleanup Patches
+++++++++++++++++++++
@@ -173,34 +511,35 @@ least, simply wrapping the lines.
In particular, we accept lines with more than 80 columns:
- on strings, as they shouldn't be broken due to line length limits;
- - when a function or variable name need to have a big identifier name,
- which keeps hard to honor the 80 columns limit;
+ - when a function or variable name needs to have a long identifier name,
+ which makes hard to honor the 80 columns limit;
- on arithmetic expressions, when breaking lines makes them harder to
read;
- - when they avoid a line to end with an open parenthesis or an open
+ - when they avoid a line ending with an open parenthesis or an open
bracket.
Key Cycle Dates
---------------
-New submissions can be sent at any time, but if they intend to hit the
+New submissions can be sent at any time, but if they are intended to hit the
next merge window they should be sent before -rc5, and ideally stabilized
in the linux-media branch by -rc6.
Review Cadence
--------------
-Provided that your patch is at https://patchwork.linuxtv.org, it should
-be sooner or later handled, so you don't need to re-submit a patch.
+Provided that your patch has landed in
+`Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_, it
+should be sooner or later handled, so you don't need to re-submit a patch.
-Except for bug fixes, we don't usually add new patches to the development
-tree between -rc6 and the next -rc1.
+Except for important bug fixes, we don't usually add new patches to the
+development tree between -rc6 and the next -rc1.
Please notice that the media subsystem is a high traffic one, so it
could take a while for us to be able to review your patches. Feel free
to ping if you don't get a feedback in a couple of weeks or to ask
-other developers to publicly add Reviewed-by and, more importantly,
+other developers to publicly add ``Reviewed-by:`` and, more importantly,
``Tested-by:`` tags.
Please note that we expect a detailed description for ``Tested-by:``,
-identifying what boards were used at the test and what it was tested.
+identifying what boards were used during the test and what it was tested.
diff --git a/Documentation/driver-api/media/media-committers.rst b/Documentation/driver-api/media/media-committers.rst
new file mode 100644
index 000000000000..a905856f6a61
--- /dev/null
+++ b/Documentation/driver-api/media/media-committers.rst
@@ -0,0 +1,203 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _Media Committers:
+
+Media Committers
+================
+
+Who is a Media Committer?
+-------------------------
+
+A Media Committer is a Media Maintainer with patchwork access who has been
+granted commit access to push patches from other developers and their own
+patches to the
+`media-committers <https://gitlab.freedesktop.org/linux-media/media-committers>`_
+tree.
+
+These commit rights are granted with expectation of responsibility:
+committers are people who care about the Linux Kernel as a whole and
+about the Linux media subsystem and want to advance its development. It
+is also based on a trust relationship among other committers, maintainers
+and the Linux Media community.
+
+As Media Committer you have the following additional responsibilities:
+
+1. Patches you authored must have a ``Signed-off-by``, ``Reviewed-by``
+ or ``Acked-by`` from another Media Maintainer;
+2. If a patch introduces a regression, then that must be corrected as soon
+ as possible. Typically the patch is either reverted, or an additional
+ patch is committed to fix the regression;
+3. If patches are fixing bugs against already released Kernels, including
+ the reverts mentioned above, the Media Committer shall add the needed
+ tags. Please see :ref:`Media development workflow` for more details.
+4. All Media Committers are responsible for maintaining
+ `Patchwork <https://patchwork.linuxtv.org/project/linux-media/list/>`_,
+ updating the state of the patches they review or merge.
+
+
+Becoming a Media Committer
+--------------------------
+
+Existing Media Committers can nominate a Media Maintainer to be granted
+commit rights. The Media Maintainer must have patchwork access,
+have been reviewing patches from third parties for some time, and has
+demonstrated a good understanding of the maintainer's duties and processes.
+
+The ultimate responsibility for accepting a nominated committer is up to
+the Media Subsystem Maintainers. The nominated committer must have earned a
+trust relationship with all Media Subsystem Maintainers, as, by granting you
+commit rights, part of their responsibilities are handed over to you.
+
+Due to that, to become a Media Committer, a consensus between all Media
+Subsystem Maintainers is required.
+
+.. Note::
+
+ In order to preserve/protect the developers that could have their commit
+ rights granted, denied or removed as well as the subsystem maintainers who
+ have the task to accept or deny commit rights, all communication related to
+ changing commit rights should happen in private as much as possible.
+
+.. _media-committer-agreement:
+
+Media Committer's agreement
+---------------------------
+
+Once a nominated committer is accepted by all Media Subsystem Maintainers,
+they will ask if the developer is interested in the nomination and discuss
+what area(s) of the media subsystem the committer will be responsible for.
+Those areas will typically be the same as the areas that the nominated
+committer is already maintaining.
+
+When the developer accepts being a committer, the new committer shall
+explicitly accept the Kernel development policies described under its
+Documentation/, and in particular to the rules in this document, by writing
+an e-mail to media-committers@linuxtv.org, with a declaration of intent
+following the model below::
+
+ I, John Doe, would like to change my status to: Committer
+
+ As Media Maintainer I accept commit rights for the following areas of
+ the media subsystem:
+
+ ...
+
+ For the purpose of committing patches to the media-committers tree,
+ I'll be using my user https://gitlab.freedesktop.org/users/<username>.
+
+Followed by a formal declaration of agreement with the Kernel development
+rules::
+
+ I agree to follow the Kernel development rules described at:
+
+ https://www.kernel.org/doc/html/latest/driver-api/media/media-committers.rst
+
+ and to the Linux Kernel development process rules.
+
+ I agree to abide by the Code of Conduct as documented in:
+ https://www.kernel.org/doc/html/latest/process/code-of-conduct.rst
+
+ I am aware that I can, at any point of time, retire. In that case, I will
+ send an e-mail to notify the Media Subsystem Maintainers for them to revoke
+ my commit rights.
+
+ I am aware that the Kernel development rules change over time.
+ By doing a new push to media-committers tree, I understand that I agree
+ to follow the rules in effect at the time of the commit.
+
+That e-mail shall be signed via the Kernel Web of trust with a PGP key cross
+signed by other Kernel and media developers. As described at
+:ref:`media-developers-gpg`, the PGP signature, together with the gitlab user
+security are fundamental components that ensure the authenticity of the merge
+requests that will happen at the media-committers.git tree.
+
+In case the kernel development process changes, by merging new commits to the
+`media-committers tree <https://gitlab.freedesktop.org/linux-media/media-committers>`_,
+the Media Committer implicitly declares their agreement with the latest
+version of the documented process including the contents of this file.
+
+If a Media Committer decides to retire, it is the committer's duty to
+notify the Media Subsystem Maintainers about that decision.
+
+.. note::
+
+ 1. Changes to the kernel media development process shall be announced in
+ the media-committers mailing list with a reasonable review period. All
+ committers are automatically subscribed to that mailing list;
+ 2. Due to the distributed nature of the Kernel development, it is
+ possible that kernel development process changes may end being
+ reviewed/merged at the Linux Docs and/or at the Linux Kernel mailing
+ lists, especially for the contents under Documentation/process and for
+ trivial typo fixes.
+
+Media Core Committers
+---------------------
+
+A Media Core Committer is a Media Core Maintainer with commit rights.
+
+As described in Documentation/driver-api/media/maintainer-entry-profile.rst,
+a Media Core Maintainer maintains media core frameworks as well, besides
+just drivers, and so is allowed to change core files and the media subsystem's
+Kernel API. The extent of the core committer's grants will be detailed by the
+Media Subsystem Maintainers when they nominate a Media Core Committer.
+
+Existing Media Committers may become Media Core Committers and vice versa.
+Such decisions will be taken in consensus among the Media Subsystem
+Maintainers.
+
+Media committers rules
+----------------------
+
+Media committers shall do their best efforts to avoid merging patches that
+would break any existing drivers. If it breaks, fixup or revert patches
+shall be merged as soon as possible, aiming to be merged at the same Kernel
+cycle the bug is reported.
+
+Media committers shall behave accordingly to the rights granted by
+the Media Subsystem Maintainers, especially with regards of the scope of changes
+they may apply directly at the media-committers tree. That scope can
+change over time on a mutual agreement between Media Committers and
+Media Subsystem Maintainers.
+
+The Media Committer workflow is described at :ref:`Media development workflow`.
+
+.. _Maintain Media Status:
+
+Maintaining Media Maintainer or Committer status
+------------------------------------------------
+
+A community of maintainers working together to move the Linux Kernel
+forward is essential to creating successful projects that are rewarding
+to work on. If there are problems or disagreements within the community,
+they can usually be solved through healthy discussion and debate.
+
+In the unhappy event that a Media Maintainer or Committer continues to
+disregard good citizenship (or actively disrupts the project), we may need
+to revoke that person's status. In such cases, if someone suggests the
+revocation with a good reason, then after discussing this among the Media
+Maintainers, the final decision is taken by the Media Subsystem Maintainers.
+
+As the decision to become a Media Maintainer or Committer comes from a
+consensus between Media Subsystem Maintainers, a single Media Subsystem
+Maintainer not trusting the Media Maintainer or Committer anymore is enough
+to revoke their maintenance, Patchwork grants and/or commit rights.
+
+Having commit rights revoked doesn't prevent Media Maintainers to keep
+contributing to the subsystem either via the pull request or via email workflow
+as documented at the :ref:`Media development workflow`.
+
+If a maintainer is inactive for more than a couple of Kernel cycles,
+maintainers will try to reach you via e-mail. If not possible, they may
+revoke their maintainer/patchwork and committer rights and update MAINTAINERS
+file entries accordingly. If you wish to resume contributing as maintainer
+later on, then contact the Media Subsystem Maintainers to ask if your
+maintenance, Patchwork grants and commit rights can be restored.
+
+References
+----------
+
+Much of this was inspired by/copied from the committer policies of:
+
+- `Chromium <https://chromium.googlesource.com/chromium/src/+/main/docs/contributing.md>`_;
+- `WebKit <https://webkit.org/commit-and-review-policy/>`_;
+- `Mozilla <https://www.mozilla.org/hacking/committer/>`_.
diff --git a/Documentation/driver-api/media/tx-rx.rst b/Documentation/driver-api/media/tx-rx.rst
index c71003f74b1c..22e1b13ecde9 100644
--- a/Documentation/driver-api/media/tx-rx.rst
+++ b/Documentation/driver-api/media/tx-rx.rst
@@ -12,7 +12,7 @@ CSI-2 receiver in an SoC.
Bus types
---------
-The following busses are the most common. This section discusses these two only.
+The following buses are the most common. This section discusses these two only.
MIPI CSI-2
^^^^^^^^^^
@@ -36,7 +36,7 @@ Transmitter drivers
Transmitter drivers generally need to provide the receiver drivers with the
configuration of the transmitter. What is required depends on the type of the
-bus. These are common for both busses.
+bus. These are common for both buses.
Media bus pixel code
^^^^^^^^^^^^^^^^^^^^
@@ -49,6 +49,13 @@ Link frequency
The :ref:`V4L2_CID_LINK_FREQ <v4l2-cid-link-freq>` control is used to tell the
receiver the frequency of the bus (i.e. it is not the same as the symbol rate).
+Drivers that do not have user-configurable link frequency should report it
+through the ``.get_mbus_config()`` subdev pad operation, in the ``link_freq``
+field of struct v4l2_mbus_config, instead of through controls.
+
+Receiver drivers should use :c:func:`v4l2_get_link_freq` helper to obtain the
+link frequency from the transmitter sub-device.
+
``.enable_streams()`` and ``.disable_streams()`` callbacks
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -58,6 +65,15 @@ to control the transmitter driver's streaming state. These callbacks may not be
called directly, but by using ``v4l2_subdev_enable_streams()`` and
``v4l2_subdev_disable_streams()``.
+Stopping the transmitter
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A transmitter stops sending the stream of images as a result of
+calling the ``.disable_streams()`` callback. Some transmitters may stop the
+stream at a frame boundary whereas others stop immediately,
+effectively leaving the current frame unfinished. The receiver driver
+should not make assumptions either way, but function properly in both
+cases.
CSI-2 transmitter drivers
-------------------------
@@ -126,13 +142,3 @@ device, so this should be only done when it is needed.
Receiver drivers that do not need explicit LP-11 or LP-111 state setup are
waived from calling the two callbacks.
-
-Stopping the transmitter
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-A transmitter stops sending the stream of images as a result of
-calling the ``.disable_streams()`` callback. Some transmitters may stop the
-stream at a frame boundary whereas others stop immediately,
-effectively leaving the current frame unfinished. The receiver driver
-should not make assumptions either way, but function properly in both
-cases.
diff --git a/Documentation/driver-api/media/v4l2-controls.rst b/Documentation/driver-api/media/v4l2-controls.rst
index b2e91804829b..fc04907589ab 100644
--- a/Documentation/driver-api/media/v4l2-controls.rst
+++ b/Documentation/driver-api/media/v4l2-controls.rst
@@ -110,6 +110,7 @@ For sub-device drivers:
v4l2_ctrl_handler_free(&foo->ctrl_handler);
+:c:func:`v4l2_ctrl_handler_free` does not touch the handler's ``error`` field.
2) Add controls:
@@ -191,12 +192,8 @@ These functions are typically called right after the
V4L2_CID_TEST_PATTERN, ARRAY_SIZE(test_pattern) - 1, 0,
0, test_pattern);
...
- if (foo->ctrl_handler.error) {
- int err = foo->ctrl_handler.error;
-
- v4l2_ctrl_handler_free(&foo->ctrl_handler);
- return err;
- }
+ if (foo->ctrl_handler.error)
+ return v4l2_ctrl_handler_free(&foo->ctrl_handler);
The :c:func:`v4l2_ctrl_new_std` function returns the v4l2_ctrl pointer to
the new control, but if you do not need to access the pointer outside the
diff --git a/Documentation/driver-api/media/v4l2-core.rst b/Documentation/driver-api/media/v4l2-core.rst
index ad987c34ad2a..a5f5102c64cc 100644
--- a/Documentation/driver-api/media/v4l2-core.rst
+++ b/Documentation/driver-api/media/v4l2-core.rst
@@ -27,3 +27,4 @@ Video4Linux devices
v4l2-common
v4l2-tveeprom
v4l2-jpeg
+ v4l2-isp
diff --git a/Documentation/driver-api/media/v4l2-dev.rst b/Documentation/driver-api/media/v4l2-dev.rst
index d5cb19b21a9f..dd239ad42051 100644
--- a/Documentation/driver-api/media/v4l2-dev.rst
+++ b/Documentation/driver-api/media/v4l2-dev.rst
@@ -157,10 +157,10 @@ changing the e.g. exposure of the webcam.
Of course, you can always do all the locking yourself by leaving both lock
pointers at ``NULL``.
-In the case of :ref:`videobuf2 <vb2_framework>` you will need to implement the
-``wait_prepare()`` and ``wait_finish()`` callbacks to unlock/lock if applicable.
-If you use the ``queue->lock`` pointer, then you can use the helper functions
-:c:func:`vb2_ops_wait_prepare` and :c:func:`vb2_ops_wait_finish`.
+In the case of :ref:`videobuf2 <vb2_framework>` you must set the ``queue->lock``
+pointer to the lock you use to serialize the queuing ioctls. This ensures that
+that lock is released while waiting in ``VIDIOC_DQBUF`` for a buffer to arrive,
+and it is retaken afterwards.
The implementation of a hotplug disconnect should also take the lock from
:c:type:`video_device` before calling v4l2_device_disconnect. If you are also
diff --git a/Documentation/driver-api/media/v4l2-fh.rst b/Documentation/driver-api/media/v4l2-fh.rst
index 3eeaa8da0c9e..a934caa483a4 100644
--- a/Documentation/driver-api/media/v4l2-fh.rst
+++ b/Documentation/driver-api/media/v4l2-fh.rst
@@ -1,33 +1,27 @@
.. SPDX-License-Identifier: GPL-2.0
-V4L2 File handlers
-------------------
+V4L2 File handles
+-----------------
-struct v4l2_fh provides a way to easily keep file handle specific
-data that is used by the V4L2 framework.
+struct v4l2_fh provides a way to easily keep file handle specific data that is
+used by the V4L2 framework. Its usage is mandatory in all drivers.
-.. attention::
- New drivers must use struct v4l2_fh
- since it is also used to implement priority handling
- (:ref:`VIDIOC_G_PRIORITY`).
+struct v4l2_fh is allocated in the driver's ``open()`` file operation handler.
+It is typically embedded in a larger driver-specific structure. The
+:c:type:`v4l2_fh` must be initialized with a call to :c:func:`v4l2_fh_init`,
+and added to the video device with :c:func:`v4l2_fh_add`. This associates the
+:c:type:`v4l2_fh` with the :c:type:`file` by setting ``file->private_data`` to
+point to the :c:type:`v4l2_fh`.
-The users of :c:type:`v4l2_fh` (in the V4L2 framework, not the driver) know
-whether a driver uses :c:type:`v4l2_fh` as its ``file->private_data`` pointer
-by testing the ``V4L2_FL_USES_V4L2_FH`` bit in :c:type:`video_device`->flags.
-This bit is set whenever :c:func:`v4l2_fh_init` is called.
+Similarly, the struct v4l2_fh is freed in the driver's ``release()`` file
+operation handler. It must be removed from the video device with
+:c:func:`v4l2_fh_del` and cleaned up with :c:func:`v4l2_fh_exit` before being
+freed.
-struct v4l2_fh is allocated as a part of the driver's own file handle
-structure and ``file->private_data`` is set to it in the driver's ``open()``
-function by the driver.
-
-In many cases the struct v4l2_fh will be embedded in a larger
-structure. In that case you should call:
-
-#) :c:func:`v4l2_fh_init` and :c:func:`v4l2_fh_add` in ``open()``
-#) :c:func:`v4l2_fh_del` and :c:func:`v4l2_fh_exit` in ``release()``
-
-Drivers can extract their own file handle structure by using the container_of
-macro.
+Drivers must not access ``file->private_data`` directly. They can retrieve the
+:c:type:`v4l2_fh` associated with a :c:type:`file` by calling
+:c:func:`file_to_v4l2_fh`. Drivers can extract their own file handle structure
+by using the container_of macro.
Example:
@@ -56,18 +50,17 @@ Example:
...
- file->private_data = &my_fh->fh;
- v4l2_fh_add(&my_fh->fh);
+ v4l2_fh_add(&my_fh->fh, file);
return 0;
}
int my_release(struct file *file)
{
- struct v4l2_fh *fh = file->private_data;
+ struct v4l2_fh *fh = file_to_v4l2_fh(file);
struct my_fh *my_fh = container_of(fh, struct my_fh, fh);
...
- v4l2_fh_del(&my_fh->fh);
+ v4l2_fh_del(&my_fh->fh, file);
v4l2_fh_exit(&my_fh->fh);
kfree(my_fh);
return 0;
@@ -78,19 +71,17 @@ Below is a short description of the :c:type:`v4l2_fh` functions used:
:c:func:`v4l2_fh_init <v4l2_fh_init>`
(:c:type:`fh <v4l2_fh>`, :c:type:`vdev <video_device>`)
-
- Initialise the file handle. This **MUST** be performed in the driver's
:c:type:`v4l2_file_operations`->open() handler.
-
:c:func:`v4l2_fh_add <v4l2_fh_add>`
-(:c:type:`fh <v4l2_fh>`)
+(:c:type:`fh <v4l2_fh>`, struct file \*filp)
- Add a :c:type:`v4l2_fh` to :c:type:`video_device` file handle list.
Must be called once the file handle is completely initialized.
:c:func:`v4l2_fh_del <v4l2_fh_del>`
-(:c:type:`fh <v4l2_fh>`)
+(:c:type:`fh <v4l2_fh>`, struct file \*filp)
- Unassociate the file handle from :c:type:`video_device`. The file handle
exit function may now be called.
@@ -101,6 +92,10 @@ Below is a short description of the :c:type:`v4l2_fh` functions used:
- Uninitialise the file handle. After uninitialisation the :c:type:`v4l2_fh`
memory can be freed.
+:c:func:`file_to_v4l2_fh <file_to_v4l2_fh>`
+(struct file \*filp)
+
+- Retrieve the :c:type:`v4l2_fh` instance associated with a :c:type:`file`.
If struct v4l2_fh is not embedded, then you can use these helper functions:
diff --git a/Documentation/driver-api/media/v4l2-isp.rst b/Documentation/driver-api/media/v4l2-isp.rst
new file mode 100644
index 000000000000..618ae614ff79
--- /dev/null
+++ b/Documentation/driver-api/media/v4l2-isp.rst
@@ -0,0 +1,49 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+V4L2 generic ISP parameters and statistics support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Design rationale
+================
+
+ISP configuration parameters and statistics are processed and collected by
+drivers and exchanged with userspace through data types that usually
+reflect the ISP peripheral registers layout.
+
+Each ISP driver defines its own metadata output format for parameters and
+a metadata capture format for statistics. The buffer layout is realized by a
+set of C structures that reflects the registers layout. The number and types
+of C structures is fixed by the format definition and becomes part of the Linux
+kernel uAPI/uABI interface.
+
+Because of the hard requirement of backward compatibility when extending the
+user API/ABI interface, modifying an ISP driver capture or output metadata
+format after it has been accepted by mainline is very hard if not impossible.
+
+It generally happens, in fact, that after the first accepted revision of an ISP
+driver the buffers layout need to be modified, either to support new hardware
+blocks, to fix bugs or to support different revisions of the hardware.
+
+Each of these situations would require defining a new metadata format, making it
+really hard to maintain and extend drivers and requiring userspace to use
+the correct format depending on the kernel revision in use.
+
+V4L2 ISP configuration parameters
+=================================
+
+For these reasons, Video4Linux2 defines generic types for ISP configuration
+parameters and statistics. Drivers are still expected to define their own
+formats for their metadata output and capture nodes, but the buffers layout can
+be defined using the extensible and versioned types defined by
+include/uapi/linux/media/v4l2-isp.h.
+
+Drivers are expected to provide the definitions of their supported ISP blocks
+and the expected maximum size of a buffer.
+
+For driver developers a set of helper functions to assist them with validation
+of the buffer received from userspace is available in
+drivers/media/v4l2-core/v4l2-isp.c
+
+V4L2 ISP support driver documentation
+=====================================
+.. kernel-doc:: include/media/v4l2-isp.h
diff --git a/Documentation/driver-api/memory-devices/index.rst b/Documentation/driver-api/memory-devices/index.rst
index 28101458cda5..3b6308113611 100644
--- a/Documentation/driver-api/memory-devices/index.rst
+++ b/Documentation/driver-api/memory-devices/index.rst
@@ -9,10 +9,3 @@ Memory Controller drivers
ti-emif
ti-gpmc
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index ce77e024c4f1..adf03983f1ba 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -996,11 +996,11 @@ The following people have contributed to the NAND driver:
2. David Woodhouse\ dwmw2@infradead.org
-3. Thomas Gleixner\ tglx@linutronix.de
+3. Thomas Gleixner\ tglx@kernel.org
A lot of users have provided bugfixes, improvements and helping hands
for testing. Thanks a lot.
The following people have contributed to this document:
-1. Thomas Gleixner\ tglx@linutronix.de
+1. Thomas Gleixner\ tglx@kernel.org
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst
index e991d92b8b1d..a49c41383779 100644
--- a/Documentation/driver-api/ntb.rst
+++ b/Documentation/driver-api/ntb.rst
@@ -35,7 +35,7 @@ anyone who has written a pci driver.
NTB Typical client driver implementation
----------------------------------------
-Primary purpose of NTB is to share some peace of memory between at least two
+Primary purpose of NTB is to share some piece of memory between at least two
systems. So the NTB device features like Scratchpad/Message registers are
mainly used to perform the proper memory window initialization. Typically
there are two types of memory window interfaces supported by the NTB API:
diff --git a/Documentation/driver-api/nvdimm/btt.rst b/Documentation/driver-api/nvdimm/btt.rst
index 107395c042ae..2d8269f834bd 100644
--- a/Documentation/driver-api/nvdimm/btt.rst
+++ b/Documentation/driver-api/nvdimm/btt.rst
@@ -83,7 +83,7 @@ flags, and the remaining form the internal block number.
======== =============================================================
Bit Description
======== =============================================================
-31 - 30 Error and Zero flags - Used in the following way::
+31 - 30 Error and Zero flags - Used in the following way:
== == ====================================================
31 30 Description
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
index ca16b5acbf30..959ba1cc0263 100644
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -230,7 +230,7 @@ LIBNVDIMM/LIBNDCTL: Bus
A bus has a 1:1 relationship with an NFIT. The current expectation for
ACPI based systems is that there is only ever one platform-global NFIT.
That said, it is trivial to register multiple NFITs, the specification
-does not preclude it. The infrastructure supports multiple busses and
+does not preclude it. The infrastructure supports multiple buses and
we use this capability to test multiple NFIT configurations in the unit
test.
@@ -535,12 +535,12 @@ internally with a static identifier::
char devname[50];
snprintf(devname, sizeof(devname), "namespace%d.%d",
- ndctl_region_get_id(region), paramaters->id);
+ ndctl_region_get_id(region), parameters->id);
ndctl_namespace_set_alt_name(ndns, devname);
/* 'uuid' must be set prior to setting size! */
- ndctl_namespace_set_uuid(ndns, paramaters->uuid);
- ndctl_namespace_set_size(ndns, paramaters->size);
+ ndctl_namespace_set_uuid(ndns, parameters->uuid);
+ ndctl_namespace_set_size(ndns, parameters->size);
/* unlike pmem namespaces, blk namespaces have a sector size */
if (parameters->lbasize)
ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
diff --git a/Documentation/driver-api/nvmem.rst b/Documentation/driver-api/nvmem.rst
index 5d9500d21ecc..790e2dc652c0 100644
--- a/Documentation/driver-api/nvmem.rst
+++ b/Documentation/driver-api/nvmem.rst
@@ -59,10 +59,10 @@ For example, a simple nvram case::
devm_nvmem_register(&config);
}
-Users of board files can define and register nvmem cells using the
-nvmem_cell_table struct::
+Device drivers can define and register an nvmem cell using the nvmem_cell_info
+struct::
- static struct nvmem_cell_info foo_nvmem_cells[] = {
+ static const struct nvmem_cell_info foo_nvmem_cell = {
{
.name = "macaddr",
.offset = 0x7f00,
@@ -70,13 +70,7 @@ nvmem_cell_table struct::
}
};
- static struct nvmem_cell_table foo_nvmem_cell_table = {
- .nvmem_name = "i2c-eeprom",
- .cells = foo_nvmem_cells,
- .ncells = ARRAY_SIZE(foo_nvmem_cells),
- };
-
- nvmem_add_cell_table(&foo_nvmem_cell_table);
+ int nvmem_add_one_cell(nvmem, &foo_nvmem_cell);
Additionally it is possible to create nvmem cell lookup entries and register
them with the nvmem framework from machine code as shown in the example below::
diff --git a/Documentation/driver-api/parport-lowlevel.rst b/Documentation/driver-api/parport-lowlevel.rst
index 0633d70ffda7..a907e279f509 100644
--- a/Documentation/driver-api/parport-lowlevel.rst
+++ b/Documentation/driver-api/parport-lowlevel.rst
@@ -7,6 +7,7 @@ PARPORT interface documentation
Described here are the following functions:
Global functions::
+
parport_register_driver
parport_unregister_driver
parport_enumerate
@@ -34,6 +35,7 @@ Global functions::
Port functions (can be overridden by low-level drivers):
SPP::
+
port->ops->read_data
port->ops->write_data
port->ops->read_status
@@ -46,17 +48,20 @@ Port functions (can be overridden by low-level drivers):
port->ops->data_reverse
EPP::
+
port->ops->epp_write_data
port->ops->epp_read_data
port->ops->epp_write_addr
port->ops->epp_read_addr
ECP::
+
port->ops->ecp_write_data
port->ops->ecp_read_data
port->ops->ecp_write_addr
Other::
+
port->ops->nibble_read_data
port->ops->byte_read_data
port->ops->compat_write_data
diff --git a/Documentation/driver-api/pci/index.rst b/Documentation/driver-api/pci/index.rst
index a38e475cdbe3..1abfbecf6ce6 100644
--- a/Documentation/driver-api/pci/index.rst
+++ b/Documentation/driver-api/pci/index.rst
@@ -10,10 +10,4 @@ The Linux PCI driver implementer's API guide
pci
p2pdma
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
+ tsm
diff --git a/Documentation/driver-api/pci/p2pdma.rst b/Documentation/driver-api/pci/p2pdma.rst
index d0b241628cf1..d3f406cca694 100644
--- a/Documentation/driver-api/pci/p2pdma.rst
+++ b/Documentation/driver-api/pci/p2pdma.rst
@@ -9,22 +9,48 @@ between two devices on the bus. This type of transaction is henceforth
called Peer-to-Peer (or P2P). However, there are a number of issues that
make P2P transactions tricky to do in a perfectly safe way.
-One of the biggest issues is that PCI doesn't require forwarding
-transactions between hierarchy domains, and in PCIe, each Root Port
-defines a separate hierarchy domain. To make things worse, there is no
-simple way to determine if a given Root Complex supports this or not.
-(See PCIe r4.0, sec 1.3.1). Therefore, as of this writing, the kernel
-only supports doing P2P when the endpoints involved are all behind the
-same PCI bridge, as such devices are all in the same PCI hierarchy
-domain, and the spec guarantees that all transactions within the
-hierarchy will be routable, but it does not require routing
-between hierarchies.
-
-The second issue is that to make use of existing interfaces in Linux,
-memory that is used for P2P transactions needs to be backed by struct
-pages. However, PCI BARs are not typically cache coherent so there are
-a few corner case gotchas with these pages so developers need to
-be careful about what they do with them.
+For PCIe the routing of Transaction Layer Packets (TLPs) is well-defined up
+until they reach a host bridge or root port. If the path includes PCIe switches
+then based on the ACS settings the transaction can route entirely within
+the PCIe hierarchy and never reach the root port. The kernel will evaluate
+the PCIe topology and always permit P2P in these well-defined cases.
+
+However, if the P2P transaction reaches the host bridge then it might have to
+hairpin back out the same root port, be routed inside the CPU SOC to another
+PCIe root port, or routed internally to the SOC.
+
+The PCIe specification doesn't define the forwarding of transactions between
+hierarchy domains and kernel defaults to blocking such routing. There is an
+allow list to allow detecting known-good HW, in which case P2P between any
+two PCIe devices will be permitted.
+
+Since P2P inherently is doing transactions between two devices it requires two
+drivers to be co-operating inside the kernel. The providing driver has to convey
+its MMIO to the consuming driver. To meet the driver model lifecycle rules the
+MMIO must have all DMA mapping removed, all CPU accesses prevented, all page
+table mappings undone before the providing driver completes remove().
+
+This requires the providing and consuming driver to actively work together to
+guarantee that the consuming driver has stopped using the MMIO during a removal
+cycle. This is done by either a synchronous invalidation shutdown or waiting
+for all usage refcounts to reach zero.
+
+At the lowest level the P2P subsystem offers a naked struct p2p_provider that
+delegates lifecycle management to the providing driver. It is expected that
+drivers using this option will wrap their MMIO memory in DMABUF and use DMABUF
+to provide an invalidation shutdown. These MMIO addresses have no struct page, and
+if used with mmap() must create special PTEs. As such there are very few
+kernel uAPIs that can accept pointers to them; in particular they cannot be used
+with read()/write(), including O_DIRECT.
+
+Building on this, the subsystem offers a layer to wrap the MMIO in a ZONE_DEVICE
+pgmap of MEMORY_DEVICE_PCI_P2PDMA to create struct pages. The lifecycle of
+pgmap ensures that when the pgmap is destroyed all other drivers have stopped
+using the MMIO. This option works with O_DIRECT flows, in some cases, if the
+underlying subsystem supports handling MEMORY_DEVICE_PCI_P2PDMA through
+FOLL_PCI_P2PDMA. The use of FOLL_LONGTERM is prevented. As this relies on pgmap
+it also relies on architecture support along with alignment and minimum size
+limitations.
Driver Writer's Guide
@@ -114,14 +140,39 @@ allocating scatter-gather lists with P2P memory.
Struct Page Caveats
-------------------
-Driver writers should be very careful about not passing these special
-struct pages to code that isn't prepared for it. At this time, the kernel
-interfaces do not have any checks for ensuring this. This obviously
-precludes passing these pages to userspace.
+While the MEMORY_DEVICE_PCI_P2PDMA pages can be installed in VMAs,
+pin_user_pages() and related will not return them unless FOLL_PCI_P2PDMA is set.
-P2P memory is also technically IO memory but should never have any side
-effects behind it. Thus, the order of loads and stores should not be important
-and ioreadX(), iowriteX() and friends should not be necessary.
+The MEMORY_DEVICE_PCI_P2PDMA pages require care to support in the kernel. The
+KVA is still MMIO and must still be accessed through the normal
+readX()/writeX()/etc helpers. Direct CPU access (e.g. memcpy) is forbidden, just
+like any other MMIO mapping. While this will actually work on some
+architectures, others will experience corruption or just crash in the kernel.
+Supporting FOLL_PCI_P2PDMA in a subsystem requires scrubbing it to ensure no CPU
+access happens.
+
+
+Usage With DMABUF
+=================
+
+DMABUF provides an alternative to the above struct page-based
+client/provider/orchestrator system and should be used when struct page
+doesn't exist. In this mode the exporting driver will wrap
+some of its MMIO in a DMABUF and give the DMABUF FD to userspace.
+
+Userspace can then pass the FD to an importing driver which will ask the
+exporting driver to map it to the importer.
+
+In this case the initiator and target pci_devices are known and the P2P subsystem
+is used to determine the mapping type. The phys_addr_t-based DMA API is used to
+establish the dma_addr_t.
+
+Lifecycle is controlled by DMABUF move_notify(). When the exporting driver wants
+to remove() it must deliver an invalidation shutdown to all DMABUF importing
+drivers through move_notify() and synchronously DMA unmap all the MMIO.
+
+No importing driver can continue to have a DMA map to the MMIO after the
+exporting driver has destroyed its p2p_provider.
P2P DMA Support Library
diff --git a/Documentation/driver-api/pci/pci.rst b/Documentation/driver-api/pci/pci.rst
index 59d86e827198..99a1bbaaec5d 100644
--- a/Documentation/driver-api/pci/pci.rst
+++ b/Documentation/driver-api/pci/pci.rst
@@ -37,6 +37,9 @@ PCI Support Library
.. kernel-doc:: drivers/pci/slot.c
:export:
+.. kernel-doc:: drivers/pci/rebar.c
+ :export:
+
.. kernel-doc:: drivers/pci/rom.c
:export:
diff --git a/Documentation/driver-api/pci/tsm.rst b/Documentation/driver-api/pci/tsm.rst
new file mode 100644
index 000000000000..232b92bec93f
--- /dev/null
+++ b/Documentation/driver-api/pci/tsm.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+========================================================
+PCI Trusted Execution Environment Security Manager (TSM)
+========================================================
+
+Subsystem Interfaces
+====================
+
+.. kernel-doc:: include/linux/pci-ide.h
+ :internal:
+
+.. kernel-doc:: drivers/pci/ide.c
+ :export:
+
+.. kernel-doc:: include/linux/pci-tsm.h
+ :internal:
+
+.. kernel-doc:: drivers/pci/tsm.c
+ :export:
diff --git a/Documentation/driver-api/phy/index.rst b/Documentation/driver-api/phy/index.rst
index 69ba1216de72..579cfe3b7b82 100644
--- a/Documentation/driver-api/phy/index.rst
+++ b/Documentation/driver-api/phy/index.rst
@@ -8,11 +8,3 @@ Generic PHY Framework
phy
samsung-usb2
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
-
diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst
index 81785c084f3e..0865c2e94eec 100644
--- a/Documentation/driver-api/phy/phy.rst
+++ b/Documentation/driver-api/phy/phy.rst
@@ -19,7 +19,7 @@ PHY. Other peripherals that use PHY include Wireless LAN, Ethernet,
SATA etc.
The intention of creating this framework is to bring the PHY drivers spread
-all over the Linux kernel to drivers/phy to increase code re-use and for
+all over the Linux kernel to drivers/phy to increase code reuse and for
better code maintainability.
This framework will be of use only to devices that use external PHY (PHY
@@ -198,8 +198,7 @@ pm_runtime_get_sync of PHY provider device because of parent-child relationship.
It should also be noted that phy_power_on and phy_power_off performs
phy_pm_runtime_get_sync and phy_pm_runtime_put respectively.
There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
-phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
-phy_pm_runtime_forbid for performing PM operations.
+phy_pm_runtime_put and phy_pm_runtime_put_sync for performing PM operations.
PHY Mappings
============
diff --git a/Documentation/driver-api/pin-control.rst b/Documentation/driver-api/pin-control.rst
index 27ea1236307e..1f585ecca63c 100644
--- a/Documentation/driver-api/pin-control.rst
+++ b/Documentation/driver-api/pin-control.rst
@@ -863,7 +863,7 @@ has to be handled by the ``<linux/gpio/consumer.h>`` interface. Instead view thi
a certain pin config setting. Look in e.g. ``<linux/pinctrl/pinconf-generic.h>``
and you find this in the documentation:
- PIN_CONFIG_OUTPUT:
+ PIN_CONFIG_LEVEL:
this will configure the pin in output, use argument
1 to indicate high level, argument 0 to indicate low level.
@@ -897,7 +897,7 @@ And your machine configuration may look like this:
};
static unsigned long uart_sleep_mode[] = {
- PIN_CONF_PACKED(PIN_CONFIG_OUTPUT, 0),
+ PIN_CONF_PACKED(PIN_CONFIG_LEVEL, 0),
};
static struct pinctrl_map pinmap[] __initdata = {
@@ -1162,8 +1162,55 @@ pinmux core.
Pin control requests from drivers
=================================
-When a device driver is about to probe the device core will automatically
-attempt to issue ``pinctrl_get_select_default()`` on these devices.
+When a device driver is about to probe, the device core attaches the
+standard states if they are defined in the device tree by calling
+``pinctrl_bind_pins()`` on these devices.
+Possible standard state names are: "default", "init", "sleep" and "idle".
+
+- if ``default`` is defined in the device tree, it is selected before
+ device probe.
+
+- if ``init`` and ``default`` are defined in the device tree, the "init"
+ state is selected before the driver probe and the "default" state is
+ selected after the driver probe.
+
+- the ``sleep`` and ``idle`` states are for power management and can only
+ be selected with the PM API bellow.
+
+PM interfaces
+=================
+PM runtime suspend/resume might need to execute the same init sequence as
+during probe. Since the predefined states are already attached to the
+device, the driver can activate these states explicitly with the
+following helper functions:
+
+- ``pinctrl_pm_select_default_state()``
+- ``pinctrl_pm_select_init_state()``
+- ``pinctrl_pm_select_sleep_state()``
+- ``pinctrl_pm_select_idle_state()``
+
+For example, if resuming the device depend on certain pinmux states
+
+.. code-block:: c
+
+ foo_suspend()
+ {
+ /* suspend device */
+ ...
+
+ pinctrl_pm_select_sleep_state(dev);
+ }
+
+ foo_resume()
+ {
+ pinctrl_pm_select_init_state(dev);
+
+ /* resuming device */
+ ...
+
+ pinctrl_pm_select_default_state(dev);
+ }
+
This way driver writers do not need to add any of the boilerplate code
of the type found below. However when doing fine-grained state selection
and not using the "default" state, you may have to do some device driver
@@ -1185,6 +1232,12 @@ operation and going to sleep, moving from the ``PINCTRL_STATE_DEFAULT`` to
``PINCTRL_STATE_SLEEP`` at runtime, re-biasing or even re-muxing pins to save
current in sleep mode.
+Another case is when the pinctrl needs to switch to a certain mode during
+probe and then revert to the default state at the end of probe. For example
+a PINMUX may need to be configured as a GPIO during probe. In this case, use
+``PINCTRL_STATE_INIT`` to switch state before probe, then move to
+``PINCTRL_STATE_DEFAULT`` at the end of probe for normal operation.
+
A driver may request a certain control state to be activated, usually just the
default state like this:
@@ -1202,22 +1255,24 @@ default state like this:
{
/* Allocate a state holder named "foo" etc */
struct foo_state *foo = ...;
+ int ret;
foo->p = devm_pinctrl_get(&device);
if (IS_ERR(foo->p)) {
- /* FIXME: clean up "foo" here */
- return PTR_ERR(foo->p);
+ ret = PTR_ERR(foo->p);
+ foo->p = NULL;
+ return ret;
}
foo->s = pinctrl_lookup_state(foo->p, PINCTRL_STATE_DEFAULT);
if (IS_ERR(foo->s)) {
- /* FIXME: clean up "foo" here */
+ devm_pinctrl_put(foo->p);
return PTR_ERR(foo->s);
}
ret = pinctrl_select_state(foo->p, foo->s);
if (ret < 0) {
- /* FIXME: clean up "foo" here */
+ devm_pinctrl_put(foo->p);
return ret;
}
}
diff --git a/Documentation/driver-api/pldmfw/index.rst b/Documentation/driver-api/pldmfw/index.rst
index fd871b83f34f..e59beca374c1 100644
--- a/Documentation/driver-api/pldmfw/index.rst
+++ b/Documentation/driver-api/pldmfw/index.rst
@@ -14,7 +14,6 @@ the PLDM for Firmware Update standard
file-format
driver-ops
-==================================
Overview of the ``pldmfw`` library
==================================
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index d448cb57df86..36d5c9c9fd11 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -255,7 +255,7 @@ get registered: a child can never be registered, probed or resumed before
its parent; and can't be removed or suspended after that parent.
The policy is that the device hierarchy should match hardware bus topology.
-[Or at least the control bus, for devices which use multiple busses.]
+[Or at least the control bus, for devices which use multiple buses.]
In particular, this means that a device registration may fail if the parent of
the device is suspending (i.e. has been chosen by the PM core as the next
device to suspend) or has already suspended, as well as after all of the other
@@ -358,7 +358,7 @@ the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``.
is probed against the device in question by passing them to the
:c:func:`dev_pm_set_driver_flags` helper function.] If the first of
these flags is set, the PM core will not apply the direct-complete
- procedure described above to the given device and, consequenty, to any
+ procedure described above to the given device and, consequently, to any
of its ancestors. The second flag, when set, informs the middle layer
code (bus types, device types, PM domains, classes) that it should take
the return value of the ``->prepare`` callback provided by the driver
@@ -493,7 +493,7 @@ states, like S3).
Drivers must also be prepared to notice that the device has been removed
while the system was powered down, whenever that's physically possible.
-PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
+PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of buses
where common Linux platforms will see such removal. Details of how drivers
will notice and handle such removals are currently bus-specific, and often
involve a separate thread.
diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst
index c2a9ef8d115c..4d6c32e32a72 100644
--- a/Documentation/driver-api/pm/index.rst
+++ b/Documentation/driver-api/pm/index.rst
@@ -10,10 +10,3 @@ CPU and Device Power Management
devices
notifiers
types
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/pps.rst b/Documentation/driver-api/pps.rst
index 71ad04c82d6c..598729f9cd27 100644
--- a/Documentation/driver-api/pps.rst
+++ b/Documentation/driver-api/pps.rst
@@ -206,8 +206,7 @@ To do so the class pps-gen has been added. PPS generators can be
registered in the kernel by defining a struct pps_gen_source_info as
follows::
- static struct pps_gen_source_info pps_gen_dummy_info = {
- .name = "dummy",
+ static const struct pps_gen_source_info pps_gen_dummy_info = {
.use_system_clock = true,
.get_time = pps_gen_dummy_get_time,
.enable = pps_gen_dummy_enable,
@@ -286,3 +285,27 @@ delay between assert and clear edge as small as possible to reduce system
latencies. But if it is too small slave won't be able to capture clear edge
transition. The default of 30us should be good enough in most situations.
The delay can be selected using 'delay' pps_gen_parport module parameter.
+
+
+Intel Timed I/O PPS signal generator
+------------------------------------
+
+Intel Timed I/O is a high precision device, present on 2019 and newer Intel
+CPUs, that can generate PPS signals.
+
+Timed I/O and system time are both driven by same hardware clock. The signal
+is generated with a precision of ~20 nanoseconds. The generated PPS signal
+is used to synchronize an external device with system clock. For example,
+it can be used to share your clock with a device that receives PPS signal,
+generated by Timed I/O device. There are dedicated Timed I/O pins to deliver
+the PPS signal to an external device.
+
+Usage of Intel Timed I/O as PPS generator:
+
+Start generating PPS signal::
+
+ $echo 1 > /sys/class/pps-gen/pps-genx/enable
+
+Stop generating PPS signal::
+
+ $echo 0 > /sys/class/pps-gen/pps-genx/enable
diff --git a/Documentation/driver-api/pwm.rst b/Documentation/driver-api/pwm.rst
index b41b1c56477f..0d27a40f5818 100644
--- a/Documentation/driver-api/pwm.rst
+++ b/Documentation/driver-api/pwm.rst
@@ -173,10 +173,15 @@ Locking
-------
The PWM core list manipulations are protected by a mutex, so pwm_get()
-and pwm_put() may not be called from an atomic context. Currently the
-PWM core does not enforce any locking to pwm_enable(), pwm_disable() and
-pwm_config(), so the calling context is currently driver specific. This
-is an issue derived from the former barebone API and should be fixed soon.
+and pwm_put() may not be called from an atomic context.
+Most functions in the PWM consumer API might sleep and so must not be called
+from atomic context. The notable exception is pwm_apply_atomic() which has the
+same semantics as pwm_apply_might_sleep() but can be called from atomic context.
+(The price for that is that it doesn't work for all PWM devices, use
+pwm_might_sleep() to check if a given PWM supports atomic operation.
+
+Locking in the PWM core ensures that callbacks related to a single chip are
+serialized.
Helpers
-------
diff --git a/Documentation/driver-api/reset.rst b/Documentation/driver-api/reset.rst
index 84e03d7039cc..7a6571849664 100644
--- a/Documentation/driver-api/reset.rst
+++ b/Documentation/driver-api/reset.rst
@@ -198,7 +198,6 @@ query the reset line status using reset_control_status().
reset_control_rearm
reset_control_put
of_reset_control_get_count
- of_reset_control_array_get
devm_reset_control_array_get
reset_control_get_count
@@ -218,4 +217,3 @@ devm_reset_controller_register().
reset_controller_register
reset_controller_unregister
devm_reset_controller_register
- reset_controller_add_lookup
diff --git a/Documentation/driver-api/scsi.rst b/Documentation/driver-api/scsi.rst
index bf2be96cc2d6..8bbdfb018c53 100644
--- a/Documentation/driver-api/scsi.rst
+++ b/Documentation/driver-api/scsi.rst
@@ -18,7 +18,7 @@ optical drives, test equipment, and medical devices) to a host computer.
Although the old parallel (fast/wide/ultra) SCSI bus has largely fallen
out of use, the SCSI command set is more widely used than ever to
-communicate with devices over a number of different busses.
+communicate with devices over a number of different buses.
The `SCSI protocol <https://www.t10.org/scsi-3.htm>`__ is a big-endian
peer-to-peer packet based protocol. SCSI commands are 6, 10, 12, or 16
@@ -286,7 +286,7 @@ Parallel SCSI (SPI) transport class
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The file drivers/scsi/scsi_transport_spi.c defines transport
-attributes for traditional (fast/wide/ultra) SCSI busses.
+attributes for traditional (fast/wide/ultra) SCSI buses.
.. kernel-doc:: drivers/scsi/scsi_transport_spi.c
:export:
diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
index 84b43061c11b..c1db6a1a67c4 100644
--- a/Documentation/driver-api/serial/driver.rst
+++ b/Documentation/driver-api/serial/driver.rst
@@ -24,9 +24,8 @@ console support.
Console Support
---------------
-The serial core provides a few helper functions. This includes identifying
-the correct port structure (via uart_get_console()) and decoding command line
-arguments (uart_parse_options()).
+The serial core provides a few helper functions. This includes
+decoding command line arguments (uart_parse_options()).
There is also a helper function (uart_console_write()) which performs a
character by character write, translating newlines to CRLF sequences.
@@ -76,7 +75,7 @@ Other functions
uart_add_one_port uart_remove_one_port uart_console_write
uart_parse_earlycon uart_parse_options uart_set_options
uart_get_lsr_info uart_handle_dcd_change uart_handle_cts_change
- uart_try_toggle_sysrq uart_get_console
+ uart_try_toggle_sysrq
.. kernel-doc:: include/linux/serial_core.h
:identifiers: uart_port_tx_limited uart_port_tx
@@ -101,6 +100,6 @@ Modem control lines via GPIO
Some helpers are provided in order to set/get modem control lines via GPIO.
.. kernel-doc:: drivers/tty/serial/serial_mctrl_gpio.c
- :identifiers: mctrl_gpio_init mctrl_gpio_free mctrl_gpio_to_gpiod
+ :identifiers: mctrl_gpio_init mctrl_gpio_to_gpiod
mctrl_gpio_set mctrl_gpio_get mctrl_gpio_enable_ms
- mctrl_gpio_disable_ms
+ mctrl_gpio_disable_ms_sync mctrl_gpio_disable_ms_no_sync
diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst
index 03a55b987a1d..610744df5e8d 100644
--- a/Documentation/driver-api/serial/index.rst
+++ b/Documentation/driver-api/serial/index.rst
@@ -18,10 +18,3 @@ Serial drivers
serial-iso7816
serial-rs485
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/soundwire/bra.rst b/Documentation/driver-api/soundwire/bra.rst
new file mode 100644
index 000000000000..c08ab2591496
--- /dev/null
+++ b/Documentation/driver-api/soundwire/bra.rst
@@ -0,0 +1,336 @@
+==========================
+Bulk Register Access (BRA)
+==========================
+
+Conventions
+-----------
+
+Capitalized words used in this documentation are intentional and refer
+to concepts of the SoundWire 1.x specification.
+
+Introduction
+------------
+
+The SoundWire 1.x specification provides a mechanism to speed-up
+command/control transfers by reclaiming parts of the audio
+bandwidth. The Bulk Register Access (BRA) protocol is a standard
+solution based on the Bulk Payload Transport (BPT) definitions.
+
+The regular control channel uses Column 0 and can only send/retrieve
+one byte per frame with write/read commands. With a typical 48kHz
+frame rate, only 48kB/s can be transferred.
+
+The optional Bulk Register Access capability can transmit up to 12
+Mbits/s and reduce transfer times by several orders of magnitude, but
+has multiple design constraints:
+
+ (1) Each frame can only support a read or a write transfer, with a
+ 10-byte overhead per frame (header and footer response).
+
+ (2) The read/writes SHALL be from/to contiguous register addresses
+ in the same frame. A fragmented register space decreases the
+ efficiency of the protocol by requiring multiple BRA transfers
+ scheduled in different frames.
+
+ (3) The targeted Peripheral device SHALL support the optional Data
+ Port 0, and likewise the Manager SHALL expose audio-like Ports
+ to insert BRA packets in the audio payload using the concepts of
+ Sample Interval, HSTART, HSTOP, etc.
+
+ (4) The BRA transport efficiency depends on the available
+ bandwidth. If there are no on-going audio transfers, the entire
+ frame minus Column 0 can be reclaimed for BRA. The frame shape
+ also impacts efficiency: since Column0 cannot be used for
+ BTP/BRA, the frame should rely on a large number of columns and
+ minimize the number of rows. The bus clock should be as high as
+ possible.
+
+ (5) The number of bits transferred per frame SHALL be a multiple of
+ 8 bits. Padding bits SHALL be inserted if necessary at the end
+ of the data.
+
+ (6) The regular read/write commands can be issued in parallel with
+ BRA transfers. This is convenient to e.g. deal with alerts, jack
+ detection or change the volume during firmware download, but
+ accessing the same address with two independent protocols has to
+ be avoided to avoid undefined behavior.
+
+ (7) Some implementations may not be capable of handling the
+ bandwidth of the BRA protocol, e.g. in the case of a slow I2C
+ bus behind the SoundWire IP. In this case, the transfers may
+ need to be spaced in time or flow-controlled.
+
+ (8) Each BRA packet SHALL be marked as 'Active' when valid data is
+ to be transmitted. This allows for software to allocate a BRA
+ stream but not transmit/discard data while processing the
+ results or preparing the next batch of data, or allowing the
+ peripheral to deal with the previous transfer. In addition BRA
+ transfer can be started early on without data being ready.
+
+ (9) Up to 470 bytes may be transmitted per frame.
+
+ (10) The address is represented with 32 bits and does not rely on
+ the paging registers used for the regular command/control
+ protocol in Column 0.
+
+
+Error checking
+--------------
+
+Firmware download is one of the key usages of the Bulk Register Access
+protocol. To make sure the binary data integrity is not compromised by
+transmission or programming errors, each BRA packet provides:
+
+ (1) A CRC on the 7-byte header. This CRC helps the Peripheral Device
+ check if it is addressed and set the start address and number of
+ bytes. The Peripheral Device provides a response in Byte 7.
+
+ (2) A CRC on the data block (header excluded). This CRC is
+ transmitted as the last-but-one byte in the packet, prior to the
+ footer response.
+
+The header response can be one of:
+ (a) Ack
+ (b) Nak
+ (c) Not Ready
+
+The footer response can be one of:
+ (1) Ack
+ (2) Nak (CRC failure)
+ (3) Good (operation completed)
+ (4) Bad (operation failed)
+
+Example frame
+-------------
+
+The example below is not to scale and makes simplifying assumptions
+for clarity. The different chunks in the BRA packets are not required
+to start on a new SoundWire Row, and the scale of data may vary.
+
+ ::
+
+ +---+--------------------------------------------+
+ + | |
+ + | BRA HEADER |
+ + | |
+ + +--------------------------------------------+
+ + C | HEADER CRC |
+ + O +--------------------------------------------+
+ + M | HEADER RESPONSE |
+ + M +--------------------------------------------+
+ + A | |
+ + N | |
+ + D | DATA |
+ + | |
+ + | |
+ + | |
+ + +--------------------------------------------+
+ + | DATA CRC |
+ + +--------------------------------------------+
+ + | FOOTER RESPONSE |
+ +---+--------------------------------------------+
+
+
+Assuming the frame uses N columns, the configuration shown above can
+be programmed by setting the DP0 registers as:
+
+ - HSTART = 1
+ - HSTOP = N - 1
+ - Sampling Interval = N
+ - WordLength = N - 1
+
+Addressing restrictions
+-----------------------
+
+The Device Number specified in the Header follows the SoundWire
+definitions, and broadcast and group addressing are permitted. For now
+the Linux implementation only allows for a single BPT transfer to a
+single device at a time. This might be revisited at a later point as
+an optimization to send the same firmware to multiple devices, but
+this would only be beneficial for single-link solutions.
+
+In the case of multiple Peripheral devices attached to different
+Managers, the broadcast and group addressing is not supported by the
+SoundWire specification. Each device must be handled with separate BRA
+streams, possibly in parallel - the links are really independent.
+
+Unsupported features
+--------------------
+
+The Bulk Register Access specification provides a number of
+capabilities that are not supported in known implementations, such as:
+
+ (1) Transfers initiated by a Peripheral Device. The BRA Initiator is
+ always the Manager Device.
+
+ (2) Flow-control capabilities and retransmission based on the
+ 'NotReady' header response require extra buffering in the
+ SoundWire IP and are not implemented.
+
+Bi-directional handling
+-----------------------
+
+The BRA protocol can handle writes as well as reads, and in each
+packet the header and footer response are provided by the Peripheral
+Target device. On the Peripheral device, the BRA protocol is handled
+by a single DP0 data port, and at the low-level the bus ownership can
+will change for header/footer response as well as the data transmitted
+during a read.
+
+On the host side, most implementations rely on a Port-like concept,
+with two FIFOs consuming/generating data transfers in parallel
+(Host->Peripheral and Peripheral->Host). The amount of data
+consumed/produced by these FIFOs is not symmetrical, as a result
+hardware typically inserts markers to help software and hardware
+interpret raw data
+
+Each packet will typically have:
+
+ (1) a 'Start of Packet' indicator.
+
+ (2) an 'End of Packet' indicator.
+
+ (3) a packet identifier to correlate the data requested and
+ transmitted, and the error status for each frame
+
+Hardware implementations can check errors at the frame level, and
+retry a transfer in case of errors. However, as for the flow-control
+case, this requires extra buffering and intelligence in the
+hardware. The Linux support assumes that the entire transfer is
+cancelled if a single error is detected in one of the responses.
+
+Abstraction required
+~~~~~~~~~~~~~~~~~~~~
+
+There are no standard registers or mandatory implementation at the
+Manager level, so the low-level BPT/BRA details must be hidden in
+Manager-specific code. For example the Cadence IP format above is not
+known to the codec drivers.
+
+Likewise, codec drivers should not have to know the frame size. The
+computation of CRC and handling of responses is handled in helpers and
+Manager-specific code.
+
+The host BRA driver may also have restrictions on pages allocated for
+DMA, or other host-DSP communication protocols. The codec driver
+should not be aware of any of these restrictions, since it might be
+reused in combination with different implementations of Manager IPs.
+
+Concurrency between BRA and regular read/write
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The existing 'nread/nwrite' API already relies on a notion of start
+address and number of bytes, so it would be possible to extend this
+API with a 'hint' requesting BPT/BRA be used.
+
+However BRA transfers could be quite long, and the use of a single
+mutex for regular read/write and BRA is a show-stopper. Independent
+operation of the control/command and BRA transfers is a fundamental
+requirement, e.g. to change the volume level with the existing regmap
+interface while downloading firmware. The integration must however
+ensure that there are no concurrent access to the same address with
+the command/control protocol and the BRA protocol.
+
+In addition, the 'sdw_msg' structure hard-codes support for 16-bit
+addresses and paging registers which are irrelevant for BPT/BRA
+support based on native 32-bit addresses. A separate API with
+'sdw_bpt_msg' makes more sense.
+
+One possible strategy to speed-up all initialization tasks would be to
+start a BRA transfer for firmware download, then deal with all the
+"regular" read/writes in parallel with the command channel, and last
+to wait for the BRA transfers to complete. This would allow for a
+degree of overlap instead of a purely sequential solution. As such,
+the BRA API must support async transfers and expose a separate wait
+function.
+
+
+Peripheral/bus interface
+------------------------
+
+The bus interface for BPT/BRA is made of two functions:
+
+ - sdw_bpt_send_async(bpt_message)
+
+ This function sends the data using the Manager
+ implementation-defined capabilities (typically DMA or IPC
+ protocol).
+
+ Queueing is currently not supported, the caller
+ needs to wait for completion of the requested transfer.
+
+ - sdw_bpt_wait()
+
+ This function waits for the entire message provided by the
+ codec driver in the 'send_async' stage. Intermediate status for
+ smaller chunks will not be provided back to the codec driver,
+ only a return code will be provided.
+
+Regmap use
+~~~~~~~~~~
+
+Existing codec drivers rely on regmap to download firmware to
+Peripherals. regmap exposes an async interface similar to the
+send/wait API suggested above, so at a high-level it would seem
+natural to combine BRA and regmap. The regmap layer could check if BRA
+is available or not, and use a regular read-write command channel in
+the latter case.
+
+The regmap integration will be handled in a second step.
+
+BRA stream model
+----------------
+
+For regular audio transfers, the machine driver exposes a dailink
+connecting CPU DAI(s) and Codec DAI(s).
+
+This model is not required BRA support:
+
+ (1) The SoundWire DAIs are mainly wrappers for SoundWire Data
+ Ports, with possibly some analog or audio conversion
+ capabilities bolted behind the Data Port. In the context of
+ BRA, the DP0 is the destination. DP0 registers are standard and
+ can be programmed blindly without knowing what Peripheral is
+ connected to each link. In addition, if there are multiple
+ Peripherals on a link and some of them do not support DP0, the
+ write commands to program DP0 registers will generate harmless
+ COMMAND_IGNORED responses that will be wired-ORed with
+ responses from Peripherals which support DP0. In other words,
+ the DP0 programming can be done with broadcast commands, and
+ the information on the Target device can be added only in the
+ BRA Header.
+
+ (2) At the CPU level, the DAI concept is not useful for BRA; the
+ machine driver will not create a dailink relying on DP0. The
+ only concept that is needed is the notion of port.
+
+ (3) The stream concept relies on a set of master_rt and slave_rt
+ concepts. All of these entities represent ports and not DAIs.
+
+ (4) With the assumption that a single BRA stream is used per link,
+ that stream can connect master ports as well as all peripheral
+ DP0 ports.
+
+ (5) BRA transfers only make sense in the context of one
+ Manager/Link, so the BRA stream handling does not rely on the
+ concept of multi-link aggregation allowed by regular DAI links.
+
+Audio DMA support
+-----------------
+
+Some DMAs, such as HDaudio, require an audio format field to be
+set. This format is in turn used to define acceptable bursts. BPT/BRA
+support is not fully compatible with these definitions in that the
+format and bandwidth may vary between read and write commands.
+
+In addition, on Intel HDaudio Intel platforms the DMAs need to be
+programmed with a PCM format matching the bandwidth of the BPT/BRA
+transfer. The format is based on 192kHz 32-bit samples, and the number
+of channels varies to adjust the bandwidth. The notion of channel is
+completely notional since the data is not typical audio
+PCM. Programming such channels helps reserve enough bandwidth and adjust
+FIFO sizes to avoid xruns.
+
+Alignment requirements are currently not enforced at the core level
+but at the platform-level, e.g. for Intel the data sizes must be
+equal to or larger than 16 bytes.
diff --git a/Documentation/driver-api/soundwire/bra_cadence.rst b/Documentation/driver-api/soundwire/bra_cadence.rst
new file mode 100644
index 000000000000..4560752e8f47
--- /dev/null
+++ b/Documentation/driver-api/soundwire/bra_cadence.rst
@@ -0,0 +1,66 @@
+Cadence IP BRA support
+----------------------
+
+Format requirements
+~~~~~~~~~~~~~~~~~~~
+
+The Cadence IP relies on PDI0 for TX and PDI1 for RX. The data needs
+to be formatted with the following conventions:
+
+ (1) all Data is stored in bits 15..0 of the 32-bit PDI FIFOs.
+
+ (2) the start of packet is BIT(31).
+
+ (3) the end of packet is BIT(30).
+
+ (4) A packet ID is stored in bits 19..16. This packet ID is
+ determined by software and is typically a rolling counter.
+
+ (5) Padding shall be inserted as needed so that the Header CRC,
+ Header response, Footer CRC, Footer response are always in
+ Byte0. Padding is inserted by software for writes, and on reads
+ software shall discard the padding added by the hardware.
+
+Example format
+~~~~~~~~~~~~~~
+
+The following table represents the sequence provided to PDI0 for a
+write command followed by a read command.
+
+::
+
+ +---+---+--------+---------------+---------------+
+ + 1 | 0 | ID = 0 | WR HDR[1] | WR HDR[0] |
+ + | | | WR HDR[3] | WR HDR[2] |
+ + | | | WR HDR[5] | WR HDR[4] |
+ + | | | pad | WR HDR CRC |
+ + | | | WR Data[1] | WR Data[0] |
+ + | | | WR Data[3] | WR Data[2] |
+ + | | | WR Data[n-2] | WR Data[n-3] |
+ + | | | pad | WR Data[n-1] |
+ + 0 | 1 | | pad | WR Data CRC |
+ +---+---+--------+---------------+---------------+
+ + 1 | 0 | ID = 1 | RD HDR[1] | RD HDR[0] |
+ + | | | RD HDR[3] | RD HDR[2] |
+ + | | | RD HDR[5] | RD HDR[4] |
+ + 0 | 1 | | pad | RD HDR CRC |
+ +---+---+--------+---------------+---------------+
+
+
+The table below represents the data received on PDI1 for the same
+write command followed by a read command.
+
+::
+
+ +---+---+--------+---------------+---------------+
+ + 1 | 0 | ID = 0 | pad | WR Hdr Rsp |
+ + 0 | 1 | | pad | WR Ftr Rsp |
+ +---+---+--------+---------------+---------------+
+ + 1 | 0 | ID = 0 | pad | Rd Hdr Rsp |
+ + | | | RD Data[1] | RD Data[0] |
+ + | | | RD Data[3] | RD Data[2] |
+ + | | | RD HDR[n-2] | RD Data[n-3] |
+ + | | | pad | RD Data[n-1] |
+ + | | | pad | RD Data CRC |
+ + 0 | 1 | | pad | RD Ftr Rsp |
+ +---+---+--------+---------------+---------------+
diff --git a/Documentation/driver-api/soundwire/index.rst b/Documentation/driver-api/soundwire/index.rst
index 234911a0db99..f7abf4a95be7 100644
--- a/Documentation/driver-api/soundwire/index.rst
+++ b/Documentation/driver-api/soundwire/index.rst
@@ -9,10 +9,5 @@ SoundWire Documentation
stream
error_handling
locking
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
+ bra
+ bra_cadence
diff --git a/Documentation/driver-api/soundwire/stream.rst b/Documentation/driver-api/soundwire/stream.rst
index 2a794484f62c..d66201299633 100644
--- a/Documentation/driver-api/soundwire/stream.rst
+++ b/Documentation/driver-api/soundwire/stream.rst
@@ -291,7 +291,7 @@ per stream. From ASoC DPCM framework, this stream state maybe linked to
.. code-block:: c
- int sdw_alloc_stream(char * stream_name);
+ int sdw_alloc_stream(char * stream_name, enum sdw_stream_type type);
The SoundWire core provides a sdw_startup_stream() helper function,
typically called during a dailink .startup() callback, which performs
diff --git a/Documentation/driver-api/soundwire/summary.rst b/Documentation/driver-api/soundwire/summary.rst
index 01dcb954f6d7..df78053743b5 100644
--- a/Documentation/driver-api/soundwire/summary.rst
+++ b/Documentation/driver-api/soundwire/summary.rst
@@ -184,14 +184,6 @@ function that provides capabilities information. Bus needs to know a set of
Slave capabilities to program Slave registers and to control the Bus
reconfigurations.
-Future enhancements to be done
-==============================
-
- (1) Bulk Register Access (BRA) transfers.
-
-
- (2) Multiple data lane support.
-
Links
=====
diff --git a/Documentation/driver-api/spi.rst b/Documentation/driver-api/spi.rst
index f28887045049..74eca6735042 100644
--- a/Documentation/driver-api/spi.rst
+++ b/Documentation/driver-api/spi.rst
@@ -13,7 +13,7 @@ additional chipselect line is usually active-low (nCS); four signals are
normally used for each peripheral, plus sometimes an interrupt.
The SPI bus facilities listed here provide a generalized interface to
-declare SPI busses and devices, manage them according to the standard
+declare SPI buses and devices, manage them according to the standard
Linux driver model, and perform input/output operations. At this time,
only "master" side interfaces are supported, where Linux talks to SPI
peripherals and does not implement such a peripheral itself. (Interfaces
diff --git a/Documentation/driver-api/surface_aggregator/clients/index.rst b/Documentation/driver-api/surface_aggregator/clients/index.rst
index 30160513afa5..c32313b8f3b7 100644
--- a/Documentation/driver-api/surface_aggregator/clients/index.rst
+++ b/Documentation/driver-api/surface_aggregator/clients/index.rst
@@ -14,10 +14,3 @@ on how to write client drivers.
cdev
dtx
san
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/surface_aggregator/index.rst b/Documentation/driver-api/surface_aggregator/index.rst
index 6f3e1094904d..f0128fe59a32 100644
--- a/Documentation/driver-api/surface_aggregator/index.rst
+++ b/Documentation/driver-api/surface_aggregator/index.rst
@@ -12,10 +12,3 @@ Surface System Aggregator Module (SSAM)
clients/index
ssh
internal
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/tee.rst b/Documentation/driver-api/tee.rst
index 5eaeb8103988..4d58ac0712c1 100644
--- a/Documentation/driver-api/tee.rst
+++ b/Documentation/driver-api/tee.rst
@@ -43,24 +43,12 @@ snippet would look like::
MODULE_DEVICE_TABLE(tee, client_id_table);
static struct tee_client_driver client_driver = {
+ .probe = client_probe,
+ .remove = client_remove,
.id_table = client_id_table,
.driver = {
.name = DRIVER_NAME,
- .bus = &tee_bus_type,
- .probe = client_probe,
- .remove = client_remove,
},
};
- static int __init client_init(void)
- {
- return driver_register(&client_driver.driver);
- }
-
- static void __exit client_exit(void)
- {
- driver_unregister(&client_driver.driver);
- }
-
- module_init(client_init);
- module_exit(client_exit);
+ module_tee_client_driver(client_driver);
diff --git a/Documentation/driver-api/thermal/exynos_thermal_emulation.rst b/Documentation/driver-api/thermal/exynos_thermal_emulation.rst
index c21d10838bc5..c679502f01c7 100644
--- a/Documentation/driver-api/thermal/exynos_thermal_emulation.rst
+++ b/Documentation/driver-api/thermal/exynos_thermal_emulation.rst
@@ -28,13 +28,13 @@ changed into it.
delay of changing temperature. However, this node only uses same delay
of real sensing time, 938us.)
-Exynos emulation mode requires synchronous of value changing and
-enabling. It means when you want to update the any value of delay or
-next temperature, then you have to enable emulation mode at the same
-time. (Or you have to keep the mode enabling.) If you don't, it fails to
-change the value to updated one and just use last succeessful value
-repeatedly. That's why this node gives users the right to change
-termerpature only. Just one interface makes it more simply to use.
+Exynos emulation mode requires that value changes and enabling are performed
+synchronously. This means that when you want to update any value, such as the
+delay or the next temperature, you must enable emulation mode at the same
+time (or keep the mode enabled). If you do not, the value will fail to update
+and the last successful value will continue to be used. For this reason,
+this node only allows users to change the temperature. Providing a single
+interface makes it simpler to use.
Disabling emulation mode only requires writing value 0 to sysfs node.
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
index 8fb8c5b2d685..4adfa1eb74db 100644
--- a/Documentation/driver-api/thermal/intel_dptf.rst
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -191,6 +191,36 @@ ABI.
User space can specify any one of the available workload type using
this interface.
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/ptc_0_control`
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/ptc_1_control`
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/ptc_2_control`
+
+All these controls needs admin privilege to update.
+
+``enable`` (RW)
+ 1 for enable, 0 for disable. Shows the current enable status of
+ platform temperature control feature. User space can enable/disable
+ hardware controls.
+
+``temperature_target`` (RW)
+ Update a new temperature target in milli degree celsius for hardware to
+ use for the temperature control.
+
+``thermal_tolerance`` (RW)
+ This attribute ranges from 0 to 7, where 0 represents
+ the most aggressive control to avoid any temperature overshoots, and
+ 7 represents a more graceful approach, favoring performance even at
+ the expense of temperature overshoots.
+ Note: This level may not scale linearly. For example, a value of 3 does
+ not necessarily imply a 50% improvement in performance compared to a
+ value of 0.
+
+Given that this is platform temperature control, it is expected that a
+single user-level manager owns and manages the controls. If multiple
+user-level software applications attempt to write different targets, it
+can lead to unexpected behavior.
+
+
DPTF Processor thermal RFIM interface
--------------------------------------------
@@ -345,6 +375,9 @@ based on the processor generation.
``workload_hint_enable`` (RW)
Enable firmware to send workload type hints to user space.
+``workload_slow_hint_enable`` (RW)
+ Enable firmware to send slow workload type hints to user space.
+
``notification_delay_ms`` (RW)
Minimum delay in milliseconds before firmware will notify OS. This is
for the rate control of notifications. This delay is between changing
@@ -379,3 +412,26 @@ based on the processor generation.
Limit 1 from being exhausted.
4 – Unknown: Can't classify.
+
+ On processors starting from Panther Lake additional hints are provided.
+ The hardware analyzes workload residencies over an extended period to
+ determine whether the workload classification tends toward idle/battery
+ life states or sustained/performance states. Based on this long-term
+ analysis, it classifies:
+
+ Power Classification: If the workload exhibits more idle or battery life
+ residencies, it is classified as "power".
+
+ Performance Classification: If the workload exhibits more sustained or
+ performance residencies, it is classified as "performance".
+
+ This approach enables applications to ignore short-term workload
+ fluctuations and instead respond to longer-term power vs. performance
+ trends.
+
+ Residency thresholds for this classification are CPU generation-specific.
+ Classification is reported via bit 4 of the workload_type_index:
+
+ Bit 4 = 1: Power classification
+
+ Bit 4 = 0: Performance classification
diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst
index c803b89b7248..f73de211bdce 100644
--- a/Documentation/driver-api/thermal/sysfs-api.rst
+++ b/Documentation/driver-api/thermal/sysfs-api.rst
@@ -413,18 +413,21 @@ This function serves as an arbitrator to set the state of a cooling
device. It sets the cooling device to the deepest cooling state if
possible.
-5. thermal_emergency_poweroff
-=============================
+5. Critical Events
+==================
+
+On an event of critical trip temperature crossing, the thermal framework
+will trigger a hardware protection power-off (shutdown) or reboot,
+depending on configuration.
-On an event of critical trip temperature crossing the thermal framework
-shuts down the system by calling hw_protection_shutdown(). The
-hw_protection_shutdown() first attempts to perform an orderly shutdown
-but accepts a delay after which it proceeds doing a forced power-off
-or as last resort an emergency_restart.
+At first, the kernel will attempt an orderly power-off or reboot, but
+accepts a delay after which it proceeds to do a forced power-off or
+reboot, respectively. If this fails, ``emergency_restart()`` is invoked
+as last resort.
The delay should be carefully profiled so as to give adequate time for
-orderly poweroff.
+orderly power-off or reboot.
-If the delay is set to 0 emergency poweroff will not be supported. So a
-carefully profiled non-zero positive value is a must for emergency
-poweroff to be triggered.
+If the delay is set to 0, the emergency action will not be supported. So a
+carefully profiled non-zero positive value is a must for the emergency
+action to be triggered.
diff --git a/Documentation/driver-api/tty/tty_driver.rst b/Documentation/driver-api/tty/tty_driver.rst
index cc529f863406..7138222a70f2 100644
--- a/Documentation/driver-api/tty/tty_driver.rst
+++ b/Documentation/driver-api/tty/tty_driver.rst
@@ -25,6 +25,8 @@ freed.
For reference, both allocation and deallocation functions are explained here in
detail:
+.. kernel-doc:: include/linux/tty_driver.h
+ :identifiers: tty_alloc_driver
.. kernel-doc:: drivers/tty/tty_io.c
:identifiers: __tty_alloc_driver tty_driver_kref_put
@@ -35,7 +37,7 @@ Here comes the documentation of flags accepted by tty_alloc_driver() (or
__tty_alloc_driver()):
.. kernel-doc:: include/linux/tty_driver.h
- :doc: TTY Driver Flags
+ :identifiers: tty_driver_flag
----
diff --git a/Documentation/driver-api/tty/tty_ldisc.rst b/Documentation/driver-api/tty/tty_ldisc.rst
index 5144751be804..d034e117c232 100644
--- a/Documentation/driver-api/tty/tty_ldisc.rst
+++ b/Documentation/driver-api/tty/tty_ldisc.rst
@@ -18,7 +18,7 @@ Registration
Line disciplines are registered with tty_register_ldisc() passing the ldisc
structure. At the point of registration the discipline must be ready to use and
it is possible it will get used before the call returns success. If the call
-returns an error then it won’t get called. Do not re-use ldisc numbers as they
+returns an error then it won’t get called. Do not reuse ldisc numbers as they
are part of the userspace ABI and writing over an existing ldisc will cause
demons to eat your computer. You must not re-register over the top of the line
discipline even with the same data or your computer again will be eaten by
diff --git a/Documentation/driver-api/tty/tty_port.rst b/Documentation/driver-api/tty/tty_port.rst
index 5cb90e954fcf..504a353f2682 100644
--- a/Documentation/driver-api/tty/tty_port.rst
+++ b/Documentation/driver-api/tty/tty_port.rst
@@ -42,9 +42,10 @@ TTY Refcounting
TTY Helpers
-----------
+.. kernel-doc:: include/linux/tty_port.h
+ :identifiers: tty_port_tty_hangup tty_port_tty_vhangup
.. kernel-doc:: drivers/tty/tty_port.c
- :identifiers: tty_port_tty_hangup tty_port_tty_wakeup
-
+ :identifiers: tty_port_tty_wakeup
Modem Signals
-------------
diff --git a/Documentation/driver-api/tty/tty_struct.rst b/Documentation/driver-api/tty/tty_struct.rst
index c72f5a4293b2..29caf1c1ca5f 100644
--- a/Documentation/driver-api/tty/tty_struct.rst
+++ b/Documentation/driver-api/tty/tty_struct.rst
@@ -72,7 +72,7 @@ TTY Struct Flags
================
.. kernel-doc:: include/linux/tty.h
- :doc: TTY Struct Flags
+ :identifiers: tty_struct_flags
TTY Struct Reference
====================
diff --git a/Documentation/driver-api/usb/anchors.rst b/Documentation/driver-api/usb/anchors.rst
index 4b248e691bd6..5a93d171e76c 100644
--- a/Documentation/driver-api/usb/anchors.rst
+++ b/Documentation/driver-api/usb/anchors.rst
@@ -45,17 +45,6 @@ This function kills all URBs associated with an anchor. The URBs
are called in the reverse temporal order they were submitted.
This way no data can be reordered.
-:c:func:`usb_unlink_anchored_urbs`
-----------------------------------
-
-
-This function unlinks all URBs associated with an anchor. The URBs
-are processed in the reverse temporal order they were submitted.
-This is similar to :c:func:`usb_kill_anchored_urbs`, but it will not sleep.
-Therefore no guarantee is made that the URBs have been unlinked when
-the call returns. They may be unlinked later but will be unlinked in
-finite time.
-
:c:func:`usb_scuttle_anchored_urbs`
-----------------------------------
diff --git a/Documentation/driver-api/usb/gadget.rst b/Documentation/driver-api/usb/gadget.rst
index 09396edd6131..6f0c67885392 100644
--- a/Documentation/driver-api/usb/gadget.rst
+++ b/Documentation/driver-api/usb/gadget.rst
@@ -459,7 +459,7 @@ Linux-USB host side driver stack, or as a peripheral, using this
``gadget`` framework. To do that, the system software relies on small
additions to those programming interfaces, and on a new internal
component (here called an "OTG Controller") affecting which driver stack
-connects to the OTG port. In each role, the system can re-use the
+connects to the OTG port. In each role, the system can reuse the
existing pool of hardware-neutral drivers, layered on top of the
controller driver interfaces (:c:type:`usb_bus` or :c:type:`usb_gadget`).
Such drivers need at most minor changes, and most of the calls added to
diff --git a/Documentation/driver-api/usb/hotplug.rst b/Documentation/driver-api/usb/hotplug.rst
index c1e13107c50e..12260f704a01 100644
--- a/Documentation/driver-api/usb/hotplug.rst
+++ b/Documentation/driver-api/usb/hotplug.rst
@@ -5,7 +5,7 @@ Linux Hotplugging
=================
-In hotpluggable busses like USB (and Cardbus PCI), end-users plug devices
+In hotpluggable buses like USB (and Cardbus PCI), end-users plug devices
into the bus with power on. In most cases, users expect the devices to become
immediately usable. That means the system must do many things, including:
diff --git a/Documentation/driver-api/usb/index.rst b/Documentation/driver-api/usb/index.rst
index cfa8797ea614..a32819963b99 100644
--- a/Documentation/driver-api/usb/index.rst
+++ b/Documentation/driver-api/usb/index.rst
@@ -3,6 +3,7 @@ Linux USB API
=============
.. toctree::
+ :maxdepth: 1
usb
gadget
@@ -21,10 +22,3 @@ Linux USB API
typec
typec_bus
usb3-debug-port
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`
diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst
index 89f9c37bb979..7f2f41e80c1c 100644
--- a/Documentation/driver-api/usb/usb.rst
+++ b/Documentation/driver-api/usb/usb.rst
@@ -13,7 +13,7 @@ structure, with the host as the root (the system's master), hubs as
interior nodes, and peripherals as leaves (and slaves). Modern PCs
support several such trees of USB devices, usually
a few USB 3.0 (5 GBit/s) or USB 3.1 (10 GBit/s) and some legacy
-USB 2.0 (480 MBit/s) busses just in case.
+USB 2.0 (480 MBit/s) buses just in case.
That master/slave asymmetry was designed-in for a number of reasons, one
being ease of use. It is not physically possible to mistake upstream and
@@ -42,7 +42,7 @@ two. One is intended for *general-purpose* drivers (exposed through
driver frameworks), and the other is for drivers that are *part of the
core*. Such core drivers include the *hub* driver (which manages trees
of USB devices) and several different kinds of *host controller
-drivers*, which control individual busses.
+drivers*, which control individual buses.
The device model seen by USB drivers is relatively complex.
@@ -161,6 +161,7 @@ rely on 64bit DMA to eliminate another kind of bounce buffer.
.. kernel-doc:: drivers/usb/core/urb.c
:export:
+.. c:namespace:: usb_core
.. kernel-doc:: drivers/usb/core/message.c
:export:
diff --git a/Documentation/driver-api/usb/writing_musb_glue_layer.rst b/Documentation/driver-api/usb/writing_musb_glue_layer.rst
index e755c8551bba..b748b9fb1965 100644
--- a/Documentation/driver-api/usb/writing_musb_glue_layer.rst
+++ b/Documentation/driver-api/usb/writing_musb_glue_layer.rst
@@ -613,7 +613,7 @@ endpoints configuration from the hardware, so we use line 12 instruction
to bypass reading the configuration from silicon, and rely on a
hard-coded table that describes the endpoints configuration instead::
- static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = {
+ static const struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = {
{ .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, },
{ .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, },
{ .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, },
@@ -709,7 +709,7 @@ Resources
USB Home Page: https://www.usb.org
-linux-usb Mailing List Archives: https://marc.info/?l=linux-usb
+linux-usb Mailing List Archives: https://lore.kernel.org/linux-usb
USB On-the-Go Basics:
https://www.maximintegrated.com/app-notes/index.mvp/id/1822
diff --git a/Documentation/driver-api/vme.rst b/Documentation/driver-api/vme.rst
index c0b475369de0..7111999abc14 100644
--- a/Documentation/driver-api/vme.rst
+++ b/Documentation/driver-api/vme.rst
@@ -107,7 +107,7 @@ The function :c:func:`vme_master_read` can be used to read from and
In addition to simple reads and writes, :c:func:`vme_master_rmw` is provided to
do a read-modify-write transaction. Parts of a VME window can also be mapped
-into user space memory using :c:func:`vme_master_mmap`.
+into user space memory using :c:func:`vme_master_mmap_prepare`.
Slave windows
diff --git a/Documentation/driver-api/wmi.rst b/Documentation/driver-api/wmi.rst
index 4e8dbdb1fc67..b847bcdcbb09 100644
--- a/Documentation/driver-api/wmi.rst
+++ b/Documentation/driver-api/wmi.rst
@@ -16,5 +16,8 @@ which will be bound to compatible WMI devices by the driver core.
.. kernel-doc:: include/linux/wmi.h
:internal:
-.. kernel-doc:: drivers/platform/x86/wmi.c
+.. kernel-doc:: drivers/platform/wmi/string.c
+ :export:
+
+.. kernel-doc:: drivers/platform/wmi/core.c
:export:
diff --git a/Documentation/driver-api/xilinx/index.rst b/Documentation/driver-api/xilinx/index.rst
index 13f7589ed442..c95bda55da6f 100644
--- a/Documentation/driver-api/xilinx/index.rst
+++ b/Documentation/driver-api/xilinx/index.rst
@@ -7,10 +7,3 @@ Xilinx FPGA
:maxdepth: 1
eemi
-
-.. only:: subproject and html
-
- Indices
- =======
-
- * :ref:`genindex`