diff options
| author | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2026-03-04 13:14:08 +0530 |
|---|---|---|
| committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2026-03-05 19:38:55 -0500 |
| commit | c36218dc49f5e9ef9e3074670fdae7ac3a7e794f (patch) | |
| tree | 18aae01ce22efb3d3abf70cb0be551c82480b297 /include | |
| parent | 724b4a12116fdb60130b89778611482bf8de0a6a (diff) | |
| download | lwn-c36218dc49f5e9ef9e3074670fdae7ac3a7e794f.tar.gz lwn-c36218dc49f5e9ef9e3074670fdae7ac3a7e794f.zip | |
drm/ras: Introduce the DRM RAS infrastructure over generic netlink
Introduces the DRM RAS infrastructure over generic netlink.
The new interface allows drivers to expose RAS nodes and their
associated error counters to userspace in a structured and extensible
way. Each drm_ras node can register its own set of error counters, which
are then discoverable and queryable through netlink operations. This
lays the groundwork for reporting and managing hardware error states
in a unified manner across different DRM drivers.
Currently it only supports error-counter nodes. But it can be
extended later.
The registration is also not tied to any drm node, so it can be
used by accel devices as well.
It uses the new and mandatory YAML description format stored in
Documentation/netlink/specs/. This forces a single generic netlink
family namespace for the entire drm: "drm-ras".
But multiple-endpoints are supported within the single family.
Any modification to this API needs to be applied to
Documentation/netlink/specs/drm_ras.yaml before regenerating the
code:
$ tools/net/ynl/pyynl/ynl_gen_c.py --spec \
Documentation/netlink/specs/drm_ras.yaml --mode uapi --header \
-o include/uapi/drm/drm_ras.h
$ tools/net/ynl/pyynl/ynl_gen_c.py --spec \
Documentation/netlink/specs/drm_ras.yaml --mode kernel \
--header -o drivers/gpu/drm/drm_ras_nl.h
$ tools/net/ynl/pyynl/ynl_gen_c.py --spec \
Documentation/netlink/specs/drm_ras.yaml \
--mode kernel --source -o drivers/gpu/drm/drm_ras_nl.c
Cc: Zack McKevitt <zachary.mckevitt@oss.qualcomm.com>
Cc: Lijo Lazar <lijo.lazar@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: netdev@vger.kernel.org
Co-developed-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Zack McKevitt <zachary.mckevitt@oss.qualcomm.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/20260304074412.464435-8-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'include')
| -rw-r--r-- | include/drm/drm_ras.h | 75 | ||||
| -rw-r--r-- | include/drm/drm_ras_genl_family.h | 17 | ||||
| -rw-r--r-- | include/uapi/drm/drm_ras.h | 49 |
3 files changed, 141 insertions, 0 deletions
diff --git a/include/drm/drm_ras.h b/include/drm/drm_ras.h new file mode 100644 index 000000000000..5d50209e51db --- /dev/null +++ b/include/drm/drm_ras.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef __DRM_RAS_H__ +#define __DRM_RAS_H__ + +#include <uapi/drm/drm_ras.h> + +/** + * struct drm_ras_node - A DRM RAS Node + */ +struct drm_ras_node { + /** @id: Unique identifier for the node. Dynamically assigned. */ + u32 id; + /** + * @device_name: Human-readable name of the device. Given by the driver. + */ + const char *device_name; + /** @node_name: Human-readable name of the node. Given by the driver. */ + const char *node_name; + /** @type: Type of the node (enum drm_ras_node_type). */ + enum drm_ras_node_type type; + + /* Error-Counter Related Callback and Variables */ + + /** @error_counter_range: Range of valid Error IDs for this node. */ + struct { + /** @first: First valid Error ID. */ + u32 first; + /** @last: Last valid Error ID. Mandatory entry. */ + u32 last; + } error_counter_range; + + /** + * @query_error_counter: + * + * This callback is used by drm-ras to query a specific error counter. + * Used for input check and to iterate all error counters in a node. + * + * Driver should expect query_error_counter() to be called with + * error_id from `error_counter_range.first` to + * `error_counter_range.last`. + * + * The @query_error_counter is a mandatory callback for + * error_counter_node. + * + * Returns: 0 on success, + * -ENOENT when error_id is not supported as an indication that + * drm_ras should silently skip this entry. Used for + * supporting non-contiguous error ranges. + * Driver is responsible for maintaining the list of + * supported error IDs in the range of first to last. + * Other negative values on errors that should terminate the + * netlink query. + */ + int (*query_error_counter)(struct drm_ras_node *node, u32 error_id, + const char **name, u32 *val); + + /** @priv: Driver private data */ + void *priv; +}; + +struct drm_device; + +#if IS_ENABLED(CONFIG_DRM_RAS) +int drm_ras_node_register(struct drm_ras_node *node); +void drm_ras_node_unregister(struct drm_ras_node *node); +#else +static inline int drm_ras_node_register(struct drm_ras_node *node) { return 0; } +static inline void drm_ras_node_unregister(struct drm_ras_node *node) { } +#endif + +#endif diff --git a/include/drm/drm_ras_genl_family.h b/include/drm/drm_ras_genl_family.h new file mode 100644 index 000000000000..910fb3943a75 --- /dev/null +++ b/include/drm/drm_ras_genl_family.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef __DRM_RAS_GENL_FAMILY_H__ +#define __DRM_RAS_GENL_FAMILY_H__ + +#if IS_ENABLED(CONFIG_DRM_RAS) +int drm_ras_genl_family_register(void); +void drm_ras_genl_family_unregister(void); +#else +static inline int drm_ras_genl_family_register(void) { return 0; } +static inline void drm_ras_genl_family_unregister(void) { } +#endif + +#endif diff --git a/include/uapi/drm/drm_ras.h b/include/uapi/drm/drm_ras.h new file mode 100644 index 000000000000..5f40fa5b869d --- /dev/null +++ b/include/uapi/drm/drm_ras.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/drm_ras.yaml */ +/* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ + +#ifndef _UAPI_LINUX_DRM_RAS_H +#define _UAPI_LINUX_DRM_RAS_H + +#define DRM_RAS_FAMILY_NAME "drm-ras" +#define DRM_RAS_FAMILY_VERSION 1 + +/* + * Type of the node. Currently, only error-counter nodes are supported, which + * expose reliability counters for a hardware/software component. + */ +enum drm_ras_node_type { + DRM_RAS_NODE_TYPE_ERROR_COUNTER = 1, +}; + +enum { + DRM_RAS_A_NODE_ATTRS_NODE_ID = 1, + DRM_RAS_A_NODE_ATTRS_DEVICE_NAME, + DRM_RAS_A_NODE_ATTRS_NODE_NAME, + DRM_RAS_A_NODE_ATTRS_NODE_TYPE, + + __DRM_RAS_A_NODE_ATTRS_MAX, + DRM_RAS_A_NODE_ATTRS_MAX = (__DRM_RAS_A_NODE_ATTRS_MAX - 1) +}; + +enum { + DRM_RAS_A_ERROR_COUNTER_ATTRS_NODE_ID = 1, + DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_ID, + DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_NAME, + DRM_RAS_A_ERROR_COUNTER_ATTRS_ERROR_VALUE, + + __DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX, + DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX = (__DRM_RAS_A_ERROR_COUNTER_ATTRS_MAX - 1) +}; + +enum { + DRM_RAS_CMD_LIST_NODES = 1, + DRM_RAS_CMD_GET_ERROR_COUNTER, + + __DRM_RAS_CMD_MAX, + DRM_RAS_CMD_MAX = (__DRM_RAS_CMD_MAX - 1) +}; + +#endif /* _UAPI_LINUX_DRM_RAS_H */ |
