summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/cxl/core/Makefile3
-rw-r--r--drivers/cxl/core/pci.c112
-rw-r--r--drivers/cxl/core/trace.c5
-rw-r--r--drivers/cxl/core/trace.h109
-rw-r--r--drivers/cxl/cxl.h2
-rw-r--r--drivers/cxl/cxlpci.h3
-rw-r--r--drivers/cxl/pci.c111
7 files changed, 234 insertions, 111 deletions
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 79c7257f4107..ca4ae31d8f57 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o
obj-$(CONFIG_CXL_SUSPEND) += suspend.o
ccflags-y += -I$(srctree)/drivers/cxl
+CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
+
cxl_core-y := port.o
cxl_core-y += pmem.o
cxl_core-y += regs.o
@@ -10,4 +12,5 @@ cxl_core-y += memdev.o
cxl_core-y += mbox.o
cxl_core-y += pci.o
cxl_core-y += hdm.o
+cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 57764e9cd19d..1d1492440287 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -9,6 +9,7 @@
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
+#include "trace.h"
/**
* DOC: cxl core pci
@@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port)
}
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
+
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ void __iomem *addr;
+ u32 status;
+
+ if (!cxlds->regs.ras)
+ return;
+
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+ trace_cxl_aer_correctable_error(dev, status);
+ }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+ void __iomem *addr;
+ u32 *log_addr;
+ int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+ addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+ log_addr = log;
+
+ for (i = 0; i < log_u32_size; i++) {
+ *log_addr = readl(addr);
+ log_addr++;
+ addr += sizeof(u32);
+ }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ u32 hl[CXL_HEADERLOG_SIZE_U32];
+ void __iomem *addr;
+ u32 status;
+ u32 fe;
+
+ if (!cxlds->regs.ras)
+ return false;
+
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+ return false;
+
+ /* If multiple errors, log header points to first error from ctrl reg */
+ if (hweight32(status) > 1) {
+ addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+ } else {
+ fe = status;
+ }
+
+ header_log_copy(cxlds, hl);
+ trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
+ writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+ return true;
+}
+
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ bool ue;
+
+ /*
+ * A frozen channel indicates an impending reset which is fatal to
+ * CXL.mem operation, and will likely crash the system. On the off
+ * chance the situation is recoverable dump the status of the RAS
+ * capability registers and bounce the active state of the memdev.
+ */
+ ue = cxl_report_and_clear(cxlds);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ if (ue) {
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ dev_warn(&pdev->dev,
+ "%s: frozen state error detected, disable CXL.mem\n",
+ dev_name(dev));
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ dev_warn(&pdev->dev,
+ "failure state error detected, request disconnect\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL);
diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
new file mode 100644
index 000000000000..29ae7ce81dc5
--- /dev/null
+++ b/drivers/cxl/core/trace.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
new file mode 100644
index 000000000000..20ca2fe2ca8e
--- /dev/null
+++ b/drivers/cxl/core/trace.h
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_EVENTS_H
+
+#include <cxl.h>
+#include <linux/tracepoint.h>
+
+#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
+#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
+#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2)
+#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3)
+#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4)
+#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5)
+#define CXL_RAS_UC_MEM_BE_PARITY BIT(6)
+#define CXL_RAS_UC_MEM_DATA_ECC BIT(7)
+#define CXL_RAS_UC_REINIT_THRESH BIT(8)
+#define CXL_RAS_UC_RSVD_ENCODE BIT(9)
+#define CXL_RAS_UC_POISON BIT(10)
+#define CXL_RAS_UC_RECV_OVERFLOW BIT(11)
+#define CXL_RAS_UC_INTERNAL_ERR BIT(14)
+#define CXL_RAS_UC_IDE_TX_ERR BIT(15)
+#define CXL_RAS_UC_IDE_RX_ERR BIT(16)
+
+#define show_uc_errs(status) __print_flags(status, " | ", \
+ { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \
+ { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \
+ { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \
+ { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
+ { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \
+ { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \
+ { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \
+ { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \
+ { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \
+ { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \
+ { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \
+ { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \
+ { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \
+ { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \
+ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
+)
+
+TRACE_EVENT(cxl_aer_uncorrectable_error,
+ TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl),
+ TP_ARGS(dev, status, fe, hl),
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(dev))
+ __field(u32, status)
+ __field(u32, first_error)
+ __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
+ ),
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(dev));
+ __entry->status = status;
+ __entry->first_error = fe;
+ /*
+ * Embed the 512B headerlog data for user app retrieval and
+ * parsing, but no need to print this in the trace buffer.
+ */
+ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
+ ),
+ TP_printk("%s: status: '%s' first_error: '%s'",
+ __get_str(dev_name),
+ show_uc_errs(__entry->status),
+ show_uc_errs(__entry->first_error)
+ )
+);
+
+#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0)
+#define CXL_RAS_CE_MEM_DATA_ECC BIT(1)
+#define CXL_RAS_CE_CRC_THRESH BIT(2)
+#define CLX_RAS_CE_RETRY_THRESH BIT(3)
+#define CXL_RAS_CE_CACHE_POISON BIT(4)
+#define CXL_RAS_CE_MEM_POISON BIT(5)
+#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6)
+
+#define show_ce_errs(status) __print_flags(status, " | ", \
+ { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
+ { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \
+ { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \
+ { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \
+ { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \
+ { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \
+ { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
+)
+
+TRACE_EVENT(cxl_aer_correctable_error,
+ TP_PROTO(const struct device *dev, u32 status),
+ TP_ARGS(dev, status),
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(dev))
+ __field(u32, status)
+ ),
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(dev));
+ __entry->status = status;
+ ),
+ TP_printk("%s: status: '%s'",
+ __get_str(dev_name), show_ce_errs(__entry->status)
+ )
+);
+
+#endif /* _CXL_EVENTS_H */
+
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 1b1cf459ac77..aa3af3bb73b2 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -140,6 +140,8 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
#define CXL_RAS_HEADER_LOG_OFFSET 0x18
#define CXL_RAS_CAPABILITY_LENGTH 0x58
+#define CXL_HEADERLOG_SIZE SZ_512
+#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32)
/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
#define CXLDEV_CAP_ARRAY_OFFSET 0x0
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 920909791bb9..77dbdb980b12 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -66,4 +66,7 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
void read_cdat_data(struct cxl_port *port);
+void cxl_cor_error_detected(struct pci_dev *pdev);
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state);
#endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 33083a522fd1..3a66aadb4df0 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -14,8 +14,6 @@
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/cxl.h>
/**
* DOC: cxl pci
@@ -514,96 +512,6 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
-/* CXL spec rev3.0 8.2.4.16.1 */
-static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
-{
- void __iomem *addr;
- u32 *log_addr;
- int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
-
- addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
- log_addr = log;
-
- for (i = 0; i < log_u32_size; i++) {
- *log_addr = readl(addr);
- log_addr++;
- addr += sizeof(u32);
- }
-}
-
-/*
- * Log the state of the RAS status registers and prepare them to log the
- * next error status. Return 1 if reset needed.
- */
-static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
-{
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- u32 hl[CXL_HEADERLOG_SIZE_U32];
- void __iomem *addr;
- u32 status;
- u32 fe;
-
- if (!cxlds->regs.ras)
- return false;
-
- addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
- status = readl(addr);
- if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
- return false;
-
- /* If multiple errors, log header points to first error from ctrl reg */
- if (hweight32(status) > 1) {
- addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
- } else {
- fe = status;
- }
-
- header_log_copy(cxlds, hl);
- trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
- writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
-
- return true;
-}
-
-static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
-{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- bool ue;
-
- /*
- * A frozen channel indicates an impending reset which is fatal to
- * CXL.mem operation, and will likely crash the system. On the off
- * chance the situation is recoverable dump the status of the RAS
- * capability registers and bounce the active state of the memdev.
- */
- ue = cxl_report_and_clear(cxlds);
-
- switch (state) {
- case pci_channel_io_normal:
- if (ue) {
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- }
- return PCI_ERS_RESULT_CAN_RECOVER;
- case pci_channel_io_frozen:
- dev_warn(&pdev->dev,
- "%s: frozen state error detected, disable CXL.mem\n",
- dev_name(dev));
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- case pci_channel_io_perm_failure:
- dev_warn(&pdev->dev,
- "failure state error detected, request disconnect\n");
- return PCI_ERS_RESULT_DISCONNECT;
- }
- return PCI_ERS_RESULT_NEED_RESET;
-}
-
static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
@@ -628,25 +536,6 @@ static void cxl_error_resume(struct pci_dev *pdev)
dev->driver ? "successful" : "failed");
}
-static void cxl_cor_error_detected(struct pci_dev *pdev)
-{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- void __iomem *addr;
- u32 status;
-
- if (!cxlds->regs.ras)
- return;
-
- addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
- status = readl(addr);
- if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
- writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
- trace_cxl_aer_correctable_error(dev, status);
- }
-}
-
static const struct pci_error_handlers cxl_error_handlers = {
.error_detected = cxl_error_detected,
.slot_reset = cxl_slot_reset,