From 62f79f3d0eb9f4c224bcc3c7f6fa758515a0a7fa Mon Sep 17 00:00:00 2001 From: Eddie James Date: Wed, 21 Jul 2021 14:02:29 -0500 Subject: fsi: occ: Force sequence numbering per OCC Set and increment the sequence number during the submit operation. This prevents sequence number conflicts between different users of the interface. A sequence number conflict may result in a user getting an OCC response meant for a different command. Since the sequence number is now modified, the checksum must be calculated and set before submitting the command. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20210721190231.117185-2-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/fsi/fsi-occ.c | 54 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c index b223f0ef337b..ecf738411fe2 100644 --- a/drivers/fsi/fsi-occ.c +++ b/drivers/fsi/fsi-occ.c @@ -50,6 +50,7 @@ struct occ { struct device *sbefifo; char name[32]; int idx; + u8 sequence_number; enum versions version; struct miscdevice mdev; struct mutex occ_lock; @@ -141,8 +142,7 @@ static ssize_t occ_write(struct file *file, const char __user *buf, { struct occ_client *client = file->private_data; size_t rlen, data_length; - u16 checksum = 0; - ssize_t rc, i; + ssize_t rc; u8 *cmd; if (!client) @@ -156,9 +156,6 @@ static ssize_t occ_write(struct file *file, const char __user *buf, /* Construct the command */ cmd = client->buffer; - /* Sequence number (we could increment and compare with response) */ - cmd[0] = 1; - /* * Copy the user command (assume user data follows the occ command * format) @@ -178,14 +175,7 @@ static ssize_t occ_write(struct file *file, const char __user *buf, goto done; } - /* Calculate checksum */ - for (i = 0; i < data_length + 4; ++i) - checksum += cmd[i]; - - cmd[data_length + 4] = checksum >> 8; - cmd[data_length + 5] = checksum & 0xFF; - - /* Submit command */ + /* Submit command; 4 bytes before the data and 2 bytes after */ rlen = PAGE_SIZE; rc = fsi_occ_submit(client->occ->dev, cmd, data_length + 6, cmd, &rlen); @@ -314,11 +304,13 @@ free: return rc; } -static int occ_putsram(struct occ *occ, const void *data, ssize_t len) +static int occ_putsram(struct occ *occ, const void *data, ssize_t len, + u8 seq_no, u16 checksum) { size_t cmd_len, buf_len, resp_len, resp_data_len; u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ __be32 *buf; + u8 *byte_buf; int idx = 0, rc; cmd_len = (occ->version == occ_p10) ? 6 : 5; @@ -358,6 +350,15 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len) buf[4 + idx] = cpu_to_be32(data_len); memcpy(&buf[5 + idx], data, len); + byte_buf = (u8 *)&buf[5 + idx]; + /* + * Overwrite the first byte with our sequence number and the last two + * bytes with the checksum. + */ + byte_buf[0] = seq_no; + byte_buf[len - 2] = checksum >> 8; + byte_buf[len - 1] = checksum & 0xff; + rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len); if (rc) goto free; @@ -467,9 +468,12 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, struct occ *occ = dev_get_drvdata(dev); struct occ_response *resp = response; u8 seq_no; + u16 checksum = 0; u16 resp_data_length; + const u8 *byte_request = (const u8 *)request; unsigned long start; int rc; + size_t i; if (!occ) return -ENODEV; @@ -479,11 +483,26 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, return -EINVAL; } + /* Checksum the request, ignoring first byte (sequence number). */ + for (i = 1; i < req_len - 2; ++i) + checksum += byte_request[i]; + mutex_lock(&occ->occ_lock); - /* Extract the seq_no from the command (first byte) */ - seq_no = *(const u8 *)request; - rc = occ_putsram(occ, request, req_len); + /* + * Get a sequence number and update the counter. Avoid a sequence + * number of 0 which would pass the response check below even if the + * OCC response is uninitialized. Any sequence number the user is + * trying to send is overwritten since this function is the only common + * interface to the OCC and therefore the only place we can guarantee + * unique sequence numbers. + */ + seq_no = occ->sequence_number++; + if (!occ->sequence_number) + occ->sequence_number = 1; + checksum += seq_no; + + rc = occ_putsram(occ, request, req_len, seq_no, checksum); if (rc) goto done; @@ -574,6 +593,7 @@ static int occ_probe(struct platform_device *pdev) occ->version = (uintptr_t)of_device_get_match_data(dev); occ->dev = dev; occ->sbefifo = dev->parent; + occ->sequence_number = 1; mutex_init(&occ->occ_lock); if (dev->of_node) { -- cgit v1.2.3 From 908dbf0242e21dd95c69a1b0935814cd1abfc134 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Wed, 21 Jul 2021 14:02:30 -0500 Subject: hwmon: (occ) Remove sequence numbering and checksum calculation Checksumming of the request and sequence numbering is now done in the OCC interface driver in order to keep unique sequence numbers. So remove those in the hwmon driver. Also, add the command length to the send_cmd function pointer, since the checksum must be placed in the last two bytes of the command. The submit interface must receive the exact size of the command - previously it could be rounded to the nearest 8 bytes with no consequence. Signed-off-by: Eddie James Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/20210721190231.117185-3-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/hwmon/occ/common.c | 30 ++++++++++++------------------ drivers/hwmon/occ/common.h | 3 +-- drivers/hwmon/occ/p8_i2c.c | 15 +++++++++------ drivers/hwmon/occ/p9_sbe.c | 4 ++-- 4 files changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 0d68a78be980..fc298268c89e 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -132,22 +132,20 @@ struct extended_sensor { static int occ_poll(struct occ *occ) { int rc; - u16 checksum = occ->poll_cmd_data + occ->seq_no + 1; - u8 cmd[8]; + u8 cmd[7]; struct occ_poll_response_header *header; /* big endian */ - cmd[0] = occ->seq_no++; /* sequence number */ + cmd[0] = 0; /* sequence number */ cmd[1] = 0; /* cmd type */ cmd[2] = 0; /* data length msb */ cmd[3] = 1; /* data length lsb */ cmd[4] = occ->poll_cmd_data; /* data */ - cmd[5] = checksum >> 8; /* checksum msb */ - cmd[6] = checksum & 0xFF; /* checksum lsb */ - cmd[7] = 0; + cmd[5] = 0; /* checksum msb */ + cmd[6] = 0; /* checksum lsb */ /* mutex should already be locked if necessary */ - rc = occ->send_cmd(occ, cmd); + rc = occ->send_cmd(occ, cmd, sizeof(cmd)); if (rc) { occ->last_error = rc; if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD) @@ -184,25 +182,23 @@ static int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap) { int rc; u8 cmd[8]; - u16 checksum = 0x24; __be16 user_power_cap_be = cpu_to_be16(user_power_cap); - cmd[0] = 0; - cmd[1] = 0x22; - cmd[2] = 0; - cmd[3] = 2; + cmd[0] = 0; /* sequence number */ + cmd[1] = 0x22; /* cmd type */ + cmd[2] = 0; /* data length msb */ + cmd[3] = 2; /* data length lsb */ memcpy(&cmd[4], &user_power_cap_be, 2); - checksum += cmd[4] + cmd[5]; - cmd[6] = checksum >> 8; - cmd[7] = checksum & 0xFF; + cmd[6] = 0; /* checksum msb */ + cmd[7] = 0; /* checksum lsb */ rc = mutex_lock_interruptible(&occ->lock); if (rc) return rc; - rc = occ->send_cmd(occ, cmd); + rc = occ->send_cmd(occ, cmd, sizeof(cmd)); mutex_unlock(&occ->lock); @@ -1151,8 +1147,6 @@ int occ_setup(struct occ *occ, const char *name) { int rc; - /* start with 1 to avoid false match with zero-initialized SRAM buffer */ - occ->seq_no = 1; mutex_init(&occ->lock); occ->groups[0] = &occ->group; diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index e6df719770e8..5020117be740 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -95,9 +95,8 @@ struct occ { struct occ_sensors sensors; int powr_sample_time_us; /* average power sample time */ - u8 seq_no; u8 poll_cmd_data; /* to perform OCC poll command */ - int (*send_cmd)(struct occ *occ, u8 *cmd); + int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len); unsigned long next_update; struct mutex lock; /* lock OCC access */ diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c index 0cf8588be35a..9e61e1fb5142 100644 --- a/drivers/hwmon/occ/p8_i2c.c +++ b/drivers/hwmon/occ/p8_i2c.c @@ -97,18 +97,21 @@ static int p8_i2c_occ_putscom_u32(struct i2c_client *client, u32 address, } static int p8_i2c_occ_putscom_be(struct i2c_client *client, u32 address, - u8 *data) + u8 *data, size_t len) { - __be32 data0, data1; + __be32 data0 = 0, data1 = 0; - memcpy(&data0, data, 4); - memcpy(&data1, data + 4, 4); + memcpy(&data0, data, min_t(size_t, len, 4)); + if (len > 4) { + len -= 4; + memcpy(&data1, data + 4, min_t(size_t, len, 4)); + } return p8_i2c_occ_putscom_u32(client, address, be32_to_cpu(data0), be32_to_cpu(data1)); } -static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd) +static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) { int i, rc; unsigned long start; @@ -127,7 +130,7 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd) return rc; /* write command (expected to already be BE), we need bus-endian... */ - rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd); + rc = p8_i2c_occ_putscom_be(client, OCB_DATA3, cmd, len); if (rc) return rc; diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index f6387cc0b754..9709f2b9c052 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -16,14 +16,14 @@ struct p9_sbe_occ { #define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ) -static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd) +static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) { struct occ_response *resp = &occ->resp; struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); size_t resp_len = sizeof(*resp); int rc; - rc = fsi_occ_submit(ctx->sbe, cmd, 8, resp, &resp_len); + rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); if (rc < 0) return rc; -- cgit v1.2.3 From 008d3825a805557464c5e75f9eb806a3aa2f5e6d Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Oct 2021 15:53:04 -0500 Subject: fsi: occ: Use a large buffer for responses Allocate a large buffer for each OCC to handle response data. This removes memory allocation during an operation, and also allows for the maximum amount of SBE FFDC. Previously for the putsram and attn commands, only 32 words would have been available, and for getsram, only up to the size of the transfer. SBE FFDC might be up to 8Kb. The SBE interface expects data to be specified in units of words (4 bytes), defined as OCC_MAX_RESP_WORDS. This change allows the full FFDC capture to be implemented, where before it was not available. Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20211019205307.36946-2-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/fsi/fsi-occ.c | 110 +++++++++++++++++++----------------------------- include/linux/fsi-occ.h | 2 + 2 files changed, 46 insertions(+), 66 deletions(-) diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c index ecf738411fe2..b4302776026d 100644 --- a/drivers/fsi/fsi-occ.c +++ b/drivers/fsi/fsi-occ.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -33,13 +34,6 @@ #define OCC_P10_SRAM_MODE 0x58 /* Normal mode, OCB channel 2 */ -/* - * Assume we don't have much FFDC, if we do we'll overflow and - * fail the command. This needs to be big enough for simple - * commands as well. - */ -#define OCC_SBE_STATUS_WORDS 32 - #define OCC_TIMEOUT_MS 1000 #define OCC_CMD_IN_PRG_WAIT_MS 50 @@ -51,6 +45,7 @@ struct occ { char name[32]; int idx; u8 sequence_number; + void *buffer; enum versions version; struct miscdevice mdev; struct mutex occ_lock; @@ -241,8 +236,10 @@ static int occ_verify_checksum(struct occ *occ, struct occ_response *resp, static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len) { u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ - size_t cmd_len, resp_len, resp_data_len; - __be32 *resp, cmd[6]; + size_t cmd_len, resp_data_len; + size_t resp_len = OCC_MAX_RESP_WORDS; + __be32 *resp = occ->buffer; + __be32 cmd[6]; int idx = 0, rc; /* @@ -269,19 +266,19 @@ static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len) cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_OCC_SRAM); cmd[4 + idx] = cpu_to_be32(data_len); - resp_len = (data_len >> 2) + OCC_SBE_STATUS_WORDS; - resp = kzalloc(resp_len << 2, GFP_KERNEL); - if (!resp) - return -ENOMEM; - rc = sbefifo_submit(occ->sbefifo, cmd, cmd_len, resp, &resp_len); if (rc) - goto free; + return rc; rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_GET_OCC_SRAM, resp, resp_len, &resp_len); - if (rc) - goto free; + if (rc > 0) { + dev_err(occ->dev, "SRAM read returned failure status: %08x\n", + rc); + return -EBADMSG; + } else if (rc) { + return rc; + } resp_data_len = be32_to_cpu(resp[resp_len - 1]); if (resp_data_len != data_len) { @@ -292,39 +289,21 @@ static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len) memcpy(data, resp, len); } -free: - /* Convert positive SBEI status */ - if (rc > 0) { - dev_err(occ->dev, "SRAM read returned failure status: %08x\n", - rc); - rc = -EBADMSG; - } - - kfree(resp); return rc; } static int occ_putsram(struct occ *occ, const void *data, ssize_t len, u8 seq_no, u16 checksum) { - size_t cmd_len, buf_len, resp_len, resp_data_len; u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ - __be32 *buf; + size_t cmd_len, resp_data_len; + size_t resp_len = OCC_MAX_RESP_WORDS; + __be32 *buf = occ->buffer; u8 *byte_buf; int idx = 0, rc; cmd_len = (occ->version == occ_p10) ? 6 : 5; - - /* - * We use the same buffer for command and response, make - * sure it's big enough - */ - resp_len = OCC_SBE_STATUS_WORDS; cmd_len += data_len >> 2; - buf_len = max(cmd_len, resp_len); - buf = kzalloc(buf_len << 2, GFP_KERNEL); - if (!buf) - return -ENOMEM; /* * Magic sequence to do SBE putsram command. SBE will transfer @@ -361,12 +340,17 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len, rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len); if (rc) - goto free; + return rc; rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM, buf, resp_len, &resp_len); - if (rc) - goto free; + if (rc > 0) { + dev_err(occ->dev, "SRAM write returned failure status: %08x\n", + rc); + return -EBADMSG; + } else if (rc) { + return rc; + } if (resp_len != 1) { dev_err(occ->dev, "SRAM write response length invalid: %zd\n", @@ -382,27 +366,16 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len, } } -free: - /* Convert positive SBEI status */ - if (rc > 0) { - dev_err(occ->dev, "SRAM write returned failure status: %08x\n", - rc); - rc = -EBADMSG; - } - - kfree(buf); return rc; } static int occ_trigger_attn(struct occ *occ) { - __be32 buf[OCC_SBE_STATUS_WORDS]; - size_t cmd_len, resp_len, resp_data_len; + __be32 *buf = occ->buffer; + size_t cmd_len, resp_data_len; + size_t resp_len = OCC_MAX_RESP_WORDS; int idx = 0, rc; - BUILD_BUG_ON(OCC_SBE_STATUS_WORDS < 8); - resp_len = OCC_SBE_STATUS_WORDS; - switch (occ->version) { default: case occ_p9: @@ -427,12 +400,17 @@ static int occ_trigger_attn(struct occ *occ) rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len); if (rc) - goto error; + return rc; rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM, buf, resp_len, &resp_len); - if (rc) - goto error; + if (rc > 0) { + dev_err(occ->dev, "SRAM attn returned failure status: %08x\n", + rc); + return -EBADMSG; + } else if (rc) { + return rc; + } if (resp_len != 1) { dev_err(occ->dev, "SRAM attn response length invalid: %zd\n", @@ -448,14 +426,6 @@ static int occ_trigger_attn(struct occ *occ) } } - error: - /* Convert positive SBEI status */ - if (rc > 0) { - dev_err(occ->dev, "SRAM attn returned failure status: %08x\n", - rc); - rc = -EBADMSG; - } - return rc; } @@ -590,6 +560,11 @@ static int occ_probe(struct platform_device *pdev) if (!occ) return -ENOMEM; + /* SBE words are always four bytes */ + occ->buffer = kvmalloc(OCC_MAX_RESP_WORDS * 4, GFP_KERNEL); + if (!occ->buffer) + return -ENOMEM; + occ->version = (uintptr_t)of_device_get_match_data(dev); occ->dev = dev; occ->sbefifo = dev->parent; @@ -625,6 +600,7 @@ static int occ_probe(struct platform_device *pdev) if (rc) { dev_err(dev, "failed to register miscdevice: %d\n", rc); ida_simple_remove(&occ_ida, occ->idx); + kvfree(occ->buffer); return rc; } @@ -640,6 +616,8 @@ static int occ_remove(struct platform_device *pdev) { struct occ *occ = platform_get_drvdata(pdev); + kvfree(occ->buffer); + misc_deregister(&occ->mdev); device_for_each_child(&pdev->dev, NULL, occ_unregister_child); diff --git a/include/linux/fsi-occ.h b/include/linux/fsi-occ.h index d4cdc2aa6e33..7ee3dbd7f4b3 100644 --- a/include/linux/fsi-occ.h +++ b/include/linux/fsi-occ.h @@ -19,6 +19,8 @@ struct device; #define OCC_RESP_CRIT_OCB 0xE3 #define OCC_RESP_CRIT_HW 0xE4 +#define OCC_MAX_RESP_WORDS 2048 + int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, void *response, size_t *resp_len); -- cgit v1.2.3 From 8ec3cc9fb51dc973ea6886cc6ba3dcea4eee98ec Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Oct 2021 15:53:05 -0500 Subject: fsi: occ: Store the SBEFIFO FFDC in the user response buffer If the SBEFIFO response indicates an error, store the response in the user buffer and return an error. Previously, the user had no way of obtaining the SBEFIFO FFDC. The user's buffer now contains data in the event of a failure. No change in the event of a successful transfer. Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20211019205307.36946-3-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/fsi/fsi-occ.c | 66 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c index b4302776026d..7eaab1be0aa4 100644 --- a/drivers/fsi/fsi-occ.c +++ b/drivers/fsi/fsi-occ.c @@ -46,6 +46,9 @@ struct occ { int idx; u8 sequence_number; void *buffer; + void *client_buffer; + size_t client_buffer_size; + size_t client_response_size; enum versions version; struct miscdevice mdev; struct mutex occ_lock; @@ -208,6 +211,22 @@ static const struct file_operations occ_fops = { .release = occ_release, }; +static void occ_save_ffdc(struct occ *occ, __be32 *resp, size_t parsed_len, + size_t resp_len) +{ + if (resp_len > parsed_len) { + size_t dh = resp_len - parsed_len; + size_t ffdc_len = (dh - 1) * 4; /* SBE words are four bytes */ + __be32 *ffdc = &resp[parsed_len]; + + if (ffdc_len > occ->client_buffer_size) + ffdc_len = occ->client_buffer_size; + + memcpy(occ->client_buffer, ffdc, ffdc_len); + occ->client_response_size = ffdc_len; + } +} + static int occ_verify_checksum(struct occ *occ, struct occ_response *resp, u16 data_length) { @@ -236,7 +255,7 @@ static int occ_verify_checksum(struct occ *occ, struct occ_response *resp, static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len) { u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ - size_t cmd_len, resp_data_len; + size_t cmd_len, parsed_len, resp_data_len; size_t resp_len = OCC_MAX_RESP_WORDS; __be32 *resp = occ->buffer; __be32 cmd[6]; @@ -271,16 +290,17 @@ static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len) return rc; rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_GET_OCC_SRAM, - resp, resp_len, &resp_len); + resp, resp_len, &parsed_len); if (rc > 0) { dev_err(occ->dev, "SRAM read returned failure status: %08x\n", rc); - return -EBADMSG; + occ_save_ffdc(occ, resp, parsed_len, resp_len); + return -ECOMM; } else if (rc) { return rc; } - resp_data_len = be32_to_cpu(resp[resp_len - 1]); + resp_data_len = be32_to_cpu(resp[parsed_len - 1]); if (resp_data_len != data_len) { dev_err(occ->dev, "SRAM read expected %d bytes got %zd\n", data_len, resp_data_len); @@ -296,7 +316,7 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len, u8 seq_no, u16 checksum) { u32 data_len = ((len + 7) / 8) * 8; /* must be multiples of 8 B */ - size_t cmd_len, resp_data_len; + size_t cmd_len, parsed_len, resp_data_len; size_t resp_len = OCC_MAX_RESP_WORDS; __be32 *buf = occ->buffer; u8 *byte_buf; @@ -343,18 +363,19 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len, return rc; rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM, - buf, resp_len, &resp_len); + buf, resp_len, &parsed_len); if (rc > 0) { dev_err(occ->dev, "SRAM write returned failure status: %08x\n", rc); - return -EBADMSG; + occ_save_ffdc(occ, buf, parsed_len, resp_len); + return -ECOMM; } else if (rc) { return rc; } - if (resp_len != 1) { + if (parsed_len != 1) { dev_err(occ->dev, "SRAM write response length invalid: %zd\n", - resp_len); + parsed_len); rc = -EBADMSG; } else { resp_data_len = be32_to_cpu(buf[0]); @@ -372,7 +393,7 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len, static int occ_trigger_attn(struct occ *occ) { __be32 *buf = occ->buffer; - size_t cmd_len, resp_data_len; + size_t cmd_len, parsed_len, resp_data_len; size_t resp_len = OCC_MAX_RESP_WORDS; int idx = 0, rc; @@ -403,18 +424,19 @@ static int occ_trigger_attn(struct occ *occ) return rc; rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM, - buf, resp_len, &resp_len); + buf, resp_len, &parsed_len); if (rc > 0) { dev_err(occ->dev, "SRAM attn returned failure status: %08x\n", rc); - return -EBADMSG; + occ_save_ffdc(occ, buf, parsed_len, resp_len); + return -ECOMM; } else if (rc) { return rc; } - if (resp_len != 1) { + if (parsed_len != 1) { dev_err(occ->dev, "SRAM attn response length invalid: %zd\n", - resp_len); + parsed_len); rc = -EBADMSG; } else { resp_data_len = be32_to_cpu(buf[0]); @@ -437,6 +459,7 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, msecs_to_jiffies(OCC_CMD_IN_PRG_WAIT_MS); struct occ *occ = dev_get_drvdata(dev); struct occ_response *resp = response; + size_t user_resp_len = *resp_len; u8 seq_no; u16 checksum = 0; u16 resp_data_length; @@ -445,11 +468,13 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, int rc; size_t i; + *resp_len = 0; + if (!occ) return -ENODEV; - if (*resp_len < 7) { - dev_dbg(dev, "Bad resplen %zd\n", *resp_len); + if (user_resp_len < 7) { + dev_dbg(dev, "Bad resplen %zd\n", user_resp_len); return -EINVAL; } @@ -459,6 +484,10 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, mutex_lock(&occ->occ_lock); + occ->client_buffer = response; + occ->client_buffer_size = user_resp_len; + occ->client_response_size = 0; + /* * Get a sequence number and update the counter. Avoid a sequence * number of 0 which would pass the response check below even if the @@ -509,7 +538,7 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, resp_data_length = get_unaligned_be16(&resp->data_length); /* Message size is data length + 5 bytes header + 2 bytes checksum */ - if ((resp_data_length + 7) > *resp_len) { + if ((resp_data_length + 7) > user_resp_len) { rc = -EMSGSIZE; goto done; } @@ -525,10 +554,11 @@ int fsi_occ_submit(struct device *dev, const void *request, size_t req_len, goto done; } - *resp_len = resp_data_length + 7; + occ->client_response_size = resp_data_length + 7; rc = occ_verify_checksum(occ, resp, resp_data_length); done: + *resp_len = occ->client_response_size; mutex_unlock(&occ->occ_lock); return rc; -- cgit v1.2.3 From 4cf400e120b303c25fd378fb4286fa682e4e0a33 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Oct 2021 15:53:06 -0500 Subject: docs: ABI: testing: Document the OCC hwmon FFDC binary interface Add documentation for the new binary sysfs that will dump the SBEFIFO FFDC. Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20211019205307.36946-4-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- .../ABI/testing/sysfs-bus-platform-devices-occ-hwmon | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon b/Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon new file mode 100644 index 000000000000..b24d7ab0278f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon @@ -0,0 +1,13 @@ +What: /sys/bus/platform/devices/occ-hwmon.X/ffdc +KernelVersion: 5.15 +Contact: eajames@linux.ibm.com +Description: + Contains the First Failure Data Capture from the SBEFIFO + hardware, if there is any from a previous transfer. Otherwise, + the file is empty. The data is cleared when it's been + completely read by a user. As the name suggests, only the data + from the first error is saved, until it's cleared upon read. The OCC hwmon driver, running on + a Baseboard Management Controller (BMC), communicates with + POWER9 and up processors over the Self-Boot Engine (SBE) FIFO. + In many error conditions, the SBEFIFO will return error data + indicating the type of error and system state, etc. -- cgit v1.2.3 From 5027a34a575e79ac225f5f3e710491e4c372c44a Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Oct 2021 15:53:07 -0500 Subject: hwmon: (occ) Provide the SBEFIFO FFDC in binary sysfs Save any FFDC provided by the OCC driver, and provide it to userspace through a binary sysfs entry. Notify userspace pollers when there is an error too. Signed-off-by: Eddie James Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211019205307.36946-5-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/hwmon/occ/p9_sbe.c | 86 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index 9709f2b9c052..e50243580269 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -4,18 +4,79 @@ #include #include #include +#include #include +#include #include +#include +#include #include "common.h" struct p9_sbe_occ { struct occ occ; + bool sbe_error; + void *ffdc; + size_t ffdc_len; + size_t ffdc_size; + struct mutex sbe_error_lock; /* lock access to ffdc data */ struct device *sbe; }; #define to_p9_sbe_occ(x) container_of((x), struct p9_sbe_occ, occ) +static ssize_t ffdc_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *battr, char *buf, loff_t pos, + size_t count) +{ + ssize_t rc = 0; + struct occ *occ = dev_get_drvdata(kobj_to_dev(kobj)); + struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); + + mutex_lock(&ctx->sbe_error_lock); + if (ctx->sbe_error) { + rc = memory_read_from_buffer(buf, count, &pos, ctx->ffdc, + ctx->ffdc_len); + if (pos >= ctx->ffdc_len) + ctx->sbe_error = false; + } + mutex_unlock(&ctx->sbe_error_lock); + + return rc; +} +static BIN_ATTR_RO(ffdc, OCC_MAX_RESP_WORDS * 4); + +static bool p9_sbe_occ_save_ffdc(struct p9_sbe_occ *ctx, const void *resp, + size_t resp_len) +{ + bool notify = false; + + mutex_lock(&ctx->sbe_error_lock); + if (!ctx->sbe_error) { + if (resp_len > ctx->ffdc_size) { + if (ctx->ffdc) + kvfree(ctx->ffdc); + ctx->ffdc = kvmalloc(resp_len, GFP_KERNEL); + if (!ctx->ffdc) { + ctx->ffdc_len = 0; + ctx->ffdc_size = 0; + goto done; + } + + ctx->ffdc_size = resp_len; + } + + notify = true; + ctx->sbe_error = true; + ctx->ffdc_len = resp_len; + memcpy(ctx->ffdc, resp, resp_len); + } + +done: + mutex_unlock(&ctx->sbe_error_lock); + return notify; +} + static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) { struct occ_response *resp = &occ->resp; @@ -24,8 +85,15 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len) int rc; rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); - if (rc < 0) + if (rc < 0) { + if (resp_len) { + if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len)) + sysfs_notify(&occ->bus_dev->kobj, NULL, + bin_attr_ffdc.attr.name); + } + return rc; + } switch (resp->return_status) { case OCC_RESP_CMD_IN_PRG: @@ -65,6 +133,8 @@ static int p9_sbe_occ_probe(struct platform_device *pdev) if (!ctx) return -ENOMEM; + mutex_init(&ctx->sbe_error_lock); + ctx->sbe = pdev->dev.parent; occ = &ctx->occ; occ->bus_dev = &pdev->dev; @@ -78,6 +148,15 @@ static int p9_sbe_occ_probe(struct platform_device *pdev) if (rc == -ESHUTDOWN) rc = -ENODEV; /* Host is shutdown, don't spew errors */ + if (!rc) { + rc = device_create_bin_file(occ->bus_dev, &bin_attr_ffdc); + if (rc) { + dev_warn(occ->bus_dev, + "failed to create SBE error ffdc file\n"); + rc = 0; + } + } + return rc; } @@ -86,9 +165,14 @@ static int p9_sbe_occ_remove(struct platform_device *pdev) struct occ *occ = platform_get_drvdata(pdev); struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); + device_remove_bin_file(occ->bus_dev, &bin_attr_ffdc); + ctx->sbe = NULL; occ_shutdown(occ); + if (ctx->ffdc) + kvfree(ctx->ffdc); + return 0; } -- cgit v1.2.3 From 9a93de620e0a113b5f18916f58e1c80aad2f612b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Oct 2021 16:17:48 -0500 Subject: docs: ABI: testing: Document the SBEFIFO timeout interface Add documentation for the new sysfs entry that indicates whether or not the SBE has timed out. Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20211019211749.38059-2-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo diff --git a/Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo b/Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo new file mode 100644 index 000000000000..531fe9d6b40a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo @@ -0,0 +1,10 @@ +What: /sys/bus/fsi/devices/XX.XX.00:06/sbefifoX/timeout +KernelVersion: 5.15 +Contact: eajames@linux.ibm.com +Description: + Indicates whether or not this SBE device has experienced a + timeout; i.e. the SBE did not respond within the time allotted + by the driver. A value of 1 indicates that a timeout has + ocurred and no transfers have completed since the timeout. A + value of 0 indicates that no timeout has ocurred, or if one + has, more recent transfers have completed successful. -- cgit v1.2.3 From 826280348ec68cefeb7c3cc3689f6cafcd31c832 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 19 Oct 2021 16:17:49 -0500 Subject: fsi: sbefifo: Add sysfs file indicating a timeout error The SBEFIFO timeout error requires special handling in userspace to do recovery operations. Add a sysfs file to indicate a timeout error, and notify pollers when a timeout occurs. This will be used by the openpower-occ-control application. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20211019211749.38059-3-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/fsi/fsi-sbefifo.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c index 84cb965bfed5..b414ab4431ef 100644 --- a/drivers/fsi/fsi-sbefifo.c +++ b/drivers/fsi/fsi-sbefifo.c @@ -124,6 +124,7 @@ struct sbefifo { bool broken; bool dead; bool async_ffdc; + bool timed_out; }; struct sbefifo_user { @@ -136,6 +137,14 @@ struct sbefifo_user { static DEFINE_MUTEX(sbefifo_ffdc_mutex); +static ssize_t timeout_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sbefifo *sbefifo = container_of(dev, struct sbefifo, dev); + + return sysfs_emit(buf, "%d\n", sbefifo->timed_out ? 1 : 0); +} +static DEVICE_ATTR_RO(timeout); static void __sbefifo_dump_ffdc(struct device *dev, const __be32 *ffdc, size_t ffdc_sz, bool internal) @@ -462,11 +471,14 @@ static int sbefifo_wait(struct sbefifo *sbefifo, bool up, break; } if (!ready) { + sysfs_notify(&sbefifo->dev.kobj, NULL, dev_attr_timeout.attr.name); + sbefifo->timed_out = true; dev_err(dev, "%s FIFO Timeout ! status=%08x\n", up ? "UP" : "DOWN", sts); return -ETIMEDOUT; } dev_vdbg(dev, "End of wait status: %08x\n", sts); + sbefifo->timed_out = false; *status = sts; return 0; @@ -993,6 +1005,8 @@ static int sbefifo_probe(struct device *dev) child_name); } + device_create_file(&sbefifo->dev, &dev_attr_timeout); + return 0; err_free_minor: fsi_free_minor(sbefifo->dev.devt); @@ -1018,6 +1032,8 @@ static int sbefifo_remove(struct device *dev) dev_dbg(dev, "Removing sbefifo device...\n"); + device_remove_file(&sbefifo->dev, &dev_attr_timeout); + mutex_lock(&sbefifo->lock); sbefifo->dead = true; mutex_unlock(&sbefifo->lock); -- cgit v1.2.3 From 7cc2f34e1f4da07c791737cc6b3d965b31815ea0 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 3 Aug 2021 16:30:16 -0500 Subject: fsi: sbefifo: Use interruptible mutex locking Some SBE operations have extremely large responses and can require several minutes to process the response. During this time, the device lock must be held. If another process attempts an operation, it will wait for the mutex for longer than the kernel hung task watchdog allows. Therefore, use the interruptible function to lock the mutex. Signed-off-by: Eddie James Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20210803213016.44739-1-eajames@linux.ibm.com Signed-off-by: Joel Stanley --- drivers/fsi/fsi-sbefifo.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c index b414ab4431ef..52328adef643 100644 --- a/drivers/fsi/fsi-sbefifo.c +++ b/drivers/fsi/fsi-sbefifo.c @@ -752,7 +752,9 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len, iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes); /* Perform the command */ - mutex_lock(&sbefifo->lock); + rc = mutex_lock_interruptible(&sbefifo->lock); + if (rc) + return rc; rc = __sbefifo_submit(sbefifo, command, cmd_len, &resp_iter); mutex_unlock(&sbefifo->lock); @@ -832,7 +834,9 @@ static ssize_t sbefifo_user_read(struct file *file, char __user *buf, iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len); /* Perform the command */ - mutex_lock(&sbefifo->lock); + rc = mutex_lock_interruptible(&sbefifo->lock); + if (rc) + goto bail; rc = __sbefifo_submit(sbefifo, user->pending_cmd, cmd_len, &resp_iter); mutex_unlock(&sbefifo->lock); if (rc < 0) @@ -887,7 +891,9 @@ static ssize_t sbefifo_user_write(struct file *file, const char __user *buf, user->pending_len = 0; /* Trigger reset request */ - mutex_lock(&sbefifo->lock); + rc = mutex_lock_interruptible(&sbefifo->lock); + if (rc) + goto bail; rc = sbefifo_request_reset(user->sbefifo); mutex_unlock(&sbefifo->lock); if (rc == 0) -- cgit v1.2.3