summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/Kconfig10
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c42
-rw-r--r--drivers/nvme/host/fc.c49
-rw-r--r--drivers/nvme/host/hwmon.c259
-rw-r--r--drivers/nvme/host/multipath.c15
-rw-r--r--drivers/nvme/host/nvme.h33
-rw-r--r--drivers/nvme/host/pci.c9
-rw-r--r--drivers/nvme/host/rdma.c24
-rw-r--r--drivers/nvme/host/tcp.c4
10 files changed, 387 insertions, 59 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 2b36f052bfb9..c6439638a419 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -23,6 +23,16 @@ config NVME_MULTIPATH
/dev/nvmeXnY device will show up for each NVMe namespaces,
even if it is accessible through multiple controllers.
+config NVME_HWMON
+ bool "NVMe hardware monitoring"
+ depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
+ help
+ This provides support for NVMe hardware monitoring. If enabled,
+ a hardware monitoring device will be created for each NVMe drive
+ in the system.
+
+ If unsure, say N.
+
config NVME_FABRICS
tristate
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 8a4b671c5f0c..fc7b26be692d 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
+nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fa7ba09dca77..8e8527408db3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -283,6 +283,8 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
+ nvme_cleanup_cmd(req);
+
if (nvme_req(req)->ctrl->kas)
nvme_req(req)->ctrl->comp_seen = true;
@@ -313,7 +315,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
if (blk_mq_request_completed(req))
return true;
- nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
+ nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(req);
return true;
}
@@ -611,8 +613,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_dsm_range *range;
struct bio *bio;
- range = kmalloc_array(segments, sizeof(*range),
- GFP_ATOMIC | __GFP_NOWARN);
+ /*
+ * Some devices do not consider the DSM 'Number of Ranges' field when
+ * determining how much data to DMA. Always allocate memory for maximum
+ * number of segments to prevent device reading beyond end of buffer.
+ */
+ static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
+
+ range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
if (!range) {
/*
* If we fail allocation our range, fallback to the controller
@@ -626,7 +634,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
}
__rq_for_each_bio(bio, req) {
- u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
+ u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
if (n < segments) {
@@ -652,7 +660,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_page = virt_to_page(range);
req->special_vec.bv_offset = offset_in_page(range);
- req->special_vec.bv_len = sizeof(*range) * segments;
+ req->special_vec.bv_len = alloc_size;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return BLK_STS_OK;
@@ -667,7 +675,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->write_zeroes.slba =
- cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
cmnd->write_zeroes.control = 0;
@@ -691,7 +699,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
- cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
@@ -1647,7 +1655,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_set_chunk_size(struct nvme_ns *ns)
{
- u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
}
@@ -1684,8 +1692,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
{
- u32 max_sectors;
- unsigned short bs = 1 << ns->lba_shift;
+ u64 max_blocks;
if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
(ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
@@ -1701,11 +1708,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
* nvme_init_identify() if available.
*/
if (ns->ctrl->max_hw_sectors == UINT_MAX)
- max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9;
+ max_blocks = (u64)USHRT_MAX + 1;
else
- max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9;
+ max_blocks = ns->ctrl->max_hw_sectors + 1;
- blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
+ blk_queue_max_write_zeroes_sectors(disk->queue,
+ nvme_lba_to_sect(ns, max_blocks));
}
static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
@@ -1748,7 +1756,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
- sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
+ sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;
@@ -2796,6 +2804,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oncs = le16_to_cpu(id->oncs);
ctrl->mtfa = le16_to_cpu(id->mtfa);
ctrl->oaes = le32_to_cpu(id->oaes);
+ ctrl->wctemp = le16_to_cpu(id->wctemp);
+ ctrl->cctemp = le16_to_cpu(id->cctemp);
+
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
if (id->mdts)
@@ -2895,6 +2906,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
+ if (!ctrl->identified)
+ nvme_hwmon_init(ctrl);
+
ctrl->identified = true;
return 0;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 265f89e11d8b..679a721ae229 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1224,7 +1224,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
lsreq->rqstlen = sizeof(*assoc_rqst);
lsreq->rspaddr = assoc_acc;
lsreq->rsplen = sizeof(*assoc_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
if (ret)
@@ -1264,7 +1264,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
if (fcret) {
ret = -EBADF;
dev_err(ctrl->dev,
- "q %d connect failed: %s\n",
+ "q %d Create Association LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
ctrl->association_id =
@@ -1332,7 +1332,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
lsreq->rqstlen = sizeof(*conn_rqst);
lsreq->rspaddr = conn_acc;
lsreq->rsplen = sizeof(*conn_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
if (ret)
@@ -1363,7 +1363,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (fcret) {
ret = -EBADF;
dev_err(ctrl->dev,
- "q %d connect failed: %s\n",
+ "q %d Create I/O Connection LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
queue->connection_id =
@@ -1376,7 +1376,7 @@ out_free_buffer:
out_no_memory:
if (ret)
dev_err(ctrl->dev,
- "queue %d connect command failed (%d).\n",
+ "queue %d connect I/O queue failed (%d).\n",
queue->qnum, ret);
return ret;
}
@@ -1413,8 +1413,8 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
static void
nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
{
- struct fcnvme_ls_disconnect_rqst *discon_rqst;
- struct fcnvme_ls_disconnect_acc *discon_acc;
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
struct nvmefc_ls_req_op *lsop;
struct nvmefc_ls_req *lsreq;
int ret;
@@ -1430,11 +1430,11 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
lsreq = &lsop->ls_req;
lsreq->private = (void *)&lsop[1];
- discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
- discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
- discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
+ discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
discon_rqst->desc_list_len = cpu_to_be32(
sizeof(struct fcnvme_lsdesc_assoc_id) +
sizeof(struct fcnvme_lsdesc_disconn_cmd));
@@ -1451,22 +1451,17 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
discon_rqst->discon_cmd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd));
- discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
- discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
lsreq->rqstaddr = discon_rqst;
lsreq->rqstlen = sizeof(*discon_rqst);
lsreq->rspaddr = discon_acc;
lsreq->rsplen = sizeof(*discon_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
nvme_fc_disconnect_assoc_done);
if (ret)
kfree(lsop);
-
- /* only meaningful part to terminating the association */
- ctrl->association_id = 0;
}
@@ -1662,7 +1657,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
(freq->rcv_rsplen / 4) ||
be32_to_cpu(op->rsp_iu.xfrd_len) !=
freq->transferred_length ||
- op->rsp_iu.status_code ||
+ op->rsp_iu.ersp_result ||
sqe->common.command_id != cqe->command_id)) {
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
dev_info(ctrl->ctrl.device,
@@ -1672,7 +1667,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
be32_to_cpu(op->rsp_iu.xfrd_len),
freq->transferred_length,
- op->rsp_iu.status_code,
+ op->rsp_iu.ersp_result,
sqe->common.command_id,
cqe->command_id);
goto done;
@@ -1731,9 +1726,14 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
op->rq = rq;
op->rqno = rqno;
- cmdiu->scsi_id = NVME_CMD_SCSI_ID;
+ cmdiu->format_id = NVME_CMD_FORMAT_ID;
cmdiu->fc_id = NVME_CMD_FC_ID;
cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
+ if (queue->qnum)
+ cmdiu->rsv_cat = fccmnd_set_cat_css(0,
+ (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
+ else
+ cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
&op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
@@ -2173,8 +2173,6 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
rq_dma_dir(rq));
- nvme_cleanup_cmd(rq);
-
sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
freq->sg_cnt = 0;
@@ -2305,6 +2303,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (!(op->flags & FCOP_FLAGS_AEN))
nvme_fc_unmap_data(ctrl, op->rq, op);
+ nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
@@ -2695,7 +2694,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
/* warn if maxcmd is lower than queue_size */
dev_warn(ctrl->ctrl.device,
"queue_size %zu > ctrl maxcmd %u, reducing "
- "to queue_size\n",
+ "to maxcmd\n",
opts->queue_size, ctrl->ctrl.maxcmd);
opts->queue_size = ctrl->ctrl.maxcmd;
}
@@ -2703,7 +2702,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
/* warn if sqsize is lower than queue_size */
dev_warn(ctrl->ctrl.device,
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ "queue_size %zu > ctrl sqsize %u, reducing "
+ "to sqsize\n",
opts->queue_size, ctrl->ctrl.sqsize + 1);
opts->queue_size = ctrl->ctrl.sqsize + 1;
}
@@ -2739,6 +2739,7 @@ out_term_aen_ops:
out_disconnect_admin_queue:
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
+ ctrl->association_id = 0;
out_delete_hw_queue:
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
out_free_queue:
@@ -2830,6 +2831,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->association_id)
nvme_fc_xmt_disconnect_assoc(ctrl);
+ ctrl->association_id = 0;
+
if (ctrl->ctrl.tagset) {
nvme_fc_delete_hw_io_queues(ctrl);
nvme_fc_free_io_queues(ctrl);
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
new file mode 100644
index 000000000000..a5af21f5d370
--- /dev/null
+++ b/drivers/nvme/host/hwmon.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express hardware monitoring support
+ * Copyright (c) 2019, Guenter Roeck
+ */
+
+#include <linux/hwmon.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+/* These macros should be moved to linux/temperature.h */
+#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000)
+#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150)
+
+struct nvme_hwmon_data {
+ struct nvme_ctrl *ctrl;
+ struct nvme_smart_log log;
+ struct mutex read_lock;
+};
+
+static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ long *temp)
+{
+ unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
+ u32 status;
+ int ret;
+
+ if (under)
+ threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
+
+ ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
+ &status);
+ if (ret > 0)
+ return -EIO;
+ if (ret < 0)
+ return ret;
+ *temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK);
+
+ return 0;
+}
+
+static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ long temp)
+{
+ unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
+ int ret;
+
+ temp = MILLICELSIUS_TO_KELVIN(temp);
+ threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK);
+
+ if (under)
+ threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
+
+ ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
+ NULL);
+ if (ret > 0)
+ return -EIO;
+
+ return ret;
+}
+
+static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
+{
+ int ret;
+
+ ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+ &data->log, sizeof(data->log), 0);
+
+ return ret <= 0 ? ret : -EIO;
+}
+
+static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+ struct nvme_smart_log *log = &data->log;
+ int temp;
+ int err;
+
+ /*
+ * First handle attributes which don't require us to read
+ * the smart log.
+ */
+ switch (attr) {
+ case hwmon_temp_max:
+ return nvme_get_temp_thresh(data->ctrl, channel, false, val);
+ case hwmon_temp_min:
+ return nvme_get_temp_thresh(data->ctrl, channel, true, val);
+ case hwmon_temp_crit:
+ *val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp);
+ return 0;
+ default:
+ break;
+ }
+
+ mutex_lock(&data->read_lock);
+ err = nvme_hwmon_get_smart_log(data);
+ if (err)
+ goto unlock;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ if (!channel)
+ temp = get_unaligned_le16(log->temperature);
+ else
+ temp = le16_to_cpu(log->temp_sensor[channel - 1]);
+ *val = KELVIN_TO_MILLICELSIUS(temp);
+ break;
+ case hwmon_temp_alarm:
+ *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+unlock:
+ mutex_unlock(&data->read_lock);
+ return err;
+}
+
+static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_max:
+ return nvme_set_temp_thresh(data->ctrl, channel, false, val);
+ case hwmon_temp_min:
+ return nvme_set_temp_thresh(data->ctrl, channel, true, val);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const char * const nvme_hwmon_sensor_names[] = {
+ "Composite",
+ "Sensor 1",
+ "Sensor 2",
+ "Sensor 3",
+ "Sensor 4",
+ "Sensor 5",
+ "Sensor 6",
+ "Sensor 7",
+ "Sensor 8",
+};
+
+static int nvme_hwmon_read_string(struct device *dev,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
+{
+ *str = nvme_hwmon_sensor_names[channel];
+ return 0;
+}
+
+static umode_t nvme_hwmon_is_visible(const void *_data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct nvme_hwmon_data *data = _data;
+
+ switch (attr) {
+ case hwmon_temp_crit:
+ if (!channel && data->ctrl->cctemp)
+ return 0444;
+ break;
+ case hwmon_temp_max:
+ case hwmon_temp_min:
+ if ((!channel && data->ctrl->wctemp) ||
+ (channel && data->log.temp_sensor[channel - 1])) {
+ if (data->ctrl->quirks &
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
+ return 0444;
+ return 0644;
+ }
+ break;
+ case hwmon_temp_alarm:
+ if (!channel)
+ return 0444;
+ break;
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+ if (!channel || data->log.temp_sensor[channel - 1])
+ return 0444;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_channel_info *nvme_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL),
+ NULL
+};
+
+static const struct hwmon_ops nvme_hwmon_ops = {
+ .is_visible = nvme_hwmon_is_visible,
+ .read = nvme_hwmon_read,
+ .read_string = nvme_hwmon_read_string,
+ .write = nvme_hwmon_write,
+};
+
+static const struct hwmon_chip_info nvme_hwmon_chip_info = {
+ .ops = &nvme_hwmon_ops,
+ .info = nvme_hwmon_info,
+};
+
+void nvme_hwmon_init(struct nvme_ctrl *ctrl)
+{
+ struct device *dev = ctrl->dev;
+ struct nvme_hwmon_data *data;
+ struct device *hwmon;
+ int err;
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ data->ctrl = ctrl;
+ mutex_init(&data->read_lock);
+
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+ devm_kfree(dev, data);
+ return;
+ }
+
+ hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data,
+ &nvme_hwmon_chip_info,
+ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+ devm_kfree(dev, data);
+ }
+}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc99a40c1ec4..797c18337d96 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -95,6 +95,7 @@ void nvme_failover_req(struct request *req)
}
break;
case NVME_SC_HOST_PATH_ERROR:
+ case NVME_SC_HOST_ABORTED_CMD:
/*
* Temporary transport disruption in talking to the controller.
* Try to send on a new path.
@@ -158,9 +159,11 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->scan_lock);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
if (nvme_mpath_clear_current_path(ns))
kblockd_schedule_work(&ns->head->requeue_work);
+ up_read(&ctrl->namespaces_rwsem);
mutex_unlock(&ctrl->scan_lock);
}
@@ -444,8 +447,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
struct nvme_ana_group_desc *desc = base + offset;
- u32 nr_nsids = le32_to_cpu(desc->nnsids);
- size_t nsid_buf_size = nr_nsids * sizeof(__le32);
+ u32 nr_nsids;
+ size_t nsid_buf_size;
+
+ if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
+ return -EINVAL;
+
+ nr_nsids = le32_to_cpu(desc->nnsids);
+ nsid_buf_size = nr_nsids * sizeof(__le32);
if (WARN_ON_ONCE(desc->grpid == 0))
return -EINVAL;
@@ -465,8 +474,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
return error;
offset += nsid_buf_size;
- if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
- return -EINVAL;
}
return 0;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 22e8401352c2..3b9cbe0668fa 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -115,6 +115,11 @@ enum nvme_quirks {
* Prevent tag overlap between queues
*/
NVME_QUIRK_SHARED_TAGS = (1 << 13),
+
+ /*
+ * Don't change the value of the temperature threshold feature
+ */
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14),
};
/*
@@ -231,6 +236,8 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
+ u16 wctemp;
+ u16 cctemp;
u32 oaes;
u32 aen_result;
u32 ctratt;
@@ -419,9 +426,20 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
}
-static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
+/*
+ * Convert a 512B sector number to a device logical block number.
+ */
+static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
+{
+ return sector >> (ns->lba_shift - SECTOR_SHIFT);
+}
+
+/*
+ * Convert a device logical block number to a 512B sector number.
+ */
+static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
{
- return (sector >> (ns->lba_shift - 9));
+ return lba << (ns->lba_shift - SECTOR_SHIFT);
}
static inline void nvme_end_request(struct request *req, __le16 status,
@@ -446,6 +464,11 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
put_device(ctrl->device);
}
+static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
+{
+ return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+}
+
void nvme_complete_rq(struct request *req);
bool nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
@@ -652,4 +675,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
return dev_to_disk(dev)->private_data;
}
+#ifdef CONFIG_NVME_HWMON
+void nvme_hwmon_init(struct nvme_ctrl *ctrl);
+#else
+static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
+#endif
+
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 869f462e6b6e..dcaad5831cee 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -925,7 +925,6 @@ static void nvme_pci_complete_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_dev *dev = iod->nvmeq->dev;
- nvme_cleanup_cmd(req);
if (blk_integrity_rq(req))
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req)->bv_len, rq_data_dir(req));
@@ -968,8 +967,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvmeq->qid == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+ if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) {
nvme_complete_async_event(&nvmeq->dev->ctrl,
cqe->status, &cqe->result);
return;
@@ -2982,7 +2980,7 @@ static int nvme_suspend(struct device *dev)
/*
* Clearing npss forces a controller reset on resume. The
- * correct value will be resdicovered then.
+ * correct value will be rediscovered then.
*/
ret = nvme_disable_prepare_reset(ndev, true);
ctrl->npss = 0;
@@ -3082,7 +3080,8 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
- NVME_QUIRK_MEDIUM_PRIO_SQ },
+ NVME_QUIRK_MEDIUM_PRIO_SQ |
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE },
{ PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f19a28b4e997..dce59459ed41 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1160,8 +1160,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
-
- nvme_cleanup_cmd(rq);
sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
}
@@ -1501,8 +1499,8 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
+ cqe->command_id)))
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else
@@ -1768,7 +1766,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", err);
- nvme_cleanup_cmd(rq);
goto err;
}
@@ -1779,18 +1776,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr ? &req->reg_wr.wr : NULL);
- if (unlikely(err)) {
- nvme_rdma_unmap_data(queue, rq);
- goto err;
- }
+ if (unlikely(err))
+ goto err_unmap;
return BLK_STS_OK;
+err_unmap:
+ nvme_rdma_unmap_data(queue, rq);
err:
if (err == -ENOMEM || err == -EAGAIN)
ret = BLK_STS_RESOURCE;
else
ret = BLK_STS_IOERR;
+ nvme_cleanup_cmd(rq);
unmap_qe:
ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@@ -2133,8 +2131,16 @@ err_unreg_client:
static void __exit nvme_rdma_cleanup_module(void)
{
+ struct nvme_rdma_ctrl *ctrl;
+
nvmf_unregister_transport(&nvme_rdma_transport);
ib_unregister_client(&nvme_rdma_ib_client);
+
+ mutex_lock(&nvme_rdma_ctrl_mutex);
+ list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
+ nvme_delete_ctrl(&ctrl->ctrl);
+ mutex_unlock(&nvme_rdma_ctrl_mutex);
+ flush_workqueue(nvme_delete_wq);
}
module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7544be84ab35..6d43b23a0fc8 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -491,8 +491,8 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_tcp_queue_id(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
+ cqe->command_id)))
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else