diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/Kconfig | 10 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 1 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 42 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 49 | ||||
-rw-r--r-- | drivers/nvme/host/hwmon.c | 259 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 15 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 33 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 9 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 24 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 4 |
10 files changed, 387 insertions, 59 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 2b36f052bfb9..c6439638a419 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -23,6 +23,16 @@ config NVME_MULTIPATH /dev/nvmeXnY device will show up for each NVMe namespaces, even if it is accessible through multiple controllers. +config NVME_HWMON + bool "NVMe hardware monitoring" + depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON) + help + This provides support for NVMe hardware monitoring. If enabled, + a hardware monitoring device will be created for each NVMe drive + in the system. + + If unsure, say N. + config NVME_FABRICS tristate diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 8a4b671c5f0c..fc7b26be692d 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o +nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fa7ba09dca77..8e8527408db3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -283,6 +283,8 @@ void nvme_complete_rq(struct request *req) trace_nvme_complete_rq(req); + nvme_cleanup_cmd(req); + if (nvme_req(req)->ctrl->kas) nvme_req(req)->ctrl->comp_seen = true; @@ -313,7 +315,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) if (blk_mq_request_completed(req)) return true; - nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; + nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(req); return true; } @@ -611,8 +613,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_dsm_range *range; struct bio *bio; - range = kmalloc_array(segments, sizeof(*range), - GFP_ATOMIC | __GFP_NOWARN); + /* + * Some devices do not consider the DSM 'Number of Ranges' field when + * determining how much data to DMA. Always allocate memory for maximum + * number of segments to prevent device reading beyond end of buffer. + */ + static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES; + + range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN); if (!range) { /* * If we fail allocation our range, fallback to the controller @@ -626,7 +634,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } __rq_for_each_bio(bio, req) { - u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); + u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; if (n < segments) { @@ -652,7 +660,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, req->special_vec.bv_page = virt_to_page(range); req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range) * segments; + req->special_vec.bv_len = alloc_size; req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_STS_OK; @@ -667,7 +675,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cmnd->write_zeroes.control = 0; @@ -691,7 +699,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); - cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) @@ -1647,7 +1655,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) static void nvme_set_chunk_size(struct nvme_ns *ns) { - u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9)); + u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } @@ -1684,8 +1692,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) { - u32 max_sectors; - unsigned short bs = 1 << ns->lba_shift; + u64 max_blocks; if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) @@ -1701,11 +1708,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) * nvme_init_identify() if available. */ if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9; + max_blocks = (u64)USHRT_MAX + 1; else - max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; + max_blocks = ns->ctrl->max_hw_sectors + 1; - blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); + blk_queue_max_write_zeroes_sectors(disk->queue, + nvme_lba_to_sect(ns, max_blocks)); } static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, @@ -1748,7 +1756,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); + sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; u32 atomic_bs, phys_bs, io_opt; @@ -2796,6 +2804,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oncs = le16_to_cpu(id->oncs); ctrl->mtfa = le16_to_cpu(id->mtfa); ctrl->oaes = le32_to_cpu(id->oaes); + ctrl->wctemp = le16_to_cpu(id->wctemp); + ctrl->cctemp = le16_to_cpu(id->cctemp); + atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; if (id->mdts) @@ -2895,6 +2906,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; + if (!ctrl->identified) + nvme_hwmon_init(ctrl); + ctrl->identified = true; return 0; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 265f89e11d8b..679a721ae229 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1224,7 +1224,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rqstlen = sizeof(*assoc_rqst); lsreq->rspaddr = assoc_acc; lsreq->rsplen = sizeof(*assoc_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) @@ -1264,7 +1264,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, if (fcret) { ret = -EBADF; dev_err(ctrl->dev, - "q %d connect failed: %s\n", + "q %d Create Association LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { ctrl->association_id = @@ -1332,7 +1332,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rqstlen = sizeof(*conn_rqst); lsreq->rspaddr = conn_acc; lsreq->rsplen = sizeof(*conn_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) @@ -1363,7 +1363,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (fcret) { ret = -EBADF; dev_err(ctrl->dev, - "q %d connect failed: %s\n", + "q %d Create I/O Connection LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { queue->connection_id = @@ -1376,7 +1376,7 @@ out_free_buffer: out_no_memory: if (ret) dev_err(ctrl->dev, - "queue %d connect command failed (%d).\n", + "queue %d connect I/O queue failed (%d).\n", queue->qnum, ret); return ret; } @@ -1413,8 +1413,8 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) static void nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) { - struct fcnvme_ls_disconnect_rqst *discon_rqst; - struct fcnvme_ls_disconnect_acc *discon_acc; + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; int ret; @@ -1430,11 +1430,11 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq = &lsop->ls_req; lsreq->private = (void *)&lsop[1]; - discon_rqst = (struct fcnvme_ls_disconnect_rqst *) + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *) (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); - discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; discon_rqst->desc_list_len = cpu_to_be32( sizeof(struct fcnvme_lsdesc_assoc_id) + sizeof(struct fcnvme_lsdesc_disconn_cmd)); @@ -1451,22 +1451,17 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) discon_rqst->discon_cmd.desc_len = fcnvme_lsdesc_len( sizeof(struct fcnvme_lsdesc_disconn_cmd)); - discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; - discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); lsreq->rqstaddr = discon_rqst; lsreq->rqstlen = sizeof(*discon_rqst); lsreq->rspaddr = discon_acc; lsreq->rsplen = sizeof(*discon_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, nvme_fc_disconnect_assoc_done); if (ret) kfree(lsop); - - /* only meaningful part to terminating the association */ - ctrl->association_id = 0; } @@ -1662,7 +1657,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || - op->rsp_iu.status_code || + op->rsp_iu.ersp_result || sqe->common.command_id != cqe->command_id)) { status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); dev_info(ctrl->ctrl.device, @@ -1672,7 +1667,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len), be32_to_cpu(op->rsp_iu.xfrd_len), freq->transferred_length, - op->rsp_iu.status_code, + op->rsp_iu.ersp_result, sqe->common.command_id, cqe->command_id); goto done; @@ -1731,9 +1726,14 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->rq = rq; op->rqno = rqno; - cmdiu->scsi_id = NVME_CMD_SCSI_ID; + cmdiu->format_id = NVME_CMD_FORMAT_ID; cmdiu->fc_id = NVME_CMD_FC_ID; cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); + if (queue->qnum) + cmdiu->rsv_cat = fccmnd_set_cat_css(0, + (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT)); + else + cmdiu->rsv_cat = fccmnd_set_cat_admin(0); op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); @@ -2173,8 +2173,6 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, rq_dma_dir(rq)); - nvme_cleanup_cmd(rq); - sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE); freq->sg_cnt = 0; @@ -2305,6 +2303,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (!(op->flags & FCOP_FLAGS_AEN)) nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && @@ -2695,7 +2694,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) /* warn if maxcmd is lower than queue_size */ dev_warn(ctrl->ctrl.device, "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", + "to maxcmd\n", opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; } @@ -2703,7 +2702,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (opts->queue_size > ctrl->ctrl.sqsize + 1) { /* warn if sqsize is lower than queue_size */ dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl sqsize %u, clamping down\n", + "queue_size %zu > ctrl sqsize %u, reducing " + "to sqsize\n", opts->queue_size, ctrl->ctrl.sqsize + 1); opts->queue_size = ctrl->ctrl.sqsize + 1; } @@ -2739,6 +2739,7 @@ out_term_aen_ops: out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); + ctrl->association_id = 0; out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: @@ -2830,6 +2831,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) if (ctrl->association_id) nvme_fc_xmt_disconnect_assoc(ctrl); + ctrl->association_id = 0; + if (ctrl->ctrl.tagset) { nvme_fc_delete_hw_io_queues(ctrl); nvme_fc_free_io_queues(ctrl); diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c new file mode 100644 index 000000000000..a5af21f5d370 --- /dev/null +++ b/drivers/nvme/host/hwmon.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVM Express hardware monitoring support + * Copyright (c) 2019, Guenter Roeck + */ + +#include <linux/hwmon.h> +#include <asm/unaligned.h> + +#include "nvme.h" + +/* These macros should be moved to linux/temperature.h */ +#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000) +#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150) + +struct nvme_hwmon_data { + struct nvme_ctrl *ctrl; + struct nvme_smart_log log; + struct mutex read_lock; +}; + +static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + long *temp) +{ + unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; + u32 status; + int ret; + + if (under) + threshold |= NVME_TEMP_THRESH_TYPE_UNDER; + + ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0, + &status); + if (ret > 0) + return -EIO; + if (ret < 0) + return ret; + *temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK); + + return 0; +} + +static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, + long temp) +{ + unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT; + int ret; + + temp = MILLICELSIUS_TO_KELVIN(temp); + threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK); + + if (under) + threshold |= NVME_TEMP_THRESH_TYPE_UNDER; + + ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0, + NULL); + if (ret > 0) + return -EIO; + + return ret; +} + +static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) +{ + int ret; + + ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, + &data->log, sizeof(data->log), 0); + + return ret <= 0 ? ret : -EIO; +} + +static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct nvme_hwmon_data *data = dev_get_drvdata(dev); + struct nvme_smart_log *log = &data->log; + int temp; + int err; + + /* + * First handle attributes which don't require us to read + * the smart log. + */ + switch (attr) { + case hwmon_temp_max: + return nvme_get_temp_thresh(data->ctrl, channel, false, val); + case hwmon_temp_min: + return nvme_get_temp_thresh(data->ctrl, channel, true, val); + case hwmon_temp_crit: + *val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp); + return 0; + default: + break; + } + + mutex_lock(&data->read_lock); + err = nvme_hwmon_get_smart_log(data); + if (err) + goto unlock; + + switch (attr) { + case hwmon_temp_input: + if (!channel) + temp = get_unaligned_le16(log->temperature); + else + temp = le16_to_cpu(log->temp_sensor[channel - 1]); + *val = KELVIN_TO_MILLICELSIUS(temp); + break; + case hwmon_temp_alarm: + *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE); + break; + default: + err = -EOPNOTSUPP; + break; + } +unlock: + mutex_unlock(&data->read_lock); + return err; +} + +static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct nvme_hwmon_data *data = dev_get_drvdata(dev); + + switch (attr) { + case hwmon_temp_max: + return nvme_set_temp_thresh(data->ctrl, channel, false, val); + case hwmon_temp_min: + return nvme_set_temp_thresh(data->ctrl, channel, true, val); + default: + break; + } + + return -EOPNOTSUPP; +} + +static const char * const nvme_hwmon_sensor_names[] = { + "Composite", + "Sensor 1", + "Sensor 2", + "Sensor 3", + "Sensor 4", + "Sensor 5", + "Sensor 6", + "Sensor 7", + "Sensor 8", +}; + +static int nvme_hwmon_read_string(struct device *dev, + enum hwmon_sensor_types type, u32 attr, + int channel, const char **str) +{ + *str = nvme_hwmon_sensor_names[channel]; + return 0; +} + +static umode_t nvme_hwmon_is_visible(const void *_data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct nvme_hwmon_data *data = _data; + + switch (attr) { + case hwmon_temp_crit: + if (!channel && data->ctrl->cctemp) + return 0444; + break; + case hwmon_temp_max: + case hwmon_temp_min: + if ((!channel && data->ctrl->wctemp) || + (channel && data->log.temp_sensor[channel - 1])) { + if (data->ctrl->quirks & + NVME_QUIRK_NO_TEMP_THRESH_CHANGE) + return 0444; + return 0644; + } + break; + case hwmon_temp_alarm: + if (!channel) + return 0444; + break; + case hwmon_temp_input: + case hwmon_temp_label: + if (!channel || data->log.temp_sensor[channel - 1]) + return 0444; + break; + default: + break; + } + return 0; +} + +static const struct hwmon_channel_info *nvme_hwmon_info[] = { + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_LABEL), + NULL +}; + +static const struct hwmon_ops nvme_hwmon_ops = { + .is_visible = nvme_hwmon_is_visible, + .read = nvme_hwmon_read, + .read_string = nvme_hwmon_read_string, + .write = nvme_hwmon_write, +}; + +static const struct hwmon_chip_info nvme_hwmon_chip_info = { + .ops = &nvme_hwmon_ops, + .info = nvme_hwmon_info, +}; + +void nvme_hwmon_init(struct nvme_ctrl *ctrl) +{ + struct device *dev = ctrl->dev; + struct nvme_hwmon_data *data; + struct device *hwmon; + int err; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return; + + data->ctrl = ctrl; + mutex_init(&data->read_lock); + + err = nvme_hwmon_get_smart_log(data); + if (err) { + dev_warn(dev, "Failed to read smart log (error %d)\n", err); + devm_kfree(dev, data); + return; + } + + hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data, + &nvme_hwmon_chip_info, + NULL); + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); + devm_kfree(dev, data); + } +} diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index fc99a40c1ec4..797c18337d96 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -95,6 +95,7 @@ void nvme_failover_req(struct request *req) } break; case NVME_SC_HOST_PATH_ERROR: + case NVME_SC_HOST_ABORTED_CMD: /* * Temporary transport disruption in talking to the controller. * Try to send on a new path. @@ -158,9 +159,11 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->scan_lock); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) if (nvme_mpath_clear_current_path(ns)) kblockd_schedule_work(&ns->head->requeue_work); + up_read(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->scan_lock); } @@ -444,8 +447,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) { struct nvme_ana_group_desc *desc = base + offset; - u32 nr_nsids = le32_to_cpu(desc->nnsids); - size_t nsid_buf_size = nr_nsids * sizeof(__le32); + u32 nr_nsids; + size_t nsid_buf_size; + + if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) + return -EINVAL; + + nr_nsids = le32_to_cpu(desc->nnsids); + nsid_buf_size = nr_nsids * sizeof(__le32); if (WARN_ON_ONCE(desc->grpid == 0)) return -EINVAL; @@ -465,8 +474,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, return error; offset += nsid_buf_size; - if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) - return -EINVAL; } return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 22e8401352c2..3b9cbe0668fa 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -115,6 +115,11 @@ enum nvme_quirks { * Prevent tag overlap between queues */ NVME_QUIRK_SHARED_TAGS = (1 << 13), + + /* + * Don't change the value of the temperature threshold feature + */ + NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14), }; /* @@ -231,6 +236,8 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u16 wctemp; + u16 cctemp; u32 oaes; u32 aen_result; u32 ctratt; @@ -419,9 +426,20 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); } -static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) +/* + * Convert a 512B sector number to a device logical block number. + */ +static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) +{ + return sector >> (ns->lba_shift - SECTOR_SHIFT); +} + +/* + * Convert a device logical block number to a 512B sector number. + */ +static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) { - return (sector >> (ns->lba_shift - 9)); + return lba << (ns->lba_shift - SECTOR_SHIFT); } static inline void nvme_end_request(struct request *req, __le16 status, @@ -446,6 +464,11 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) put_device(ctrl->device); } +static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) +{ + return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH; +} + void nvme_complete_rq(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, @@ -652,4 +675,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } +#ifdef CONFIG_NVME_HWMON +void nvme_hwmon_init(struct nvme_ctrl *ctrl); +#else +static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { } +#endif + #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 869f462e6b6e..dcaad5831cee 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -925,7 +925,6 @@ static void nvme_pci_complete_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_dev *dev = iod->nvmeq->dev; - nvme_cleanup_cmd(req); if (blk_integrity_rq(req)) dma_unmap_page(dev->dev, iod->meta_dma, rq_integrity_vec(req)->bv_len, rq_data_dir(req)); @@ -968,8 +967,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvmeq->qid == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; @@ -2982,7 +2980,7 @@ static int nvme_suspend(struct device *dev) /* * Clearing npss forces a controller reset on resume. The - * correct value will be resdicovered then. + * correct value will be rediscovered then. */ ret = nvme_disable_prepare_reset(ndev, true); ctrl->npss = 0; @@ -3082,7 +3080,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | - NVME_QUIRK_MEDIUM_PRIO_SQ }, + NVME_QUIRK_MEDIUM_PRIO_SQ | + NVME_QUIRK_NO_TEMP_THRESH_CHANGE }, { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f19a28b4e997..dce59459ed41 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1160,8 +1160,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, } ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); - - nvme_cleanup_cmd(rq); sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE); } @@ -1501,8 +1499,8 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_rdma_queue_idx(queue) == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) + if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue), + cqe->command_id))) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else @@ -1768,7 +1766,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", err); - nvme_cleanup_cmd(rq); goto err; } @@ -1779,18 +1776,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr ? &req->reg_wr.wr : NULL); - if (unlikely(err)) { - nvme_rdma_unmap_data(queue, rq); - goto err; - } + if (unlikely(err)) + goto err_unmap; return BLK_STS_OK; +err_unmap: + nvme_rdma_unmap_data(queue, rq); err: if (err == -ENOMEM || err == -EAGAIN) ret = BLK_STS_RESOURCE; else ret = BLK_STS_IOERR; + nvme_cleanup_cmd(rq); unmap_qe: ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -2133,8 +2131,16 @@ err_unreg_client: static void __exit nvme_rdma_cleanup_module(void) { + struct nvme_rdma_ctrl *ctrl; + nvmf_unregister_transport(&nvme_rdma_transport); ib_unregister_client(&nvme_rdma_ib_client); + + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) + nvme_delete_ctrl(&ctrl->ctrl); + mutex_unlock(&nvme_rdma_ctrl_mutex); + flush_workqueue(nvme_delete_wq); } module_init(nvme_rdma_init_module); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 7544be84ab35..6d43b23a0fc8 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -491,8 +491,8 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_tcp_queue_id(queue) == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) + if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue), + cqe->command_id))) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else |