From 288dab8a35a0bde426a09870943c8d3ee3a50dab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jun 2016 16:00:36 +0200 Subject: block: add a separate operation type for secure erase Instead of overloading the discard support with the REQ_SECURE flag. Use the opportunity to rename the queue flag as well, and remove the dead checks for this flag in the RAID 1 and RAID 10 drivers that don't claim support for secure erase. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 5 ++--- include/linux/blkdev.h | 23 ++++------------------- 2 files changed, 6 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 562ab8301217..efba1f2ace2e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -163,7 +163,6 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ - __REQ_SECURE, /* secure discard (used with REQ_OP_DISCARD) */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ @@ -212,7 +211,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \ - REQ_PREFLUSH | REQ_FUA | REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) + REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE) #define REQ_CLONE_MASK REQ_COMMON_MASK /* This mask is used for both bio and request merge checking */ @@ -239,7 +238,6 @@ enum rq_flag_bits { #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) -#define REQ_SECURE (1ULL << __REQ_SECURE) #define REQ_PM (1ULL << __REQ_PM) #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) @@ -248,6 +246,7 @@ enum req_op { REQ_OP_READ, REQ_OP_WRITE, REQ_OP_DISCARD, /* request to discard sectors */ + REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ REQ_OP_WRITE_SAME, /* write same block many times */ REQ_OP_FLUSH, /* request for cache flush */ }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c9f8793c87e..53fee6123893 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -497,7 +497,7 @@ struct request_queue { #define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ -#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ +#define QUEUE_FLAG_SECERASE 17 /* supports secure erase */ #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ @@ -593,8 +593,8 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -#define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ - test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) +#define blk_queue_secure_erase(q) \ + (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ @@ -675,21 +675,6 @@ static inline bool rq_mergeable(struct request *rq) return true; } -static inline bool blk_check_merge_flags(unsigned int flags1, unsigned int op1, - unsigned int flags2, unsigned int op2) -{ - if ((op1 == REQ_OP_DISCARD) != (op2 == REQ_OP_DISCARD)) - return false; - - if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) - return false; - - if ((op1 == REQ_OP_WRITE_SAME) != (op2 == REQ_OP_WRITE_SAME)) - return false; - - return true; -} - static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) { if (bio_data(a) == bio_data(b)) @@ -1158,7 +1143,7 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int op_flags, + sector_t nr_sects, gfp_t gfp_mask, int flags, struct bio **biop); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); -- cgit v1.2.3 From 14e974a84e831bf9a44495c7256a6846e7f77630 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Jun 2016 23:20:43 +0200 Subject: nvme.h: add RTD3R, RTD3E and OAES fields These have been added in NVMe 1.2 and we'll need at least oaes for the NVMe target driver. Reviewed-by: Sagi Grimberg Reviewed-by: Jay Freyensee Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7d51b2904cb7..ff5ebc378cc3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -107,7 +107,10 @@ struct nvme_id_ctrl { __u8 mdts; __le16 cntlid; __le32 ver; - __u8 rsvd84[172]; + __le32 rtd3r; + __le32 rtd3e; + __le32 oaes; + __u8 rsvd96[160]; __le16 oacs; __u8 acl; __u8 aerl; -- cgit v1.2.3 From 725b358836ed038d7d8eafef86330b3a0b3f9c2f Mon Sep 17 00:00:00 2001 From: Armen Baloyan Date: Mon, 6 Jun 2016 23:20:44 +0200 Subject: nvme.h: Add get_log_page command strucure Add get_log_page command structure and a corresponding entry in nvme_command union Signed-off-by: Armen Baloyan Reviewed-by: Jay Freyensee Reviewed--by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index ff5ebc378cc3..9925b85246d8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -519,6 +519,24 @@ struct nvme_format_cmd { __u32 rsvd11[5]; }; +struct nvme_get_log_page_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __u8 lid; + __u8 rsvd10; + __le16 numdl; + __le16 numdu; + __u16 rsvd11; + __le32 lpol; + __le32 lpou; + __u32 rsvd14[2]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -532,6 +550,7 @@ struct nvme_command { struct nvme_format_cmd format; struct nvme_dsm_cmd dsm; struct nvme_abort_cmd abort; + struct nvme_get_log_page_command get_log_page; }; }; -- cgit v1.2.3 From 69cd27e2511616f9b402d1bad4c49f91aa7411a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Jun 2016 23:20:45 +0200 Subject: nvme.h: add NVM command set SQE/CQE size defines Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9925b85246d8..9807d98ca93a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -50,6 +50,13 @@ enum { #define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2) #define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1) +/* + * Submission and Completion Queue Entry Sizes for the NVM command set. + * (In bytes and specified as a power of two (2^n)). + */ +#define NVME_NVM_IOSQES 6 +#define NVME_NVM_IOCQES 4 + enum { NVME_CC_ENABLE = 1 << 0, NVME_CC_CSS_NVM = 0 << 4, @@ -61,8 +68,8 @@ enum { NVME_CC_SHN_NORMAL = 1 << 14, NVME_CC_SHN_ABRUPT = 2 << 14, NVME_CC_SHN_MASK = 3 << 14, - NVME_CC_IOSQES = 6 << 16, - NVME_CC_IOCQES = 4 << 20, + NVME_CC_IOSQES = NVME_NVM_IOSQES << 16, + NVME_CC_IOCQES = NVME_NVM_IOCQES << 20, NVME_CSTS_RDY = 1 << 0, NVME_CSTS_CFS = 1 << 1, NVME_CSTS_NSSRO = 1 << 4, -- cgit v1.2.3 From 79f370eac63796a8933b210bca02f006ba32d22e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Jun 2016 23:20:46 +0200 Subject: nvme.h: add AER constants Reviewed-by: Jay Freyensee Reviewed-by: Sagi Grimberg Reviewed-by: Ming Lin Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9807d98ca93a..a9b8c7bcaf85 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -284,6 +284,12 @@ struct nvme_reservation_status { } regctl_ds[]; }; +enum nvme_async_event_type { + NVME_AER_TYPE_ERROR = 0, + NVME_AER_TYPE_SMART = 1, + NVME_AER_TYPE_NOTICE = 2, +}; + /* I/O commands */ enum nvme_opcode { -- cgit v1.2.3 From 3972be23bd2d2bcfaa44595a260a371cd9218872 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 6 Jun 2016 23:20:47 +0200 Subject: nvme.h: add constants for PSDT and FUSE values Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a9b8c7bcaf85..2b82f05fe4ec 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -306,6 +306,29 @@ enum nvme_opcode { nvme_cmd_resv_release = 0x15, }; +/* + * Lowest two bits of our flags field (FUSE field in the spec): + * + * @NVME_CMD_FUSE_FIRST: Fused Operation, first command + * @NVME_CMD_FUSE_SECOND: Fused Operation, second command + * + * Highest two bits in our flags field (PSDT field in the spec): + * + * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer, + * If used, MPTR contains addr of single physical buffer (byte aligned). + * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer, + * If used, MPTR contains an address of an SGL segment containing + * exactly 1 SGL descriptor (qword aligned). + */ +enum { + NVME_CMD_FUSE_FIRST = (1 << 0), + NVME_CMD_FUSE_SECOND = (1 << 1), + + NVME_CMD_SGL_METABUF = (1 << 6), + NVME_CMD_SGL_METASEG = (1 << 7), + NVME_CMD_SGL_ALL = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG, +}; + struct nvme_common_command { __u8 opcode; __u8 flags; -- cgit v1.2.3 From 7a5abb4b48570c3552e33ff4c72ae1e8dac3ba15 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Jun 2016 23:20:49 +0200 Subject: nvme: factor out a add nvme_is_write helper Centralize the check if a given NVMe command reads or writes data. Reviewed-by: Sagi Grimberg Reviewed-by: Jay Freyensee Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 5 ++--- include/linux/nvme.h | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7ded308fd67b..45fd6cca4a27 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -193,10 +193,9 @@ EXPORT_SYMBOL_GPL(nvme_requeue_req); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags) { - bool write = cmd->common.opcode & 1; struct request *req; - req = blk_mq_alloc_request(q, write, flags); + req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags); if (IS_ERR(req)) return req; @@ -361,7 +360,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u32 *result, unsigned timeout) { - bool write = cmd->common.opcode & 1; + bool write = nvme_is_write(cmd); struct nvme_completion cqe; struct nvme_ns *ns = q->queuedata; struct gendisk *disk = ns ? ns->disk : NULL; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2b82f05fe4ec..dc815cc6718d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -590,6 +590,11 @@ struct nvme_command { }; }; +static inline bool nvme_is_write(struct nvme_command *cmd) +{ + return cmd->common.opcode & 1; +} + enum { NVME_SC_SUCCESS = 0x0, NVME_SC_INVALID_OPCODE = 0x1, -- cgit v1.2.3 From a5ca66c419410b4a26ab47b120d5424bd1d33700 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 14 Jun 2016 00:26:14 +0200 Subject: drbd: Introduce new disk config option rs-discard-granularity As long as the value is 0 the feature is disabled. With setting it to a positive value, DRBD limits and aligns its resync requests to the rs-discard-granularity setting. If the sync source detects all zeros in such a block, the resync target discards the range on disk. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 32 +++++++++++++++++++++++++++++--- include/linux/drbd_genl.h | 6 +++--- include/linux/drbd_limits.h | 6 ++++++ 3 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fad03e4feef6..99339dfe4389 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1348,12 +1348,38 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) a->disk_drain != b->disk_drain; } -static void sanitize_disk_conf(struct disk_conf *disk_conf, struct drbd_backing_dev *nbc) +static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf, + struct drbd_backing_dev *nbc) { + struct request_queue * const q = nbc->backing_bdev->bd_disk->queue; + if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; if (disk_conf->al_extents > drbd_al_extents_max(nbc)) disk_conf->al_extents = drbd_al_extents_max(nbc); + + if (!blk_queue_discard(q) || !q->limits.discard_zeroes_data) { + disk_conf->rs_discard_granularity = 0; /* disable feature */ + drbd_info(device, "rs_discard_granularity feature disabled\n"); + } + + if (disk_conf->rs_discard_granularity) { + int orig_value = disk_conf->rs_discard_granularity; + int remainder; + + if (q->limits.discard_granularity > disk_conf->rs_discard_granularity) + disk_conf->rs_discard_granularity = q->limits.discard_granularity; + + remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity; + disk_conf->rs_discard_granularity += remainder; + + if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9) + disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9; + + if (disk_conf->rs_discard_granularity != orig_value) + drbd_info(device, "rs_discard_granularity changed to %d\n", + disk_conf->rs_discard_granularity); + } } int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) @@ -1403,7 +1429,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (!expect(new_disk_conf->resync_rate >= 1)) new_disk_conf->resync_rate = 1; - sanitize_disk_conf(new_disk_conf, device->ldev); + sanitize_disk_conf(device, new_disk_conf, device->ldev); if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; @@ -1698,7 +1724,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto fail; - sanitize_disk_conf(new_disk_conf, nbc); + sanitize_disk_conf(device, new_disk_conf, nbc); if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { drbd_err(device, "max capacity %llu smaller than disk size %llu\n", diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2d0e5ad5de9d..ab649d8e9ed2 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -123,14 +123,14 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __u32_field_def(13, DRBD_GENLA_F_MANDATORY, c_fill_target, DRBD_C_FILL_TARGET_DEF) __u32_field_def(14, DRBD_GENLA_F_MANDATORY, c_max_rate, DRBD_C_MAX_RATE_DEF) __u32_field_def(15, DRBD_GENLA_F_MANDATORY, c_min_rate, DRBD_C_MIN_RATE_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) + __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) + __u32_field_def(25, 0 /* OPTIONAL */, rs_discard_granularity, DRBD_RS_DISCARD_GRANULARITY_DEF) __flg_field_def(16, DRBD_GENLA_F_MANDATORY, disk_barrier, DRBD_DISK_BARRIER_DEF) __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) - __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) - __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) - /* 9: __u32_field_def(22, DRBD_GENLA_F_MANDATORY, unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */ __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) ) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 8ac8c5d9a3ad..2e4aad8d8ae8 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -230,4 +230,10 @@ #define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX #define DRBD_SOCKET_CHECK_TIMEO_DEF 0 #define DRBD_SOCKET_CHECK_TIMEO_SCALE '1' + +#define DRBD_RS_DISCARD_GRANULARITY_MIN 0 +#define DRBD_RS_DISCARD_GRANULARITY_MAX (1<<20) /* 1MiByte */ +#define DRBD_RS_DISCARD_GRANULARITY_DEF 0 /* disabled by default */ +#define DRBD_RS_DISCARD_GRANULARITY_SCALE '1' /* bytes */ + #endif -- cgit v1.2.3 From dd4f699da674010c58d7a2534215b4ca1ff13b13 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Jun 2016 00:26:20 +0200 Subject: drbd: when receiving P_TRIM, zero-out partial unaligned chunks We can avoid spurious data divergence caused by partially-ignored discards on certain backends with discard_zeroes_data=0, if we translate partial unaligned discard requests into explicit zero-out. The relevant use case is LVM/DM thin. If on different nodes, DRBD is backed by devices with differing discard characteristics, discards may lead to data divergence (old data or garbage left over on one backend, zeroes due to unmapped areas on the other backend). Online verify would now potentially report tons of spurious differences. While probably harmless for most use cases (fstrim on a file system), DRBD cannot have that, it would violate our promise to upper layers that our data instances on the nodes are identical. To be correct and play safe (make sure data is identical on both copies), we would have to disable discard support, if our local backend (on a Primary) does not support "discard_zeroes_data=true". We'd also have to translate discards to explicit zero-out on the receiving (typically: Secondary) side, unless the receiving side supports "discard_zeroes_data=true". Which both would allocate those blocks, instead of unmapping them, in contrast with expectations. LVM/DM thin does set discard_zeroes_data=0, because it silently ignores discards to partial chunks. We can work around this by checking the alignment first. For unaligned (wrt. alignment and granularity) or too small discards, we zero-out the initial (and/or) trailing unaligned partial chunks, but discard all the aligned full chunks. At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1". Arguably it should behave this way internally, by default, and we'll try to make that happen. But our workaround is still valid for already deployed setups, and for other devices that may behave this way. Setting discard-zeroes-if-aligned=yes will allow DRBD to use discards, and to announce discard_zeroes_data=true, even on backends that announce discard_zeroes_data=false. Setting discard-zeroes-if-aligned=no will cause DRBD to always fall-back to zero-out on the receiving side, and to not even announce discard capabilities on the Primary, if the respective backend announces discard_zeroes_data=false. We used to ignore the discard_zeroes_data setting completely. To not break established and expected behaviour, and suddenly cause fstrim on thin-provisioned LVs to run out-of-space, instead of freeing up space, the default value is "yes". Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 15 ++-- drivers/block/drbd/drbd_receiver.c | 140 ++++++++++++++++++++++++++++++------- include/linux/drbd_genl.h | 1 + include/linux/drbd_limits.h | 6 ++ 5 files changed, 134 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9e338ecca7dd..f49ff861bfd8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1488,7 +1488,7 @@ enum determine_dev_size { extern enum determine_dev_size drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); extern void resync_after_online_grow(struct drbd_device *); -extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev); +extern void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev); extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3643f9ca5a38..8d757d6f21e7 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1161,13 +1161,17 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi unsigned int max_hw_sectors = max_bio_size >> 9; unsigned int max_segments = 0; struct request_queue *b = NULL; + struct disk_conf *dc; + bool discard_zeroes_if_aligned = true; if (bdev) { b = bdev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); rcu_read_lock(); - max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; + dc = rcu_dereference(device->ldev->disk_conf); + max_segments = dc->max_bio_bvecs; + discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned; rcu_read_unlock(); blk_set_stacking_limits(&q->limits); @@ -1185,7 +1189,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS); - if (blk_queue_discard(b) && + if (blk_queue_discard(b) && (b->limits.discard_zeroes_data || discard_zeroes_if_aligned) && (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) { /* We don't care, stacking below should fix it for the local device. * Whether or not it is a suitable granularity on the remote device @@ -1216,7 +1220,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi } } -void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) +void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev) { unsigned int now, new, local, peer; @@ -1488,6 +1492,9 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) if (write_ordering_changed(old_disk_conf, new_disk_conf)) drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); + if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned) + drbd_reconsider_queue_parameters(device, device->ldev); + drbd_md_sync(device); if (device->state.conn >= C_CONNECTED) { @@ -1866,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) device->read_cnt = 0; device->writ_cnt = 0; - drbd_reconsider_max_bio_size(device, device->ldev); + drbd_reconsider_queue_parameters(device, device->ldev); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 15b2a0dab0e5..cb80fb406361 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1443,6 +1443,108 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); } +/* + * We *may* ignore the discard-zeroes-data setting, if so configured. + * + * Assumption is that it "discard_zeroes_data=0" is only because the backend + * may ignore partial unaligned discards. + * + * LVM/DM thin as of at least + * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) + * Library version: 1.02.93-RHEL7 (2015-01-28) + * Driver version: 4.29.0 + * still behaves this way. + * + * For unaligned (wrt. alignment and granularity) or too small discards, + * we zero-out the initial (and/or) trailing unaligned partial chunks, + * but discard all the aligned full chunks. + * + * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1". + */ +int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard) +{ + struct block_device *bdev = device->ldev->backing_bdev; + struct request_queue *q = bdev_get_queue(bdev); + sector_t tmp, nr; + unsigned int max_discard_sectors, granularity; + int alignment; + int err = 0; + + if (!discard) + goto zero_out; + + /* Zero-sector (unknown) and one-sector granularities are the same. */ + granularity = max(q->limits.discard_granularity >> 9, 1U); + alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; + + max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); + max_discard_sectors -= max_discard_sectors % granularity; + if (unlikely(!max_discard_sectors)) + goto zero_out; + + if (nr_sectors < granularity) + goto zero_out; + + tmp = start; + if (sector_div(tmp, granularity) != alignment) { + if (nr_sectors < 2*granularity) + goto zero_out; + /* start + gran - (start + gran - align) % gran */ + tmp = start + granularity - alignment; + tmp = start + granularity - sector_div(tmp, granularity); + + nr = tmp - start; + err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); + nr_sectors -= nr; + start = tmp; + } + while (nr_sectors >= granularity) { + nr = min_t(sector_t, nr_sectors, max_discard_sectors); + err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); + nr_sectors -= nr; + start += nr; + } + zero_out: + if (nr_sectors) { + err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0); + } + return err != 0; +} + +static bool can_do_reliable_discards(struct drbd_device *device) +{ + struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); + struct disk_conf *dc; + bool can_do; + + if (!blk_queue_discard(q)) + return false; + + if (q->limits.discard_zeroes_data) + return true; + + rcu_read_lock(); + dc = rcu_dereference(device->ldev->disk_conf); + can_do = dc->discard_zeroes_if_aligned; + rcu_read_unlock(); + return can_do; +} + +void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) +{ + /* If the backend cannot discard, or does not guarantee + * read-back zeroes in discarded ranges, we fall back to + * zero-out. Unless configuration specifically requested + * otherwise. */ + if (!can_do_reliable_discards(device)) + peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; + + if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, + peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT))) + peer_req->flags |= EE_WAS_ERROR; + drbd_endio_write_sec_final(peer_req); +} + /** * drbd_submit_peer_request() * @device: DRBD device. @@ -1474,7 +1576,13 @@ int drbd_submit_peer_request(struct drbd_device *device, unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; - if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { + /* TRIM/DISCARD: for now, always use the helper function + * blkdev_issue_zeroout(..., discard=true). + * It's synchronous, but it does the right thing wrt. bio splitting. + * Correctness first, performance later. Next step is to code an + * asynchronous variant of the same. + */ + if (peer_req->flags & EE_IS_TRIM) { /* wait for all pending IO completions, before we start * zeroing things out. */ conn_wait_active_ee_empty(peer_req->peer_device->connection); @@ -1491,19 +1599,10 @@ int drbd_submit_peer_request(struct drbd_device *device, spin_unlock_irq(&device->resource->req_lock); } - if (blkdev_issue_zeroout(device->ldev->backing_bdev, - sector, data_size >> 9, GFP_NOIO, false)) - peer_req->flags |= EE_WAS_ERROR; - drbd_endio_write_sec_final(peer_req); + drbd_issue_peer_discard(device, peer_req); return 0; } - /* Discards don't have any payload. - * But the scsi layer still expects a bio_vec it can use internally, - * see sd_setup_discard_cmnd() and blk_add_request_payload(). */ - if (peer_req->flags & EE_IS_TRIM) - nr_pages = 1; - /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the @@ -1529,11 +1628,6 @@ next_bio: bios = bio; ++n_bios; - if (op == REQ_OP_DISCARD) { - bio->bi_iter.bi_size = data_size; - goto submit; - } - page_chain_for_each(page) { unsigned len = min_t(unsigned, data_size, PAGE_SIZE); if (!bio_add_page(bio, page, len, 0)) { @@ -1555,7 +1649,6 @@ next_bio: --nr_pages; } D_ASSERT(device, data_size == 0); -submit: D_ASSERT(device, page == NULL); atomic_set(&peer_req->pending_bios, n_bios); @@ -2424,10 +2517,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * op = wire_flags_to_bio_op(dp_flags); op_flags = wire_flags_to_bio_flags(dp_flags); if (pi->cmd == P_TRIM) { - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); peer_req->flags |= EE_IS_TRIM; - if (!blk_queue_discard(q)) - peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; D_ASSERT(peer_device, peer_req->i.size > 0); D_ASSERT(peer_device, op == REQ_OP_DISCARD); D_ASSERT(peer_device, peer_req->pages == NULL); @@ -2498,7 +2588,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * * and we wait for all pending requests, respectively wait for * active_ee to become empty in drbd_submit_peer_request(); * better not add ourselves here. */ - if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) + if ((peer_req->flags & EE_IS_TRIM) == 0) list_add_tail(&peer_req->w.list, &device->active_ee); spin_unlock_irq(&device->resource->req_lock); @@ -3916,14 +4006,14 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info } device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); - /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). + /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size(). In case we cleared the QUEUE_FLAG_DISCARD from our queue in - drbd_reconsider_max_bio_size(), we can be sure that after + drbd_reconsider_queue_parameters(), we can be sure that after drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(device)) { - drbd_reconsider_max_bio_size(device, device->ldev); + drbd_reconsider_queue_parameters(device, device->ldev); dd = drbd_determine_dev_size(device, ddsf, NULL); put_ldev(device); if (dd == DS_ERROR) @@ -3943,7 +4033,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info * However, if he sends a zero current size, * take his (user-capped or) backing disk size anyways. */ - drbd_reconsider_max_bio_size(device, NULL); + drbd_reconsider_queue_parameters(device, NULL); drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); } diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index ab649d8e9ed2..c934d3a96b5e 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -132,6 +132,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) + __flg_field_def(24, 0 /* OPTIONAL */, discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 2e4aad8d8ae8..a351c40087cd 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -210,6 +210,12 @@ #define DRBD_MD_FLUSHES_DEF 1 #define DRBD_TCP_CORK_DEF 1 #define DRBD_AL_UPDATES_DEF 1 +/* We used to ignore the discard_zeroes_data setting. + * To not change established (and expected) behaviour, + * by default assume that, for discard_zeroes_data=0, + * we can make that an effective discard_zeroes_data=1, + * if we only explicitly zero-out unaligned partial chunks. */ +#define DRBD_DISCARD_ZEROES_IF_ALIGNED 1 #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -- cgit v1.2.3 From 505675f96cf0f169647a18c3dda1f373eca957b1 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Jun 2016 00:26:23 +0200 Subject: drbd: allow larger max_discard_sectors Make sure we have at least 67 (> AL_UPDATES_PER_TRANSACTION) al-extents available, and allow up to half of that to be discarded in one bio. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_int.h | 8 ++++---- include/linux/drbd_limits.h | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index d524973f94b3..265b2b6ebdec 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -258,7 +258,7 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); - D_ASSERT(device, (unsigned)(last - first) <= 1); + D_ASSERT(device, first <= last); D_ASSERT(device, atomic_read(&device->local_cnt) > 0); /* FIXME figure out a fast path for bios crossing AL extent boundaries */ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0b5a658882b2..9c68ec54a247 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1347,10 +1347,10 @@ struct bm_extent { #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ -/* For now, don't allow more than one activity log extent worth of data - * to be discarded in one go. We may need to rework drbd_al_begin_io() - * to allow for even larger discard ranges */ -#define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE +/* For now, don't allow more than half of what we can "activate" in one + * activity log transaction to be discarded in one go. We may need to rework + * drbd_al_begin_io() to allow for even larger discard ranges */ +#define DRBD_MAX_DISCARD_SIZE (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE) #define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9) extern int drbd_bm_init(struct drbd_device *device); diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index a351c40087cd..ddac68422a96 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -126,8 +126,7 @@ #define DRBD_RESYNC_RATE_DEF 250 #define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */ - /* less than 7 would hit performance unnecessarily. */ -#define DRBD_AL_EXTENTS_MIN 7 +#define DRBD_AL_EXTENTS_MIN 67 /* we use u16 as "slot number", (u16)~0 is "FREE". * If you use >= 292 kB on-disk ring buffer, * this is the maximum you can use: */ -- cgit v1.2.3 From 7e5fec31685a5c69b81e9005eaed44318880d881 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 14 Jun 2016 00:26:35 +0200 Subject: drbd: code cleanups without semantic changes This contains various cosmetic fixes ranging from simple typos to const-ifying, and using booleans properly. Original commit messages from Fabian's patch set: drbd: debugfs: constify drbd_version_fops drbd: use seq_put instead of seq_print where possible drbd: include linux/uaccess.h instead of asm/uaccess.h drbd: use const char * const for drbd strings drbd: kerneldoc warning fix in w_e_end_data_req() drbd: use unsigned for one bit fields drbd: use bool for peer is_ states drbd: fix typo drbd: use | for bitmask combination drbd: use true/false for bool drbd: fix drbd_bm_init() comments drbd: introduce peer state union drbd: fix maybe_pull_ahead() locking comments drbd: use bool for growing drbd: remove redundant declarations drbd: replace if/BUG by BUG_ON Signed-off-by: Fabian Frederick Signed-off-by: Roland Kammerer Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_bitmap.c | 6 +++--- drivers/block/drbd/drbd_debugfs.c | 2 +- drivers/block/drbd/drbd_int.h | 4 +--- drivers/block/drbd/drbd_interval.h | 14 +++++++------- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 14 ++++++++------ drivers/block/drbd/drbd_proc.c | 30 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 8 ++++---- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_state.c | 4 +--- drivers/block/drbd/drbd_state.h | 2 +- drivers/block/drbd/drbd_strings.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 9 ++++----- include/linux/drbd.h | 8 ++++++++ 14 files changed, 59 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e5d89f623b90..095625beaf98 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -427,8 +427,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) } /* - * called on driver init only. TODO call when a device is created. - * allocates the drbd_bitmap, and stores it in device->bitmap. + * allocates the drbd_bitmap and stores it in device->bitmap. */ int drbd_bm_init(struct drbd_device *device) { @@ -633,7 +632,8 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi unsigned long bits, words, owords, obits; unsigned long want, have, onpages; /* number of pages */ struct page **npages, **opages = NULL; - int err = 0, growing; + int err = 0; + bool growing; if (!expect(b)) return -ENOMEM; diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index 8a9081236f92..be91a8d7c22a 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -903,7 +903,7 @@ static int drbd_version_open(struct inode *inode, struct file *file) return single_open(file, drbd_version_show, NULL); } -static struct file_operations drbd_version_fops = { +static const struct file_operations drbd_version_fops = { .owner = THIS_MODULE, .open = drbd_version_open, .llseek = seq_lseek, diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 995aa8d25660..2c9194dc2ec2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1499,7 +1499,7 @@ extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, int force); extern bool conn_try_outdate_peer(struct drbd_connection *connection); extern void conn_try_outdate_peer_async(struct drbd_connection *connection); -extern int conn_khelper(struct drbd_connection *connection, char *cmd); +extern enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd); extern int drbd_khelper(struct drbd_device *device, char *cmd); /* drbd_worker.c */ @@ -1648,8 +1648,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; extern const struct file_operations drbd_proc_fops; -extern const char *drbd_conn_str(enum drbd_conns s); -extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index f210543f05f4..23c5a94428d2 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -6,13 +6,13 @@ struct drbd_interval { struct rb_node rb; - sector_t sector; /* start sector of the interval */ - unsigned int size; /* size in bytes */ - sector_t end; /* highest interval end in subtree */ - int local:1 /* local or remote request? */; - int waiting:1; /* someone is waiting for this to complete */ - int completed:1; /* this has been completed already; - * ignore for conflict detection */ + sector_t sector; /* start sector of the interval */ + unsigned int size; /* size in bytes */ + sector_t end; /* highest interval end in subtree */ + unsigned int local:1 /* local or remote request? */; + unsigned int waiting:1; /* someone is waiting for completion */ + unsigned int completed:1; /* this has been completed already; + * ignore for conflict detection */ }; static inline void drbd_clear_interval(struct drbd_interval *i) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f4ea8d6c50d3..0501ae0c517b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9a45c80239ba..f35db29cac76 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -387,7 +387,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) return ret; } -int conn_khelper(struct drbd_connection *connection, char *cmd) +enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd) { char *envp[] = { "HOME=/", "TERM=linux", @@ -503,17 +503,17 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) r = conn_khelper(connection, "fence-peer"); switch ((r>>8) & 0xff) { - case 3: /* peer is inconsistent */ + case P_INCONSISTENT: /* peer is inconsistent */ ex_to_string = "peer is inconsistent or worse"; mask.pdsk = D_MASK; val.pdsk = D_INCONSISTENT; break; - case 4: /* peer got outdated, or was already outdated */ + case P_OUTDATED: /* peer got outdated, or was already outdated */ ex_to_string = "peer was fenced"; mask.pdsk = D_MASK; val.pdsk = D_OUTDATED; break; - case 5: /* peer was down */ + case P_DOWN: /* peer was down */ if (conn_highest_disk(connection) == D_UP_TO_DATE) { /* we will(have) create(d) a new UUID anyways... */ ex_to_string = "peer is unreachable, assumed to be dead"; @@ -523,7 +523,7 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; } break; - case 6: /* Peer is primary, voluntarily outdate myself. + case P_PRIMARY: /* Peer is primary, voluntarily outdate myself. * This is useful when an unconnected R_SECONDARY is asked to * become R_PRIMARY, but finds the other peer being active. */ ex_to_string = "peer is active"; @@ -531,7 +531,9 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) mask.disk = D_MASK; val.disk = D_OUTDATED; break; - case 7: + case P_FENCING: + /* THINK: do we need to handle this + * like case 4, or more like case 5? */ if (fp != FP_STONITH) drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 6537b25db9c1..be2b93fd2c11 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -25,7 +25,7 @@ #include -#include +#include #include #include #include @@ -122,18 +122,18 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se x = res/50; y = 20-x; - seq_printf(seq, "\t["); + seq_puts(seq, "\t["); for (i = 1; i < x; i++) - seq_printf(seq, "="); - seq_printf(seq, ">"); + seq_putc(seq, '='); + seq_putc(seq, '>'); for (i = 0; i < y; i++) seq_printf(seq, "."); - seq_printf(seq, "] "); + seq_puts(seq, "] "); if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) - seq_printf(seq, "verified:"); + seq_puts(seq, "verified:"); else - seq_printf(seq, "sync'ed:"); + seq_puts(seq, "sync'ed:"); seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); /* if more than a few GB, display in MB */ @@ -146,7 +146,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se (unsigned long) Bit2KB(rs_left), (unsigned long) Bit2KB(rs_total)); - seq_printf(seq, "\n\t"); + seq_puts(seq, "\n\t"); /* see drivers/md/md.c * We do not want to overflow, so the order of operands and @@ -175,9 +175,9 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se rt / 3600, (rt % 3600) / 60, rt % 60); dbdt = Bit2KB(db/dt); - seq_printf(seq, " speed: "); + seq_puts(seq, " speed: "); seq_printf_with_thousands_grouping(seq, dbdt); - seq_printf(seq, " ("); + seq_puts(seq, " ("); /* ------------------------- ~3s average ------------------------ */ if (proc_details >= 1) { /* this is what drbd_rs_should_slow_down() uses */ @@ -188,7 +188,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se db = device->rs_mark_left[i] - rs_left; dbdt = Bit2KB(db/dt); seq_printf_with_thousands_grouping(seq, dbdt); - seq_printf(seq, " -- "); + seq_puts(seq, " -- "); } /* --------------------- long term average ---------------------- */ @@ -200,11 +200,11 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se db = rs_total - rs_left; dbdt = Bit2KB(db/dt); seq_printf_with_thousands_grouping(seq, dbdt); - seq_printf(seq, ")"); + seq_putc(seq, ')'); if (state.conn == C_SYNC_TARGET || state.conn == C_VERIFY_S) { - seq_printf(seq, " want: "); + seq_puts(seq, " want: "); seq_printf_with_thousands_grouping(seq, device->c_sync_rate); } seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); @@ -231,7 +231,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se (unsigned long long)bm_bits * BM_SECT_PER_BIT); if (stop_sector != 0 && stop_sector != ULLONG_MAX) seq_printf(seq, " stop sector: %llu", stop_sector); - seq_printf(seq, "\n"); + seq_putc(seq, '\n'); } } @@ -276,7 +276,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) rcu_read_lock(); idr_for_each_entry(&drbd_devices, device, i) { if (prev_i != i - 1) - seq_printf(seq, "\n"); + seq_putc(seq, '\n'); prev_i = i; state = device->state; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 577a1872da48..0d74602feca0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -25,7 +25,7 @@ #include -#include +#include #include #include @@ -2289,13 +2289,13 @@ static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) { struct drbd_peer_request *rs_req; - bool rv = 0; + bool rv = false; spin_lock_irq(&device->resource->req_lock); list_for_each_entry(rs_req, &device->sync_ee, w.list) { if (overlaps(peer_req->i.sector, peer_req->i.size, rs_req->i.sector, rs_req->i.size)) { - rv = 1; + rv = true; break; } } @@ -2678,7 +2678,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * } out_interrupted: - drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); + drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP); put_ldev(device); drbd_free_peer_req(device, peer_req); return err; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index a8994875d52b..787536a0ee7c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1000,7 +1000,7 @@ static void complete_conflicting_writes(struct drbd_request *req) finish_wait(&device->misc_wait, &wait); } -/* called within req_lock and rcu_read_lock() */ +/* called within req_lock */ static void maybe_pull_ahead(struct drbd_device *device) { struct drbd_connection *connection = first_peer_device(device)->connection; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a1b5e6c91106..aca68a581b06 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -2196,9 +2196,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union ns.disk = os.disk; rv = _drbd_set_state(device, ns, flags, NULL); - if (rv < SS_SUCCESS) - BUG(); - + BUG_ON(rv < SS_SUCCESS); ns.i = device->state.i; ns_max.role = max_role(ns.role, ns_max.role); ns_max.peer = max_role(ns.peer, ns_max.peer); diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index bd989536f888..6c9d5d4a8a75 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -140,7 +140,7 @@ extern void drbd_resume_al(struct drbd_device *device); extern bool conn_all_vols_unconf(struct drbd_connection *connection); /** - * drbd_request_state() - Reqest a state change + * drbd_request_state() - Request a state change * @device: DRBD device. * @mask: mask of state bits to change. * @val: value of new state bits. diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 80b0f63c7075..0eeab14776e9 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -26,7 +26,7 @@ #include #include "drbd_strings.h" -static const char *drbd_conn_s_names[] = { +static const char * const drbd_conn_s_names[] = { [C_STANDALONE] = "StandAlone", [C_DISCONNECTING] = "Disconnecting", [C_UNCONNECTED] = "Unconnected", @@ -53,13 +53,13 @@ static const char *drbd_conn_s_names[] = { [C_BEHIND] = "Behind", }; -static const char *drbd_role_s_names[] = { +static const char * const drbd_role_s_names[] = { [R_PRIMARY] = "Primary", [R_SECONDARY] = "Secondary", [R_UNKNOWN] = "Unknown" }; -static const char *drbd_disk_s_names[] = { +static const char * const drbd_disk_s_names[] = { [D_DISKLESS] = "Diskless", [D_ATTACHING] = "Attaching", [D_FAILED] = "Failed", @@ -71,7 +71,7 @@ static const char *drbd_disk_s_names[] = { [D_UP_TO_DATE] = "UpToDate", }; -static const char *drbd_state_sw_errors[] = { +static const char * const drbd_state_sw_errors[] = { [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", [-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data", [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 364fed1e2100..b3fa5575bc0e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -173,8 +173,8 @@ void drbd_peer_request_endio(struct bio *bio) { struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_device *device = peer_req->peer_device->device; - int is_write = bio_data_dir(bio) == WRITE; - int is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); + bool is_write = bio_data_dir(bio) == WRITE; + bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) drbd_warn(device, "%s: error=%d s=%llus\n", @@ -1039,7 +1039,6 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_ /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST - * @device: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways */ @@ -1700,7 +1699,7 @@ static bool use_checksum_based_resync(struct drbd_connection *connection, struct rcu_read_unlock(); return connection->agreed_pro_version >= 89 && /* supported? */ connection->csums_tfm && /* configured? */ - (csums_after_crash_only == 0 /* use for each resync? */ + (csums_after_crash_only == false /* use for each resync? */ || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ } @@ -1835,7 +1834,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) device->bm_resync_fo = 0; device->use_csums = use_checksum_based_resync(connection, device); } else { - device->use_csums = 0; + device->use_csums = false; } /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d6b3c9943a2c..2b26156365ec 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -370,6 +370,14 @@ enum drbd_notification_type { NOTIFY_FLAGS = NOTIFY_CONTINUES, }; +enum drbd_peer_state { + P_INCONSISTENT = 3, + P_OUTDATED = 4, + P_DOWN = 5, + P_PRIMARY = 6, + P_FENCING = 7, +}; + #define UUID_JUST_CREATED ((__u64)4) enum write_ordering_e { -- cgit v1.2.3 From 1b57e66384e2d21150301e68078526fac5680a16 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Jun 2016 00:26:39 +0200 Subject: drbd: correctly handle failed crypto_alloc_hash crypto_alloc_hash returns an ERR_PTR(), not NULL. Also reset peer_integrity_tfm to NULL, to not call crypto_free_hash() on an errno in the cleanup path. Reported-by: Insu Yun Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 3 ++- include/linux/drbd.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0d74602feca0..df45713dfbe8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3681,7 +3681,8 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in */ peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC); - if (!peer_integrity_tfm) { + if (IS_ERR(peer_integrity_tfm)) { + peer_integrity_tfm = NULL; drbd_err(connection, "peer data-integrity-alg %s not supported\n", integrity_alg); goto disconnect; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 2b26156365ec..002611c85318 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -51,7 +51,7 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.6" +#define REL_VERSION "8.4.7" #define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit v1.2.3 From e63a46bef01ff3064f44dba145833284fb6adeec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 15 Jun 2016 18:17:27 -0700 Subject: block: introduce device_add_disk() In preparation for removing the ->driverfs_dev member of a gendisk, add an api that takes the parent device as a parameter to add_disk(). For now this maintains the status quo of WARN()ing on failure, but not return a error code. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Dan Williams --- block/genhd.c | 17 +++++++++++------ include/linux/genhd.h | 8 +++++++- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/genhd.c b/block/genhd.c index 9f42526b4d62..fb2d9aeb75a5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -506,7 +506,7 @@ static int exact_lock(dev_t devt, void *data) return 0; } -static void register_disk(struct gendisk *disk) +static void register_disk(struct device *parent, struct gendisk *disk) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -514,7 +514,7 @@ static void register_disk(struct gendisk *disk) struct hd_struct *part; int err; - ddev->parent = disk->driverfs_dev; + ddev->parent = parent; dev_set_name(ddev, "%s", disk->disk_name); @@ -573,7 +573,8 @@ exit: } /** - * add_disk - add partitioning information to kernel list + * device_add_disk - add partitioning information to kernel list + * @parent: parent device for the disk * @disk: per-device partitioning information * * This function registers the partitioning information in @disk @@ -581,7 +582,7 @@ exit: * * FIXME: error handling */ -void add_disk(struct gendisk *disk) +void device_add_disk(struct device *parent, struct gendisk *disk) { struct backing_dev_info *bdi; dev_t devt; @@ -617,7 +618,11 @@ void add_disk(struct gendisk *disk) blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); - register_disk(disk); + + /* temporary while we convert usages to use disk_to_dev(disk)->parent */ + disk->driverfs_dev = parent; + + register_disk(parent, disk); blk_register_queue(disk); /* @@ -633,7 +638,7 @@ void add_disk(struct gendisk *disk) disk_add_events(disk); blk_integrity_add(disk); } -EXPORT_SYMBOL(add_disk); +EXPORT_SYMBOL(device_add_disk); void del_gendisk(struct gendisk *disk) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 359a8e4bd44d..df1dabbfd388 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -414,7 +414,13 @@ static inline void free_part_info(struct hd_struct *part) extern void part_round_stats(int cpu, struct hd_struct *part); /* block/genhd.c */ -extern void add_disk(struct gendisk *disk); +extern void device_add_disk(struct device *parent, struct gendisk *disk); +static inline void add_disk(struct gendisk *disk) +{ + /* temporary while we convert callers to device_add_disk */ + device_add_disk(disk->driverfs_dev, disk); +} + extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -- cgit v1.2.3 From 52c44d93c26f5a76068c0a8cc83bb8f56f38043d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 15 Jun 2016 19:43:07 -0700 Subject: block: remove ->driverfs_dev Now that all drivers that specify a ->driverfs_dev have been converted to device_add_disk(), the pointer can be removed from struct gendisk. Cc: Jens Axboe Cc: Ross Zwisler Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- block/genhd.c | 9 ++------- drivers/nvdimm/bus.c | 2 +- include/linux/genhd.h | 4 +--- 3 files changed, 4 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/genhd.c b/block/genhd.c index fb2d9aeb75a5..a18d35390f1d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -618,10 +618,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); - - /* temporary while we convert usages to use disk_to_dev(disk)->parent */ - disk->driverfs_dev = parent; - register_disk(parent, disk); blk_register_queue(disk); @@ -804,10 +800,9 @@ void __init printk_all_partitions(void) , disk_name(disk, part->partno, name_buf), part->info ? part->info->uuid : ""); if (is_part0) { - if (disk->driverfs_dev != NULL && - disk->driverfs_dev->driver != NULL) + if (dev->parent && dev->parent->driver) printk(" driver: %s\n", - disk->driverfs_dev->driver->name); + dev->parent->driver->name); else printk(" (driver?)\n"); } else diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index f085f8bceae8..5e4e5c772ea5 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -312,7 +312,7 @@ EXPORT_SYMBOL(__nd_driver_register); int nvdimm_revalidate_disk(struct gendisk *disk) { - struct device *dev = disk->driverfs_dev; + struct device *dev = disk_to_dev(disk)->parent; struct nd_region *nd_region = to_nd_region(dev->parent); const char *pol = nd_region->ro ? "only" : "write"; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index df1dabbfd388..1dbf52f9c24b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -205,7 +205,6 @@ struct gendisk { void *private_data; int flags; - struct device *driverfs_dev; // FIXME: remove struct kobject *slave_dir; struct timer_rand_state *random; @@ -417,8 +416,7 @@ extern void part_round_stats(int cpu, struct hd_struct *part); extern void device_add_disk(struct device *parent, struct gendisk *disk); static inline void add_disk(struct gendisk *disk) { - /* temporary while we convert callers to device_add_disk */ - device_add_disk(disk->driverfs_dev, disk); + device_add_disk(NULL, disk); } extern void del_gendisk(struct gendisk *gp); -- cgit v1.2.3 From 1f5bd336b9150560458b03460cbcfcfbcf8995b1 Mon Sep 17 00:00:00 2001 From: Ming Lin Date: Mon, 13 Jun 2016 16:45:21 +0200 Subject: blk-mq: add blk_mq_alloc_request_hctx For some protocols like NVMe over Fabrics we need to be able to send initialization commands to a specific queue. Based on an earlier patch from Christoph Hellwig . Signed-off-by: Ming Lin [hch: disallow sleeping allocation, req_op fixes] Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 39 +++++++++++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 2 ++ 2 files changed, 41 insertions(+) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index 13f460368759..7aa60c4f56fd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -267,6 +267,45 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, } EXPORT_SYMBOL(blk_mq_alloc_request); +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, + unsigned int flags, unsigned int hctx_idx) +{ + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + struct request *rq; + struct blk_mq_alloc_data alloc_data; + int ret; + + /* + * If the tag allocator sleeps we could get an allocation for a + * different hardware context. No need to complicate the low level + * allocator for this for the rare use case of a command tied to + * a specific queue. + */ + if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT))) + return ERR_PTR(-EINVAL); + + if (hctx_idx >= q->nr_hw_queues) + return ERR_PTR(-EIO); + + ret = blk_queue_enter(q, true); + if (ret) + return ERR_PTR(ret); + + hctx = q->queue_hw_ctx[hctx_idx]; + ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); + + blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); + rq = __blk_mq_alloc_request(&alloc_data, rw, 0); + if (!rq) { + blk_queue_exit(q); + return ERR_PTR(-EWOULDBLOCK); + } + + return rq; +} +EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); + static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct request *rq) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2498fdf3a503..cbfd8ca5f13e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -196,6 +196,8 @@ enum { struct request *blk_mq_alloc_request(struct request_queue *q, int rw, unsigned int flags); +struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op, + unsigned int flags, unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); -- cgit v1.2.3 From eb793e2c9286cca415423edff4942e4ba28e3cd4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2016 16:45:25 +0200 Subject: nvme.h: add NVMe over Fabrics definitions The NVMe over Fabrics specification defines a protocol interface and related extensions to NVMe that enable operation over network protocols. The NVMe over Fabrics specification has an NVMe Transport binding for each NVMe Transport. This patch adds the fabrics related definitions: - fabric specific command set and error codes - transport addressing and binding definitions - fabrics sgl extensions - controller identification fabrics enhancements - discovery log page definition Signed-off-by: Armen Baloyan Signed-off-by: James Smart Signed-off-by: Jay Freyensee Signed-off-by: Ming Lin Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 +- drivers/nvme/host/pci.c | 4 +- include/linux/nvme.h | 337 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 322 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f5676c69842a..3a1e82e5c7f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -524,7 +524,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; c.features.nsid = cpu_to_le32(nsid); - c.features.prp1 = cpu_to_le64(dma_addr); + c.features.dptr.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, @@ -543,7 +543,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; - c.features.prp1 = cpu_to_le64(dma_addr); + c.features.dptr.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index de222ed030ab..79a4f56c06cd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -520,8 +520,8 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, goto out_unmap; } - cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); + cmnd->rw.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma); if (blk_integrity_rq(req)) cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); return BLK_MQ_RQ_QUEUE_OK; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index dc815cc6718d..75250303bcb3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,6 +16,78 @@ #define _LINUX_NVME_H #include +#include + +/* NQN names in commands fields specified one size */ +#define NVMF_NQN_FIELD_LEN 256 + +/* However the max length of a qualified name is another size */ +#define NVMF_NQN_SIZE 223 + +#define NVMF_TRSVCID_SIZE 32 +#define NVMF_TRADDR_SIZE 256 +#define NVMF_TSAS_SIZE 256 + +#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" + +#define NVME_RDMA_IP_PORT 4420 + +enum nvme_subsys_type { + NVME_NQN_DISC = 1, /* Discovery type target subsystem */ + NVME_NQN_NVME = 2, /* NVME type target subsystem */ +}; + +/* Address Family codes for Discovery Log Page entry ADRFAM field */ +enum { + NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ + NVMF_ADDR_FAMILY_IP4 = 1, /* IP4 */ + NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */ + NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */ + NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */ +}; + +/* Transport Type codes for Discovery Log Page entry TRTYPE field */ +enum { + NVMF_TRTYPE_RDMA = 1, /* RDMA */ + NVMF_TRTYPE_FC = 2, /* Fibre Channel */ + NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */ + NVMF_TRTYPE_MAX, +}; + +/* Transport Requirements codes for Discovery Log Page entry TREQ field */ +enum { + NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ + NVMF_TREQ_REQUIRED = 1, /* Required */ + NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ +}; + +/* RDMA QP Service Type codes for Discovery Log Page entry TSAS + * RDMA_QPTYPE field + */ +enum { + NVMF_RDMA_QPTYPE_CONNECTED = 0, /* Reliable Connected */ + NVMF_RDMA_QPTYPE_DATAGRAM = 1, /* Reliable Datagram */ +}; + +/* RDMA QP Service Type codes for Discovery Log Page entry TSAS + * RDMA_QPTYPE field + */ +enum { + NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 0, /* No Provider Specified */ + NVMF_RDMA_PRTYPE_IB = 1, /* InfiniBand */ + NVMF_RDMA_PRTYPE_ROCE = 2, /* InfiniBand RoCE */ + NVMF_RDMA_PRTYPE_ROCEV2 = 3, /* InfiniBand RoCEV2 */ + NVMF_RDMA_PRTYPE_IWARP = 4, /* IWARP */ +}; + +/* RDMA Connection Management Service Type codes for Discovery Log Page + * entry TSAS RDMA_CMS field + */ +enum { + NVMF_RDMA_CMS_RDMA_CM = 0, /* Sockets based enpoint addressing */ +}; + +#define NVMF_AQ_DEPTH 32 enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ @@ -117,7 +189,8 @@ struct nvme_id_ctrl { __le32 rtd3r; __le32 rtd3e; __le32 oaes; - __u8 rsvd96[160]; + __le32 ctratt; + __u8 rsvd100[156]; __le16 oacs; __u8 acl; __u8 aerl; @@ -132,7 +205,7 @@ struct nvme_id_ctrl { __u8 rsvd270[242]; __u8 sqes; __u8 cqes; - __u8 rsvd514[2]; + __le16 maxcmd; __le32 nn; __le16 oncs; __le16 fuses; @@ -145,7 +218,15 @@ struct nvme_id_ctrl { __le16 acwu; __u8 rsvd534[2]; __le32 sgls; - __u8 rsvd540[1508]; + __u8 rsvd540[228]; + char subnqn[256]; + __u8 rsvd1024[768]; + __le32 ioccsz; + __le32 iorcsz; + __le16 icdoff; + __u8 ctrattr; + __u8 msdbd; + __u8 rsvd1804[244]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; @@ -306,6 +387,61 @@ enum nvme_opcode { nvme_cmd_resv_release = 0x15, }; +/* + * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier + * + * @NVME_SGL_FMT_ADDRESS: absolute address of the data block + * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block + * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation + * request subtype + */ +enum { + NVME_SGL_FMT_ADDRESS = 0x00, + NVME_SGL_FMT_OFFSET = 0x01, + NVME_SGL_FMT_INVALIDATE = 0x0f, +}; + +/* + * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier + * + * For struct nvme_sgl_desc: + * @NVME_SGL_FMT_DATA_DESC: data block descriptor + * @NVME_SGL_FMT_SEG_DESC: sgl segment descriptor + * @NVME_SGL_FMT_LAST_SEG_DESC: last sgl segment descriptor + * + * For struct nvme_keyed_sgl_desc: + * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor + */ +enum { + NVME_SGL_FMT_DATA_DESC = 0x00, + NVME_SGL_FMT_SEG_DESC = 0x02, + NVME_SGL_FMT_LAST_SEG_DESC = 0x03, + NVME_KEY_SGL_FMT_DATA_DESC = 0x04, +}; + +struct nvme_sgl_desc { + __le64 addr; + __le32 length; + __u8 rsvd[3]; + __u8 type; +}; + +struct nvme_keyed_sgl_desc { + __le64 addr; + __u8 length[3]; + __u8 key[4]; + __u8 type; +}; + +union nvme_data_ptr { + struct { + __le64 prp1; + __le64 prp2; + }; + struct nvme_sgl_desc sgl; + struct nvme_keyed_sgl_desc ksgl; +}; + /* * Lowest two bits of our flags field (FUSE field in the spec): * @@ -336,8 +472,7 @@ struct nvme_common_command { __le32 nsid; __le32 cdw2[2]; __le64 metadata; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __le32 cdw10[6]; }; @@ -348,8 +483,7 @@ struct nvme_rw_command { __le32 nsid; __u64 rsvd2; __le64 metadata; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __le64 slba; __le16 length; __le16 control; @@ -389,8 +523,7 @@ struct nvme_dsm_cmd { __u16 command_id; __le32 nsid; __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __le32 nr; __le32 attributes; __u32 rsvd12[4]; @@ -454,6 +587,7 @@ enum { NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), NVME_FWACT_REPL_ACTV = (1 << 3), @@ -466,8 +600,7 @@ struct nvme_identify { __u16 command_id; __le32 nsid; __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __le32 cns; __u32 rsvd11[5]; }; @@ -478,8 +611,7 @@ struct nvme_features { __u16 command_id; __le32 nsid; __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __le32 fid; __le32 dword11; __u32 rsvd12[4]; @@ -538,8 +670,7 @@ struct nvme_download_firmware { __u8 flags; __u16 command_id; __u32 rsvd1[5]; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __le32 numd; __le32 offset; __u32 rsvd12[4]; @@ -561,8 +692,7 @@ struct nvme_get_log_page_command { __u16 command_id; __le32 nsid; __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; + union nvme_data_ptr dptr; __u8 lid; __u8 rsvd10; __le16 numdl; @@ -573,6 +703,126 @@ struct nvme_get_log_page_command { __u32 rsvd14[2]; }; +/* + * Fabrics subcommands. + */ +enum nvmf_fabrics_opcode { + nvme_fabrics_command = 0x7f, +}; + +enum nvmf_capsule_command { + nvme_fabrics_type_property_set = 0x00, + nvme_fabrics_type_connect = 0x01, + nvme_fabrics_type_property_get = 0x04, +}; + +struct nvmf_common_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[35]; + __u8 ts[24]; +}; + +/* + * The legal cntlid range a NVMe Target will provide. + * Note that cntlid of value 0 is considered illegal in the fabrics world. + * Devices based on earlier specs did not have the subsystem concept; + * therefore, those devices had their cntlid value set to 0 as a result. + */ +#define NVME_CNTLID_MIN 1 +#define NVME_CNTLID_MAX 0xffef +#define NVME_CNTLID_DYNAMIC 0xffff + +#define MAX_DISC_LOGS 255 + +/* Discovery log page entry */ +struct nvmf_disc_rsp_page_entry { + __u8 trtype; + __u8 adrfam; + __u8 nqntype; + __u8 treq; + __le16 portid; + __le16 cntlid; + __le16 asqsz; + __u8 resv8[22]; + char trsvcid[NVMF_TRSVCID_SIZE]; + __u8 resv64[192]; + char subnqn[NVMF_NQN_FIELD_LEN]; + char traddr[NVMF_TRADDR_SIZE]; + union tsas { + char common[NVMF_TSAS_SIZE]; + struct rdma { + __u8 qptype; + __u8 prtype; + __u8 cms; + __u8 resv3[5]; + __u16 pkey; + __u8 resv10[246]; + } rdma; + } tsas; +}; + +/* Discovery log page header */ +struct nvmf_disc_rsp_page_hdr { + __le64 genctr; + __le64 numrec; + __le16 recfmt; + __u8 resv14[1006]; + struct nvmf_disc_rsp_page_entry entries[0]; +}; + +struct nvmf_connect_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[19]; + union nvme_data_ptr dptr; + __le16 recfmt; + __le16 qid; + __le16 sqsize; + __u8 cattr; + __u8 resv3; + __le32 kato; + __u8 resv4[12]; +}; + +struct nvmf_connect_data { + uuid_le hostid; + __le16 cntlid; + char resv4[238]; + char subsysnqn[NVMF_NQN_FIELD_LEN]; + char hostnqn[NVMF_NQN_FIELD_LEN]; + char resv5[256]; +}; + +struct nvmf_property_set_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[35]; + __u8 attrib; + __u8 resv3[3]; + __le32 offset; + __le64 value; + __u8 resv4[8]; +}; + +struct nvmf_property_get_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[35]; + __u8 attrib; + __u8 resv3[3]; + __le32 offset; + __u8 resv4[16]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -587,15 +837,29 @@ struct nvme_command { struct nvme_dsm_cmd dsm; struct nvme_abort_cmd abort; struct nvme_get_log_page_command get_log_page; + struct nvmf_common_command fabrics; + struct nvmf_connect_command connect; + struct nvmf_property_set_command prop_set; + struct nvmf_property_get_command prop_get; }; }; static inline bool nvme_is_write(struct nvme_command *cmd) { + /* + * What a mess... + * + * Why can't we simply have a Fabrics In and Fabrics out command? + */ + if (unlikely(cmd->common.opcode == nvme_fabrics_command)) + return cmd->fabrics.opcode & 1; return cmd->common.opcode & 1; } enum { + /* + * Generic Command Status: + */ NVME_SC_SUCCESS = 0x0, NVME_SC_INVALID_OPCODE = 0x1, NVME_SC_INVALID_FIELD = 0x2, @@ -614,10 +878,18 @@ enum { NVME_SC_SGL_INVALID_DATA = 0xf, NVME_SC_SGL_INVALID_METADATA = 0x10, NVME_SC_SGL_INVALID_TYPE = 0x11, + + NVME_SC_SGL_INVALID_OFFSET = 0x16, + NVME_SC_SGL_INVALID_SUBTYPE = 0x17, + NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, NVME_SC_NS_NOT_READY = 0x82, NVME_SC_RESERVATION_CONFLICT = 0x83, + + /* + * Command Specific Status: + */ NVME_SC_CQ_INVALID = 0x100, NVME_SC_QID_INVALID = 0x101, NVME_SC_QUEUE_SIZE = 0x102, @@ -635,9 +907,29 @@ enum { NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, NVME_SC_FEATURE_NOT_PER_NS = 0x10f, NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + + /* + * I/O Command Set Specific - NVM commands: + */ NVME_SC_BAD_ATTRIBUTES = 0x180, NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, + + /* + * I/O Command Set Specific - Fabrics commands: + */ + NVME_SC_CONNECT_FORMAT = 0x180, + NVME_SC_CONNECT_CTRL_BUSY = 0x181, + NVME_SC_CONNECT_INVALID_PARAM = 0x182, + NVME_SC_CONNECT_RESTART_DISC = 0x183, + NVME_SC_CONNECT_INVALID_HOST = 0x184, + + NVME_SC_DISCOVERY_RESTART = 0x190, + NVME_SC_AUTH_REQUIRED = 0x191, + + /* + * Media and Data Integrity Errors: + */ NVME_SC_WRITE_FAULT = 0x280, NVME_SC_READ_ERROR = 0x281, NVME_SC_GUARD_CHECK = 0x282, @@ -645,12 +937,19 @@ enum { NVME_SC_REFTAG_CHECK = 0x284, NVME_SC_COMPARE_FAILED = 0x285, NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_DNR = 0x4000, }; struct nvme_completion { - __le32 result; /* Used by admin commands to return data */ - __u32 rsvd; + /* + * Used by Admin and Fabrics commands to return data: + */ + union { + __le16 result16; + __le32 result; + __le64 result64; + }; __le16 sq_head; /* how much of this queue may be reclaimed */ __le16 sq_id; /* submission queue that generated this entry */ __u16 command_id; /* of the command which completed */ -- cgit v1.2.3 From 7b89eae29eec4dd9259acd82ed57bec8d9e430ca Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Jun 2016 16:45:27 +0200 Subject: nvme.h: Add keep-alive opcode and identify controller attribute KAS: keep-alive support and granularity of kato in units of 100 ms nvme_admin_keep_alive opcode: 0x18 Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 75250303bcb3..d8b37bab2887 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -202,7 +202,9 @@ struct nvme_id_ctrl { __u8 apsta; __le16 wctemp; __le16 cctemp; - __u8 rsvd270[242]; + __u8 rsvd270[50]; + __le16 kas; + __u8 rsvd322[190]; __u8 sqes; __u8 cqes; __le16 maxcmd; @@ -556,6 +558,7 @@ enum nvme_admin_opcode { nvme_admin_async_event = 0x0c, nvme_admin_activate_fw = 0x10, nvme_admin_download_fw = 0x11, + nvme_admin_keep_alive = 0x18, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, nvme_admin_security_recv = 0x82, @@ -580,6 +583,7 @@ enum { NVME_FEAT_WRITE_ATOMIC = 0x0a, NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_KATO = 0x0f, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, -- cgit v1.2.3 From 529435e8188674fa1071235f50e6fbbbe020c93f Mon Sep 17 00:00:00 2001 From: Javier González Date: Thu, 7 Jul 2016 09:54:08 +0200 Subject: lightnvm: add media manager mark_blk helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose media manager mark_blk() to targets, as done for the rest of the media manager callback functions. Signed-off-by: Javier González Updated description Signed-off-by: Matias Bjørling Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 6 ++++++ include/linux/lightnvm.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 160c1a6838e1..13993c905601 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -210,6 +210,12 @@ void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk) } EXPORT_SYMBOL(nvm_put_blk); +void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type) +{ + return dev->mt->mark_blk(dev, ppa, type); +} +EXPORT_SYMBOL(nvm_mark_blk); + int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) { return dev->mt->submit_io(dev, rqd); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ef2c7d2e76c4..9c56148d6f63 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -532,6 +532,8 @@ extern int nvm_register(struct request_queue *, char *, struct nvm_dev_ops *); extern void nvm_unregister(char *); +void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type); + extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *); extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); -- cgit v1.2.3 From 5389a1dfb39786df08d4f6a482bd2734b1b50e33 Mon Sep 17 00:00:00 2001 From: Javier González Date: Thu, 7 Jul 2016 09:54:09 +0200 Subject: lightnvm: initialize ppa_addr in dev_to_generic_addr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ->reserved bit is not initialized when allocated on stack. This may lead targets to misinterpret the PPA as cached. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9c56148d6f63..cee4c8de34d5 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -385,6 +385,7 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, { struct ppa_addr l; + l.ppa = 0; /* * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc. */ -- cgit v1.2.3 From 077d2389994197277f4f7662b13d11b2f67646a7 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Thu, 7 Jul 2016 09:54:14 +0200 Subject: lightnvm: remove open/close statistics for gennvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The responsibility of the media manager is not to keep track of open/closed blocks. This is better maintained within a target, that already manages this information on writes. Remove the statistics and merge the states NVM_BLK_ST_OPEN and NVM_BLK_ST_CLOSED. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/gennvm.c | 31 ++++++------------------------- drivers/lightnvm/rrpc.c | 5 ----- include/linux/lightnvm.h | 13 ++----------- 3 files changed, 8 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index ec9fb6876e38..1a3e1649346b 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -122,9 +122,6 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn) lun->vlun.lun_id = i % dev->luns_per_chnl; lun->vlun.chnl_id = i / dev->luns_per_chnl; lun->vlun.nr_free_blocks = dev->blks_per_lun; - lun->vlun.nr_open_blocks = 0; - lun->vlun.nr_closed_blocks = 0; - lun->vlun.nr_bad_blocks = 0; } return 0; } @@ -149,7 +146,6 @@ static int gennvm_block_bb(struct gen_nvm *gn, struct ppa_addr ppa, blk = &lun->vlun.blocks[i]; list_move_tail(&blk->list, &lun->bb_list); - lun->vlun.nr_bad_blocks++; lun->vlun.nr_free_blocks--; } @@ -200,9 +196,8 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private) * block state. The block is assumed to be open. */ list_move_tail(&blk->list, &lun->used_list); - blk->state = NVM_BLK_ST_OPEN; + blk->state = NVM_BLK_ST_TGT; lun->vlun.nr_free_blocks--; - lun->vlun.nr_open_blocks++; } } @@ -346,11 +341,10 @@ static struct nvm_block *gennvm_get_blk_unlocked(struct nvm_dev *dev, goto out; blk = list_first_entry(&lun->free_list, struct nvm_block, list); - list_move_tail(&blk->list, &lun->used_list); - blk->state = NVM_BLK_ST_OPEN; + list_move_tail(&blk->list, &lun->used_list); + blk->state = NVM_BLK_ST_TGT; lun->vlun.nr_free_blocks--; - lun->vlun.nr_open_blocks++; out: return blk; @@ -374,27 +368,18 @@ static void gennvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk) assert_spin_locked(&vlun->lock); - if (blk->state & NVM_BLK_ST_OPEN) { - list_move_tail(&blk->list, &lun->free_list); - lun->vlun.nr_open_blocks--; - lun->vlun.nr_free_blocks++; - blk->state = NVM_BLK_ST_FREE; - } else if (blk->state & NVM_BLK_ST_CLOSED) { + if (blk->state & NVM_BLK_ST_TGT) { list_move_tail(&blk->list, &lun->free_list); - lun->vlun.nr_closed_blocks--; lun->vlun.nr_free_blocks++; blk->state = NVM_BLK_ST_FREE; } else if (blk->state & NVM_BLK_ST_BAD) { list_move_tail(&blk->list, &lun->bb_list); - lun->vlun.nr_bad_blocks++; blk->state = NVM_BLK_ST_BAD; } else { WARN_ON_ONCE(1); pr_err("gennvm: erroneous block type (%lu -> %u)\n", blk->id, blk->state); list_move_tail(&blk->list, &lun->bb_list); - lun->vlun.nr_bad_blocks++; - blk->state = NVM_BLK_ST_BAD; } } @@ -516,12 +501,8 @@ static void gennvm_lun_info_print(struct nvm_dev *dev) gennvm_for_each_lun(gn, lun, i) { spin_lock(&lun->vlun.lock); - pr_info("%s: lun%8u\t%u\t%u\t%u\t%u\n", - dev->name, i, - lun->vlun.nr_free_blocks, - lun->vlun.nr_open_blocks, - lun->vlun.nr_closed_blocks, - lun->vlun.nr_bad_blocks); + pr_info("%s: lun%8u\t%u\n", dev->name, i, + lun->vlun.nr_free_blocks); spin_unlock(&lun->vlun.lock); } diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 90c7cb4e9c43..c3a6f34d3b7c 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -512,17 +512,12 @@ static void rrpc_gc_queue(struct work_struct *work) struct rrpc_block *rblk = gcb->rblk; struct rrpc_lun *rlun = rblk->rlun; struct nvm_lun *lun = rblk->parent->lun; - struct nvm_block *blk = rblk->parent; spin_lock(&rlun->lock); list_add_tail(&rblk->prio, &rlun->prio_list); spin_unlock(&rlun->lock); spin_lock(&lun->lock); - lun->nr_open_blocks--; - lun->nr_closed_blocks++; - blk->state &= ~NVM_BLK_ST_OPEN; - blk->state |= NVM_BLK_ST_CLOSED; list_move_tail(&rblk->list, &rlun->closed_list); spin_unlock(&lun->lock); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cee4c8de34d5..8b51d5767331 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -269,24 +269,15 @@ struct nvm_lun { int lun_id; int chnl_id; - /* It is up to the target to mark blocks as closed. If the target does - * not do it, all blocks are marked as open, and nr_open_blocks - * represents the number of blocks in use - */ - unsigned int nr_open_blocks; /* Number of used, writable blocks */ - unsigned int nr_closed_blocks; /* Number of used, read-only blocks */ - unsigned int nr_free_blocks; /* Number of unused blocks */ - unsigned int nr_bad_blocks; /* Number of bad blocks */ - spinlock_t lock; + unsigned int nr_free_blocks; /* Number of unused blocks */ struct nvm_block *blocks; }; enum { NVM_BLK_ST_FREE = 0x1, /* Free block */ - NVM_BLK_ST_OPEN = 0x2, /* Open block - read-write */ - NVM_BLK_ST_CLOSED = 0x4, /* Closed block - read-only */ + NVM_BLK_ST_TGT = 0x2, /* Block in use by target */ NVM_BLK_ST_BAD = 0x8, /* Bad block */ }; -- cgit v1.2.3 From b76eb20bb0c6f4f9c2e1ad493f73c52817b8aaff Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Thu, 7 Jul 2016 09:54:16 +0200 Subject: lightnvm: move target mgmt into media mgr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To enable persistent block management to easily control creation and removal of targets, we move target management into the media manager. The LightNVM core continues to maintain which target types are registered, while the media manager now keeps track of its initialized targets. Two new callbacks for the media manager are introduced. create_tgt and remove_tgt. Note that remove_tgt returns 0 on successfully removing a target, and returns 1 if the target was not found. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 198 +++++++++------------------------------------- drivers/lightnvm/gennvm.c | 154 ++++++++++++++++++++++++++++++++++++ drivers/lightnvm/gennvm.h | 3 + include/linux/lightnvm.h | 10 +++ 4 files changed, 206 insertions(+), 159 deletions(-) (limited to 'include') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 0da196f6891b..8afb04cc4d5a 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -18,8 +18,6 @@ * */ -#include -#include #include #include #include @@ -28,42 +26,37 @@ #include #include #include -#include static LIST_HEAD(nvm_tgt_types); static LIST_HEAD(nvm_mgrs); static LIST_HEAD(nvm_devices); -static LIST_HEAD(nvm_targets); static DECLARE_RWSEM(nvm_lock); -static struct nvm_target *nvm_find_target(const char *name) +struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) { - struct nvm_target *tgt; + struct nvm_tgt_type *tmp, *tt = NULL; - list_for_each_entry(tgt, &nvm_targets, list) - if (!strcmp(name, tgt->disk->disk_name)) - return tgt; + if (lock) + down_write(&nvm_lock); - return NULL; -} - -static struct nvm_tgt_type *nvm_find_target_type(const char *name) -{ - struct nvm_tgt_type *tt; - - list_for_each_entry(tt, &nvm_tgt_types, list) - if (!strcmp(name, tt->name)) - return tt; + list_for_each_entry(tmp, &nvm_tgt_types, list) + if (!strcmp(name, tmp->name)) { + tt = tmp; + break; + } - return NULL; + if (lock) + up_write(&nvm_lock); + return tt; } +EXPORT_SYMBOL(nvm_find_target_type); int nvm_register_tgt_type(struct nvm_tgt_type *tt) { int ret = 0; down_write(&nvm_lock); - if (nvm_find_target_type(tt->name)) + if (nvm_find_target_type(tt->name, 0)) ret = -EEXIST; else list_add(&tt->list, &nvm_tgt_types); @@ -605,42 +598,11 @@ err_fmtype: return ret; } -static void nvm_remove_target(struct nvm_target *t) -{ - struct nvm_tgt_type *tt = t->type; - struct gendisk *tdisk = t->disk; - struct request_queue *q = tdisk->queue; - - lockdep_assert_held(&nvm_lock); - - del_gendisk(tdisk); - blk_cleanup_queue(q); - - if (tt->exit) - tt->exit(tdisk->private_data); - - put_disk(tdisk); - - list_del(&t->list); - kfree(t); -} - static void nvm_free_mgr(struct nvm_dev *dev) { - struct nvm_target *tgt, *tmp; - if (!dev->mt) return; - down_write(&nvm_lock); - list_for_each_entry_safe(tgt, tmp, &nvm_targets, list) { - if (tgt->dev != dev) - continue; - - nvm_remove_target(tgt); - } - up_write(&nvm_lock); - dev->mt->unregister_mgr(dev); dev->mt = NULL; } @@ -787,91 +749,6 @@ void nvm_unregister(char *disk_name) } EXPORT_SYMBOL(nvm_unregister); -static const struct block_device_operations nvm_fops = { - .owner = THIS_MODULE, -}; - -static int nvm_create_target(struct nvm_dev *dev, - struct nvm_ioctl_create *create) -{ - struct nvm_ioctl_create_simple *s = &create->conf.s; - struct request_queue *tqueue; - struct gendisk *tdisk; - struct nvm_tgt_type *tt; - struct nvm_target *t; - void *targetdata; - - if (!dev->mt) { - pr_info("nvm: device has no media manager registered.\n"); - return -ENODEV; - } - - down_write(&nvm_lock); - tt = nvm_find_target_type(create->tgttype); - if (!tt) { - pr_err("nvm: target type %s not found\n", create->tgttype); - up_write(&nvm_lock); - return -EINVAL; - } - - t = nvm_find_target(create->tgtname); - if (t) { - pr_err("nvm: target name already exists.\n"); - up_write(&nvm_lock); - return -EINVAL; - } - up_write(&nvm_lock); - - t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); - if (!t) - return -ENOMEM; - - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); - if (!tqueue) - goto err_t; - blk_queue_make_request(tqueue, tt->make_rq); - - tdisk = alloc_disk(0); - if (!tdisk) - goto err_queue; - - sprintf(tdisk->disk_name, "%s", create->tgtname); - tdisk->flags = GENHD_FL_EXT_DEVT; - tdisk->major = 0; - tdisk->first_minor = 0; - tdisk->fops = &nvm_fops; - tdisk->queue = tqueue; - - targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end); - if (IS_ERR(targetdata)) - goto err_init; - - tdisk->private_data = targetdata; - tqueue->queuedata = targetdata; - - blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); - - set_capacity(tdisk, tt->capacity(targetdata)); - add_disk(tdisk); - - t->type = tt; - t->disk = tdisk; - t->dev = dev; - - down_write(&nvm_lock); - list_add_tail(&t->list, &nvm_targets); - up_write(&nvm_lock); - - return 0; -err_init: - put_disk(tdisk); -err_queue: - blk_cleanup_queue(tqueue); -err_t: - kfree(t); - return -ENOMEM; -} - static int __nvm_configure_create(struct nvm_ioctl_create *create) { struct nvm_dev *dev; @@ -880,11 +757,17 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) down_write(&nvm_lock); dev = nvm_find_nvm_dev(create->dev); up_write(&nvm_lock); + if (!dev) { pr_err("nvm: device not found\n"); return -EINVAL; } + if (!dev->mt) { + pr_info("nvm: device has no media manager registered.\n"); + return -ENODEV; + } + if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { pr_err("nvm: config type not valid\n"); return -EINVAL; @@ -897,25 +780,7 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) return -EINVAL; } - return nvm_create_target(dev, create); -} - -static int __nvm_configure_remove(struct nvm_ioctl_remove *remove) -{ - struct nvm_target *t; - - down_write(&nvm_lock); - t = nvm_find_target(remove->tgtname); - if (!t) { - pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname); - up_write(&nvm_lock); - return -EINVAL; - } - - nvm_remove_target(t); - up_write(&nvm_lock); - - return 0; + return dev->mt->create_tgt(dev, create); } #ifdef CONFIG_NVM_DEBUG @@ -950,8 +815,9 @@ static int nvm_configure_show(const char *val) static int nvm_configure_remove(const char *val) { struct nvm_ioctl_remove remove; + struct nvm_dev *dev; char opcode; - int ret; + int ret = 0; ret = sscanf(val, "%c %256s", &opcode, remove.tgtname); if (ret != 2) { @@ -961,7 +827,13 @@ static int nvm_configure_remove(const char *val) remove.flags = 0; - return __nvm_configure_remove(&remove); + list_for_each_entry(dev, &nvm_devices, devices) { + ret = dev->mt->remove_tgt(dev, &remove); + if (!ret) + break; + } + + return ret; } static int nvm_configure_create(const char *val) @@ -1158,6 +1030,8 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) { struct nvm_ioctl_remove remove; + struct nvm_dev *dev; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1172,7 +1046,13 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) return -EINVAL; } - return __nvm_configure_remove(&remove); + list_for_each_entry(dev, &nvm_devices, devices) { + ret = dev->mt->remove_tgt(dev, &remove); + if (!ret) + break; + } + + return ret; } static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info) diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index 3d2762f2a6fe..41760b24bea7 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -20,6 +20,144 @@ #include "gennvm.h" +static struct nvm_target *gen_find_target(struct gen_dev *gn, const char *name) +{ + struct nvm_target *tgt; + + list_for_each_entry(tgt, &gn->targets, list) + if (!strcmp(name, tgt->disk->disk_name)) + return tgt; + + return NULL; +} + +static const struct block_device_operations gen_fops = { + .owner = THIS_MODULE, +}; + +static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) +{ + struct gen_dev *gn = dev->mp; + struct nvm_ioctl_create_simple *s = &create->conf.s; + struct request_queue *tqueue; + struct gendisk *tdisk; + struct nvm_tgt_type *tt; + struct nvm_target *t; + void *targetdata; + + tt = nvm_find_target_type(create->tgttype, 1); + if (!tt) { + pr_err("nvm: target type %s not found\n", create->tgttype); + return -EINVAL; + } + + mutex_lock(&gn->lock); + t = gen_find_target(gn, create->tgtname); + if (t) { + pr_err("nvm: target name already exists.\n"); + mutex_unlock(&gn->lock); + return -EINVAL; + } + mutex_unlock(&gn->lock); + + t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); + if (!t) + return -ENOMEM; + + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + if (!tqueue) + goto err_t; + blk_queue_make_request(tqueue, tt->make_rq); + + tdisk = alloc_disk(0); + if (!tdisk) + goto err_queue; + + sprintf(tdisk->disk_name, "%s", create->tgtname); + tdisk->flags = GENHD_FL_EXT_DEVT; + tdisk->major = 0; + tdisk->first_minor = 0; + tdisk->fops = &gen_fops; + tdisk->queue = tqueue; + + targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end); + if (IS_ERR(targetdata)) + goto err_init; + + tdisk->private_data = targetdata; + tqueue->queuedata = targetdata; + + blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); + + set_capacity(tdisk, tt->capacity(targetdata)); + add_disk(tdisk); + + t->type = tt; + t->disk = tdisk; + t->dev = dev; + + mutex_lock(&gn->lock); + list_add_tail(&t->list, &gn->targets); + mutex_unlock(&gn->lock); + + return 0; +err_init: + put_disk(tdisk); +err_queue: + blk_cleanup_queue(tqueue); +err_t: + kfree(t); + return -ENOMEM; +} + +static void __gen_remove_target(struct nvm_target *t) +{ + struct nvm_tgt_type *tt = t->type; + struct gendisk *tdisk = t->disk; + struct request_queue *q = tdisk->queue; + + del_gendisk(tdisk); + blk_cleanup_queue(q); + + if (tt->exit) + tt->exit(tdisk->private_data); + + put_disk(tdisk); + + list_del(&t->list); + kfree(t); +} + +/** + * gen_remove_tgt - Removes a target from the media manager + * @dev: device + * @remove: ioctl structure with target name to remove. + * + * Returns: + * 0: on success + * 1: on not found + * <0: on error + */ +static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) +{ + struct gen_dev *gn = dev->mp; + struct nvm_target *t; + + if (!gn) + return 1; + + mutex_lock(&gn->lock); + t = gen_find_target(gn, remove->tgtname); + if (!t) { + mutex_unlock(&gn->lock); + return 1; + } + __gen_remove_target(t); + mutex_unlock(&gn->lock); + + return 0; +} + static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len) { struct gen_dev *gn = dev->mp; @@ -295,6 +433,8 @@ static int gen_register(struct nvm_dev *dev) gn->dev = dev; gn->nr_luns = dev->nr_luns; INIT_LIST_HEAD(&gn->area_list); + mutex_init(&gn->lock); + INIT_LIST_HEAD(&gn->targets); dev->mp = gn; ret = gen_luns_init(dev, gn); @@ -318,6 +458,17 @@ err: static void gen_unregister(struct nvm_dev *dev) { + struct gen_dev *gn = dev->mp; + struct nvm_target *t, *tmp; + + mutex_lock(&gn->lock); + list_for_each_entry_safe(t, tmp, &gn->targets, list) { + if (t->dev != dev) + continue; + __gen_remove_target(t); + } + mutex_unlock(&gn->lock); + gen_free(dev); module_put(THIS_MODULE); } @@ -515,6 +666,9 @@ static struct nvmm_type gen = { .register_mgr = gen_register, .unregister_mgr = gen_unregister, + .create_tgt = gen_create_tgt, + .remove_tgt = gen_remove_tgt, + .get_blk_unlocked = gen_get_blk_unlocked, .put_blk_unlocked = gen_put_blk_unlocked, diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h index bf0621963c7c..8ecfa817d21d 100644 --- a/drivers/lightnvm/gennvm.h +++ b/drivers/lightnvm/gennvm.h @@ -40,6 +40,9 @@ struct gen_dev { int nr_luns; struct gen_lun *luns; struct list_head area_list; + + struct mutex lock; + struct list_head targets; }; struct gen_area { diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 8b51d5767331..d619f6dde7df 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -1,7 +1,9 @@ #ifndef NVM_H #define NVM_H +#include #include +#include enum { NVM_IO_OK = 0, @@ -447,6 +449,8 @@ struct nvm_tgt_type { struct list_head list; }; +extern struct nvm_tgt_type *nvm_find_target_type(const char *, int); + extern int nvm_register_tgt_type(struct nvm_tgt_type *); extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); @@ -455,6 +459,9 @@ extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); typedef int (nvmm_register_fn)(struct nvm_dev *); typedef void (nvmm_unregister_fn)(struct nvm_dev *); + +typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *); +typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *); typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *, struct nvm_lun *, unsigned long); typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *); @@ -480,6 +487,9 @@ struct nvmm_type { nvmm_register_fn *register_mgr; nvmm_unregister_fn *unregister_mgr; + nvmm_create_tgt_fn *create_tgt; + nvmm_remove_tgt_fn *remove_tgt; + /* Block administration callbacks */ nvmm_get_blk_fn *get_blk_unlocked; nvmm_put_blk_fn *put_blk_unlocked; -- cgit v1.2.3 From 41285fad511a2c3746bee7ccb3b2f21b70305c14 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Thu, 7 Jul 2016 09:54:19 +0200 Subject: lightnvm: remove _unlocked variant of [get/put]_blk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The [get/put]_blk API enables targets to get ownership of blocks at runtime. This information is currently not recorded on disk, and the information is therefore lost on power failure. To restore the metadata, the [get/put]_blk must persist its metadata. In that case, we need to control the outer lock, so that we can disable them while updating the on-disk metadata. Fortunately, the _unlocked versions can be removed, which allows us to move the lock into the [get/put]_blk functions. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 14 -------------- drivers/lightnvm/gennvm.c | 32 ++++---------------------------- drivers/lightnvm/rrpc.c | 14 ++------------ include/linux/lightnvm.h | 6 ------ 4 files changed, 6 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 04469e0c84ce..ddc809803084 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -176,20 +176,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name) return NULL; } -struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *dev, struct nvm_lun *lun, - unsigned long flags) -{ - return dev->mt->get_blk_unlocked(dev, lun, flags); -} -EXPORT_SYMBOL(nvm_get_blk_unlocked); - -/* Assumes that all valid pages have already been moved on release to bm */ -void nvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk) -{ - return dev->mt->put_blk_unlocked(dev, blk); -} -EXPORT_SYMBOL(nvm_put_blk_unlocked); - struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun, unsigned long flags) { diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index 41760b24bea7..c65fb675e374 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -473,15 +473,14 @@ static void gen_unregister(struct nvm_dev *dev) module_put(THIS_MODULE); } -static struct nvm_block *gen_get_blk_unlocked(struct nvm_dev *dev, +static struct nvm_block *gen_get_blk(struct nvm_dev *dev, struct nvm_lun *vlun, unsigned long flags) { struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun); struct nvm_block *blk = NULL; int is_gc = flags & NVM_IOTYPE_GC; - assert_spin_locked(&vlun->lock); - + spin_lock(&vlun->lock); if (list_empty(&lun->free_list)) { pr_err_ratelimited("gen: lun %u have no free pages available", lun->vlun.id); @@ -496,29 +495,17 @@ static struct nvm_block *gen_get_blk_unlocked(struct nvm_dev *dev, list_move_tail(&blk->list, &lun->used_list); blk->state = NVM_BLK_ST_TGT; lun->vlun.nr_free_blocks--; - out: - return blk; -} - -static struct nvm_block *gen_get_blk(struct nvm_dev *dev, - struct nvm_lun *vlun, unsigned long flags) -{ - struct nvm_block *blk; - - spin_lock(&vlun->lock); - blk = gen_get_blk_unlocked(dev, vlun, flags); spin_unlock(&vlun->lock); return blk; } -static void gen_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk) +static void gen_put_blk(struct nvm_dev *dev, struct nvm_block *blk) { struct nvm_lun *vlun = blk->lun; struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun); - assert_spin_locked(&vlun->lock); - + spin_lock(&vlun->lock); if (blk->state & NVM_BLK_ST_TGT) { list_move_tail(&blk->list, &lun->free_list); lun->vlun.nr_free_blocks++; @@ -532,14 +519,6 @@ static void gen_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk) blk->id, blk->state); list_move_tail(&blk->list, &lun->bb_list); } -} - -static void gen_put_blk(struct nvm_dev *dev, struct nvm_block *blk) -{ - struct nvm_lun *vlun = blk->lun; - - spin_lock(&vlun->lock); - gen_put_blk_unlocked(dev, blk); spin_unlock(&vlun->lock); } @@ -669,9 +648,6 @@ static struct nvmm_type gen = { .create_tgt = gen_create_tgt, .remove_tgt = gen_remove_tgt, - .get_blk_unlocked = gen_get_blk_unlocked, - .put_blk_unlocked = gen_put_blk_unlocked, - .get_blk = gen_get_blk, .put_blk = gen_put_blk, diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 10ed22b9875d..fa8d5be2987c 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -192,21 +192,16 @@ static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk) static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun, unsigned long flags) { - struct nvm_lun *lun = rlun->parent; struct nvm_block *blk; struct rrpc_block *rblk; - spin_lock(&lun->lock); - blk = nvm_get_blk_unlocked(rrpc->dev, rlun->parent, flags); + blk = nvm_get_blk(rrpc->dev, rlun->parent, flags); if (!blk) { pr_err("nvm: rrpc: cannot get new block from media manager\n"); - spin_unlock(&lun->lock); return NULL; } rblk = rrpc_get_rblk(rlun, blk->id); - spin_unlock(&lun->lock); - blk->priv = rblk; bitmap_zero(rblk->invalid_pages, rrpc->dev->sec_per_blk); rblk->next_page = 0; @@ -218,12 +213,7 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun, static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk) { - struct rrpc_lun *rlun = rblk->rlun; - struct nvm_lun *lun = rlun->parent; - - spin_lock(&lun->lock); - nvm_put_blk_unlocked(rrpc->dev, rblk->parent); - spin_unlock(&lun->lock); + nvm_put_blk(rrpc->dev, rblk->parent); } static void rrpc_put_blks(struct rrpc *rrpc) diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index d619f6dde7df..e9836cfb6fde 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -491,8 +491,6 @@ struct nvmm_type { nvmm_remove_tgt_fn *remove_tgt; /* Block administration callbacks */ - nvmm_get_blk_fn *get_blk_unlocked; - nvmm_put_blk_fn *put_blk_unlocked; nvmm_get_blk_fn *get_blk; nvmm_put_blk_fn *put_blk; nvmm_open_blk_fn *open_blk; @@ -522,10 +520,6 @@ struct nvmm_type { extern int nvm_register_mgr(struct nvmm_type *); extern void nvm_unregister_mgr(struct nvmm_type *); -extern struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *, - struct nvm_lun *, unsigned long); -extern void nvm_put_blk_unlocked(struct nvm_dev *, struct nvm_block *); - extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *, unsigned long); extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *); -- cgit v1.2.3 From 8680f165eaa56cc9812d7d9b4903f449df0f6a45 Mon Sep 17 00:00:00 2001 From: Matias Bjørling Date: Thu, 7 Jul 2016 09:54:22 +0200 Subject: lightnvm: make ppa_list const in nvm_set_rqd_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The passed by reference ppa list in nvm_set_rqd_list() is updated when multiple planes are available. In that case, each PPA plane is incremented when the device side PPA list is created. This prevents the caller to rely on the PPA list to be unmodified after a call. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 8 +++++--- include/linux/lightnvm.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index ddc809803084..00b64f78887d 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -237,9 +237,10 @@ void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) EXPORT_SYMBOL(nvm_generic_to_addr_mode); int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, - struct ppa_addr *ppas, int nr_ppas, int vblk) + const struct ppa_addr *ppas, int nr_ppas, int vblk) { int i, plane_cnt, pl_idx; + struct ppa_addr ppa; if ((!vblk || dev->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) { rqd->nr_ppas = nr_ppas; @@ -264,8 +265,9 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, for (i = 0; i < nr_ppas; i++) { for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { - ppas[i].g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i]; + ppa = ppas[i]; + ppa.g.pl = pl_idx; + rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; } } } diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e9836cfb6fde..ba78b8306674 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -534,7 +534,7 @@ extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *); extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, - struct ppa_addr *, int, int); + const struct ppa_addr *, int, int); extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int); extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *); -- cgit v1.2.3 From 486cf9899e311838b6ab95d19ff87c4da44d6508 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 6 Jul 2016 21:55:48 +0900 Subject: blk-mq: Introduce blk_mq_reinit_tagset The new nvme-rdma driver will need to reinitialize all the tags as part of the error recovery procedure (realloc the tag memory region). Add a helper in blk-mq for it that can iterate over all requests in a tagset to make this easier. Signed-off-by: Sagi Grimberg Tested-by: Ming Lin Reviewed-by: Stephen Bates Signed-off-by: Christoph Hellwig Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 26 ++++++++++++++++++++++++++ include/linux/blk-mq.h | 3 +++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 56a0c37a3d06..729bac3a673b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -485,6 +485,32 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); +int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) +{ + int i, j, ret = 0; + + if (!set->ops->reinit_request) + goto out; + + for (i = 0; i < set->nr_hw_queues; i++) { + struct blk_mq_tags *tags = set->tags[i]; + + for (j = 0; j < tags->nr_tags; j++) { + if (!tags->rqs[j]) + continue; + + ret = set->ops->reinit_request(set->driver_data, + tags->rqs[j]); + if (ret) + goto out; + } + } + +out: + return ret; +} +EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset); + void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index cbfd8ca5f13e..e43bbffb5b7a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -96,6 +96,7 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int, unsigned int, unsigned int); typedef void (exit_request_fn)(void *, struct request *, unsigned int, unsigned int); +typedef int (reinit_request_fn)(void *, struct request *); typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); @@ -145,6 +146,7 @@ struct blk_mq_ops { */ init_request_fn *init_request; exit_request_fn *exit_request; + reinit_request_fn *reinit_request; }; enum { @@ -245,6 +247,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); +int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); -- cgit v1.2.3 From 931a6de4d734de6142142c82555e3f017adcb9f0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 6 Jul 2016 21:55:50 +0900 Subject: nvme-rdma.h: Add includes for nvme rdma_cm negotiation NVMe over Fabrics RDMA transport defines a connection establishment protocol over the RDMA connection manager. This header will be used by both the host and target drivers to negotiate the connection establishment parameters. Signed-off-by: Jay Freyensee Signed-off-by: Ming Lin Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Jens Axboe --- include/linux/nvme-rdma.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/nvme-rdma.h (limited to 'include') diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h new file mode 100644 index 000000000000..bf240a3cbf99 --- /dev/null +++ b/include/linux/nvme-rdma.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _LINUX_NVME_RDMA_H +#define _LINUX_NVME_RDMA_H + +enum nvme_rdma_cm_fmt { + NVME_RDMA_CM_FMT_1_0 = 0x0, +}; + +enum nvme_rdma_cm_status { + NVME_RDMA_CM_INVALID_LEN = 0x01, + NVME_RDMA_CM_INVALID_RECFMT = 0x02, + NVME_RDMA_CM_INVALID_QID = 0x03, + NVME_RDMA_CM_INVALID_HSQSIZE = 0x04, + NVME_RDMA_CM_INVALID_HRQSIZE = 0x05, + NVME_RDMA_CM_NO_RSC = 0x06, + NVME_RDMA_CM_INVALID_IRD = 0x07, + NVME_RDMA_CM_INVALID_ORD = 0x08, +}; + +/** + * struct nvme_rdma_cm_req - rdma connect request + * + * @recfmt: format of the RDMA Private Data + * @qid: queue Identifier for the Admin or I/O Queue + * @hrqsize: host receive queue size to be created + * @hsqsize: host send queue size to be created + */ +struct nvme_rdma_cm_req { + __le16 recfmt; + __le16 qid; + __le16 hrqsize; + __le16 hsqsize; + u8 rsvd[24]; +}; + +/** + * struct nvme_rdma_cm_rep - rdma connect reply + * + * @recfmt: format of the RDMA Private Data + * @crqsize: controller receive queue size + */ +struct nvme_rdma_cm_rep { + __le16 recfmt; + __le16 crqsize; + u8 rsvd[28]; +}; + +/** + * struct nvme_rdma_cm_rej - rdma connect reject + * + * @recfmt: format of the RDMA Private Data + * @fsts: error status for the associated connect request + */ +struct nvme_rdma_cm_rej { + __le16 recfmt; + __le16 sts; +}; + +#endif /* _LINUX_NVME_RDMA_H */ -- cgit v1.2.3 From e950fdf71c9b4a6b63b58fed78956a96cc907402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:23:33 +0200 Subject: block: introduce BLKDEV_DISCARD_ZERO to fix zeroout Currently blkdev_issue_zeroout cascades down from discards (if the driver guarantees that discards zero data), to WRITE SAME and then to a loop writing zeroes. Unfortunately we ignore run-time EOPNOTSUPP errors in the block layer blkdev_issue_discard helper to work around DM volumes that may have mixed discard support underneath. This patch intoroduces a new BLKDEV_DISCARD_ZERO flag to blkdev_issue_discard that indicates we are called for zeroing operation. This allows both to ignore the EOPNOTSUPP hack and actually consolidating the discard_zeroes_data check into the function. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-lib.c | 17 +++++++++++------ include/linux/blkdev.h | 4 +++- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/blk-lib.c b/block/blk-lib.c index 78626c2fde33..45b35b15496f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -36,12 +36,17 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -ENXIO; if (flags & BLKDEV_DISCARD_SECURE) { + if (flags & BLKDEV_DISCARD_ZERO) + return -EOPNOTSUPP; if (!blk_queue_secure_erase(q)) return -EOPNOTSUPP; op = REQ_OP_SECURE_ERASE; } else { if (!blk_queue_discard(q)) return -EOPNOTSUPP; + if ((flags & BLKDEV_DISCARD_ZERO) && + !q->limits.discard_zeroes_data) + return -EOPNOTSUPP; op = REQ_OP_DISCARD; } @@ -116,7 +121,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, &bio); if (!ret && bio) { ret = submit_bio_wait(bio); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO)) ret = 0; } blk_finish_plug(&plug); @@ -241,11 +246,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, bool discard) { - struct request_queue *q = bdev_get_queue(bdev); - - if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data && - blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0) - return 0; + if (discard) { + if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, + BLKDEV_DISCARD_ZERO)) + return 0; + } if (bdev_write_same(bdev) && blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53fee6123893..156455cb07ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1137,7 +1137,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return bqt->tag_index[tag]; } -#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ + +#define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ +#define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, -- cgit v1.2.3 From 70246286e94c335b5bea0cbc68a17a96dd620281 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:28:41 +0200 Subject: block: get rid of bio_rw and READA These two are confusing leftover of the old world order, combining values of the REQ_OP_ and REQ_ namespaces. For callers that don't special case we mostly just replace bi_rw with bio_data_dir or op_is_write, except for the few cases where a switch over the REQ_OP_ values makes more sense. Any check for READA is replaced with an explicit check for REQ_RAHEAD. Also remove the READA alias for REQ_RAHEAD. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Mike Christie Signed-off-by: Jens Axboe --- drivers/block/brd.c | 4 +--- drivers/block/drbd/drbd_req.c | 34 ++++++++++++++++------------- drivers/block/drbd/drbd_worker.c | 30 +++++++++++++++---------- drivers/block/null_blk.c | 2 +- drivers/block/umem.c | 6 ++--- drivers/lightnvm/rrpc.c | 4 ++-- drivers/md/dm-raid1.c | 8 +++---- drivers/md/dm-snap.c | 13 ++++++----- drivers/md/dm-zero.c | 15 ++++++++----- drivers/md/dm.c | 2 +- drivers/md/raid1.c | 2 +- drivers/md/raid5.c | 2 +- drivers/nvme/host/lightnvm.c | 2 +- drivers/staging/lustre/lustre/llite/lloop.c | 13 ++++++----- fs/buffer.c | 12 +++++----- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/gc.c | 3 ++- fs/f2fs/node.c | 2 +- fs/gfs2/bmap.c | 3 ++- fs/gfs2/dir.c | 2 +- fs/gfs2/meta_io.c | 2 +- fs/reiserfs/stree.c | 2 +- fs/udf/dir.c | 2 +- fs/udf/directory.c | 2 +- include/linux/fs.h | 18 +-------------- include/trace/events/f2fs.h | 4 ++-- 26 files changed, 95 insertions(+), 96 deletions(-) (limited to 'include') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index f5b0d6f4e09f..41a6c4c9da9f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -347,9 +347,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) goto out; } - rw = bio_rw(bio); - if (rw == READA) - rw = READ; + rw = bio_data_dir(bio); bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 787536a0ee7c..66b8e4bb74d8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -219,7 +219,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) { const unsigned s = req->rq_state; struct drbd_device *device = req->device; - int rw; int error, ok; /* we must not complete the master bio, while it is @@ -243,8 +242,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) return; } - rw = bio_rw(req->master_bio); - /* * figure out whether to report success or failure. * @@ -268,7 +265,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) * epoch number. If they match, increase the current_tle_nr, * and reset the transfer log epoch write_cnt. */ - if (rw == WRITE && + if (op_is_write(bio_op(req->master_bio)) && req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr)) start_new_tl_epoch(first_peer_device(device)->connection); @@ -285,11 +282,14 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) * because no path was available, in which case * it was not even added to the transfer_log. * - * READA may fail, and will not be retried. + * read-ahead may fail, and will not be retried. * * WRITE should have used all available paths already. */ - if (!ok && rw == READ && !list_empty(&req->tl_requests)) + if (!ok && + bio_op(req->master_bio) == REQ_OP_READ && + !(req->master_bio->bi_rw & REQ_RAHEAD) && + !list_empty(&req->tl_requests)) req->rq_state |= RQ_POSTPONED; if (!(req->rq_state & RQ_POSTPONED)) { @@ -645,7 +645,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(device, DRBD_READ_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: - /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ + /* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */ mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); break; @@ -657,7 +657,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case QUEUE_FOR_NET_READ: - /* READ or READA, and + /* READ, and * no local disk, * or target area marked as invalid, * or just got an io-error. */ @@ -1172,7 +1172,14 @@ drbd_submit_req_private_bio(struct drbd_request *req) { struct drbd_device *device = req->device; struct bio *bio = req->private_bio; - const int rw = bio_rw(bio); + unsigned int type; + + if (bio_op(bio) != REQ_OP_READ) + type = DRBD_FAULT_DT_WR; + else if (bio->bi_rw & REQ_RAHEAD) + type = DRBD_FAULT_DT_RA; + else + type = DRBD_FAULT_DT_RD; bio->bi_bdev = device->ldev->backing_bdev; @@ -1182,10 +1189,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) * stable storage, and this is a WRITE, we may not even submit * this bio. */ if (get_ldev(device)) { - if (drbd_insert_fault(device, - rw == WRITE ? DRBD_FAULT_DT_WR - : rw == READ ? DRBD_FAULT_DT_RD - : DRBD_FAULT_DT_RA)) + if (drbd_insert_fault(device, type)) bio_io_error(bio); else if (bio_op(bio) == REQ_OP_DISCARD) drbd_process_discard_req(req); @@ -1278,7 +1282,7 @@ static bool may_do_writes(struct drbd_device *device) static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { struct drbd_resource *resource = device->resource; - const int rw = bio_rw(req->master_bio); + const int rw = bio_data_dir(req->master_bio); struct bio_and_error m = { NULL, }; bool no_remote = false; bool submit_private_bio = false; @@ -1308,7 +1312,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request goto out; } - /* We fail READ/READA early, if we can not serve it. + /* We fail READ early, if we can not serve it. * We must do this before req is registered on any lists. * Otherwise, drbd_req_complete() will queue failed READ for retry. */ if (rw != WRITE) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b3fa5575bc0e..35dbb3dca47e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -248,18 +248,26 @@ void drbd_request_endio(struct bio *bio) /* to avoid recursion in __req_mod */ if (unlikely(bio->bi_error)) { - if (bio_op(bio) == REQ_OP_DISCARD) - what = (bio->bi_error == -EOPNOTSUPP) - ? DISCARD_COMPLETED_NOTSUPP - : DISCARD_COMPLETED_WITH_ERROR; - else - what = (bio_data_dir(bio) == WRITE) - ? WRITE_COMPLETED_WITH_ERROR - : (bio_rw(bio) == READ) - ? READ_COMPLETED_WITH_ERROR - : READ_AHEAD_COMPLETED_WITH_ERROR; - } else + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + if (bio->bi_error == -EOPNOTSUPP) + what = DISCARD_COMPLETED_NOTSUPP; + else + what = DISCARD_COMPLETED_WITH_ERROR; + break; + case REQ_OP_READ: + if (bio->bi_rw & REQ_RAHEAD) + what = READ_AHEAD_COMPLETED_WITH_ERROR; + else + what = READ_COMPLETED_WITH_ERROR; + break; + default: + what = WRITE_COMPLETED_WITH_ERROR; + break; + } + } else { what = COMPLETED_OK; + } bio_put(req->private_bio); req->private_bio = ERR_PTR(bio->bi_error); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index cab97593ba54..75a7f88d6717 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -448,7 +448,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct request *rq; struct bio *bio = rqd->bio; - rq = blk_mq_alloc_request(q, bio_rw(bio), 0); + rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); if (IS_ERR(rq)) return -ENOMEM; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 4b3ba74e9d22..d0a3e6d4515f 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -344,7 +344,6 @@ static int add_bio(struct cardinfo *card) int offset; struct bio *bio; struct bio_vec vec; - int rw; bio = card->currentbio; if (!bio && card->bio) { @@ -359,7 +358,6 @@ static int add_bio(struct cardinfo *card) if (!bio) return 0; - rw = bio_rw(bio); if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) return 0; @@ -369,7 +367,7 @@ static int add_bio(struct cardinfo *card) vec.bv_page, vec.bv_offset, vec.bv_len, - (rw == READ) ? + bio_op(bio) == REQ_OP_READ ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); p = &card->mm_pages[card->Ready]; @@ -398,7 +396,7 @@ static int add_bio(struct cardinfo *card) DMASCR_CHAIN_EN | DMASCR_SEM_EN | pci_cmds); - if (rw == WRITE) + if (bio_op(bio) == REQ_OP_WRITE) desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); desc->sem_control_bits = desc->control_bits; diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index fa1ab0421489..37fcaadbf80c 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -853,14 +853,14 @@ static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio, return NVM_IO_ERR; } - if (bio_rw(bio) == WRITE) + if (bio_op(bio) == REQ_OP_WRITE) return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags, npages); return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages); } - if (bio_rw(bio) == WRITE) + if (bio_op(bio) == REQ_OP_WRITE) return rrpc_write_rq(rrpc, bio, rqd, flags); return rrpc_read_rq(rrpc, bio, rqd, flags); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9f5f460c0e92..dac55b254a09 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -528,7 +528,7 @@ static void read_callback(unsigned long error, void *context) DMWARN_LIMIT("Read failure on mirror device %s. " "Trying alternative device.", m->dev->name); - queue_bio(m->ms, bio, bio_rw(bio)); + queue_bio(m->ms, bio, bio_data_dir(bio)); return; } @@ -1193,7 +1193,7 @@ static void mirror_dtr(struct dm_target *ti) */ static int mirror_map(struct dm_target *ti, struct bio *bio) { - int r, rw = bio_rw(bio); + int r, rw = bio_data_dir(bio); struct mirror *m; struct mirror_set *ms = ti->private; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); @@ -1217,7 +1217,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) * If region is not in-sync queue the bio. */ if (!r || (r == -EWOULDBLOCK)) { - if (rw == READA) + if (bio->bi_rw & REQ_RAHEAD) return -EWOULDBLOCK; queue_bio(ms, bio, rw); @@ -1242,7 +1242,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) { - int rw = bio_rw(bio); + int rw = bio_data_dir(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 69ab1ff5f5c9..cc2f14b42ba4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1696,7 +1696,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) * to copy an exception */ down_write(&s->lock); - if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_rw(bio) == WRITE)) { + if (!s->valid || (unlikely(s->snapshot_overflowed) && + bio_data_dir(bio) == WRITE)) { r = -EIO; goto out_unlock; } @@ -1713,7 +1714,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) * flags so we should only get this if we are * writeable. */ - if (bio_rw(bio) == WRITE) { + if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { up_write(&s->lock); @@ -1819,7 +1820,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) e = dm_lookup_exception(&s->complete, chunk); if (e) { /* Queue writes overlapping with chunks being merged */ - if (bio_rw(bio) == WRITE && + if (bio_data_dir(bio) == WRITE && chunk >= s->first_merging_chunk && chunk < (s->first_merging_chunk + s->num_merging_chunks)) { @@ -1831,7 +1832,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) remap_exception(s, e, bio, chunk); - if (bio_rw(bio) == WRITE) + if (bio_data_dir(bio) == WRITE) track_chunk(s, bio, chunk); goto out_unlock; } @@ -1839,7 +1840,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) redirect_to_origin: bio->bi_bdev = s->origin->bdev; - if (bio_rw(bio) == WRITE) { + if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); return do_origin(s->origin, bio); } @@ -2288,7 +2289,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) if (unlikely(bio->bi_rw & REQ_PREFLUSH)) return DM_MAPIO_REMAPPED; - if (bio_rw(bio) != WRITE) + if (bio_data_dir(bio) != WRITE) return DM_MAPIO_REMAPPED; available_sectors = o->split_boundary - diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 766bc93006e6..618b8752dcf1 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -35,16 +35,19 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) */ static int zero_map(struct dm_target *ti, struct bio *bio) { - switch(bio_rw(bio)) { - case READ: + switch (bio_op(bio)) { + case REQ_OP_READ: + if (bio->bi_rw & REQ_RAHEAD) { + /* readahead of null bytes only wastes buffer cache */ + return -EIO; + } zero_fill_bio(bio); break; - case READA: - /* readahead of null bytes only wastes buffer cache */ - return -EIO; - case WRITE: + case REQ_OP_WRITE: /* writes get silently dropped */ break; + default: + return -EIO; } bio_endio(bio); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aba7ed9abb3a..812fd5984eea 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1833,7 +1833,7 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { dm_put_live_table(md, srcu_idx); - if (bio_rw(bio) != READA) + if (!(bio->bi_rw & REQ_RAHEAD)) queue_io(md, bio); else bio_io_error(bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 41d9c31da3b3..4e6da4497553 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1105,7 +1105,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) bitmap = mddev->bitmap; /* - * make_request() can abort the operation when READA is being + * make_request() can abort the operation when read-ahead is being * used and no empty request is available. * */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7aacf5b55e15..6953d78297b0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5233,7 +5233,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi) (unsigned long long)logical_sector); sh = raid5_get_active_stripe(conf, new_sector, previous, - (bi->bi_rw&RWA_MASK), 0); + (bi->bi_rw & REQ_RAHEAD), 0); if (sh) { if (unlikely(previous)) { /* expansion might have moved on while waiting for a diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 97fe6109c98f..63f483daf930 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -500,7 +500,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct bio *bio = rqd->bio; struct nvme_nvm_command *cmd; - rq = blk_mq_alloc_request(q, bio_rw(bio), 0); + rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); if (IS_ERR(rq)) return -ENOMEM; diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c index b677930a8a3b..56ae8ac32708 100644 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ b/drivers/staging/lustre/lustre/llite/lloop.c @@ -336,7 +336,6 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; - int rw = bio_rw(old_bio); int inactive; blk_queue_split(q, &old_bio, q->bio_split); @@ -354,13 +353,15 @@ static blk_qc_t loop_make_request(struct request_queue *q, struct bio *old_bio) if (inactive) goto err; - if (rw == WRITE) { + switch (bio_op(old_bio)) { + case REQ_OP_WRITE: if (lo->lo_flags & LO_FLAGS_READ_ONLY) goto err; - } else if (rw == READA) { - rw = READ; - } else if (rw != READ) { - CERROR("lloop: unknown command (%x)\n", rw); + break; + case REQ_OP_READ: + break; + default: + CERROR("lloop: unknown command (%x)\n", bio_op(old_bio)); goto err; } loop_add_bio(lo, old_bio); diff --git a/fs/buffer.c b/fs/buffer.c index 373aacb4f4c1..c24201e78492 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -153,7 +153,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - /* This happens, due to failed READA attempts. */ + /* This happens, due to failed read-ahead attempts. */ clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1395,7 +1395,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); if (likely(bh)) { - ll_rw_block(REQ_OP_READ, READA, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); brelse(bh); } } @@ -3052,14 +3052,14 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) * @op: whether to %READ or %WRITE - * @op_flags: rq_flag_bits or %READA (readahead) + * @op_flags: rq_flag_bits * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and - * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %READA option is described in the documentation for generic_make_request() - * which ll_rw_block() calls. + * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE. + * @op_flags contains flags modifying the detailed I/O behavior, most notably + * %REQ_RAHEAD. * * This function drops any buffer that it cannot get a lock on (with the * BH_Lock state bit), any buffer that appears to be clean when doing a write diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b6d600e91f39..124b4a3017b5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -159,7 +159,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA, + .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, }; struct blk_plug plug; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3649d86bb431..f06ed73adf99 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -733,7 +733,8 @@ next_step: start_bidx = start_bidx_of_node(nofs, inode); data_page = get_read_data_page(inode, - start_bidx + ofs_in_node, READA, true); + start_bidx + ofs_in_node, REQ_RAHEAD, + true); if (IS_ERR(data_page)) { iput(inode); continue; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e53403987f6d..d1867698e601 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1119,7 +1119,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (!apage) return; - err = read_node_page(apage, READA); + err = read_node_page(apage, REQ_RAHEAD); f2fs_put_page(apage, err ? 1 : 0); } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fd6389cf0f14..6e2bec1cd289 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -285,7 +285,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, READA | REQ_META, rabh); + submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, + rabh); continue; } unlock_buffer(rabh); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0fbb42679cef..f077cf5796ee 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1513,7 +1513,7 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, continue; } bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, READA | REQ_META, bh); + submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh); continue; } brelse(bh); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 052c1132e5b6..950b8be68e41 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -459,7 +459,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(REQ_OP_READ, READA | REQ_META, 1, &bh); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh); brelse(bh); dblock++; extlen--; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 64b29b592d86..4032d1e87c8f 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -551,7 +551,7 @@ static int search_by_key_reada(struct super_block *s, if (!buffer_uptodate(bh[j])) { if (depth == -1) depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_READ, READA, 1, bh + j); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j); } brelse(bh[j]); } diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 80c8a21daed9..aaec13c95253 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -113,7 +113,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, READA, num, bha); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 71f3e0b5b8ab..988d5352bdb8 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -87,7 +87,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, READA, num, bha); + ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 183024525d40..dc488662ce0b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -178,9 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, * READ_SYNC A synchronous read. Device is not plugged, caller can * immediately wait on this read without caring about * unplugging. - * READA Used for read-ahead operations. Lower priority, and the - * block layer could (in theory) choose to ignore this - * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO @@ -195,11 +192,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, * */ #define RW_MASK REQ_OP_WRITE -#define RWA_MASK REQ_RAHEAD #define READ REQ_OP_READ -#define WRITE RW_MASK -#define READA RWA_MASK +#define WRITE REQ_OP_WRITE #define READ_SYNC REQ_SYNC #define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) @@ -2470,17 +2465,6 @@ static inline bool op_is_write(unsigned int op) return op == REQ_OP_READ ? false : true; } -/* - * return READ, READA, or WRITE - */ -static inline int bio_rw(struct bio *bio) -{ - if (op_is_write(bio_op(bio))) - return WRITE; - - return bio->bi_rw & RWA_MASK; -} - /* * return data direction, READ or WRITE */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 878963a1f058..ff95fd02116f 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -55,7 +55,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) -#define F2FS_BIO_FLAG_MASK(t) (t & (READA | WRITE_FLUSH_FUA)) +#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) #define show_bio_type(op, op_flags) show_bio_op(op), \ @@ -68,7 +68,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); #define show_bio_op_flags(flags) \ __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ - { READA, "READAHEAD" }, \ + { REQ_RAHEAD, "READAHEAD" }, \ { READ_SYNC, "READ_SYNC" }, \ { WRITE_SYNC, "WRITE_SYNC" }, \ { WRITE_FLUSH, "WRITE_FLUSH" }, \ -- cgit v1.2.3 From ed996a52c868b62c4e5bf529cb4ccb44bcfa2f8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:28:42 +0200 Subject: block: simplify and cleanup bvec pool handling Instead of a flag and an index just make sure an index of 0 means no need to free the bvec array. Also move the constants related to the bvec pools together and use a consistent naming scheme for them. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Mike Christie Signed-off-by: Jens Axboe --- block/bio-integrity.c | 9 ++++----- block/bio.c | 32 ++++++++++++++++++-------------- drivers/md/bcache/io.c | 1 - include/linux/bio.h | 2 -- include/linux/blk_types.h | 22 ++++++++++++++-------- 5 files changed, 36 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 711e4d8de6fa..11633fca27d3 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -53,7 +53,6 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, { struct bio_integrity_payload *bip; struct bio_set *bs = bio->bi_pool; - unsigned long idx = BIO_POOL_NONE; unsigned inline_vecs; if (!bs || !bs->bio_integrity_pool) { @@ -71,17 +70,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, memset(bip, 0, sizeof(*bip)); if (nr_vecs > inline_vecs) { + unsigned long idx = 0; + bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; bip->bip_max_vcnt = bvec_nr_vecs(idx); + bip->bip_slab = idx; } else { bip->bip_vec = bip->bip_inline_vecs; bip->bip_max_vcnt = inline_vecs; } - bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; bio->bi_rw |= REQ_INTEGRITY; @@ -110,9 +111,7 @@ void bio_integrity_free(struct bio *bio) bip->bip_vec->bv_offset); if (bs && bs->bio_integrity_pool) { - if (bip->bip_slab != BIO_POOL_NONE) - bvec_free(bs->bvec_integrity_pool, bip->bip_vec, - bip->bip_slab); + bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab); mempool_free(bip, bs->bio_integrity_pool); } else { diff --git a/block/bio.c b/block/bio.c index 848cd351513b..882b50ae3ad6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -43,7 +43,7 @@ * unsigned short */ #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { +static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV @@ -160,11 +160,15 @@ unsigned int bvec_nr_vecs(unsigned short idx) void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { - BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); + if (!idx) + return; + idx--; + + BIO_BUG_ON(idx >= BVEC_POOL_NR); - if (idx == BIOVEC_MAX_IDX) + if (idx == BVEC_POOL_MAX) { mempool_free(bv, pool); - else { + } else { struct biovec_slab *bvs = bvec_slabs + idx; kmem_cache_free(bvs->slab, bv); @@ -206,7 +210,7 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, * idx now points to the pool we want to allocate from. only the * 1-vec entry pool is mempool backed. */ - if (*idx == BIOVEC_MAX_IDX) { + if (*idx == BVEC_POOL_MAX) { fallback: bvl = mempool_alloc(pool, gfp_mask); } else { @@ -226,11 +230,12 @@ fallback: */ bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) { - *idx = BIOVEC_MAX_IDX; + *idx = BVEC_POOL_MAX; goto fallback; } } + (*idx)++; return bvl; } @@ -250,8 +255,7 @@ static void bio_free(struct bio *bio) __bio_free(bio); if (bs) { - if (bio_flagged(bio, BIO_OWNS_VEC)) - bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); + bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); /* * If we have front padding, adjust the bio pointer before freeing @@ -420,7 +424,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) gfp_t saved_gfp = gfp_mask; unsigned front_pad; unsigned inline_vecs; - unsigned long idx = BIO_POOL_NONE; struct bio_vec *bvl = NULL; struct bio *bio; void *p; @@ -480,6 +483,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bio_init(bio); if (nr_iovecs > inline_vecs) { + unsigned long idx = 0; + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); if (!bvl && gfp_mask != saved_gfp) { punt_bios_to_rescuer(bs); @@ -490,13 +495,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; - bio_set_flag(bio, BIO_OWNS_VEC); + bio->bi_flags |= idx << BVEC_POOL_OFFSET; } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } bio->bi_pool = bs; - bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; bio->bi_io_vec = bvl; return bio; @@ -568,7 +572,7 @@ EXPORT_SYMBOL(bio_phys_segments); */ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) { - BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); + BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio)); /* * most users will be overriding ->bi_bdev with a new target, @@ -1832,7 +1836,7 @@ EXPORT_SYMBOL_GPL(bio_trim); */ mempool_t *biovec_create_pool(int pool_entries) { - struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; + struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX; return mempool_create_slab_pool(pool_entries, bp->slab); } @@ -2009,7 +2013,7 @@ static void __init biovec_init_slabs(void) { int i; - for (i = 0; i < BIOVEC_NR_POOLS; i++) { + for (i = 0; i < BVEC_POOL_NR; i++) { int size; struct biovec_slab *bvs = bvec_slabs + i; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index fd885cc2afad..e97b0acf7b8d 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -25,7 +25,6 @@ struct bio *bch_bbio_alloc(struct cache_set *c) struct bio *bio = &b->bio; bio_init(bio); - bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; bio->bi_max_vecs = bucket_pages(c); bio->bi_io_vec = bio->bi_inline_vecs; diff --git a/include/linux/bio.h b/include/linux/bio.h index 0bbb2e332410..141cfa95a185 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -715,8 +715,6 @@ static inline void bio_inc_remaining(struct bio *bio) * and the bvec_slabs[]. */ #define BIO_POOL_SIZE 2 -#define BIOVEC_NR_POOLS 6 -#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1) struct bio_set { struct kmem_cache *bio_slab; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index efba1f2ace2e..b76463ea3587 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -134,19 +134,25 @@ struct bio { /* * Flags starting here get preserved by bio_reset() - this includes - * BIO_POOL_IDX() + * BVEC_POOL_IDX() */ #define BIO_RESET_BITS 13 -#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */ /* - * top 4 bits of bio flags indicate the pool this bio came from + * We support 6 different bvec pools, the last one is magic in that it + * is backed by a mempool. */ -#define BIO_POOL_BITS (4) -#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) -#define BIO_POOL_OFFSET (32 - BIO_POOL_BITS) -#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) -#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) +#define BVEC_POOL_NR 6 +#define BVEC_POOL_MAX (BVEC_POOL_NR - 1) + +/* + * Top 4 bits of bio flags indicate the pool the bvecs came from. We add + * 1 to the actual index so that 0 indicates that there are no bvecs to be + * freed. + */ +#define BVEC_POOL_BITS (4) +#define BVEC_POOL_OFFSET (32 - BVEC_POOL_BITS) +#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) #endif /* CONFIG_BLOCK */ -- cgit v1.2.3 From c0acf12a50c2b6cbaf72bdfe2b1b514c369a83c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:28:43 +0200 Subject: block: shrink bio size again The recent ops split grew the bio by adding the new ioprio field. Shrink it again by using a 16-bit field for the bi_flags value and filling the holes near the beginning of the structure. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Mike Christie Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b76463ea3587..c3525c5564b2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -46,11 +46,11 @@ struct bvec_iter { struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned int bi_flags; /* status, command, etc */ int bi_error; unsigned int bi_rw; /* bottom bits req flags, * top bits REQ_OP */ + unsigned short bi_flags; /* status, command, etc */ unsigned short bi_ioprio; struct bvec_iter bi_iter; @@ -136,7 +136,7 @@ struct bio { * Flags starting here get preserved by bio_reset() - this includes * BVEC_POOL_IDX() */ -#define BIO_RESET_BITS 13 +#define BIO_RESET_BITS 10 /* * We support 6 different bvec pools, the last one is magic in that it @@ -151,7 +151,7 @@ struct bio { * freed. */ #define BVEC_POOL_BITS (4) -#define BVEC_POOL_OFFSET (32 - BVEC_POOL_BITS) +#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) #endif /* CONFIG_BLOCK */ -- cgit v1.2.3 From 98d61d5b1a65a9df7cb3d9605f5d37d3dbbb4b5e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:31:51 +0200 Subject: block: simplify and export blk_rq_append_bio The target SCSI passthrough backend is much better served with the low-level blk_rq_append_bio construct then the helpers built on top of it, so export it. Also use the opportunity to remove the pointless request_queue argument and make the code flow a little more readable. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-map.c | 25 +++++++++++++++---------- block/blk.h | 2 -- include/linux/blkdev.h | 1 + 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 4d8794925fe9..a2230186c36d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1363,7 +1363,7 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, int ret; blk_queue_bounce(q, &bounce_bio); - ret = blk_rq_append_bio(q, rq, bounce_bio); + ret = blk_rq_append_bio(rq, bounce_bio); if (unlikely(ret)) { blk_put_request(rq); return ERR_PTR(ret); diff --git a/block/blk-map.c b/block/blk-map.c index 61733a660c3a..b8657fa8dc9a 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -9,21 +9,26 @@ #include "blk.h" -int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio) +/* + * Append a bio to a passthrough request. Only works can be merged into + * the request based on the driver constraints. + */ +int blk_rq_append_bio(struct request *rq, struct bio *bio) { - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!ll_back_merge_fn(q, rq, bio)) - return -EINVAL; - else { + if (!rq->bio) { + blk_rq_bio_prep(rq->q, rq, bio); + } else { + if (!ll_back_merge_fn(rq->q, rq, bio)) + return -EINVAL; + rq->biotail->bi_next = bio; rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; } + return 0; } +EXPORT_SYMBOL(blk_rq_append_bio); static int __blk_rq_unmap_user(struct bio *bio) { @@ -71,7 +76,7 @@ static int __blk_rq_map_user_iov(struct request *rq, */ bio_get(bio); - ret = blk_rq_append_bio(q, rq, bio); + ret = blk_rq_append_bio(rq, bio); if (ret) { bio_endio(bio); __blk_rq_unmap_user(orig_bio); @@ -229,7 +234,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (do_copy) rq->cmd_flags |= REQ_COPY_USER; - ret = blk_rq_append_bio(q, rq, bio); + ret = blk_rq_append_bio(rq, bio); if (unlikely(ret)) { /* request is too big */ bio_put(bio); diff --git a/block/blk.h b/block/blk.h index 70e4aee9cdcb..c37492f5edaa 100644 --- a/block/blk.h +++ b/block/blk.h @@ -64,8 +64,6 @@ void blk_exit_rl(struct request_list *rl); void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); -int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio); void blk_queue_bypass_start(struct request_queue *q); void blk_queue_bypass_end(struct request_queue *q); void blk_dequeue_request(struct request *rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 156455cb07ad..706c8bf61c84 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -802,6 +802,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); +extern int blk_rq_append_bio(struct request *rq, struct bio *bio); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **, struct bio_set *); -- cgit v1.2.3 From 4613c5f1df92f3cb5a8f89c7dfefc37402c16bd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jul 2016 11:31:53 +0200 Subject: scsi/osd: open code blk_make_request I wish the OSD code could simply use blk_rq_map_* helpers like everyone else, but the complex nature of deciding if we have DATA IN and/or DATA OUT buffers might make this impossible (at least for a mere human like me). But using blk_rq_append_bio at least allows sharing the setup code between request with or without dat a buffers, and given that this is the last user of blk_make_request it allows getting rid of that somewhat awkward interface. Signed-off-by: Christoph Hellwig Acked-by: Boaz Harrosh Signed-off-by: Jens Axboe --- block/blk-core.c | 57 ---------------------------------------- drivers/scsi/osd/osd_initiator.c | 25 +++++++++++------- include/linux/blkdev.h | 2 -- 3 files changed, 16 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index a2230186c36d..91b339f4d2ad 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1317,63 +1317,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) } EXPORT_SYMBOL(blk_get_request); -/** - * blk_make_request - given a bio, allocate a corresponding struct request. - * @q: target request queue - * @bio: The bio describing the memory mappings that will be submitted for IO. - * It may be a chained-bio properly constructed by block/bio layer. - * @gfp_mask: gfp flags to be used for memory allocation - * - * blk_make_request is the parallel of generic_make_request for BLOCK_PC - * type commands. Where the struct request needs to be farther initialized by - * the caller. It is passed a &struct bio, which describes the memory info of - * the I/O transfer. - * - * The caller of blk_make_request must make sure that bi_io_vec - * are set to describe the memory buffers. That bio_data_dir() will return - * the needed direction of the request. (And all bio's in the passed bio-chain - * are properly set accordingly) - * - * If called under none-sleepable conditions, mapped bio buffers must not - * need bouncing, by calling the appropriate masked or flagged allocator, - * suitable for the target device. Otherwise the call to blk_queue_bounce will - * BUG. - * - * WARNING: When allocating/cloning a bio-chain, careful consideration should be - * given to how you allocate bios. In particular, you cannot use - * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise - * you risk waiting for IO completion of a bio that hasn't been submitted yet, - * thus resulting in a deadlock. Alternatively bios should be allocated using - * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock. - * If possible a big IO should be split into smaller parts when allocation - * fails. Partial allocation should not be an error, or you risk a live-lock. - */ -struct request *blk_make_request(struct request_queue *q, struct bio *bio, - gfp_t gfp_mask) -{ - struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); - - if (IS_ERR(rq)) - return rq; - - blk_rq_set_block_pc(rq); - - for_each_bio(bio) { - struct bio *bounce_bio = bio; - int ret; - - blk_queue_bounce(q, &bounce_bio); - ret = blk_rq_append_bio(rq, bounce_bio); - if (unlikely(ret)) { - blk_put_request(rq); - return ERR_PTR(ret); - } - } - - return rq; -} -EXPORT_SYMBOL(blk_make_request); - /** * blk_rq_set_block_pc - initialize a request to type BLOCK_PC * @rq: request to be initialized diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index daa4dc17f172..2f2a9910e30e 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1558,18 +1558,25 @@ static int _osd_req_finalize_data_integrity(struct osd_request *or, static struct request *_make_request(struct request_queue *q, bool has_write, struct _osd_io_info *oii, gfp_t flags) { - if (oii->bio) - return blk_make_request(q, oii->bio, flags); - else { - struct request *req; - - req = blk_get_request(q, has_write ? WRITE : READ, flags); - if (IS_ERR(req)) - return req; + struct request *req; + struct bio *bio = oii->bio; + int ret; - blk_rq_set_block_pc(req); + req = blk_get_request(q, has_write ? WRITE : READ, flags); + if (IS_ERR(req)) return req; + blk_rq_set_block_pc(req); + + for_each_bio(bio) { + struct bio *bounce_bio = bio; + + blk_queue_bounce(req->q, &bounce_bio); + ret = blk_rq_append_bio(req, bounce_bio); + if (ret) + return ERR_PTR(ret); } + + return req; } static int _init_blk_request(struct osd_request *or, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 706c8bf61c84..0cdea75a6614 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -788,8 +788,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); -extern struct request *blk_make_request(struct request_queue *, struct bio *, - gfp_t); extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern void blk_add_request_payload(struct request *rq, struct page *page, -- cgit v1.2.3