diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-13 10:19:16 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-13 10:19:16 -0800 |
commit | 36869cb93d36269f34800b3384ba7991060a69cf (patch) | |
tree | 1ff266dcb3386bb1403494aa89647a96fd2396cd /drivers/block | |
parent | 9439b3710df688d853eb6cb4851256f2c92b1797 (diff) | |
parent | 7cd54aa8438947602cf68eda1db327822b9b8e6b (diff) | |
download | lwn-36869cb93d36269f34800b3384ba7991060a69cf.tar.gz lwn-36869cb93d36269f34800b3384ba7991060a69cf.zip |
Merge branch 'for-4.10/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"This is the main block pull request this series. Contrary to previous
release, I've kept the core and driver changes in the same branch. We
always ended up having dependencies between the two for obvious
reasons, so makes more sense to keep them together. That said, I'll
probably try and keep more topical branches going forward, especially
for cycles that end up being as busy as this one.
The major parts of this pull request is:
- Improved support for O_DIRECT on block devices, with a small
private implementation instead of using the pig that is
fs/direct-io.c. From Christoph.
- Request completion tracking in a scalable fashion. This is utilized
by two components in this pull, the new hybrid polling and the
writeback queue throttling code.
- Improved support for polling with O_DIRECT, adding a hybrid mode
that combines pure polling with an initial sleep. From me.
- Support for automatic throttling of writeback queues on the block
side. This uses feedback from the device completion latencies to
scale the queue on the block side up or down. From me.
- Support from SMR drives in the block layer and for SD. From Hannes
and Shaun.
- Multi-connection support for nbd. From Josef.
- Cleanup of request and bio flags, so we have a clear split between
which are bio (or rq) private, and which ones are shared. From
Christoph.
- A set of patches from Bart, that improve how we handle queue
stopping and starting in blk-mq.
- Support for WRITE_ZEROES from Chaitanya.
- Lightnvm updates from Javier/Matias.
- Supoort for FC for the nvme-over-fabrics code. From James Smart.
- A bunch of fixes from a whole slew of people, too many to name
here"
* 'for-4.10/block' of git://git.kernel.dk/linux-block: (182 commits)
blk-stat: fix a few cases of missing batch flushing
blk-flush: run the queue when inserting blk-mq flush
elevator: make the rqhash helpers exported
blk-mq: abstract out blk_mq_dispatch_rq_list() helper
blk-mq: add blk_mq_start_stopped_hw_queue()
block: improve handling of the magic discard payload
blk-wbt: don't throttle discard or write zeroes
nbd: use dev_err_ratelimited in io path
nbd: reset the setup task for NBD_CLEAR_SOCK
nvme-fabrics: Add FC LLDD loopback driver to test FC-NVME
nvme-fabrics: Add target support for FC transport
nvme-fabrics: Add host support for FC transport
nvme-fabrics: Add FC transport LLDD api definitions
nvme-fabrics: Add FC transport FC-NVME definitions
nvme-fabrics: Add FC transport error codes to nvme.h
Add type 0x28 NVME type code to scsi fc headers
nvme-fabrics: patch target code in prep for FC transport support
nvme-fabrics: set sqe.command_id in core not transports
parser: add u64 number parser
nvme-rdma: align to generic ib_event logging helper
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 5 | ||||
-rw-r--r-- | drivers/block/brd.c | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 16 | ||||
-rw-r--r-- | drivers/block/floppy.c | 10 | ||||
-rw-r--r-- | drivers/block/loop.c | 2 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 18 | ||||
-rw-r--r-- | drivers/block/nbd.c | 443 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 1 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 49 | ||||
-rw-r--r-- | drivers/block/skd_main.c | 238 | ||||
-rw-r--r-- | drivers/block/umem.c | 2 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 10 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 3 |
14 files changed, 390 insertions, 448 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 39dd30b6ef86..223ff2fcae7e 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -384,9 +384,12 @@ config BLK_DEV_RAM_DAX allocated from highmem (only a problem for highmem systems). config CDROM_PKTCDVD - tristate "Packet writing on CD/DVD media" + tristate "Packet writing on CD/DVD media (DEPRECATED)" depends on !UML help + Note: This driver is deprecated and will be removed from the + kernel in the near future! + If you have a CDROM/DVD drive that supports packet writing, say Y to include support. It should work with any MMC/Mt Fuji compliant ATAPI or SCSI drive, which is just about any newer diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 0c76d4016eeb..ad793f35632c 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -395,44 +395,9 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, #define brd_direct_access NULL #endif -static int brd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - int error; - struct brd_device *brd = bdev->bd_disk->private_data; - - if (cmd != BLKFLSBUF) - return -ENOTTY; - - /* - * ram device BLKFLSBUF has special semantics, we want to actually - * release and destroy the ramdisk data. - */ - mutex_lock(&brd_mutex); - mutex_lock(&bdev->bd_mutex); - error = -EBUSY; - if (bdev->bd_openers <= 1) { - /* - * Kill the cache first, so it isn't written back to the - * device. - * - * Another thread might instantiate more buffercache here, - * but there is not much we can do to close that race. - */ - kill_bdev(bdev); - brd_free_pages(brd); - error = 0; - } - mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&brd_mutex); - - return error; -} - static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, .rw_page = brd_rw_page, - .ioctl = brd_ioctl, .direct_access = brd_direct_access, }; @@ -443,8 +408,8 @@ static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; module_param(rd_nr, int, S_IRUGO); MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); -int rd_size = CONFIG_BLK_DEV_RAM_SIZE; -module_param(rd_size, int, S_IRUGO); +unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; +module_param(rd_size, ulong, S_IRUGO); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); static int max_part = 1; diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 2d3d50ab74bf..8d7bcfa49c12 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -148,7 +148,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags)) op_flags |= REQ_FUA | REQ_PREFLUSH; - op_flags |= REQ_SYNC | REQ_NOIDLE; + op_flags |= REQ_SYNC; bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 942384f34e22..c7728dd77230 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1266,7 +1266,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont bio->bi_bdev = device->ldev->backing_bdev; bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH); + bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); @@ -1648,20 +1648,8 @@ next_bio: page_chain_for_each(page) { unsigned len = min_t(unsigned, data_size, PAGE_SIZE); - if (!bio_add_page(bio, page, len, 0)) { - /* A single page must always be possible! - * But in case it fails anyways, - * we deal with it, and complain (below). */ - if (bio->bi_vcnt == 0) { - drbd_err(device, - "bio_add_page failed for len=%u, " - "bi_vcnt=0 (bi_sector=%llu)\n", - len, (uint64_t)bio->bi_iter.bi_sector); - err = -ENOSPC; - goto fail; - } + if (!bio_add_page(bio, page, len, 0)) goto next_bio; - } data_size -= len; sector += len >> 9; --nr_pages; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index e3d8e4ced4a2..a391a3cfb3fe 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3806,14 +3806,10 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) cbdata.drive = drive; - bio_init(&bio); - bio.bi_io_vec = &bio_vec; - bio_vec.bv_page = page; - bio_vec.bv_len = size; - bio_vec.bv_offset = 0; - bio.bi_vcnt = 1; - bio.bi_iter.bi_size = size; + bio_init(&bio, &bio_vec, 1); bio.bi_bdev = bdev; + bio_add_page(&bio, page, size, 0); + bio.bi_iter.bi_sector = 0; bio.bi_flags |= (1 << BIO_QUIET); bio.bi_private = &cbdata; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fa1b7a90ba11..4af818766797 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1646,7 +1646,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (lo->lo_state != Lo_bound) - return -EIO; + return BLK_MQ_RQ_QUEUE_ERROR; switch (req_op(cmd->rq)) { case REQ_OP_FLUSH: diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3cfd879267b2..f96ab717534c 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2035,18 +2035,14 @@ static int exec_drive_taskfile(struct driver_data *dd, taskout = req_task->out_size; taskin = req_task->in_size; /* 130560 = 512 * 0xFF*/ - if (taskin > 130560 || taskout > 130560) { - err = -EINVAL; - goto abort; - } + if (taskin > 130560 || taskout > 130560) + return -EINVAL; if (taskout) { outbuf = memdup_user(buf + outtotal, taskout); - if (IS_ERR(outbuf)) { - err = PTR_ERR(outbuf); - outbuf = NULL; - goto abort; - } + if (IS_ERR(outbuf)) + return PTR_ERR(outbuf); + outbuf_dma = pci_map_single(dd->pdev, outbuf, taskout, @@ -3937,8 +3933,10 @@ static int mtip_block_initialize(struct driver_data *dd) /* Generate the disk name, implemented same as in sd.c */ do { - if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL)) + if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL)) { + rv = -ENOMEM; goto ida_get_error; + } spin_lock(&rssd_index_lock); rv = ida_get_new(&rssd_index_ida, &index); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7a1048755914..99c84468f154 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -41,26 +41,34 @@ #include <linux/nbd.h> +struct nbd_sock { + struct socket *sock; + struct mutex tx_lock; +}; + #define NBD_TIMEDOUT 0 #define NBD_DISCONNECT_REQUESTED 1 +#define NBD_DISCONNECTED 2 +#define NBD_RUNNING 3 struct nbd_device { u32 flags; unsigned long runtime_flags; - struct socket * sock; /* If == NULL, device is not ready, yet */ + struct nbd_sock **socks; int magic; struct blk_mq_tag_set tag_set; - struct mutex tx_lock; + struct mutex config_lock; struct gendisk *disk; - int blksize; + int num_connections; + atomic_t recv_threads; + wait_queue_head_t recv_wq; + loff_t blksize; loff_t bytesize; - /* protects initialization and shutdown of the socket */ - spinlock_t sock_lock; struct task_struct *task_recv; - struct task_struct *task_send; + struct task_struct *task_setup; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbg_dir; @@ -69,7 +77,7 @@ struct nbd_device { struct nbd_cmd { struct nbd_device *nbd; - struct list_head list; + struct completion send_complete; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -126,7 +134,7 @@ static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) } static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, - int blocksize, int nr_blocks) + loff_t blocksize, loff_t nr_blocks) { int ret; @@ -135,7 +143,7 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, return ret; nbd->blksize = blocksize; - nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks; + nbd->bytesize = blocksize * nr_blocks; nbd_size_update(nbd, bdev); @@ -159,22 +167,20 @@ static void nbd_end_request(struct nbd_cmd *cmd) */ static void sock_shutdown(struct nbd_device *nbd) { - struct socket *sock; - - spin_lock(&nbd->sock_lock); + int i; - if (!nbd->sock) { - spin_unlock(&nbd->sock_lock); + if (nbd->num_connections == 0) + return; + if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) return; - } - - sock = nbd->sock; - dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); - nbd->sock = NULL; - spin_unlock(&nbd->sock_lock); - kernel_sock_shutdown(sock, SHUT_RDWR); - sockfd_put(sock); + for (i = 0; i < nbd->num_connections; i++) { + struct nbd_sock *nsock = nbd->socks[i]; + mutex_lock(&nsock->tx_lock); + kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + mutex_unlock(&nsock->tx_lock); + } + dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); } static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, @@ -182,42 +188,38 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); struct nbd_device *nbd = cmd->nbd; - struct socket *sock = NULL; - - spin_lock(&nbd->sock_lock); + dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); - - if (nbd->sock) { - sock = nbd->sock; - get_file(sock->file); - } - - spin_unlock(&nbd->sock_lock); - if (sock) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sockfd_put(sock); - } - req->errors++; - dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); + + /* + * If our disconnect packet times out then we're already holding the + * config_lock and could deadlock here, so just set an error and return, + * we'll handle shutting everything down later. + */ + if (req->cmd_type == REQ_TYPE_DRV_PRIV) + return BLK_EH_HANDLED; + mutex_lock(&nbd->config_lock); + sock_shutdown(nbd); + mutex_unlock(&nbd->config_lock); return BLK_EH_HANDLED; } /* * Send or receive packet. */ -static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, - int msg_flags) +static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, + int size, int msg_flags) { - struct socket *sock = nbd->sock; + struct socket *sock = nbd->socks[index]->sock; int result; struct msghdr msg; struct kvec iov; unsigned long pflags = current->flags; if (unlikely(!sock)) { - dev_err(disk_to_dev(nbd->disk), + dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted %s on closed socket in sock_xmit\n", (send ? "send" : "recv")); return -EINVAL; @@ -254,29 +256,29 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, return result; } -static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, - int flags) +static inline int sock_send_bvec(struct nbd_device *nbd, int index, + struct bio_vec *bvec, int flags) { int result; void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset, + result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); kunmap(bvec->bv_page); return result; } /* always call with the tx_lock held */ -static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) +static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); + struct bio *bio; u32 type; + u32 tag = blk_mq_unique_tag(req); - if (req->cmd_type == REQ_TYPE_DRV_PRIV) - type = NBD_CMD_DISC; - else if (req_op(req) == REQ_OP_DISCARD) + if (req_op(req) == REQ_OP_DISCARD) type = NBD_CMD_TRIM; else if (req_op(req) == REQ_OP_FLUSH) type = NBD_CMD_FLUSH; @@ -288,73 +290,89 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) memset(&request, 0, sizeof(request)); request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(type); - if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) { + if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.len = htonl(size); } - memcpy(request.handle, &req->tag, sizeof(req->tag)); + memcpy(request.handle, &tag, sizeof(tag)); dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", cmd, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); - result = sock_xmit(nbd, 1, &request, sizeof(request), + result = sock_xmit(nbd, index, 1, &request, sizeof(request), (type == NBD_CMD_WRITE) ? MSG_MORE : 0); if (result <= 0) { - dev_err(disk_to_dev(nbd->disk), + dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); return -EIO; } - if (type == NBD_CMD_WRITE) { - struct req_iterator iter; + if (type != NBD_CMD_WRITE) + return 0; + + flags = 0; + bio = req->bio; + while (bio) { + struct bio *next = bio->bi_next; + struct bvec_iter iter; struct bio_vec bvec; - /* - * we are really probing at internals to determine - * whether to set MSG_MORE or not... - */ - rq_for_each_segment(bvec, req, iter) { - flags = 0; - if (!rq_iter_last(bvec, iter)) + + bio_for_each_segment(bvec, bio, iter) { + bool is_last = !next && bio_iter_last(bvec, iter); + + if (is_last) flags = MSG_MORE; dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", cmd, bvec.bv_len); - result = sock_send_bvec(nbd, &bvec, flags); + result = sock_send_bvec(nbd, index, &bvec, flags); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); return -EIO; } + /* + * The completion might already have come in, + * so break for the last one instead of letting + * the iterator do it. This prevents use-after-free + * of the bio. + */ + if (is_last) + break; } + bio = next; } return 0; } -static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) +static inline int sock_recv_bvec(struct nbd_device *nbd, int index, + struct bio_vec *bvec) { int result; void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len, - MSG_WAITALL); + result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset, + bvec->bv_len, MSG_WAITALL); kunmap(bvec->bv_page); return result; } /* NULL returned = something went wrong, inform userspace */ -static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) +static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { int result; struct nbd_reply reply; struct nbd_cmd *cmd; struct request *req = NULL; u16 hwq; - int tag; + u32 tag; reply.magic = 0; - result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); + result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL); if (result <= 0) { - dev_err(disk_to_dev(nbd->disk), - "Receive control failed (result %d)\n", result); + if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && + !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + dev_err(disk_to_dev(nbd->disk), + "Receive control failed (result %d)\n", result); return ERR_PTR(result); } @@ -364,7 +382,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) return ERR_PTR(-EPROTO); } - memcpy(&tag, reply.handle, sizeof(int)); + memcpy(&tag, reply.handle, sizeof(u32)); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) @@ -376,7 +394,6 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) return ERR_PTR(-ENOENT); } cmd = blk_mq_rq_to_pdu(req); - if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); @@ -390,7 +407,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - result = sock_recv_bvec(nbd, &bvec); + result = sock_recv_bvec(nbd, index, &bvec); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); @@ -400,6 +417,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", cmd, bvec.bv_len); } + } else { + /* See the comment in nbd_queue_rq. */ + wait_for_completion(&cmd->send_complete); } return cmd; } @@ -418,25 +438,24 @@ static struct device_attribute pid_attr = { .show = pid_show, }; -static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) +struct recv_thread_args { + struct work_struct work; + struct nbd_device *nbd; + int index; +}; + +static void recv_work(struct work_struct *work) { + struct recv_thread_args *args = container_of(work, + struct recv_thread_args, + work); + struct nbd_device *nbd = args->nbd; struct nbd_cmd *cmd; - int ret; + int ret = 0; BUG_ON(nbd->magic != NBD_MAGIC); - - sk_set_memalloc(nbd->sock->sk); - - ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); - if (ret) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - return ret; - } - - nbd_size_update(nbd, bdev); - while (1) { - cmd = nbd_read_stat(nbd); + cmd = nbd_read_stat(nbd, args->index); if (IS_ERR(cmd)) { ret = PTR_ERR(cmd); break; @@ -445,10 +464,14 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) nbd_end_request(cmd); } - nbd_size_clear(nbd, bdev); - - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - return ret; + /* + * We got an error, shut everybody down if this wasn't the result of a + * disconnect request. + */ + if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + sock_shutdown(nbd); + atomic_dec(&nbd->recv_threads); + wake_up(&nbd->recv_wq); } static void nbd_clear_req(struct request *req, void *data, bool reserved) @@ -466,51 +489,60 @@ static void nbd_clear_que(struct nbd_device *nbd) { BUG_ON(nbd->magic != NBD_MAGIC); - /* - * Because we have set nbd->sock to NULL under the tx_lock, all - * modifications to the list must have completed by now. - */ - BUG_ON(nbd->sock); - blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); } -static void nbd_handle_cmd(struct nbd_cmd *cmd) +static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; + struct nbd_sock *nsock; + + if (index >= nbd->num_connections) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on invalid socket\n"); + goto error_out; + } + + if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on closed socket\n"); + goto error_out; + } - if (req->cmd_type != REQ_TYPE_FS) + if (req->cmd_type != REQ_TYPE_FS && + req->cmd_type != REQ_TYPE_DRV_PRIV) goto error_out; - if (rq_data_dir(req) == WRITE && + if (req->cmd_type == REQ_TYPE_FS && + rq_data_dir(req) == WRITE && (nbd->flags & NBD_FLAG_READ_ONLY)) { - dev_err(disk_to_dev(nbd->disk), - "Write on read-only\n"); + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Write on read-only\n"); goto error_out; } req->errors = 0; - mutex_lock(&nbd->tx_lock); - nbd->task_send = current; - if (unlikely(!nbd->sock)) { - mutex_unlock(&nbd->tx_lock); - dev_err(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); + nsock = nbd->socks[index]; + mutex_lock(&nsock->tx_lock); + if (unlikely(!nsock->sock)) { + mutex_unlock(&nsock->tx_lock); + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on closed socket\n"); goto error_out; } - if (nbd_send_cmd(nbd, cmd) != 0) { - dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); + if (nbd_send_cmd(nbd, cmd, index) != 0) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Request send failed\n"); req->errors++; nbd_end_request(cmd); } - nbd->task_send = NULL; - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nsock->tx_lock); return; @@ -524,39 +556,70 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + /* + * Since we look at the bio's to send the request over the network we + * need to make sure the completion work doesn't mark this request done + * before we are done doing our send. This keeps us from dereferencing + * freed data if we have particularly fast completions (ie we get the + * completion before we exit sock_xmit on the last bvec) or in the case + * that the server is misbehaving (or there was an error) before we're + * done sending everything over the wire. + */ + init_completion(&cmd->send_complete); blk_mq_start_request(bd->rq); - nbd_handle_cmd(cmd); + nbd_handle_cmd(cmd, hctx->queue_num); + complete(&cmd->send_complete); + return BLK_MQ_RQ_QUEUE_OK; } -static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) +static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) { - int ret = 0; - - spin_lock_irq(&nbd->sock_lock); + struct nbd_sock **socks; + struct nbd_sock *nsock; - if (nbd->sock) { - ret = -EBUSY; - goto out; + if (!nbd->task_setup) + nbd->task_setup = current; + if (nbd->task_setup != current) { + dev_err(disk_to_dev(nbd->disk), + "Device being setup by another task"); + return -EINVAL; } - nbd->sock = sock; + socks = krealloc(nbd->socks, (nbd->num_connections + 1) * + sizeof(struct nbd_sock *), GFP_KERNEL); + if (!socks) + return -ENOMEM; + nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); + if (!nsock) + return -ENOMEM; -out: - spin_unlock_irq(&nbd->sock_lock); + nbd->socks = socks; + + mutex_init(&nsock->tx_lock); + nsock->sock = sock; + socks[nbd->num_connections++] = nsock; - return ret; + return 0; } /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { + int i; + + for (i = 0; i < nbd->num_connections; i++) + kfree(nbd->socks[i]); + kfree(nbd->socks); + nbd->socks = NULL; nbd->runtime_flags = 0; nbd->blksize = 1024; nbd->bytesize = 0; set_capacity(nbd->disk, 0); nbd->flags = 0; nbd->tag_set.timeout = 0; + nbd->num_connections = 0; + nbd->task_setup = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } @@ -582,48 +645,68 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) blk_queue_write_cache(nbd->disk->queue, false, false); } +static void send_disconnects(struct nbd_device *nbd) +{ + struct nbd_request request = {}; + int i, ret; + + request.magic = htonl(NBD_REQUEST_MAGIC); + request.type = htonl(NBD_CMD_DISC); + + for (i = 0; i < nbd->num_connections; i++) { + ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0); + if (ret <= 0) + dev_err(disk_to_dev(nbd->disk), + "Send disconnect failed %d\n", ret); + } +} + static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); -/* Must be called with tx_lock held */ - +/* Must be called with config_lock held */ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { switch (cmd) { case NBD_DISCONNECT: { - struct request *sreq; - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->sock) + if (!nbd->socks) return -EINVAL; - sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0); - if (IS_ERR(sreq)) - return -ENOMEM; - - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nbd->config_lock); fsync_bdev(bdev); - mutex_lock(&nbd->tx_lock); - sreq->cmd_type = REQ_TYPE_DRV_PRIV; + mutex_lock(&nbd->config_lock); /* Check again after getting mutex back. */ - if (!nbd->sock) { - blk_mq_free_request(sreq); + if (!nbd->socks) return -EINVAL; - } - set_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags); - - nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq)); - blk_mq_free_request(sreq); + if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, + &nbd->runtime_flags)) + send_disconnects(nbd); return 0; } - + case NBD_CLEAR_SOCK: sock_shutdown(nbd); nbd_clear_que(nbd); kill_bdev(bdev); + nbd_bdev_reset(bdev); + /* + * We want to give the run thread a chance to wait for everybody + * to clean up and then do it's own cleanup. + */ + if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) { + int i; + + for (i = 0; i < nbd->num_connections; i++) + kfree(nbd->socks[i]); + kfree(nbd->socks); + nbd->socks = NULL; + nbd->num_connections = 0; + nbd->task_setup = NULL; + } return 0; case NBD_SET_SOCK: { @@ -633,7 +716,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!sock) return err; - err = nbd_set_socket(nbd, sock); + err = nbd_add_socket(nbd, sock); if (!err && max_part) bdev->bd_invalidated = 1; @@ -648,7 +731,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_size_set(nbd, bdev, nbd->blksize, - arg / nbd->blksize); + div_s64(arg, nbd->blksize)); case NBD_SET_SIZE_BLOCKS: return nbd_size_set(nbd, bdev, nbd->blksize, arg); @@ -662,26 +745,61 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; case NBD_DO_IT: { - int error; + struct recv_thread_args *args; + int num_connections = nbd->num_connections; + int error = 0, i; if (nbd->task_recv) return -EBUSY; - if (!nbd->sock) + if (!nbd->socks) return -EINVAL; + if (num_connections > 1 && + !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); + error = -EINVAL; + goto out_err; + } - /* We have to claim the device under the lock */ + set_bit(NBD_RUNNING, &nbd->runtime_flags); + blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); + args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); + if (!args) { + error = -ENOMEM; + goto out_err; + } nbd->task_recv = current; - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nbd->config_lock); nbd_parse_flags(nbd, bdev); + error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (error) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + goto out_recv; + } + + nbd_size_update(nbd, bdev); + nbd_dev_dbg_init(nbd); - error = nbd_thread_recv(nbd, bdev); + for (i = 0; i < num_connections; i++) { + sk_set_memalloc(nbd->socks[i]->sock->sk); + atomic_inc(&nbd->recv_threads); + INIT_WORK(&args[i].work, recv_work); + args[i].nbd = nbd; + args[i].index = i; + queue_work(system_long_wq, &args[i].work); + } + wait_event_interruptible(nbd->recv_wq, + atomic_read(&nbd->recv_threads) == 0); + for (i = 0; i < num_connections; i++) + flush_work(&args[i].work); nbd_dev_dbg_close(nbd); - - mutex_lock(&nbd->tx_lock); + nbd_size_clear(nbd, bdev); + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); +out_recv: + mutex_lock(&nbd->config_lock); nbd->task_recv = NULL; - +out_err: sock_shutdown(nbd); nbd_clear_que(nbd); kill_bdev(bdev); @@ -694,7 +812,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, error = -ETIMEDOUT; nbd_reset(nbd); - return error; } @@ -726,9 +843,9 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, BUG_ON(nbd->magic != NBD_MAGIC); - mutex_lock(&nbd->tx_lock); + mutex_lock(&nbd->config_lock); error = __nbd_ioctl(bdev, nbd, cmd, arg); - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nbd->config_lock); return error; } @@ -748,8 +865,6 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) if (nbd->task_recv) seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); - if (nbd->task_send) - seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send)); return 0; } @@ -817,7 +932,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); - debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize); + debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize); debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); return 0; @@ -873,9 +988,7 @@ static int nbd_init_request(void *data, struct request *rq, unsigned int numa_node) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->nbd = data; - INIT_LIST_HEAD(&cmd->list); return 0; } @@ -985,13 +1098,13 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; nbd_dev[i].magic = NBD_MAGIC; - spin_lock_init(&nbd_dev[i].sock_lock); - mutex_init(&nbd_dev[i].tx_lock); + mutex_init(&nbd_dev[i].config_lock); disk->major = NBD_MAJOR; disk->first_minor = i << part_shift; disk->fops = &nbd_fops; disk->private_data = &nbd_dev[i]; sprintf(disk->disk_name, "nbd%d", i); + init_waitqueue_head(&nbd_dev[i].recv_wq); nbd_reset(&nbd_dev[i]); add_disk(disk); } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ba6f4a2e73db..4943ee22716e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -577,6 +577,7 @@ static void null_nvm_unregister(struct nullb *nullb) #else static int null_nvm_register(struct nullb *nullb) { + pr_err("null_blk: CONFIG_NVM needs to be enabled for LightNVM\n"); return -EINVAL; } static void null_nvm_unregister(struct nullb *nullb) {} diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90fa4ac149db..95c98de92971 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -721,7 +721,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->timeout = 60*HZ; if (cgc->quiet) - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); if (rq->errors) @@ -944,39 +944,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que } } -/* - * Copy all data for this packet to pkt->pages[], so that - * a) The number of required segments for the write bio is minimized, which - * is necessary for some scsi controllers. - * b) The data can be used as cache to avoid read requests if we receive a - * new write request for the same zone. - */ -static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) -{ - int f, p, offs; - - /* Copy all data to pkt->pages[] */ - p = 0; - offs = 0; - for (f = 0; f < pkt->frames; f++) { - if (bvec[f].bv_page != pkt->pages[p]) { - void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset; - void *vto = page_address(pkt->pages[p]) + offs; - memcpy(vto, vfrom, CD_FRAMESIZE); - kunmap_atomic(vfrom); - bvec[f].bv_page = pkt->pages[p]; - bvec[f].bv_offset = offs; - } else { - BUG_ON(bvec[f].bv_offset != offs); - } - offs += CD_FRAMESIZE; - if (offs >= PAGE_SIZE) { - offs = 0; - p++; - } - } -} - static void pkt_end_io_read(struct bio *bio) { struct packet_data *pkt = bio->bi_private; @@ -1298,7 +1265,6 @@ try_next_bio: static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) { int f; - struct bio_vec *bvec = pkt->w_bio->bi_io_vec; bio_reset(pkt->w_bio); pkt->w_bio->bi_iter.bi_sector = pkt->sector; @@ -1308,9 +1274,10 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) /* XXX: locking? */ for (f = 0; f < pkt->frames; f++) { - bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; - bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE; - if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) + struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; + unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + + if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset)) BUG(); } pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt); @@ -1327,12 +1294,10 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) pkt_dbg(2, pd, "Writing %d frames for zone %llx\n", pkt->write_size, (unsigned long long)pkt->sector); - if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { - pkt_make_local_copy(pkt, bvec); + if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) pkt->cache_valid = 1; - } else { + else pkt->cache_valid = 0; - } /* Start the write request */ atomic_set(&pkt->io_wait, 1); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 3822eae102db..abf805e332e2 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -36,7 +36,6 @@ #include <linux/scatterlist.h> #include <linux/version.h> #include <linux/err.h> -#include <linux/scatterlist.h> #include <linux/aer.h> #include <linux/ctype.h> #include <linux/wait.h> @@ -270,8 +269,6 @@ struct skd_device { resource_size_t mem_phys[SKD_MAX_BARS]; u32 mem_size[SKD_MAX_BARS]; - skd_irq_type_t irq_type; - u32 msix_count; struct skd_msix_entry *msix_entries; struct pci_dev *pdev; @@ -2138,12 +2135,8 @@ static void skd_send_fitmsg(struct skd_device *skdev, u8 *bp = (u8 *)skmsg->msg_buf; int i; for (i = 0; i < skmsg->length; i += 8) { - pr_debug("%s:%s:%d msg[%2d] %02x %02x %02x %02x " - "%02x %02x %02x %02x\n", - skdev->name, __func__, __LINE__, - i, bp[i + 0], bp[i + 1], bp[i + 2], - bp[i + 3], bp[i + 4], bp[i + 5], - bp[i + 6], bp[i + 7]); + pr_debug("%s:%s:%d msg[%2d] %8ph\n", + skdev->name, __func__, __LINE__, i, &bp[i]); if (i == 0) i = 64 - 8; } @@ -2164,7 +2157,6 @@ static void skd_send_fitmsg(struct skd_device *skdev, qcmd |= FIT_QCMD_MSGSIZE_64; SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); - } static void skd_send_special_fitmsg(struct skd_device *skdev, @@ -2177,11 +2169,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev, int i; for (i = 0; i < SKD_N_SPECIAL_FITMSG_BYTES; i += 8) { - pr_debug("%s:%s:%d spcl[%2d] %02x %02x %02x %02x " - "%02x %02x %02x %02x\n", - skdev->name, __func__, __LINE__, i, - bp[i + 0], bp[i + 1], bp[i + 2], bp[i + 3], - bp[i + 4], bp[i + 5], bp[i + 6], bp[i + 7]); + pr_debug("%s:%s:%d spcl[%2d] %8ph\n", + skdev->name, __func__, __LINE__, i, &bp[i]); if (i == 0) i = 64 - 8; } @@ -2955,8 +2944,8 @@ static void skd_completion_worker(struct work_struct *work) static void skd_isr_msg_from_dev(struct skd_device *skdev); -irqreturn_t -static skd_isr(int irq, void *ptr) +static irqreturn_t +skd_isr(int irq, void *ptr) { struct skd_device *skdev; u32 intstat; @@ -3821,10 +3810,6 @@ static irqreturn_t skd_qfull_isr(int irq, void *skd_host_data) */ struct skd_msix_entry { - int have_irq; - u32 vector; - u32 entry; - struct skd_device *rsp; char isr_name[30]; }; @@ -3853,193 +3838,121 @@ static struct skd_init_msix_entry msix_entries[SKD_MAX_MSIX_COUNT] = { { "(Queue Full 3)", skd_qfull_isr }, }; -static void skd_release_msix(struct skd_device *skdev) -{ - struct skd_msix_entry *qentry; - int i; - - if (skdev->msix_entries) { - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; - skdev = qentry->rsp; - - if (qentry->have_irq) - devm_free_irq(&skdev->pdev->dev, - qentry->vector, qentry->rsp); - } - - kfree(skdev->msix_entries); - } - - if (skdev->msix_count) - pci_disable_msix(skdev->pdev); - - skdev->msix_count = 0; - skdev->msix_entries = NULL; -} - static int skd_acquire_msix(struct skd_device *skdev) { int i, rc; struct pci_dev *pdev = skdev->pdev; - struct msix_entry *entries; - struct skd_msix_entry *qentry; - - entries = kzalloc(sizeof(struct msix_entry) * SKD_MAX_MSIX_COUNT, - GFP_KERNEL); - if (!entries) - return -ENOMEM; - - for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) - entries[i].entry = i; - rc = pci_enable_msix_exact(pdev, entries, SKD_MAX_MSIX_COUNT); - if (rc) { + rc = pci_alloc_irq_vectors(pdev, SKD_MAX_MSIX_COUNT, SKD_MAX_MSIX_COUNT, + PCI_IRQ_MSIX); + if (rc < 0) { pr_err("(%s): failed to enable MSI-X %d\n", skd_name(skdev), rc); - goto msix_out; + goto out; } - skdev->msix_count = SKD_MAX_MSIX_COUNT; - skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) * - skdev->msix_count, GFP_KERNEL); + skdev->msix_entries = kcalloc(SKD_MAX_MSIX_COUNT, + sizeof(struct skd_msix_entry), GFP_KERNEL); if (!skdev->msix_entries) { rc = -ENOMEM; pr_err("(%s): msix table allocation error\n", skd_name(skdev)); - goto msix_out; - } - - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; - qentry->vector = entries[i].vector; - qentry->entry = entries[i].entry; - qentry->rsp = NULL; - qentry->have_irq = 0; - pr_debug("%s:%s:%d %s: <%s> msix (%d) vec %d, entry %x\n", - skdev->name, __func__, __LINE__, - pci_name(pdev), skdev->name, - i, qentry->vector, qentry->entry); + goto out; } /* Enable MSI-X vectors for the base queue */ - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; + for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) { + struct skd_msix_entry *qentry = &skdev->msix_entries[i]; + snprintf(qentry->isr_name, sizeof(qentry->isr_name), "%s%d-msix %s", DRV_NAME, skdev->devno, msix_entries[i].name); - rc = devm_request_irq(&skdev->pdev->dev, qentry->vector, - msix_entries[i].handler, 0, - qentry->isr_name, skdev); + + rc = devm_request_irq(&skdev->pdev->dev, + pci_irq_vector(skdev->pdev, i), + msix_entries[i].handler, 0, + qentry->isr_name, skdev); if (rc) { pr_err("(%s): Unable to register(%d) MSI-X " "handler %d: %s\n", skd_name(skdev), rc, i, qentry->isr_name); goto msix_out; - } else { - qentry->have_irq = 1; - qentry->rsp = skdev; } } + pr_debug("%s:%s:%d %s: <%s> msix %d irq(s) enabled\n", skdev->name, __func__, __LINE__, - pci_name(pdev), skdev->name, skdev->msix_count); + pci_name(pdev), skdev->name, SKD_MAX_MSIX_COUNT); return 0; msix_out: - if (entries) - kfree(entries); - skd_release_msix(skdev); + while (--i >= 0) + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), skdev); +out: + kfree(skdev->msix_entries); + skdev->msix_entries = NULL; return rc; } static int skd_acquire_irq(struct skd_device *skdev) { + struct pci_dev *pdev = skdev->pdev; + unsigned int irq_flag = PCI_IRQ_LEGACY; int rc; - struct pci_dev *pdev; - - pdev = skdev->pdev; - skdev->msix_count = 0; -RETRY_IRQ_TYPE: - switch (skdev->irq_type) { - case SKD_IRQ_MSIX: + if (skd_isr_type == SKD_IRQ_MSIX) { rc = skd_acquire_msix(skdev); if (!rc) - pr_info("(%s): MSI-X %d irqs enabled\n", - skd_name(skdev), skdev->msix_count); - else { - pr_err( - "(%s): failed to enable MSI-X, re-trying with MSI %d\n", - skd_name(skdev), rc); - skdev->irq_type = SKD_IRQ_MSI; - goto RETRY_IRQ_TYPE; - } - break; - case SKD_IRQ_MSI: - snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d-msi", - DRV_NAME, skdev->devno); - rc = pci_enable_msi_range(pdev, 1, 1); - if (rc > 0) { - rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, 0, - skdev->isr_name, skdev); - if (rc) { - pci_disable_msi(pdev); - pr_err( - "(%s): failed to allocate the MSI interrupt %d\n", - skd_name(skdev), rc); - goto RETRY_IRQ_LEGACY; - } - pr_info("(%s): MSI irq %d enabled\n", - skd_name(skdev), pdev->irq); - } else { -RETRY_IRQ_LEGACY: - pr_err( - "(%s): failed to enable MSI, re-trying with LEGACY %d\n", - skd_name(skdev), rc); - skdev->irq_type = SKD_IRQ_LEGACY; - goto RETRY_IRQ_TYPE; - } - break; - case SKD_IRQ_LEGACY: - snprintf(skdev->isr_name, sizeof(skdev->isr_name), - "%s%d-legacy", DRV_NAME, skdev->devno); - rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, - IRQF_SHARED, skdev->isr_name, skdev); - if (!rc) - pr_info("(%s): LEGACY irq %d enabled\n", - skd_name(skdev), pdev->irq); - else - pr_err("(%s): request LEGACY irq error %d\n", - skd_name(skdev), rc); - break; - default: - pr_info("(%s): irq_type %d invalid, re-set to %d\n", - skd_name(skdev), skdev->irq_type, SKD_IRQ_DEFAULT); - skdev->irq_type = SKD_IRQ_LEGACY; - goto RETRY_IRQ_TYPE; + return 0; + + pr_err("(%s): failed to enable MSI-X, re-trying with MSI %d\n", + skd_name(skdev), rc); } - return rc; + + snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d", DRV_NAME, + skdev->devno); + + if (skd_isr_type != SKD_IRQ_LEGACY) + irq_flag |= PCI_IRQ_MSI; + rc = pci_alloc_irq_vectors(pdev, 1, 1, irq_flag); + if (rc < 0) { + pr_err("(%s): failed to allocate the MSI interrupt %d\n", + skd_name(skdev), rc); + return rc; + } + + rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, + pdev->msi_enabled ? 0 : IRQF_SHARED, + skdev->isr_name, skdev); + if (rc) { + pci_free_irq_vectors(pdev); + pr_err("(%s): failed to allocate interrupt %d\n", + skd_name(skdev), rc); + return rc; + } + + return 0; } static void skd_release_irq(struct skd_device *skdev) { - switch (skdev->irq_type) { - case SKD_IRQ_MSIX: - skd_release_msix(skdev); - break; - case SKD_IRQ_MSI: - devm_free_irq(&skdev->pdev->dev, skdev->pdev->irq, skdev); - pci_disable_msi(skdev->pdev); - break; - case SKD_IRQ_LEGACY: - devm_free_irq(&skdev->pdev->dev, skdev->pdev->irq, skdev); - break; - default: - pr_err("(%s): wrong irq type %d!", - skd_name(skdev), skdev->irq_type); - break; + struct pci_dev *pdev = skdev->pdev; + + if (skdev->msix_entries) { + int i; + + for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) { + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), + skdev); + } + + kfree(skdev->msix_entries); + skdev->msix_entries = NULL; + } else { + devm_free_irq(&pdev->dev, pdev->irq, skdev); } + + pci_free_irq_vectors(pdev); } /* @@ -4402,7 +4315,6 @@ static struct skd_device *skd_construct(struct pci_dev *pdev) skdev->pdev = pdev; skdev->devno = skd_next_devno++; skdev->major = blk_major; - skdev->irq_type = skd_isr_type; sprintf(skdev->name, DRV_NAME "%d", skdev->devno); skdev->dev_max_queue_depth = 0; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index be90e15854ed..46f4c719fed9 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -535,7 +535,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; - if (bio->bi_opf & REQ_SYNC || !mm_check_plugged(card)) + if (op_is_sync(bio->bi_opf) || !mm_check_plugged(card)) activate(card); spin_unlock_irq(&card->lock); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4a80ee752597..726c32e35db9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1253,14 +1253,14 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, case BLKIF_OP_WRITE: ring->st_wr_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_ODIRECT; + operation_flags = REQ_SYNC | REQ_IDLE; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: ring->st_f_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_FLUSH; + operation_flags = REQ_PREFLUSH; break; default: operation = 0; /* make gcc happy */ @@ -1272,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, nseg = req->operation == BLKIF_OP_INDIRECT ? req->u.indirect.nr_segments : req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation_flags != REQ_PREFLUSH) || unlikely((req->operation != BLKIF_OP_INDIRECT) && (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || unlikely((req->operation == BLKIF_OP_INDIRECT) && @@ -1334,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, } /* Wait on all outstanding I/O's and once that has been completed - * issue the WRITE_FLUSH. + * issue the flush. */ if (drain) xen_blk_drain_io(pending_req->ring); @@ -1380,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation_flags != WRITE_FLUSH); + BUG_ON(operation_flags != REQ_PREFLUSH); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9908597c5209..c000fdf048b2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2043,8 +2043,9 @@ static int blkif_recover(struct blkfront_info *info) /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); BUG_ON(req->nr_phys_segments > segs); - blk_mq_requeue_request(req); + blk_mq_requeue_request(req, false); } + blk_mq_start_stopped_hw_queues(info->rq, true); blk_mq_kick_requeue_list(info->rq); while ((bio = bio_list_pop(&info->bio_list)) != NULL) { |