diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 12:08:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 12:08:18 -0800 |
commit | 16382e17c0ff583df2d5eed56ca7c771d637e9d1 (patch) | |
tree | 568c5a69b55d08cda20b8e1367df3135dbafeeb6 | |
parent | 93f30c73ecd0281cf3685ef0e4e384980a176176 (diff) | |
parent | cfe057f7db1ff026c8db75469a3f9ba9736e1975 (diff) | |
download | lwn-16382e17c0ff583df2d5eed56ca7c771d637e9d1.tar.gz lwn-16382e17c0ff583df2d5eed56ca7c771d637e9d1.zip |
Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull iov_iter updates from Al Viro:
- bio_{map,copy}_user_iov() series; those are cleanups - fixes from the
same pile went into mainline (and stable) in late September.
- fs/iomap.c iov_iter-related fixes
- new primitive - iov_iter_for_each_range(), which applies a function
to kernel-mapped segments of an iov_iter.
Usable for kvec and bvec ones, the latter does kmap()/kunmap() around
the callback. _Not_ usable for iovec- or pipe-backed iov_iter; the
latter is not hard to fix if the need ever appears, the former is by
design.
Another related primitive will have to wait for the next cycle - it
passes page + offset + size instead of pointer + size, and that one
will be usable for everything _except_ kvec. Unfortunately, that one
didn't get exposure in -next yet, so...
- a bit more lustre iov_iter work, including a use case for
iov_iter_for_each_range() (checksum calculation)
- vhost/scsi leak fix in failure exit
- misc cleanups and detritectomy...
* 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (21 commits)
iomap_dio_actor(): fix iov_iter bugs
switch ksocknal_lib_recv_...() to use of iov_iter_for_each_range()
lustre: switch struct ksock_conn to iov_iter
vhost/scsi: switch to iov_iter_get_pages()
fix a page leak in vhost_scsi_iov_to_sgl() error recovery
new primitive: iov_iter_for_each_range()
lnet_return_rx_credits_locked: don't abuse list_entry
xen: don't open-code iov_iter_kvec()
orangefs: remove detritus from struct orangefs_kiocb_s
kill iov_shorten()
bio_alloc_map_data(): do bmd->iter setup right there
bio_copy_user_iov(): saner bio size calculation
bio_map_user_iov(): get rid of copying iov_iter
bio_copy_from_iter(): get rid of copying iov_iter
move more stuff down into bio_copy_user_iov()
blk_rq_map_user_iov(): move iov_iter_advance() down
bio_map_user_iov(): get rid of the iov_for_each()
bio_map_user_iov(): move alignment check into the main loop
don't rely upon subsequent bio_add_pc_page() calls failing
... and with iov_iter_get_pages_alloc() it becomes even simpler
...
-rw-r--r-- | block/bio.c | 192 | ||||
-rw-r--r-- | block/blk-map.c | 7 | ||||
-rw-r--r-- | drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c | 4 | ||||
-rw-r--r-- | drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h | 9 | ||||
-rw-r--r-- | drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c | 157 | ||||
-rw-r--r-- | drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c | 99 | ||||
-rw-r--r-- | drivers/staging/lustre/lnet/lnet/lib-move.c | 2 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 73 | ||||
-rw-r--r-- | drivers/xen/pvcalls-back.c | 16 | ||||
-rw-r--r-- | fs/iomap.c | 24 | ||||
-rw-r--r-- | fs/orangefs/orangefs-kernel.h | 6 | ||||
-rw-r--r-- | fs/read_write.c | 21 | ||||
-rw-r--r-- | include/linux/bio.h | 4 | ||||
-rw-r--r-- | include/linux/uio.h | 6 | ||||
-rw-r--r-- | lib/iov_iter.c | 22 |
15 files changed, 202 insertions, 440 deletions
diff --git a/block/bio.c b/block/bio.c index 459cc857f3d9..228229f3bb76 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1062,14 +1062,21 @@ struct bio_map_data { struct iovec iov[]; }; -static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, +static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, gfp_t gfp_mask) { - if (iov_count > UIO_MAXIOV) + struct bio_map_data *bmd; + if (data->nr_segs > UIO_MAXIOV) return NULL; - return kmalloc(sizeof(struct bio_map_data) + - sizeof(struct iovec) * iov_count, gfp_mask); + bmd = kmalloc(sizeof(struct bio_map_data) + + sizeof(struct iovec) * data->nr_segs, gfp_mask); + if (!bmd) + return NULL; + memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); + bmd->iter = *data; + bmd->iter.iov = bmd->iov; + return bmd; } /** @@ -1080,7 +1087,7 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, * Copy all pages from iov_iter to bio. * Returns 0 on success, or error on failure. */ -static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) +static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) { int i; struct bio_vec *bvec; @@ -1091,9 +1098,9 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) ret = copy_page_from_iter(bvec->bv_page, bvec->bv_offset, bvec->bv_len, - &iter); + iter); - if (!iov_iter_count(&iter)) + if (!iov_iter_count(iter)) break; if (ret < bvec->bv_len) @@ -1187,40 +1194,18 @@ int bio_uncopy_user(struct bio *bio) */ struct bio *bio_copy_user_iov(struct request_queue *q, struct rq_map_data *map_data, - const struct iov_iter *iter, + struct iov_iter *iter, gfp_t gfp_mask) { struct bio_map_data *bmd; struct page *page; struct bio *bio; - int i, ret; - int nr_pages = 0; + int i = 0, ret; + int nr_pages; unsigned int len = iter->count; unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; - for (i = 0; i < iter->nr_segs; i++) { - unsigned long uaddr; - unsigned long end; - unsigned long start; - - uaddr = (unsigned long) iter->iov[i].iov_base; - end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1) - >> PAGE_SHIFT; - start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - } - - if (offset) - nr_pages++; - - bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask); + bmd = bio_alloc_map_data(iter, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); @@ -1230,9 +1215,10 @@ struct bio *bio_copy_user_iov(struct request_queue *q, * shortlived one. */ bmd->is_our_pages = map_data ? 0 : 1; - memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); - bmd->iter = *iter; - bmd->iter.iov = bmd->iov; + + nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); + if (nr_pages > BIO_MAX_PAGES) + nr_pages = BIO_MAX_PAGES; ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); @@ -1281,17 +1267,24 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (ret) goto cleanup; + if (map_data) + map_data->offset += bio->bi_iter.bi_size; + /* * success */ if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { - ret = bio_copy_from_iter(bio, *iter); + ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else { + iov_iter_advance(iter, bio->bi_iter.bi_size); } bio->bi_private = bmd; + if (map_data && map_data->null_mapped) + bio_set_flag(bio, BIO_NULL_MAPPED); return bio; cleanup: if (!map_data) @@ -1312,111 +1305,74 @@ out_bmd: * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, - const struct iov_iter *iter, + struct iov_iter *iter, gfp_t gfp_mask) { int j; - int nr_pages = 0; - struct page **pages; struct bio *bio; - int cur_page = 0; - int ret, offset; - struct iov_iter i; - struct iovec iov; + int ret; struct bio_vec *bvec; - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - unsigned long len = iov.iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - /* - * buffer must be aligned to at least logical block size for now - */ - if (uaddr & queue_dma_alignment(q)) - return ERR_PTR(-EINVAL); - } - - if (!nr_pages) + if (!iov_iter_count(iter)) return ERR_PTR(-EINVAL); - bio = bio_kmalloc(gfp_mask, nr_pages); + bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); if (!bio) return ERR_PTR(-ENOMEM); - ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); - if (!pages) - goto out; + while (iov_iter_count(iter)) { + struct page **pages; + ssize_t bytes; + size_t offs, added = 0; + int npages; - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - unsigned long len = iov.iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int local_nr_pages = end - start; - const int page_limit = cur_page + local_nr_pages; - - ret = get_user_pages_fast(uaddr, local_nr_pages, - (iter->type & WRITE) != WRITE, - &pages[cur_page]); - if (unlikely(ret < local_nr_pages)) { - for (j = cur_page; j < page_limit; j++) { - if (!pages[j]) - break; - put_page(pages[j]); - } - ret = -EFAULT; + bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); + if (unlikely(bytes <= 0)) { + ret = bytes ? bytes : -EFAULT; goto out_unmap; } - offset = offset_in_page(uaddr); - for (j = cur_page; j < page_limit; j++) { - unsigned int bytes = PAGE_SIZE - offset; - unsigned short prev_bi_vcnt = bio->bi_vcnt; + npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - /* - * sorry... - */ - if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < - bytes) - break; + if (unlikely(offs & queue_dma_alignment(q))) { + ret = -EINVAL; + j = 0; + } else { + for (j = 0; j < npages; j++) { + struct page *page = pages[j]; + unsigned int n = PAGE_SIZE - offs; + unsigned short prev_bi_vcnt = bio->bi_vcnt; - /* - * check if vector was merged with previous - * drop page reference if needed - */ - if (bio->bi_vcnt == prev_bi_vcnt) - put_page(pages[j]); + if (n > bytes) + n = bytes; - len -= bytes; - offset = 0; - } + if (!bio_add_pc_page(q, bio, page, n, offs)) + break; - cur_page = j; + /* + * check if vector was merged with previous + * drop page reference if needed + */ + if (bio->bi_vcnt == prev_bi_vcnt) + put_page(page); + + added += n; + bytes -= n; + offs = 0; + } + iov_iter_advance(iter, added); + } /* * release the pages we didn't map into the bio, if any */ - while (j < page_limit) + while (j < npages) put_page(pages[j++]); + kvfree(pages); + /* couldn't stuff something into bio? */ + if (bytes) + break; } - kfree(pages); - bio_set_flag(bio, BIO_USER_MAPPED); /* @@ -1432,8 +1388,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, bio_for_each_segment_all(bvec, bio, j) { put_page(bvec->bv_page); } - out: - kfree(pages); bio_put(bio); return ERR_PTR(ret); } diff --git a/block/blk-map.c b/block/blk-map.c index d5251edcc0dd..b21f8e86f120 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -67,13 +67,6 @@ static int __blk_rq_map_user_iov(struct request *rq, bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= req_op(rq); - if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - - iov_iter_advance(iter, bio->bi_iter.bi_size); - if (map_data) - map_data->offset += bio->bi_iter.bi_size; - orig_bio = bio; /* diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index f8ea523863ba..986c2a40d978 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -1683,10 +1683,10 @@ ksocknal_destroy_conn(struct ksock_conn *conn) case SOCKNAL_RX_LNET_PAYLOAD: last_rcv = conn->ksnc_rx_deadline - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout); - CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n", + CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, + iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left, cfs_duration_sec(cfs_time_sub(cfs_time_current(), last_rcv))); lnet_finalize(conn->ksnc_peer->ksnp_ni, diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index 35a7b396def4..d50ebdf863fa 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -358,11 +358,7 @@ struct ksock_conn { __u8 ksnc_rx_scheduled; /* being progressed */ __u8 ksnc_rx_state; /* what is being read */ int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted;/* bytes actually wanted */ - int ksnc_rx_niov; /* # iovec frags */ - struct kvec *ksnc_rx_iov; /* the iovec frags */ - int ksnc_rx_nkiov; /* # page frags */ - struct bio_vec *ksnc_rx_kiov; /* the page frags */ + struct iov_iter ksnc_rx_to; /* copy destination */ union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */ __u32 ksnc_rx_csum; /* partial checksum for incoming * data @@ -701,8 +697,7 @@ int ksocknal_lib_setup_sock(struct socket *so); int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx); int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx); void ksocknal_lib_eager_ack(struct ksock_conn *conn); -int ksocknal_lib_recv_iov(struct ksock_conn *conn); -int ksocknal_lib_recv_kiov(struct ksock_conn *conn); +int ksocknal_lib_recv(struct ksock_conn *conn); int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index a5f2ecb966fa..27c56d5ae4e5 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -250,66 +250,16 @@ ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx) } static int -ksocknal_recv_iov(struct ksock_conn *conn) +ksocknal_recv_iter(struct ksock_conn *conn) { - struct kvec *iov = conn->ksnc_rx_iov; int nob; int rc; - LASSERT(conn->ksnc_rx_niov > 0); - - /* - * Never touch conn->ksnc_rx_iov or change connection - * status inside ksocknal_lib_recv_iov - */ - rc = ksocknal_lib_recv_iov(conn); - - if (rc <= 0) - return rc; - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_wanted -= nob; - conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT(conn->ksnc_rx_niov > 0); - - if (nob < (int)iov->iov_len) { - iov->iov_len -= nob; - iov->iov_base += nob; - return -EAGAIN; - } - - nob -= iov->iov_len; - conn->ksnc_rx_iov = ++iov; - conn->ksnc_rx_niov--; - } while (nob); - - return rc; -} - -static int -ksocknal_recv_kiov(struct ksock_conn *conn) -{ - struct bio_vec *kiov = conn->ksnc_rx_kiov; - int nob; - int rc; - - LASSERT(conn->ksnc_rx_nkiov > 0); - /* - * Never touch conn->ksnc_rx_kiov or change connection - * status inside ksocknal_lib_recv_iov + * Never touch conn->ksnc_rx_to or change connection + * status inside ksocknal_lib_recv */ - rc = ksocknal_lib_recv_kiov(conn); + rc = ksocknal_lib_recv(conn); if (rc <= 0) return rc; @@ -323,22 +273,11 @@ ksocknal_recv_kiov(struct ksock_conn *conn) mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; - conn->ksnc_rx_nob_wanted -= nob; conn->ksnc_rx_nob_left -= nob; - do { - LASSERT(conn->ksnc_rx_nkiov > 0); - - if (nob < (int)kiov->bv_len) { - kiov->bv_offset += nob; - kiov->bv_len -= nob; - return -EAGAIN; - } - - nob -= kiov->bv_len; - conn->ksnc_rx_kiov = ++kiov; - conn->ksnc_rx_nkiov--; - } while (nob); + iov_iter_advance(&conn->ksnc_rx_to, nob); + if (iov_iter_count(&conn->ksnc_rx_to)) + return -EAGAIN; return 1; } @@ -348,7 +287,7 @@ ksocknal_receive(struct ksock_conn *conn) { /* * Return 1 on success, 0 on EOF, < 0 on error. - * Caller checks ksnc_rx_nob_wanted to determine + * Caller checks ksnc_rx_to to determine * progress/completion. */ int rc; @@ -365,11 +304,7 @@ ksocknal_receive(struct ksock_conn *conn) } for (;;) { - if (conn->ksnc_rx_niov) - rc = ksocknal_recv_iov(conn); - else - rc = ksocknal_recv_kiov(conn); - + rc = ksocknal_recv_iter(conn); if (rc <= 0) { /* error/EOF or partial receive */ if (rc == -EAGAIN) { @@ -383,7 +318,7 @@ ksocknal_receive(struct ksock_conn *conn) /* Completed a fragment */ - if (!conn->ksnc_rx_nob_wanted) { + if (!iov_iter_count(&conn->ksnc_rx_to)) { rc = 1; break; } @@ -1051,6 +986,7 @@ int ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) { static char ksocknal_slop_buffer[4096]; + struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space; int nob; unsigned int niov; @@ -1071,32 +1007,26 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) case KSOCK_PROTO_V2: case KSOCK_PROTO_V3: conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER; - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg; - - conn->ksnc_rx_nob_wanted = offsetof(struct ksock_msg, ksm_u); + kvec->iov_base = &conn->ksnc_msg; + kvec->iov_len = offsetof(struct ksock_msg, ksm_u); conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u); - conn->ksnc_rx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u); + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, + 1, offsetof(struct ksock_msg, ksm_u)); break; case KSOCK_PROTO_V1: /* Receiving bare struct lnet_hdr */ conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(struct lnet_hdr); + kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; + kvec->iov_len = sizeof(struct lnet_hdr); conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr); - - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof(struct lnet_hdr); + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, + 1, sizeof(struct lnet_hdr)); break; default: LBUG(); } - conn->ksnc_rx_niov = 1; - - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; conn->ksnc_rx_csum = ~0; return 1; } @@ -1107,15 +1037,14 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) */ conn->ksnc_rx_state = SOCKNAL_RX_SLOP; conn->ksnc_rx_nob_left = nob_to_skip; - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; skipped = 0; niov = 0; do { nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer)); - conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; + kvec[niov].iov_base = ksocknal_slop_buffer; + kvec[niov].iov_len = nob; niov++; skipped += nob; nob_to_skip -= nob; @@ -1123,16 +1052,14 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) } while (nob_to_skip && /* mustn't overflow conn's rx iov */ niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec)); - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_nob_wanted = skipped; + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped); return 0; } static int ksocknal_process_receive(struct ksock_conn *conn) { + struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space; struct lnet_hdr *lhdr; struct lnet_process_id *id; int rc; @@ -1146,7 +1073,7 @@ ksocknal_process_receive(struct ksock_conn *conn) conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || conn->ksnc_rx_state == SOCKNAL_RX_SLOP); again: - if (conn->ksnc_rx_nob_wanted) { + if (iov_iter_count(&conn->ksnc_rx_to)) { rc = ksocknal_receive(conn); if (rc <= 0) { @@ -1171,7 +1098,7 @@ ksocknal_process_receive(struct ksock_conn *conn) return (!rc ? -ESHUTDOWN : rc); } - if (conn->ksnc_rx_nob_wanted) { + if (iov_iter_count(&conn->ksnc_rx_to)) { /* short read */ return -EAGAIN; } @@ -1234,16 +1161,13 @@ ksocknal_process_receive(struct ksock_conn *conn) } conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(struct ksock_lnet_msg); conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg); - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof(struct ksock_lnet_msg); + kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; + kvec->iov_len = sizeof(struct ksock_lnet_msg); - conn->ksnc_rx_niov = 1; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, + 1, sizeof(struct ksock_lnet_msg)); goto again; /* read lnet header now */ @@ -1345,26 +1269,9 @@ ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, LASSERT(to->nr_segs <= LNET_MAX_IOV); conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = iov_iter_count(to); conn->ksnc_rx_nob_left = rlen; - if (to->type & ITER_KVEC) { - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; - conn->ksnc_rx_niov = - lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov, - to->nr_segs, to->kvec, - to->iov_offset, iov_iter_count(to)); - } else { - conn->ksnc_rx_niov = 0; - conn->ksnc_rx_iov = NULL; - conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; - conn->ksnc_rx_nkiov = - lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov, - to->nr_segs, to->bvec, - to->iov_offset, iov_iter_count(to)); - } + conn->ksnc_rx_to = *to; LASSERT(conn->ksnc_rx_scheduled); @@ -2329,12 +2236,12 @@ ksocknal_find_timed_out_conn(struct ksock_peer *peer) conn->ksnc_rx_deadline)) { /* Timed out incomplete incoming message */ ksocknal_conn_addref(conn); - CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %d left %d\n", + CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n", libcfs_id2str(peer->ksnp_id), &conn->ksnc_ipaddr, conn->ksnc_port, conn->ksnc_rx_state, - conn->ksnc_rx_nob_wanted, + iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left); return conn; } diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 970140f09258..cb28dd2baf2f 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -162,94 +162,39 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn) sizeof(opt)); } -int -ksocknal_lib_recv_iov(struct ksock_conn *conn) +static int lustre_csum(struct kvec *v, void *context) { - unsigned int niov = conn->ksnc_rx_niov; - struct kvec *iov = conn->ksnc_rx_iov; - struct msghdr msg = { - .msg_flags = 0 - }; - int nob; - int i; - int rc; - int fragnob; - int sum; - __u32 saved_csum; - - LASSERT(niov > 0); - - for (nob = i = 0; i < niov; i++) - nob += iov[i].iov_len; - - LASSERT(nob <= conn->ksnc_rx_nob_wanted); - - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob); - rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); - - saved_csum = 0; - if (conn->ksnc_proto == &ksocknal_protocol_v2x) { - saved_csum = conn->ksnc_msg.ksm_csum; - conn->ksnc_msg.ksm_csum = 0; - } - - if (saved_csum) { - /* accumulate checksum */ - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT(i < niov); - - fragnob = iov[i].iov_len; - if (fragnob > sum) - fragnob = sum; - - conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - iov[i].iov_base, - fragnob); - } - conn->ksnc_msg.ksm_csum = saved_csum; - } - - return rc; + struct ksock_conn *conn = context; + conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, + v->iov_base, v->iov_len); + return 0; } int -ksocknal_lib_recv_kiov(struct ksock_conn *conn) +ksocknal_lib_recv(struct ksock_conn *conn) { - unsigned int niov = conn->ksnc_rx_nkiov; - struct bio_vec *kiov = conn->ksnc_rx_kiov; - struct msghdr msg = { - .msg_flags = 0 - }; - int nob; - int i; + struct msghdr msg = { .msg_iter = conn->ksnc_rx_to }; + __u32 saved_csum; int rc; - void *base; - int sum; - int fragnob; - for (nob = i = 0; i < niov; i++) - nob += kiov[i].bv_len; - - LASSERT(nob <= conn->ksnc_rx_nob_wanted); - - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob); rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); + if (rc <= 0) + return rc; - if (conn->ksnc_msg.ksm_csum) { - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT(i < niov); - - base = kmap(kiov[i].bv_page) + kiov[i].bv_offset; - fragnob = kiov[i].bv_len; - if (fragnob > sum) - fragnob = sum; + saved_csum = conn->ksnc_msg.ksm_csum; + if (!saved_csum) + return rc; - conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - base, fragnob); + /* header is included only in V2 - V3 checksums only the bulk data */ + if (!(conn->ksnc_rx_to.type & ITER_BVEC) && + conn->ksnc_proto != &ksocknal_protocol_v2x) + return rc; + + /* accumulate checksum */ + conn->ksnc_msg.ksm_csum = 0; + iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn); + conn->ksnc_msg.ksm_csum = saved_csum; - kunmap(kiov[i].bv_page); - } - } return rc; } diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index 27848cd69564..68d16ffec980 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -890,7 +890,7 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg) */ LASSERT(msg->msg_kiov); - rb = list_entry(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]); + rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]); rbp = rb->rb_pool; msg->msg_kiov = NULL; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 1bc9be829fb4..71517b3c5558 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -210,12 +210,6 @@ static struct workqueue_struct *vhost_scsi_workqueue; static DEFINE_MUTEX(vhost_scsi_mutex); static LIST_HEAD(vhost_scsi_list); -static int iov_num_pages(void __user *iov_base, size_t iov_len) -{ - return (PAGE_ALIGN((unsigned long)iov_base + iov_len) - - ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT; -} - static void vhost_scsi_done_inflight(struct kref *kref) { struct vhost_scsi_inflight *inflight; @@ -618,48 +612,31 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, */ static int vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, - void __user *ptr, - size_t len, + struct iov_iter *iter, struct scatterlist *sgl, bool write) { - unsigned int npages = 0, offset, nbytes; - unsigned int pages_nr = iov_num_pages(ptr, len); - struct scatterlist *sg = sgl; struct page **pages = cmd->tvc_upages; - int ret, i; - - if (pages_nr > VHOST_SCSI_PREALLOC_UPAGES) { - pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" - " preallocated VHOST_SCSI_PREALLOC_UPAGES: %u\n", - pages_nr, VHOST_SCSI_PREALLOC_UPAGES); - return -ENOBUFS; - } + struct scatterlist *sg = sgl; + ssize_t bytes; + size_t offset; + unsigned int npages = 0; - ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); + bytes = iov_iter_get_pages(iter, pages, LONG_MAX, + VHOST_SCSI_PREALLOC_UPAGES, &offset); /* No pages were pinned */ - if (ret < 0) - goto out; - /* Less pages pinned than wanted */ - if (ret != pages_nr) { - for (i = 0; i < ret; i++) - put_page(pages[i]); - ret = -EFAULT; - goto out; - } + if (bytes <= 0) + return bytes < 0 ? bytes : -EFAULT; - while (len > 0) { - offset = (uintptr_t)ptr & ~PAGE_MASK; - nbytes = min_t(unsigned int, PAGE_SIZE - offset, len); - sg_set_page(sg, pages[npages], nbytes, offset); - ptr += nbytes; - len -= nbytes; - sg++; - npages++; - } + iov_iter_advance(iter, bytes); -out: - return ret; + while (bytes) { + unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); + sg_set_page(sg++, pages[npages++], n, offset); + bytes -= n; + offset = 0; + } + return npages; } static int @@ -687,24 +664,20 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, struct iov_iter *iter, struct scatterlist *sg, int sg_count) { - size_t off = iter->iov_offset; - int i, ret; - - for (i = 0; i < iter->nr_segs; i++) { - void __user *base = iter->iov[i].iov_base + off; - size_t len = iter->iov[i].iov_len - off; + struct scatterlist *p = sg; + int ret; - ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write); + while (iov_iter_count(iter)) { + ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write); if (ret < 0) { - for (i = 0; i < sg_count; i++) { - struct page *page = sg_page(&sg[i]); + while (p < sg) { + struct page *page = sg_page(p++); if (page) put_page(page); } return ret; } sg += ret; - off = 0; } return 0; } diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 02cd33c58204..c7822d8078b9 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -134,20 +134,16 @@ static void pvcalls_conn_back_read(void *opaque) masked_cons = pvcalls_mask(cons, array_size); memset(&msg, 0, sizeof(msg)); - msg.msg_iter.type = ITER_KVEC|WRITE; - msg.msg_iter.count = wanted; if (masked_prod < masked_cons) { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = wanted; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 1; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted); } else { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = array_size - masked_prod; vec[1].iov_base = data->in; vec[1].iov_len = wanted - vec[0].iov_len; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 2; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted); } atomic_set(&map->read, 0); @@ -196,20 +192,16 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) memset(&msg, 0, sizeof(msg)); msg.msg_flags |= MSG_DONTWAIT; - msg.msg_iter.type = ITER_KVEC|READ; - msg.msg_iter.count = size; if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = size; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 1; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size); } else { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); vec[1].iov_base = data->out; vec[1].iov_len = size - vec[0].iov_len; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 2; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size); } atomic_set(&map->write, 0); diff --git a/fs/iomap.c b/fs/iomap.c index b9f74803e56c..47d29ccffaef 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -856,6 +856,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, struct bio *bio; bool need_zeroout = false; int nr_pages, ret; + size_t copied = 0; if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL; @@ -867,7 +868,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, /*FALLTHRU*/ case IOMAP_UNWRITTEN: if (!(dio->flags & IOMAP_DIO_WRITE)) { - iov_iter_zero(length, dio->submit.iter); + length = iov_iter_zero(length, dio->submit.iter); dio->size += length; return length; } @@ -904,8 +905,11 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, } do { - if (dio->error) + size_t n; + if (dio->error) { + iov_iter_revert(dio->submit.iter, copied); return 0; + } bio = bio_alloc(GFP_KERNEL, nr_pages); bio_set_dev(bio, iomap->bdev); @@ -918,20 +922,24 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ret = bio_iov_iter_get_pages(bio, &iter); if (unlikely(ret)) { bio_put(bio); - return ret; + return copied ? copied : ret; } + n = bio->bi_iter.bi_size; if (dio->flags & IOMAP_DIO_WRITE) { bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); - task_io_account_write(bio->bi_iter.bi_size); + task_io_account_write(n); } else { bio_set_op_attrs(bio, REQ_OP_READ, 0); if (dio->flags & IOMAP_DIO_DIRTY) bio_set_pages_dirty(bio); } - dio->size += bio->bi_iter.bi_size; - pos += bio->bi_iter.bi_size; + iov_iter_advance(dio->submit.iter, n); + + dio->size += n; + pos += n; + copied += n; nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); @@ -947,9 +955,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, if (pad) iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); } - - iov_iter_advance(dio->submit.iter, length); - return length; + return copied; } ssize_t diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 004af348fb80..f44d5eb74fcc 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -275,12 +275,6 @@ struct orangefs_kiocb_s { /* orangefs kernel operation type */ struct orangefs_kernel_op_s *op; - /* The user space buffers from/to which I/O is being staged */ - struct iovec *iov; - - /* number of elements in the iovector */ - unsigned long nr_segs; - /* set to indicate the type of the operation */ int rw; diff --git a/fs/read_write.c b/fs/read_write.c index 0046d72efe94..f8547b82dfb3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -635,27 +635,6 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, return ret; } -/* - * Reduce an iovec's length in-place. Return the resulting number of segments - */ -unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) -{ - unsigned long seg = 0; - size_t len = 0; - - while (seg < nr_segs) { - seg++; - if (len + iov->iov_len >= to) { - iov->iov_len = to - len; - break; - } - len += iov->iov_len; - iov++; - } - return seg; -} -EXPORT_SYMBOL(iov_shorten); - static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, loff_t *ppos, int type, rwf_t flags) { diff --git a/include/linux/bio.h b/include/linux/bio.h index d4eec19a6d3c..82f0c8fd7be8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -450,7 +450,7 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, - const struct iov_iter *, gfp_t); + struct iov_iter *, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -482,7 +482,7 @@ extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, struct rq_map_data *, - const struct iov_iter *, + struct iov_iter *, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/uio.h b/include/linux/uio.h index 8a642cda641c..e67e12adb136 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -80,8 +80,6 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) ((iov = iov_iter_iovec(&(iter))), 1); \ iov_iter_advance(&(iter), (iov).iov_len)) -unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); - size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); @@ -246,4 +244,8 @@ int compat_import_iovec(int type, const struct compat_iovec __user * uvector, int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, + int (*f)(struct kvec *vec, void *context), + void *context); + #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1c1c06ddc20a..970212670b6a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1446,3 +1446,25 @@ int import_single_range(int rw, void __user *buf, size_t len, return 0; } EXPORT_SYMBOL(import_single_range); + +int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, + int (*f)(struct kvec *vec, void *context), + void *context) +{ + struct kvec w; + int err = -EINVAL; + if (!bytes) + return 0; + + iterate_all_kinds(i, bytes, v, -EINVAL, ({ + w.iov_base = kmap(v.bv_page) + v.bv_offset; + w.iov_len = v.bv_len; + err = f(&w, context); + kunmap(v.bv_page); + err;}), ({ + w = v; + err = f(&w, context);}) + ) + return err; +} +EXPORT_SYMBOL(iov_iter_for_each_range); |