summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2022-05-22 14:59:25 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2022-08-08 22:37:15 -0400
commitfcb14cb1bdacec5b4374fe161e83fb8208164a85 (patch)
tree06b308b8f6b67c65e0c1e90d2e97e61ae66f232a /lib
parentfa9db655d0e112c108fe838809608caf759bdf5e (diff)
downloadlwn-fcb14cb1bdacec5b4374fe161e83fb8208164a85.tar.gz
lwn-fcb14cb1bdacec5b4374fe161e83fb8208164a85.zip
new iov_iter flavour - ITER_UBUF
Equivalent of single-segment iovec. Initialized by iov_iter_ubuf(), checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC ones. We are going to expose the things like ->write_iter() et.al. to those in subsequent commits. New predicate (user_backed_iter()) that is true for ITER_IOVEC and ITER_UBUF; places like direct-IO handling should use that for checking that pages we modify after getting them from iov_iter_get_pages() would need to be dirtied. DO NOT assume that replacing iter_is_iovec() with user_backed_iter() will solve all problems - there's code that uses iter_is_iovec() to decide how to poke around in iov_iter guts and for that the predicate replacement obviously won't suffice. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'lib')
-rw-r--r--lib/iov_iter.c87
1 files changed, 69 insertions, 18 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0e0be334dbee..b3493d20536e 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -16,6 +16,16 @@
#define PIPE_PARANOIA /* for now */
+/* covers ubuf and kbuf alike */
+#define iterate_buf(i, n, base, len, off, __p, STEP) { \
+ size_t __maybe_unused off = 0; \
+ len = n; \
+ base = __p + i->iov_offset; \
+ len -= (STEP); \
+ i->iov_offset += len; \
+ n = len; \
+}
+
/* covers iovec and kvec alike */
#define iterate_iovec(i, n, base, len, off, __p, STEP) { \
size_t off = 0; \
@@ -110,7 +120,12 @@ __out: \
if (unlikely(i->count < n)) \
n = i->count; \
if (likely(n)) { \
- if (likely(iter_is_iovec(i))) { \
+ if (likely(iter_is_ubuf(i))) { \
+ void __user *base; \
+ size_t len; \
+ iterate_buf(i, n, base, len, off, \
+ i->ubuf, (I)) \
+ } else if (likely(iter_is_iovec(i))) { \
const struct iovec *iov = i->iov; \
void __user *base; \
size_t len; \
@@ -275,7 +290,11 @@ out:
*/
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
{
- if (iter_is_iovec(i)) {
+ if (iter_is_ubuf(i)) {
+ size_t n = min(size, iov_iter_count(i));
+ n -= fault_in_readable(i->ubuf + i->iov_offset, n);
+ return size - n;
+ } else if (iter_is_iovec(i)) {
size_t count = min(size, iov_iter_count(i));
const struct iovec *p;
size_t skip;
@@ -314,7 +333,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
*/
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
{
- if (iter_is_iovec(i)) {
+ if (iter_is_ubuf(i)) {
+ size_t n = min(size, iov_iter_count(i));
+ n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
+ return size - n;
+ } else if (iter_is_iovec(i)) {
size_t count = min(size, iov_iter_count(i));
const struct iovec *p;
size_t skip;
@@ -345,6 +368,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
*i = (struct iov_iter) {
.iter_type = ITER_IOVEC,
.nofault = false,
+ .user_backed = true,
.data_source = direction,
.iov = iov,
.nr_segs = nr_segs,
@@ -494,7 +518,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter(addr, bytes, i);
- if (iter_is_iovec(i))
+ if (user_backed_iter(i))
might_fault();
iterate_and_advance(i, bytes, base, len, off,
copyout(base, addr + off, len),
@@ -583,7 +607,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
if (unlikely(iov_iter_is_pipe(i)))
return copy_mc_pipe_to_iter(addr, bytes, i);
- if (iter_is_iovec(i))
+ if (user_backed_iter(i))
might_fault();
__iterate_and_advance(i, bytes, base, len, off,
copyout_mc(base, addr + off, len),
@@ -601,7 +625,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
WARN_ON(1);
return 0;
}
- if (iter_is_iovec(i))
+ if (user_backed_iter(i))
might_fault();
iterate_and_advance(i, bytes, base, len, off,
copyin(addr + off, base, len),
@@ -894,16 +918,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
{
if (unlikely(i->count < size))
size = i->count;
- if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
+ if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
+ i->iov_offset += size;
+ i->count -= size;
+ } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
/* iovec and kvec have identical layouts */
iov_iter_iovec_advance(i, size);
} else if (iov_iter_is_bvec(i)) {
iov_iter_bvec_advance(i, size);
} else if (iov_iter_is_pipe(i)) {
pipe_advance(i, size);
- } else if (unlikely(iov_iter_is_xarray(i))) {
- i->iov_offset += size;
- i->count -= size;
} else if (iov_iter_is_discard(i)) {
i->count -= size;
}
@@ -950,7 +974,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
return;
}
unroll -= i->iov_offset;
- if (iov_iter_is_xarray(i)) {
+ if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
BUG(); /* We should never go beyond the start of the specified
* range since we might then be straying into pages that
* aren't pinned.
@@ -1158,6 +1182,14 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
+ if (likely(iter_is_ubuf(i))) {
+ if (i->count & len_mask)
+ return false;
+ if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
+ return false;
+ return true;
+ }
+
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
return iov_iter_aligned_iovec(i, addr_mask, len_mask);
@@ -1233,6 +1265,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
unsigned long iov_iter_alignment(const struct iov_iter *i)
{
+ if (likely(iter_is_ubuf(i))) {
+ size_t size = i->count;
+ if (size)
+ return ((unsigned long)i->ubuf + i->iov_offset) | size;
+ return 0;
+ }
+
/* iovec and kvec have identical layouts */
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
return iov_iter_alignment_iovec(i);
@@ -1263,6 +1302,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
size_t size = i->count;
unsigned k;
+ if (iter_is_ubuf(i))
+ return 0;
+
if (WARN_ON(!iter_is_iovec(i)))
return ~0U;
@@ -1385,12 +1427,15 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
}
-/* must be done on non-empty ITER_IOVEC one */
+/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
{
size_t skip;
long k;
+ if (iter_is_ubuf(i))
+ return (unsigned long)i->ubuf + i->iov_offset;
+
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
size_t len = i->iov[k].iov_len - skip;
@@ -1432,7 +1477,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (maxsize > MAX_RW_COUNT)
maxsize = MAX_RW_COUNT;
- if (likely(iter_is_iovec(i))) {
+ if (likely(user_backed_iter(i))) {
unsigned int gup_flags = 0;
unsigned long addr;
@@ -1559,7 +1604,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
if (maxsize > MAX_RW_COUNT)
maxsize = MAX_RW_COUNT;
- if (likely(iter_is_iovec(i))) {
+ if (likely(user_backed_iter(i))) {
unsigned int gup_flags = 0;
unsigned long addr;
@@ -1715,6 +1760,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
{
if (unlikely(!i->count))
return 0;
+ if (likely(iter_is_ubuf(i))) {
+ unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
+ int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
+ return min(npages, maxpages);
+ }
/* iovec and kvec have identical layouts */
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
return iov_npages(i, maxpages);
@@ -1749,17 +1799,16 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
WARN_ON(1);
return NULL;
}
- if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
- return NULL;
if (iov_iter_is_bvec(new))
return new->bvec = kmemdup(new->bvec,
new->nr_segs * sizeof(struct bio_vec),
flags);
- else
+ else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
/* iovec and kvec have identical layout */
return new->iov = kmemdup(new->iov,
new->nr_segs * sizeof(struct iovec),
flags);
+ return NULL;
}
EXPORT_SYMBOL(dup_iter);
@@ -1953,10 +2002,12 @@ EXPORT_SYMBOL(import_single_range);
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
{
if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
- !iov_iter_is_kvec(i))
+ !iov_iter_is_kvec(i) && !iter_is_ubuf(i))
return;
i->iov_offset = state->iov_offset;
i->count = state->count;
+ if (iter_is_ubuf(i))
+ return;
/*
* For the *vec iters, nr_segs + iov is constant - if we increment
* the vec, then we also decrement the nr_segs count. Hence we don't