summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Zhao <yuzhao@google.com>2022-12-30 14:52:52 -0700
committerAndrew Morton <akpm@linux-foundation.org>2023-01-18 17:12:57 -0800
commit17e810229cb3068b692fa078bd9b3a6527e0866a (patch)
tree34468471b710e3329acb2143665b6887f76e0a78
parent8788f6781486769d9598dcaedc3fe0eb12fc3e59 (diff)
downloadlwn-17e810229cb3068b692fa078bd9b3a6527e0866a.tar.gz
lwn-17e810229cb3068b692fa078bd9b3a6527e0866a.zip
mm: support POSIX_FADV_NOREUSE
This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU algorithm can ignore access to mapped files marked by this flag. The advantages of POSIX_FADV_NOREUSE are: 1. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not alter the default readahead behavior. 2. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not split VMAs and therefore does not take mmap_lock. 3. Unlike MADV_COLD, setting it has a negligible cost, regardless of how many pages it affects. Its limitations are: 1. Like POSIX_FADV_RANDOM and POSIX_FADV_SEQUENTIAL, it currently does not support range. IOW, its scope is the entire file. 2. It currently does not ignore access through file descriptors. Specifically, for the active/inactive LRU, given a file page shared by two users and one of them having set POSIX_FADV_NOREUSE on the file, this page will be activated upon the second user accessing it. This corner case can be covered by checking POSIX_FADV_NOREUSE before calling folio_mark_accessed() on the read path. But it is considered not worth the effort. There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1]. This time the goal is to fill a niche: a few desktop applications, e.g., large file transferring and video encoding/decoding, want fast file streaming with mmap() rather than direct IO. Among those applications, an SVT-AV1 regression was reported when running with MGLRU [2]. The following test can reproduce that regression. kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo) kb=$((kb - 8*1024*1024)) modprobe brd rd_nr=1 rd_size=$kb dd if=/dev/zero of=/dev/ram0 bs=1M mkfs.ext4 /dev/ram0 mount /dev/ram0 /mnt/ swapoff -a fallocate -l 8G /mnt/swapfile mkswap /mnt/swapfile swapon /mnt/swapfile wget http://ultravideo.cs.tut.fi/video/Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z 7z e -o/mnt/ Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z SvtAv1EncApp --preset 12 -w 3840 -h 2160 \ -i /mnt/Bosphorus_3840x2160.y4m For MGLRU, the following change showed a [9-11]% increase in FPS, which makes it on par with the active/inactive LRU. patch Source/App/EncApp/EbAppMain.c <<EOF 31a32 > #include <fcntl.h> 35d35 < #include <fcntl.h> /* _O_BINARY */ 117a118 > posix_fadvise(config->mmap.fd, 0, 0, POSIX_FADV_NOREUSE); EOF [1] https://lore.kernel.org/r/1308923350-7932-1-git-send-email-andrea@betterlinux.com/ [2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57 Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com Signed-off-by: Yu Zhao <yuzhao@google.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Righi <andrea.righi@canonical.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michael Larabel <Michael@MichaelLarabel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/mm_inline.h3
-rw-r--r--mm/fadvise.c5
3 files changed, 9 insertions, 1 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1769a2c5d70..d353c262d669 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File supports DIRECT IO */
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
+#define FMODE_NOREUSE ((__force fmode_t)0x800000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 4abebf2615a3..26dcbda07e92 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -599,6 +599,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma)
if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
return false;
+ if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE))
+ return false;
+
return true;
}
diff --git a/mm/fadvise.c b/mm/fadvise.c
index bf04fec87f35..fb7c5f43fd2a 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
case POSIX_FADV_NORMAL:
file->f_ra.ra_pages = bdi->ra_pages;
spin_lock(&file->f_lock);
- file->f_mode &= ~FMODE_RANDOM;
+ file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
spin_unlock(&file->f_lock);
break;
case POSIX_FADV_RANDOM:
@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
force_page_cache_readahead(mapping, file, start_index, nrpages);
break;
case POSIX_FADV_NOREUSE:
+ spin_lock(&file->f_lock);
+ file->f_mode |= FMODE_NOREUSE;
+ spin_unlock(&file->f_lock);
break;
case POSIX_FADV_DONTNEED:
__filemap_fdatawrite_range(mapping, offset, endbyte,