summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2017-09-06 16:24:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 17:27:29 -0700
commit749df87bd7bee5a79cef073f5d032ddb2b211de8 (patch)
tree6f5582d907bc71a22c922c9c4ee91055d72b112f /mm
parentab1b597ee0e4208a1db227bb7b2c9512c8234b48 (diff)
downloadlwn-749df87bd7bee5a79cef073f5d032ddb2b211de8.tar.gz
lwn-749df87bd7bee5a79cef073f5d032ddb2b211de8.zip
mm/shmem: add hugetlbfs support to memfd_create()
This patch came out of discussions in this e-mail thread: http://lkml.kernel.org/r/1499357846-7481-1-git-send-email-mike.kravetz%40oracle.com The Oracle JVM team is developing a new garbage collection model. This new model requires multiple mappings of the same anonymous memory. One straight forward way to accomplish this is with memfd_create. They can use the returned fd to create multiple mappings of the same memory. The JVM today has an option to use (static hugetlb) huge pages. If this option is specified, they would like to use the same garbage collection model requiring multiple mappings to the same memory. Using hugetlbfs, it is possible to explicitly mount a filesystem and specify file paths in order to get an fd that can be used for multiple mappings. However, this introduces additional system admin work and coordination. Ideally they would like to get a hugetlbfs fd without requiring explicit mounting of a filesystem. Today, mmap and shmget can make use of hugetlbfs without explicitly mounting a filesystem. The patch adds this functionality to memfd_create. Add a new flag MFD_HUGETLB to memfd_create() that will specify the file to be created resides in the hugetlbfs filesystem. This is the generic hugetlbfs filesystem not associated with any specific mount point. As with other system calls that request hugetlbfs backed pages, there is the ability to encode huge page size in the flag arguments. hugetlbfs does not support sealing operations, therefore specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL. Of course, the memfd_man page would need updating if this type of functionality moves forward. Link: http://lkml.kernel.org/r/1502149672-7759-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Hugh Dickins <hughd@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/shmem.c37
1 files changed, 31 insertions, 6 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 64bdc91187f7..47179bbe9ee7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -34,6 +34,7 @@
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/khugepaged.h>
+#include <linux/hugetlb.h>
#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
@@ -3652,7 +3653,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
-#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
SYSCALL_DEFINE2(memfd_create,
const char __user *, uname,
@@ -3664,8 +3665,18 @@ SYSCALL_DEFINE2(memfd_create,
char *name;
long len;
- if (flags & ~(unsigned int)MFD_ALL_FLAGS)
- return -EINVAL;
+ if (!(flags & MFD_HUGETLB)) {
+ if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+ return -EINVAL;
+ } else {
+ /* Sealing not supported in hugetlbfs (MFD_HUGETLB) */
+ if (flags & MFD_ALLOW_SEALING)
+ return -EINVAL;
+ /* Allow huge page size encoding in flags. */
+ if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
+ (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
+ return -EINVAL;
+ }
/* length includes terminating zero */
len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
@@ -3696,16 +3707,30 @@ SYSCALL_DEFINE2(memfd_create,
goto err_name;
}
- file = shmem_file_setup(name, 0, VM_NORESERVE);
+ if (flags & MFD_HUGETLB) {
+ struct user_struct *user = NULL;
+
+ file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
+ HUGETLB_ANONHUGE_INODE,
+ (flags >> MFD_HUGE_SHIFT) &
+ MFD_HUGE_MASK);
+ } else
+ file = shmem_file_setup(name, 0, VM_NORESERVE);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto err_fd;
}
- info = SHMEM_I(file_inode(file));
file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
file->f_flags |= O_RDWR | O_LARGEFILE;
- if (flags & MFD_ALLOW_SEALING)
+
+ if (flags & MFD_ALLOW_SEALING) {
+ /*
+ * flags check at beginning of function ensures
+ * this is not a hugetlbfs (MFD_HUGETLB) file.
+ */
+ info = SHMEM_I(file_inode(file));
info->seals &= ~F_SEAL_SEAL;
+ }
fd_install(fd, file);
kfree(name);