summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/compaction.c12
-rw-r--r--mm/filemap.c43
-rw-r--r--mm/hugetlb.c14
-rw-r--r--mm/khugepaged.c10
-rw-r--r--mm/memblock.c151
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mm_init.c69
-rw-r--r--mm/shmem.c21
-rw-r--r--mm/slab.h14
-rw-r--r--mm/slab_common.c111
-rw-r--r--mm/slub.c209
-rw-r--r--mm/truncate.c3
-rw-r--r--mm/util.c28
14 files changed, 494 insertions, 210 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 60796402850e..b72e7d040f78 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -279,6 +279,23 @@ config SLAB_FREELIST_HARDENED
sacrifices to harden the kernel slab allocator against common
freelist exploit methods.
+config SLAB_BUCKETS
+ bool "Support allocation from separate kmalloc buckets"
+ depends on !SLUB_TINY
+ default SLAB_FREELIST_HARDENED
+ help
+ Kernel heap attacks frequently depend on being able to create
+ specifically-sized allocations with user-controlled contents
+ that will be allocated into the same kmalloc bucket as a
+ target object. To avoid sharing these allocation buckets,
+ provide an explicitly separated set of buckets to be used for
+ user-controlled allocations. This may very slightly increase
+ memory fragmentation, though in practice it's only a handful
+ of extra pages since the bulk of user-controlled allocations
+ are relatively long-lived.
+
+ If unsure, say Y.
+
config SLUB_STATS
default n
bool "Enable performance statistics"
diff --git a/mm/compaction.c b/mm/compaction.c
index 739b1bf3d637..6cb901b63482 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1179,22 +1179,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (((mode & ISOLATE_ASYNC_MIGRATE) && is_dirty) ||
(mapping && is_unevictable)) {
bool migrate_dirty = true;
- bool is_unmovable;
+ bool is_inaccessible;
/*
* Only folios without mappings or that have
* a ->migrate_folio callback are possible to migrate
* without blocking.
*
- * Folios from unmovable mappings are not migratable.
+ * Folios from inaccessible mappings are not migratable.
*
* However, we can be racing with truncation, which can
* free the mapping that we need to check. Truncation
* holds the folio lock until after the folio is removed
* from the page so holding it ourselves is sufficient.
*
- * To avoid locking the folio just to check unmovable,
- * assume every unmovable folio is also unevictable,
+ * To avoid locking the folio just to check inaccessible,
+ * assume every inaccessible folio is also unevictable,
* which is a cheaper test. If our assumption goes
* wrong, it's not a correctness bug, just potentially
* wasted cycles.
@@ -1207,9 +1207,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
migrate_dirty = !mapping ||
mapping->a_ops->migrate_folio;
}
- is_unmovable = mapping && mapping_unmovable(mapping);
+ is_inaccessible = mapping && mapping_inaccessible(mapping);
folio_unlock(folio);
- if (!migrate_dirty || is_unmovable)
+ if (!migrate_dirty || is_inaccessible)
goto isolate_fail_put;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index b53e17c8f4c1..d62150418b91 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return 0;
- ptep = pte_offset_map(vmf->pmd, vmf->address);
+ ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
+ &vmf->ptl);
if (unlikely(!ptep))
return VM_FAULT_NOPAGE;
@@ -3981,21 +3982,24 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
loff_t pos = iocb->ki_pos;
struct address_space *mapping = file->f_mapping;
const struct address_space_operations *a_ops = mapping->a_ops;
+ size_t chunk = mapping_max_folio_size(mapping);
long status = 0;
ssize_t written = 0;
do {
struct page *page;
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
+ struct folio *folio;
+ size_t offset; /* Offset into folio */
+ size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
void *fsdata = NULL;
- offset = (pos & (PAGE_SIZE - 1));
- bytes = min_t(unsigned long, PAGE_SIZE - offset,
- iov_iter_count(i));
+ bytes = iov_iter_count(i);
+retry:
+ offset = pos & (chunk - 1);
+ bytes = min(chunk - offset, bytes);
+ balance_dirty_pages_ratelimited(mapping);
-again:
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
@@ -4017,11 +4021,16 @@ again:
if (unlikely(status < 0))
break;
+ folio = page_folio(page);
+ offset = offset_in_folio(folio, pos);
+ if (bytes > folio_size(folio) - offset)
+ bytes = folio_size(folio) - offset;
+
if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
- copied = copy_page_from_iter_atomic(page, offset, bytes, i);
- flush_dcache_page(page);
+ copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
+ flush_dcache_folio(folio);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
@@ -4039,14 +4048,16 @@ again:
* halfway through, might be a race with munmap,
* might be severe memory pressure.
*/
- if (copied)
+ if (chunk > PAGE_SIZE)
+ chunk /= 2;
+ if (copied) {
bytes = copied;
- goto again;
+ goto retry;
+ }
+ } else {
+ pos += status;
+ written += status;
}
- pos += status;
- written += status;
-
- balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
if (!written)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 11f25e00a293..0858a1827207 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1730,13 +1730,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
}
/*
- * Move PageHWPoison flag from head page to the raw error pages,
- * which makes any healthy subpages reusable.
- */
- if (unlikely(folio_test_hwpoison(folio)))
- folio_clear_hugetlb_hwpoison(folio);
-
- /*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
*/
@@ -1746,6 +1739,13 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
spin_unlock_irq(&hugetlb_lock);
}
+ /*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(folio_test_hwpoison(folio)))
+ folio_clear_hugetlb_hwpoison(folio);
+
folio_ref_unfreeze(folio, 1);
/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a5ec03ef8722..cdd1d8655a76 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2017,9 +2017,9 @@ out_unlock:
if (!is_shmem) {
filemap_nr_thps_inc(mapping);
/*
- * Paired with smp_mb() in do_dentry_open() to ensure
- * i_writecount is up to date and the update to nr_thps is
- * visible. Ensures the page cache will be truncated if the
+ * Paired with the fence in do_dentry_open() -> get_write_access()
+ * to ensure i_writecount is up to date and the update to nr_thps
+ * is visible. Ensures the page cache will be truncated if the
* file is opened writable.
*/
smp_mb();
@@ -2207,8 +2207,8 @@ rollback:
if (!is_shmem && result == SCAN_COPY_MC) {
filemap_nr_thps_dec(mapping);
/*
- * Paired with smp_mb() in do_dentry_open() to
- * ensure the update to nr_thps is visible.
+ * Paired with the fence in do_dentry_open() -> get_write_access()
+ * to ensure the update to nr_thps is visible.
*/
smp_mb();
}
diff --git a/mm/memblock.c b/mm/memblock.c
index e81fb68f7f88..3b9dc2d89b8a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
- .memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
- .reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
@@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = {
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem = {
.regions = memblock_physmem_init_regions,
- .cnt = 1, /* empty dummy entry */
.max = INIT_PHYSMEM_REGIONS,
.name = "physmem",
};
@@ -197,8 +194,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
for (i = 0; i < type->cnt; i++)
if (memblock_addrs_overlap(base, size, type->regions[i].base,
type->regions[i].size))
- break;
- return i < type->cnt;
+ return true;
+ return false;
}
/**
@@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
/* Special case for empty arrays */
if (type->cnt == 0) {
WARN_ON(type->total_size != 0);
- type->cnt = 1;
type->regions[0].base = 0;
type->regions[0].size = 0;
type->regions[0].flags = 0;
@@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
/* special case for empty array */
if (type->regions[0].size == 0) {
- WARN_ON(type->cnt != 1 || type->total_size);
+ WARN_ON(type->cnt != 0 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
+ type->cnt = 1;
return 0;
}
@@ -780,7 +777,8 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
* Walk @type and ensure that regions don't cross the boundaries defined by
* [@base, @base + @size). Crossing regions are split at the boundaries,
* which may create at most two more regions. The index of the first
- * region inside the range is returned in *@start_rgn and end in *@end_rgn.
+ * region inside the range is returned in *@start_rgn and the index of the
+ * first region after the range is returned in *@end_rgn.
*
* Return:
* 0 on success, -errno on failure.
@@ -1441,6 +1439,17 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
+ /*
+ * Detect any accidental use of these APIs after slab is ready, as at
+ * this moment memblock may be deinitialized already and its
+ * internal data may be destroyed (after execution of memblock_free_all)
+ */
+ if (WARN_ON_ONCE(slab_is_available())) {
+ void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
+
+ return vaddr ? virt_to_phys(vaddr) : 0;
+ }
+
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
@@ -1566,13 +1575,6 @@ static void * __init memblock_alloc_internal(
{
phys_addr_t alloc;
- /*
- * Detect any accidental use of these APIs after slab is ready, as at
- * this moment memblock may be deinitialized already and its
- * internal data may be destroyed (after execution of memblock_free_all)
- */
- if (WARN_ON_ONCE(slab_is_available()))
- return kzalloc_node(size, GFP_NOWAIT, nid);
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
@@ -2031,7 +2033,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
* downwards.
*/
pg = PAGE_ALIGN(__pa(start_pg));
- pgend = __pa(end_pg) & PAGE_MASK;
+ pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
/*
* If there are free pages between these, free the section of the
@@ -2234,6 +2236,123 @@ void __init memblock_free_all(void)
totalram_pages_add(pages);
}
+/* Keep a table to reserve named memory */
+#define RESERVE_MEM_MAX_ENTRIES 8
+#define RESERVE_MEM_NAME_SIZE 16
+struct reserve_mem_table {
+ char name[RESERVE_MEM_NAME_SIZE];
+ phys_addr_t start;
+ phys_addr_t size;
+};
+static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
+static int reserved_mem_count;
+
+/* Add wildcard region with a lookup name */
+static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
+ const char *name)
+{
+ struct reserve_mem_table *map;
+
+ map = &reserved_mem_table[reserved_mem_count++];
+ map->start = start;
+ map->size = size;
+ strscpy(map->name, name);
+}
+
+/**
+ * reserve_mem_find_by_name - Find reserved memory region with a given name
+ * @name: The name that is attached to a reserved memory region
+ * @start: If found, holds the start address
+ * @size: If found, holds the size of the address.
+ *
+ * @start and @size are only updated if @name is found.
+ *
+ * Returns: 1 if found or 0 if not found.
+ */
+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
+{
+ struct reserve_mem_table *map;
+ int i;
+
+ for (i = 0; i < reserved_mem_count; i++) {
+ map = &reserved_mem_table[i];
+ if (!map->size)
+ continue;
+ if (strcmp(name, map->name) == 0) {
+ *start = map->start;
+ *size = map->size;
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
+
+/*
+ * Parse reserve_mem=nn:align:name
+ */
+static int __init reserve_mem(char *p)
+{
+ phys_addr_t start, size, align, tmp;
+ char *name;
+ char *oldp;
+ int len;
+
+ if (!p)
+ return -EINVAL;
+
+ /* Check if there's room for more reserved memory */
+ if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
+ return -EBUSY;
+
+ oldp = p;
+ size = memparse(p, &p);
+ if (!size || p == oldp)
+ return -EINVAL;
+
+ if (*p != ':')
+ return -EINVAL;
+
+ align = memparse(p+1, &p);
+ if (*p != ':')
+ return -EINVAL;
+
+ /*
+ * memblock_phys_alloc() doesn't like a zero size align,
+ * but it is OK for this command to have it.
+ */
+ if (align < SMP_CACHE_BYTES)
+ align = SMP_CACHE_BYTES;
+
+ name = p + 1;
+ len = strlen(name);
+
+ /* name needs to have length but not too big */
+ if (!len || len >= RESERVE_MEM_NAME_SIZE)
+ return -EINVAL;
+
+ /* Make sure that name has text */
+ for (p = name; *p; p++) {
+ if (!isspace(*p))
+ break;
+ }
+ if (!*p)
+ return -EINVAL;
+
+ /* Make sure the name is not already used */
+ if (reserve_mem_find_by_name(name, &start, &tmp))
+ return -EBUSY;
+
+ start = memblock_phys_alloc(size, align);
+ if (!start)
+ return -ENOMEM;
+
+ reserved_mem_add(start, size, name);
+
+ return 1;
+}
+__setup("reserve_mem=", reserve_mem);
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
static const char * const flagname[] = {
[ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
diff --git a/mm/migrate.c b/mm/migrate.c
index bdbb5bb04c91..e7296c0fb5d5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -967,7 +967,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
if (!mapping)
rc = migrate_folio(mapping, dst, src, mode);
- else if (mapping_unmovable(mapping))
+ else if (mapping_inaccessible(mapping))
rc = -EOPNOTSUPP;
else if (mapping->a_ops->migrate_folio)
/*
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 1095dcf25ced..75c3bd42799b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -363,7 +363,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
nid = memblock_get_region_node(r);
- usable_startpfn = PFN_DOWN(r->base);
+ usable_startpfn = memblock_region_memory_base_pfn(r);
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn;
@@ -676,6 +676,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
if (early_page_ext_enabled())
return false;
+
+ /* Always populate low zones for address-constrained allocations */
+ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
+ return false;
+
+ if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
+ return true;
+
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
@@ -685,12 +693,6 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
nr_initialised = 0;
}
- /* Always populate low zones for address-constrained allocations */
- if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
- return false;
-
- if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
- return true;
/*
* We start only with one section of pages, more pages are added as
* needed until the rest of deferred pages are initialized.
@@ -758,9 +760,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
init_reserved_page(start_pfn, nid);
- /* Avoid false-positive PageTail() */
- INIT_LIST_HEAD(&page->lru);
-
/*
* no need for atomic set_bit because the struct
* page is not visible yet so nobody should
@@ -1979,24 +1978,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
}
/*
- * This function is meant to pre-load the iterator for the zone init.
- * Specifically it walks through the ranges until we are caught up to the
- * first_init_pfn value and exits there. If we never encounter the value we
- * return false indicating there are no valid ranges left.
+ * This function is meant to pre-load the iterator for the zone init from
+ * a given point.
+ * Specifically it walks through the ranges starting with initial index
+ * passed to it until we are caught up to the first_init_pfn value and
+ * exits there. If we never encounter the value we return false indicating
+ * there are no valid ranges left.
*/
static bool __init
deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
unsigned long *spfn, unsigned long *epfn,
unsigned long first_init_pfn)
{
- u64 j;
+ u64 j = *i;
+
+ if (j == 0)
+ __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
/*
* Start out by walking through the ranges in this zone that have
* already been initialized. We don't need to do anything with them
* so we just need to flush them out of the system.
*/
- for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
+ for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
if (*epfn <= first_init_pfn)
continue;
if (*spfn < first_init_pfn)
@@ -2068,7 +2072,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
{
unsigned long spfn, epfn;
struct zone *zone = arg;
- u64 i;
+ u64 i = 0;
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
@@ -2097,8 +2101,8 @@ static int __init deferred_init_memmap(void *data)
unsigned long first_init_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
- int zid, max_threads;
- u64 i;
+ int max_threads;
+ u64 i = 0;
/* Bind memory initialisation thread to a local node if possible */
if (!cpumask_empty(cpumask))
@@ -2124,27 +2128,18 @@ static int __init deferred_init_memmap(void *data)
*/
pgdat_resize_unlock(pgdat, &flags);
- /* Only the highest zone is deferred so find it */
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- zone = pgdat->node_zones + zid;
- if (first_init_pfn < zone_end_pfn(zone))
- break;
- }
-
- /* If the zone is empty somebody else may have cleared out the zone */
- if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- first_init_pfn))
- goto zone_empty;
+ /* Only the highest zone is deferred */
+ zone = pgdat->node_zones + pgdat->nr_zones - 1;
max_threads = deferred_page_init_max_threads(cpumask);
- while (spfn < epfn) {
- unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
+ while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
+ first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
struct padata_mt_job job = {
.thread_fn = deferred_init_memmap_chunk,
.fn_arg = zone,
.start = spfn,
- .size = epfn_align - spfn,
+ .size = first_init_pfn - spfn,
.align = PAGES_PER_SECTION,
.min_chunk = PAGES_PER_SECTION,
.max_threads = max_threads,
@@ -2152,12 +2147,10 @@ static int __init deferred_init_memmap(void *data)
};
padata_do_multithreaded(&job);
- deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- epfn_align);
}
-zone_empty:
+
/* Sanity check that the next zone really is unpopulated */
- WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+ WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
pr_info("node %d deferred pages initialised in %ums\n",
pgdat->node_id, jiffies_to_msecs(jiffies - start));
@@ -2184,7 +2177,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
unsigned long spfn, epfn, flags;
unsigned long nr_pages = 0;
- u64 i;
+ u64 i = 0;
/* Only the last zone may have deferred pages */
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
diff --git a/mm/shmem.c b/mm/shmem.c
index f24dfbd387ba..2faa9daaf54b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3325,10 +3325,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
struct folio *folio;
/*
- * Good, the fallocate(2) manpage permits EINTR: we may have
- * been interrupted because we are using up too much memory.
+ * Check for fatal signal so that we abort early in OOM
+ * situations. We don't want to abort in case of non-fatal
+ * signals as large fallocate can take noticeable time and
+ * e.g. periodic timers may result in fallocate constantly
+ * restarting.
*/
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
error = -EINTR;
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM;
@@ -4062,14 +4065,14 @@ static const struct constant_table shmem_param_enums_huge[] = {
};
const struct fs_parameter_spec shmem_fs_parameters[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("mpol", Opt_mpol),
fsparam_string("nr_blocks", Opt_nr_blocks),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("size", Opt_size),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
fsparam_flag ("inode32", Opt_inode32),
fsparam_flag ("inode64", Opt_inode64),
fsparam_flag ("noswap", Opt_noswap),
@@ -4129,9 +4132,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->mode = result.uint_32 & 07777;
break;
case Opt_uid:
- kuid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(kuid))
- goto bad_value;
+ kuid = result.uid;
/*
* The requested uid must be representable in the
@@ -4143,9 +4144,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->uid = kuid;
break;
case Opt_gid:
- kgid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(kgid))
- goto bad_value;
+ kgid = result.gid;
/*
* The requested gid must be representable in the
diff --git a/mm/slab.h b/mm/slab.h
index 3586e6183224..dcdb56b8e7f5 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -97,8 +97,10 @@ struct slab {
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
SLAB_MATCH(_refcount, __page_refcount);
-#ifdef CONFIG_SLAB_OBJ_EXT
+#ifdef CONFIG_MEMCG
SLAB_MATCH(memcg_data, obj_exts);
+#elif defined(CONFIG_SLAB_OBJ_EXT)
+SLAB_MATCH(_unused_slab_obj_exts, obj_exts);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
@@ -166,7 +168,7 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
*/
static inline bool slab_test_pfmemalloc(const struct slab *slab)
{
- return folio_test_active((struct folio *)slab_folio(slab));
+ return folio_test_active(slab_folio(slab));
}
static inline void slab_set_pfmemalloc(struct slab *slab)
@@ -211,7 +213,7 @@ static inline struct slab *virt_to_slab(const void *addr)
static inline int slab_order(const struct slab *slab)
{
- return folio_order((struct folio *)slab_folio(slab));
+ return folio_order(slab_folio(slab));
}
static inline size_t slab_size(const struct slab *slab)
@@ -403,16 +405,18 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
-kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
+kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
{
unsigned int index;
+ if (!b)
+ b = &kmalloc_caches[kmalloc_type(flags, caller)];
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
index = fls(size - 1);
- return kmalloc_caches[kmalloc_type(flags, caller)][index];
+ return (*b)[index];
}
gfp_t kmalloc_fix_flags(gfp_t flags);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 60268bb258fc..40b582a014b8 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -392,6 +392,98 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
}
EXPORT_SYMBOL(kmem_cache_create);
+static struct kmem_cache *kmem_buckets_cache __ro_after_init;
+
+/**
+ * kmem_buckets_create - Create a set of caches that handle dynamic sized
+ * allocations via kmem_buckets_alloc()
+ * @name: A prefix string which is used in /proc/slabinfo to identify this
+ * cache. The individual caches with have their sizes as the suffix.
+ * @flags: SLAB flags (see kmem_cache_create() for details).
+ * @useroffset: Starting offset within an allocation that may be copied
+ * to/from userspace.
+ * @usersize: How many bytes, starting at @useroffset, may be copied
+ * to/from userspace.
+ * @ctor: A constructor for the objects, run when new allocations are made.
+ *
+ * Cannot be called within an interrupt, but can be interrupted.
+ *
+ * Return: a pointer to the cache on success, NULL on failure. When
+ * CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and
+ * subsequent calls to kmem_buckets_alloc() will fall back to kmalloc().
+ * (i.e. callers only need to check for NULL on failure.)
+ */
+kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
+ unsigned int useroffset,
+ unsigned int usersize,
+ void (*ctor)(void *))
+{
+ kmem_buckets *b;
+ int idx;
+
+ /*
+ * When the separate buckets API is not built in, just return
+ * a non-NULL value for the kmem_buckets pointer, which will be
+ * unused when performing allocations.
+ */
+ if (!IS_ENABLED(CONFIG_SLAB_BUCKETS))
+ return ZERO_SIZE_PTR;
+
+ if (WARN_ON(!kmem_buckets_cache))
+ return NULL;
+
+ b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
+ if (WARN_ON(!b))
+ return NULL;
+
+ flags |= SLAB_NO_MERGE;
+
+ for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
+ char *short_size, *cache_name;
+ unsigned int cache_useroffset, cache_usersize;
+ unsigned int size;
+
+ if (!kmalloc_caches[KMALLOC_NORMAL][idx])
+ continue;
+
+ size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
+ if (!size)
+ continue;
+
+ short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
+ if (WARN_ON(!short_size))
+ goto fail;
+
+ cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
+ if (WARN_ON(!cache_name))
+ goto fail;
+
+ if (useroffset >= size) {
+ cache_useroffset = 0;
+ cache_usersize = 0;
+ } else {
+ cache_useroffset = useroffset;
+ cache_usersize = min(size - cache_useroffset, usersize);
+ }
+ (*b)[idx] = kmem_cache_create_usercopy(cache_name, size,
+ 0, flags, cache_useroffset,
+ cache_usersize, ctor);
+ kfree(cache_name);
+ if (WARN_ON(!(*b)[idx]))
+ goto fail;
+ }
+
+ return b;
+
+fail:
+ for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++)
+ kmem_cache_destroy((*b)[idx]);
+ kfree(b);
+
+ return NULL;
+}
+EXPORT_SYMBOL(kmem_buckets_create);
+
#ifdef SLAB_SUPPORTS_SYSFS
/*
* For a given kmem_cache, kmem_cache_destroy() should only be called
@@ -617,11 +709,12 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
s->size = s->object_size = size;
/*
- * For power of two sizes, guarantee natural alignment for kmalloc
- * caches, regardless of SL*B debugging options.
+ * kmalloc caches guarantee alignment of at least the largest
+ * power-of-two divisor of the size. For power-of-two sizes,
+ * it is the size itself.
*/
- if (is_power_of_2(size))
- align = max(align, size);
+ if (flags & SLAB_KMALLOC)
+ align = max(align, 1U << (ffs(size) - 1));
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
@@ -653,8 +746,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
return s;
}
-struct kmem_cache *
-kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
+kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init =
{ /* initialization for https://llvm.org/pr42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
@@ -703,7 +795,7 @@ size_t kmalloc_size_roundup(size_t size)
* The flags don't matter since size_index is common to all.
* Neither does the caller for just getting ->object_size.
*/
- return kmalloc_slab(size, GFP_KERNEL, 0)->object_size;
+ return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size;
}
/* Above the smaller buckets, size is a multiple of page size. */
@@ -932,6 +1024,11 @@ void __init create_kmalloc_caches(void)
/* Kmalloc array is now usable */
slab_state = UP;
+
+ if (IS_ENABLED(CONFIG_SLAB_BUCKETS))
+ kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
+ sizeof(kmem_buckets),
+ 0, SLAB_NO_MERGE, NULL);
}
/**
diff --git a/mm/slub.c b/mm/slub.c
index b5aaaa3ca756..3520acaf9afa 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -788,8 +788,24 @@ static bool slab_add_kunit_errors(void)
kunit_put_resource(resource);
return true;
}
+
+static bool slab_in_kunit_test(void)
+{
+ struct kunit_resource *resource;
+
+ if (!kunit_get_current_test())
+ return false;
+
+ resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
+ if (!resource)
+ return false;
+
+ kunit_put_resource(resource);
+ return true;
+}
#else
static inline bool slab_add_kunit_errors(void) { return false; }
+static inline bool slab_in_kunit_test(void) { return false; }
#endif
static inline unsigned int size_from_object(struct kmem_cache *s)
@@ -964,11 +980,9 @@ void print_tracking(struct kmem_cache *s, void *object)
static void print_slab_info(const struct slab *slab)
{
- struct folio *folio = (struct folio *)slab_folio(slab);
-
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
- folio_flags(folio, 0));
+ &slab->__page_flags);
}
/*
@@ -1208,8 +1222,6 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
- print_trailer(s, slab, object);
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
restore_bytes(s, what, value, fault, end);
@@ -1232,8 +1244,8 @@ skip_bug_print:
* Padding is extended by another word if Redzoning is enabled and
* object_size == inuse.
*
- * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
- * 0xcc (RED_ACTIVE) for objects in use.
+ * We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
+ * 0xcc (SLUB_RED_ACTIVE) for objects in use.
*
* object + s->inuse
* Meta data starts here.
@@ -1319,15 +1331,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
u8 *p = object;
u8 *endobject = object + s->object_size;
unsigned int orig_size, kasan_meta_size;
+ int ret = 1;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
- return 0;
+ ret = 0;
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
- return 0;
+ ret = 0;
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
orig_size = get_orig_size(s, object);
@@ -1336,14 +1349,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, object,
"kmalloc Redzone", p + orig_size,
val, s->object_size - orig_size)) {
- return 0;
+ ret = 0;
}
}
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
- check_bytes_and_report(s, slab, p, "Alignment padding",
+ if (!check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
- s->inuse - s->object_size);
+ s->inuse - s->object_size))
+ ret = 0;
}
}
@@ -1359,27 +1373,25 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, p, "Poison",
p + kasan_meta_size, POISON_FREE,
s->object_size - kasan_meta_size - 1))
- return 0;
+ ret = 0;
if (kasan_meta_size < s->object_size &&
!check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1))
- return 0;
+ ret = 0;
}
/*
* check_pad_bytes cleans up on its own.
*/
- check_pad_bytes(s, slab, p);
+ if (!check_pad_bytes(s, slab, p))
+ ret = 0;
}
- if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
- /*
- * Object and freepointer overlap. Cannot check
- * freepointer while object is allocated.
- */
- return 1;
-
- /* Check free pointer validity */
- if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
+ /*
+ * Cannot check freepointer while object is allocated if
+ * object and freepointer overlap.
+ */
+ if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) &&
+ !check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
@@ -1387,9 +1399,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
* another error because the object count is now wrong.
*/
set_freepointer(s, p, NULL);
- return 0;
+ ret = 0;
}
- return 1;
+
+ if (!ret && !slab_in_kunit_test()) {
+ print_trailer(s, slab, object);
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ }
+
+ return ret;
}
static int check_slab(struct kmem_cache *s, struct slab *slab)
@@ -2026,6 +2044,27 @@ static inline bool need_slab_obj_ext(void)
return false;
}
+#else /* CONFIG_SLAB_OBJ_EXT */
+
+static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
+{
+ return 0;
+}
+
+static inline void free_slab_obj_exts(struct slab *slab)
+{
+}
+
+static inline bool need_slab_obj_ext(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_SLAB_OBJ_EXT */
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
static inline struct slabobj_ext *
prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
{
@@ -2051,10 +2090,26 @@ prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
}
static inline void
+alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
+{
+ if (need_slab_obj_ext()) {
+ struct slabobj_ext *obj_exts;
+
+ obj_exts = prepare_slab_obj_exts_hook(s, flags, object);
+ /*
+ * Currently obj_exts is used only for allocation profiling.
+ * If other users appear then mem_alloc_profiling_enabled()
+ * check should be added before alloc_tag_add().
+ */
+ if (likely(obj_exts))
+ alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
+ }
+}
+
+static inline void
alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
int objects)
{
-#ifdef CONFIG_MEM_ALLOC_PROFILING
struct slabobj_ext *obj_exts;
int i;
@@ -2070,30 +2125,13 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
alloc_tag_sub(&obj_exts[off].ref, s->size);
}
-#endif
}
-#else /* CONFIG_SLAB_OBJ_EXT */
-
-static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
- gfp_t gfp, bool new_slab)
-{
- return 0;
-}
+#else /* CONFIG_MEM_ALLOC_PROFILING */
-static inline void free_slab_obj_exts(struct slab *slab)
-{
-}
-
-static inline bool need_slab_obj_ext(void)
+static inline void
+alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
{
- return false;
-}
-
-static inline struct slabobj_ext *
-prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
-{
- return NULL;
}
static inline void
@@ -2102,7 +2140,8 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
{
}
-#endif /* CONFIG_SLAB_OBJ_EXT */
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
#ifdef CONFIG_MEMCG
@@ -2550,7 +2589,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
*/
static inline bool slab_test_node_partial(const struct slab *slab)
{
- return folio_test_workingset((struct folio *)slab_folio(slab));
+ return folio_test_workingset(slab_folio(slab));
}
static inline void slab_set_node_partial(struct slab *slab)
@@ -3953,20 +3992,7 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
-#ifdef CONFIG_MEM_ALLOC_PROFILING
- if (need_slab_obj_ext()) {
- struct slabobj_ext *obj_exts;
-
- obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]);
- /*
- * Currently obj_exts is used only for allocation profiling.
- * If other users appear then mem_alloc_profiling_enabled()
- * check should be added before alloc_tag_add().
- */
- if (likely(obj_exts))
- alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
- }
-#endif
+ alloc_tagging_slab_alloc_hook(s, p[i], flags);
}
return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
@@ -4064,7 +4090,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
-static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
+static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct folio *folio;
void *ptr = NULL;
@@ -4089,35 +4115,35 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
return ptr;
}
-void *kmalloc_large_noprof(size_t size, gfp_t flags)
+void *__kmalloc_large_noprof(size_t size, gfp_t flags)
{
- void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
+ void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large_noprof);
+EXPORT_SYMBOL(__kmalloc_large_noprof);
-void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
+void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
{
- void *ret = __kmalloc_large_node(size, flags, node);
+ void *ret = ___kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large_node_noprof);
+EXPORT_SYMBOL(__kmalloc_large_node_noprof);
static __always_inline
-void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
+void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
- ret = __kmalloc_large_node(size, flags, node);
+ ret = __kmalloc_large_node_noprof(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
@@ -4126,34 +4152,34 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
if (unlikely(!size))
return ZERO_SIZE_PTR;
- s = kmalloc_slab(size, flags, caller);
+ s = kmalloc_slab(size, b, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
-
-void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
+void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
- return __do_kmalloc_node(size, flags, node, _RET_IP_);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node_noprof);
void *__kmalloc_noprof(size_t size, gfp_t flags)
{
- return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
+ return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_noprof);
-void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags,
- int node, unsigned long caller)
+void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
+ int node, unsigned long caller)
{
- return __do_kmalloc_node(size, flags, node, caller);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
+
}
-EXPORT_SYMBOL(kmalloc_node_track_caller_noprof);
+EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
-void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
@@ -4163,10 +4189,10 @@ void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_trace_noprof);
+EXPORT_SYMBOL(__kmalloc_cache_noprof);
-void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size)
+void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
+ int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
@@ -4175,7 +4201,7 @@ void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_node_trace_noprof);
+EXPORT_SYMBOL(__kmalloc_cache_node_noprof);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
@@ -5160,10 +5186,9 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
- if (slub_debug_orig_size(s) ||
- (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
- ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
- s->ctor) {
+ if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor ||
+ ((flags & SLAB_RED_ZONE) &&
+ (s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
@@ -5171,7 +5196,9 @@ static int calculate_sizes(struct kmem_cache *s)
*
* This is the case if we do RCU, have a constructor or
* destructor, are poisoning the objects, or are
- * redzoning an object smaller than sizeof(void *).
+ * redzoning an object smaller than sizeof(void *) or are
+ * redzoning an object with slub_debug_orig_size() enabled,
+ * in which case the right redzone may be extended.
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the
diff --git a/mm/truncate.c b/mm/truncate.c
index dfb3d1f4d456..4d61fbdd4b2f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -216,7 +216,8 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
* doing a complex calculation here, and then doing the zeroing
* anyway if the page split fails.
*/
- folio_zero_range(folio, offset, length);
+ if (!mapping_inaccessible(folio->mapping))
+ folio_zero_range(folio, offset, length);
if (folio_has_private(folio))
folio_invalidate(folio, offset, length);
diff --git a/mm/util.c b/mm/util.c
index 172213f27c31..bc488f0121a7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -26,6 +26,8 @@
#include <linux/uaccess.h>
+#include <kunit/visibility.h>
+
#include "internal.h"
#include "swap.h"
@@ -198,6 +200,16 @@ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
}
EXPORT_SYMBOL(kmemdup_nul);
+static kmem_buckets *user_buckets __ro_after_init;
+
+static int __init init_user_buckets(void)
+{
+ user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL);
+
+ return 0;
+}
+subsys_initcall(init_user_buckets);
+
/**
* memdup_user - duplicate memory region from user space
*
@@ -211,7 +223,7 @@ void *memdup_user(const void __user *src, size_t len)
{
void *p;
- p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+ p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -237,7 +249,7 @@ void *vmemdup_user(const void __user *src, size_t len)
{
void *p;
- p = kvmalloc(len, GFP_USER);
+ p = kmem_buckets_valloc(user_buckets, len, GFP_USER);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -482,6 +494,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
clear_bit(MMF_TOPDOWN, &mm->flags);
}
#endif
+#ifdef CONFIG_MMU
+EXPORT_SYMBOL_IF_KUNIT(arch_pick_mmap_layout);
+#endif
/**
* __account_locked_vm - account locked pages to an mm's locked_vm
@@ -594,9 +609,10 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
EXPORT_SYMBOL(vm_mmap);
/**
- * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* failure, fall back to non-contiguous (vmalloc) allocation.
* @size: size of the request.
+ * @b: which set of kmalloc buckets to allocate from.
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
* @node: numa node to allocate from
*
@@ -609,7 +625,7 @@ EXPORT_SYMBOL(vm_mmap);
*
* Return: pointer to the allocated memory of %NULL in case of failure
*/
-void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;
@@ -631,7 +647,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
kmalloc_flags &= ~__GFP_NOFAIL;
}
- ret = kmalloc_node_noprof(size, kmalloc_flags, node);
+ ret = __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, b), kmalloc_flags, node);
/*
* It doesn't really make sense to fallback to vmalloc for sub page
@@ -660,7 +676,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
-EXPORT_SYMBOL(kvmalloc_node_noprof);
+EXPORT_SYMBOL(__kvmalloc_node_noprof);
/**
* kvfree() - Free memory.