diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 444 |
1 files changed, 221 insertions, 223 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index ba19430dd4ea..2eff8b51a945 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -37,7 +37,6 @@ #include <linux/oom.h> #include <linux/swapfile.h> #include <linux/export.h> -#include <linux/swap_slots.h> #include <linux/sort.h> #include <linux/completion.h> #include <linux/suspend.h> @@ -116,6 +115,18 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0); atomic_t nr_rotate_swap = ATOMIC_INIT(0); +struct percpu_swap_cluster { + struct swap_info_struct *si[SWAP_NR_ORDERS]; + unsigned long offset[SWAP_NR_ORDERS]; + local_lock_t lock; +}; + +static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = { + .si = { NULL }, + .offset = { SWAP_ENTRY_INVALID }, + .lock = INIT_LOCAL_LOCK(), +}; + static struct swap_info_struct *swap_type_to_swap_info(int type) { if (type >= MAX_SWAPFILES) @@ -158,10 +169,8 @@ static long swap_usage_in_pages(struct swap_info_struct *si) #define TTRS_UNMAPPED 0x2 /* Reclaim the swap entry if swap is getting full */ #define TTRS_FULL 0x4 -/* Reclaim directly, bypass the slot cache and don't touch device lock */ -#define TTRS_DIRECT 0x8 -static bool swap_is_has_cache(struct swap_info_struct *si, +static bool swap_only_has_cache(struct swap_info_struct *si, unsigned long offset, int nr_pages) { unsigned char *map = si->swap_map + offset; @@ -210,6 +219,7 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, int ret, nr_pages; bool need_reclaim; +again: folio = filemap_get_folio(address_space, swap_cache_index(entry)); if (IS_ERR(folio)) return 0; @@ -227,8 +237,16 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, if (!folio_trylock(folio)) goto out; - /* offset could point to the middle of a large folio */ + /* + * Offset could point to the middle of a large folio, or folio + * may no longer point to the expected offset before it's locked. + */ entry = folio->swap; + if (offset < swp_offset(entry) || offset >= swp_offset(entry) + nr_pages) { + folio_unlock(folio); + folio_put(folio); + goto again; + } offset = swp_offset(entry); need_reclaim = ((flags & TTRS_ANYWAY) || @@ -243,28 +261,13 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si, * reference or pending writeback, and can't be allocated to others. */ ci = lock_cluster(si, offset); - need_reclaim = swap_is_has_cache(si, offset, nr_pages); + need_reclaim = swap_only_has_cache(si, offset, nr_pages); unlock_cluster(ci); if (!need_reclaim) goto out_unlock; - if (!(flags & TTRS_DIRECT)) { - /* Free through slot cache */ - delete_from_swap_cache(folio); - folio_set_dirty(folio); - ret = nr_pages; - goto out_unlock; - } - - xa_lock_irq(&address_space->i_pages); - __delete_from_swap_cache(folio, entry, NULL); - xa_unlock_irq(&address_space->i_pages); - folio_ref_sub(folio, nr_pages); + delete_from_swap_cache(folio); folio_set_dirty(folio); - - ci = lock_cluster(si, offset); - swap_entry_range_free(si, ci, entry, nr_pages); - unlock_cluster(ci); ret = nr_pages; out_unlock: folio_unlock(folio); @@ -479,15 +482,6 @@ static void move_cluster(struct swap_info_struct *si, static void swap_cluster_schedule_discard(struct swap_info_struct *si, struct swap_cluster_info *ci) { - unsigned int idx = cluster_index(si, ci); - /* - * If scan_swap_map_slots() can't find a free cluster, it will check - * si->swap_map directly. To make sure the discarding cluster isn't - * taken by scan_swap_map_slots(), mark the swap entries bad (occupied). - * It will be cleared after discard - */ - memset(si->swap_map + idx * SWAPFILE_CLUSTER, - SWAP_MAP_BAD, SWAPFILE_CLUSTER); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD); schedule_work(&si->discard_work); @@ -556,7 +550,7 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si) ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list); /* * Delete the cluster from list to prepare for discard, but keep - * the CLUSTER_FLAG_DISCARD flag, there could be percpu_cluster + * the CLUSTER_FLAG_DISCARD flag, percpu_swap_cluster could be * pointing to it, or ran into by relocate_cluster. */ list_del(&ci->list); @@ -571,8 +565,6 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si) * return the cluster to allocation list. */ ci->flags = CLUSTER_FLAG_NONE; - memset(si->swap_map + idx * SWAPFILE_CLUSTER, - 0, SWAPFILE_CLUSTER); __free_cluster(si, ci); spin_unlock(&ci->lock); ret = true; @@ -653,7 +645,8 @@ static void relocate_cluster(struct swap_info_struct *si, return; if (!ci->count) { - free_cluster(si, ci); + if (ci->flags != CLUSTER_FLAG_FREE) + free_cluster(si, ci); } else if (ci->count != SWAPFILE_CLUSTER) { if (ci->flags != CLUSTER_FLAG_FRAG) move_cluster(si, ci, &si->frag_clusters[ci->order], @@ -698,7 +691,7 @@ static bool cluster_reclaim_range(struct swap_info_struct *si, offset++; break; case SWAP_HAS_CACHE: - nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT); + nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); if (nr_reclaim > 0) offset += nr_reclaim; else @@ -729,6 +722,9 @@ static bool cluster_scan_range(struct swap_info_struct *si, unsigned long offset, end = start + nr_pages; unsigned char *map = si->swap_map; + if (cluster_is_empty(ci)) + return true; + for (offset = start; offset < end; offset++) { switch (READ_ONCE(map[offset])) { case 0: @@ -820,14 +816,15 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, out: relocate_cluster(si, ci); unlock_cluster(ci); - if (si->flags & SWP_SOLIDSTATE) - __this_cpu_write(si->percpu_cluster->next[order], next); - else + if (si->flags & SWP_SOLIDSTATE) { + this_cpu_write(percpu_swap_cluster.offset[order], next); + this_cpu_write(percpu_swap_cluster.si[order], si); + } else { si->global_cluster->next[order] = next; + } return found; } -/* Return true if reclaimed a whole cluster */ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) { long to_scan = 1; @@ -848,7 +845,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) { spin_unlock(&ci->lock); nr_reclaim = __try_to_reclaim_swap(si, offset, - TTRS_ANYWAY | TTRS_DIRECT); + TTRS_ANYWAY); spin_lock(&ci->lock); if (nr_reclaim) { offset += abs(nr_reclaim); @@ -858,6 +855,10 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) offset++; } + /* in case no swap cache is reclaimed */ + if (ci->flags == CLUSTER_FLAG_NONE) + relocate_cluster(si, ci); + unlock_cluster(ci); if (to_scan <= 0) break; @@ -874,27 +875,29 @@ static void swap_reclaim_work(struct work_struct *work) } /* - * Try to get swap entries with specified order from current cpu's swap entry - * pool (a cluster). This might involve allocating a new cluster for current CPU - * too. + * Try to allocate swap entries with specified order and try set a new + * cluster for current CPU too. */ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int order, unsigned char usage) { struct swap_cluster_info *ci; - unsigned int offset, found = 0; + unsigned int offset = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; - if (si->flags & SWP_SOLIDSTATE) { - /* Fast path using per CPU cluster */ - local_lock(&si->percpu_cluster->lock); - offset = __this_cpu_read(si->percpu_cluster->next[order]); - } else { + /* + * Swapfile is not block device so unable + * to allocate large entries. + */ + if (order && !(si->flags & SWP_BLKDEV)) + return 0; + + if (!(si->flags & SWP_SOLIDSTATE)) { /* Serialize HDD SWAP allocation for each device. */ spin_lock(&si->global_cluster_lock); offset = si->global_cluster->next[order]; - } + if (offset == SWAP_ENTRY_INVALID) + goto new_cluster; - if (offset) { ci = lock_cluster(si, offset); /* Cluster could have been used by another order */ if (cluster_is_usable(ci, order)) { @@ -985,9 +988,7 @@ new_cluster: } } done: - if (si->flags & SWP_SOLIDSTATE) - local_unlock(&si->percpu_cluster->lock); - else + if (!(si->flags & SWP_SOLIDSTATE)) spin_unlock(&si->global_cluster_lock); return found; } @@ -1101,7 +1102,7 @@ static void swap_usage_sub(struct swap_info_struct *si, unsigned int nr_entries) /* * If device is not full, and SWAP_USAGE_OFFLIST_BIT is set, - * remove it from the plist. + * add it to the plist. */ if (unlikely(val & SWAP_USAGE_OFFLIST_BIT)) add_to_avail_list(si, false); @@ -1155,61 +1156,6 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_usage_sub(si, nr_entries); } -static int cluster_alloc_swap(struct swap_info_struct *si, - unsigned char usage, int nr, - swp_entry_t slots[], int order) -{ - int n_ret = 0; - - while (n_ret < nr) { - unsigned long offset = cluster_alloc_swap_entry(si, order, usage); - - if (!offset) - break; - slots[n_ret++] = swp_entry(si->type, offset); - } - - return n_ret; -} - -static int scan_swap_map_slots(struct swap_info_struct *si, - unsigned char usage, int nr, - swp_entry_t slots[], int order) -{ - unsigned int nr_pages = 1 << order; - - /* - * We try to cluster swap pages by allocating them sequentially - * in swap. Once we've allocated SWAPFILE_CLUSTER pages this - * way, however, we resort to first-free allocation, starting - * a new cluster. This prevents us from scattering swap pages - * all over the entire swap partition, so that we reduce - * overall disk seek times between swap pages. -- sct - * But we do now try to find an empty cluster. -Andrea - * And we let swap pages go all over an SSD partition. Hugh - */ - if (order > 0) { - /* - * Should not even be attempting large allocations when huge - * page swap is disabled. Warn and fail the allocation. - */ - if (!IS_ENABLED(CONFIG_THP_SWAP) || - nr_pages > SWAPFILE_CLUSTER) { - VM_WARN_ON_ONCE(1); - return 0; - } - - /* - * Swapfile is not block device so unable - * to allocate large entries. - */ - if (!(si->flags & SWP_BLKDEV)) - return 0; - } - - return cluster_alloc_swap(si, usage, nr, slots, order); -} - static bool get_swap_device_info(struct swap_info_struct *si) { if (!percpu_ref_tryget_live(&si->users)) @@ -1226,39 +1172,65 @@ static bool get_swap_device_info(struct swap_info_struct *si) return true; } -int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) +/* + * Fast path try to get swap entries with specified order from current + * CPU's swap entry pool (a cluster). + */ +static bool swap_alloc_fast(swp_entry_t *entry, + int order) { - int order = swap_entry_order(entry_order); - unsigned long size = 1 << order; - struct swap_info_struct *si, *next; - long avail_pgs; - int n_ret = 0; - int node; + struct swap_cluster_info *ci; + struct swap_info_struct *si; + unsigned int offset, found = SWAP_ENTRY_INVALID; - spin_lock(&swap_avail_lock); + /* + * Once allocated, swap_info_struct will never be completely freed, + * so checking it's liveness by get_swap_device_info is enough. + */ + si = this_cpu_read(percpu_swap_cluster.si[order]); + offset = this_cpu_read(percpu_swap_cluster.offset[order]); + if (!si || !offset || !get_swap_device_info(si)) + return false; - avail_pgs = atomic_long_read(&nr_swap_pages) / size; - if (avail_pgs <= 0) { - spin_unlock(&swap_avail_lock); - goto noswap; + ci = lock_cluster(si, offset); + if (cluster_is_usable(ci, order)) { + if (cluster_is_empty(ci)) + offset = cluster_offset(si, ci); + found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE); + if (found) + *entry = swp_entry(si->type, found); + } else { + unlock_cluster(ci); } - n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs); + put_swap_device(si); + return !!found; +} - atomic_long_sub(n_goal * size, &nr_swap_pages); +/* Rotate the device and switch to a new cluster */ +static bool swap_alloc_slow(swp_entry_t *entry, + int order) +{ + int node; + unsigned long offset; + struct swap_info_struct *si, *next; -start_over: node = numa_node_id(); + spin_lock(&swap_avail_lock); +start_over: plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { - /* requeue si to after same-priority siblings */ + /* Rotate the device and switch to a new cluster */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); if (get_swap_device_info(si)) { - n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, - n_goal, swp_entries, order); + offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE); put_swap_device(si); - if (n_ret || size > 1) - goto check_out; + if (offset) { + *entry = swp_entry(si->type, offset); + return true; + } + if (order) + return false; } spin_lock(&swap_avail_lock); @@ -1276,15 +1248,68 @@ start_over: if (plist_node_empty(&next->avail_lists[node])) goto start_over; } - spin_unlock(&swap_avail_lock); + return false; +} + +/** + * folio_alloc_swap - allocate swap space for a folio + * @folio: folio we want to move to swap + * @gfp: gfp mask for shadow nodes + * + * Allocate swap space for the folio and add the folio to the + * swap cache. + * + * Context: Caller needs to hold the folio lock. + * Return: Whether the folio was added to the swap cache. + */ +int folio_alloc_swap(struct folio *folio, gfp_t gfp) +{ + unsigned int order = folio_order(folio); + unsigned int size = 1 << order; + swp_entry_t entry = {}; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); + + /* + * Should not even be attempting large allocations when huge + * page swap is disabled. Warn and fail the allocation. + */ + if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) { + VM_WARN_ON_ONCE(1); + return -EINVAL; + } + + local_lock(&percpu_swap_cluster.lock); + if (!swap_alloc_fast(&entry, order)) + swap_alloc_slow(&entry, order); + local_unlock(&percpu_swap_cluster.lock); -check_out: - if (n_ret < n_goal) - atomic_long_add((long)(n_goal - n_ret) * size, - &nr_swap_pages); -noswap: - return n_ret; + /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ + if (mem_cgroup_try_charge_swap(folio, entry)) + goto out_free; + + if (!entry.val) + return -ENOMEM; + + /* + * XArray node allocations from PF_MEMALLOC contexts could + * completely exhaust the page allocator. __GFP_NOMEMALLOC + * stops emergency reserves from being allocated. + * + * TODO: this could cause a theoretical memory reclaim + * deadlock in the swap out path. + */ + if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL)) + goto out_free; + + atomic_long_sub(size, &nr_swap_pages); + return 0; + +out_free: + put_swap_folio(folio, entry); + return -ENOMEM; } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) @@ -1569,7 +1594,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry) return; ci = lock_cluster(si, offset); - if (swap_is_has_cache(si, offset, size)) + if (swap_only_has_cache(si, offset, size)) swap_entry_range_free(si, ci, entry, size); else { for (int i = 0; i < size; i++, entry.val++) { @@ -1580,25 +1605,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry) unlock_cluster(ci); } -void swapcache_free_entries(swp_entry_t *entries, int n) -{ - int i; - struct swap_cluster_info *ci; - struct swap_info_struct *si = NULL; - - if (n <= 0) - return; - - for (i = 0; i < n; ++i) { - si = _swap_info_get(entries[i]); - if (si) { - ci = lock_cluster(si, swp_offset(entries[i])); - swap_entry_range_free(si, ci, entries[i], 1); - unlock_cluster(ci); - } - } -} - int __swap_count(swp_entry_t entry) { struct swap_info_struct *si = swp_swap_info(entry); @@ -1612,7 +1618,7 @@ int __swap_count(swp_entry_t entry) * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ -int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry) { pgoff_t offset = swp_offset(entry); struct swap_cluster_info *ci; @@ -1621,7 +1627,7 @@ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); unlock_cluster(ci); - return count; + return !!count; } /* @@ -1707,7 +1713,7 @@ static bool folio_swapped(struct folio *folio) return false; if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio))) - return swap_swapcount(si, entry) != 0; + return swap_entry_swapped(si, entry); return swap_page_trans_huge_swapped(si, entry, folio_order(folio)); } @@ -1781,9 +1787,6 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr) bool any_only_cache = false; unsigned long offset; - if (non_swap_entry(entry)) - return; - si = get_swap_device(entry); if (!si) return; @@ -1842,6 +1845,7 @@ out: swp_entry_t get_swap_page_of_type(int type) { struct swap_info_struct *si = swap_type_to_swap_info(type); + unsigned long offset; swp_entry_t entry = {0}; if (!si) @@ -1849,8 +1853,13 @@ swp_entry_t get_swap_page_of_type(int type) /* This is called for allocating swap entry, not cache */ if (get_swap_device_info(si)) { - if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0)) - atomic_long_dec(&nr_swap_pages); + if (si->flags & SWP_WRITEOK) { + offset = cluster_alloc_swap_entry(si, 0, 1); + if (offset) { + entry = swp_entry(si->type, offset); + atomic_long_dec(&nr_swap_pages); + } + } put_swap_device(si); } fail: @@ -2611,21 +2620,6 @@ static void reinsert_swap_info(struct swap_info_struct *si) spin_unlock(&swap_lock); } -static bool __has_usable_swap(void) -{ - return !plist_head_empty(&swap_active_head); -} - -bool has_usable_swap(void) -{ - bool ret; - - spin_lock(&swap_lock); - ret = __has_usable_swap(); - spin_unlock(&swap_lock); - return ret; -} - /* * Called after clearing SWP_WRITEOK, ensures cluster_alloc_range * see the updated flags, so there will be no more allocations. @@ -2641,10 +2635,31 @@ static void wait_for_allocation(struct swap_info_struct *si) for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { ci = lock_cluster(si, offset); unlock_cluster(ci); - offset += SWAPFILE_CLUSTER; } } +/* + * Called after swap device's reference count is dead, so + * neither scan nor allocation will use it. + */ +static void flush_percpu_swap_cluster(struct swap_info_struct *si) +{ + int cpu, i; + struct swap_info_struct **pcp_si; + + for_each_possible_cpu(cpu) { + pcp_si = per_cpu_ptr(percpu_swap_cluster.si, cpu); + /* + * Invalidate the percpu swap cluster cache, si->users + * is dead, so no new user will point to it, just flush + * any existing user. + */ + for (i = 0; i < SWAP_NR_ORDERS; i++) + cmpxchg(&pcp_si[i], si, NULL); + } +} + + SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; @@ -2717,8 +2732,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) wait_for_allocation(p); - disable_swap_slots_cache_lock(); - set_current_oom_origin(); err = try_to_unuse(p->type); clear_current_oom_origin(); @@ -2726,12 +2739,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); - reenable_swap_slots_cache_unlock(); goto out_dput; } - reenable_swap_slots_cache_unlock(); - /* * Wait for swap operations protected by get/put_swap_device() * to complete. Because of synchronize_rcu() here, all swap @@ -2746,6 +2756,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) flush_work(&p->discard_work); flush_work(&p->reclaim_work); + flush_percpu_swap_cluster(p); destroy_swap_extents(p); if (p->flags & SWP_CONTINUED) @@ -2773,8 +2784,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) arch_swap_invalidate_area(p->type); zswap_swapoff(p->type); mutex_unlock(&swapon_mutex); - free_percpu(p->percpu_cluster); - p->percpu_cluster = NULL; kfree(p->global_cluster); p->global_cluster = NULL; vfree(swap_map); @@ -3120,13 +3129,6 @@ static unsigned long read_swap_header(struct swap_info_struct *si, return maxpages; } -#define SWAP_CLUSTER_INFO_COLS \ - DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) -#define SWAP_CLUSTER_SPACE_COLS \ - DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) -#define SWAP_CLUSTER_COLS \ - max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) - static int setup_swap_map_and_extents(struct swap_info_struct *si, union swap_header *swap_header, unsigned char *swap_map, @@ -3166,14 +3168,21 @@ static int setup_swap_map_and_extents(struct swap_info_struct *si, return nr_extents; } +#define SWAP_CLUSTER_INFO_COLS \ + DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) +#define SWAP_CLUSTER_SPACE_COLS \ + DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) +#define SWAP_CLUSTER_COLS \ + max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) + static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, union swap_header *swap_header, unsigned long maxpages) { unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); struct swap_cluster_info *cluster_info; - unsigned long i, j, k, idx; - int cpu, err = -ENOMEM; + unsigned long i, j, idx; + int err = -ENOMEM; cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL); if (!cluster_info) @@ -3182,20 +3191,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, for (i = 0; i < nr_clusters; i++) spin_lock_init(&cluster_info[i].lock); - if (si->flags & SWP_SOLIDSTATE) { - si->percpu_cluster = alloc_percpu(struct percpu_cluster); - if (!si->percpu_cluster) - goto err_free; - - for_each_possible_cpu(cpu) { - struct percpu_cluster *cluster; - - cluster = per_cpu_ptr(si->percpu_cluster, cpu); - for (i = 0; i < SWAP_NR_ORDERS; i++) - cluster->next[i] = SWAP_ENTRY_INVALID; - local_lock_init(&cluster->lock); - } - } else { + if (!(si->flags & SWP_SOLIDSTATE)) { si->global_cluster = kmalloc(sizeof(*si->global_cluster), GFP_KERNEL); if (!si->global_cluster) @@ -3233,8 +3229,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, * Reduce false cache line sharing between cluster_info and * sharing same address space. */ - for (k = 0; k < SWAP_CLUSTER_COLS; k++) { - j = k % SWAP_CLUSTER_COLS; + for (j = 0; j < SWAP_CLUSTER_COLS; j++) { for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { struct swap_cluster_info *ci; idx = i * SWAP_CLUSTER_COLS + j; @@ -3449,8 +3444,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) - prio = - (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; + prio = swap_flags & SWAP_FLAG_PRIO_MASK; enable_swap_info(si, prio, swap_map, cluster_info, zeromap); pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", @@ -3474,8 +3468,6 @@ free_swap_address_space: bad_swap_unlock_inode: inode_unlock(inode); bad_swap: - free_percpu(si->percpu_cluster); - si->percpu_cluster = NULL; kfree(si->global_cluster); si->global_cluster = NULL; inode = NULL; @@ -3499,8 +3491,6 @@ out: putname(name); if (inode) inode_unlock(inode); - if (!error) - enable_swap_slots_cache(); return error; } @@ -3527,7 +3517,6 @@ void si_swapinfo(struct sysinfo *val) * Returns error code in following case. * - success -> 0 * - swp_entry is invalid -> EINVAL - * - swp_entry is migration entry -> EINVAL * - swap-cache reference is requested but there is already one. -> EEXIST * - swap-cache reference is requested but the entry is not used. -> ENOENT * - swap-mapped reference requested but needs continued swap count. -> ENOMEM @@ -3542,6 +3531,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr) int err, i; si = swp_swap_info(entry); + if (WARN_ON_ONCE(!si)) { + pr_err("%s%08lx\n", Bad_file, entry.val); + return -EINVAL; + } offset = swp_offset(entry); VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); @@ -3787,8 +3780,8 @@ outer: * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. - * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster - * lock. + * Called while __swap_duplicate() or caller of __swap_entry_free_locked() + * holds cluster lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) @@ -3893,6 +3886,11 @@ static void free_swap_count_continuations(struct swap_info_struct *si) } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +static bool __has_usable_swap(void) +{ + return !plist_head_empty(&swap_active_head); +} + void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { struct swap_info_struct *si, *next; |