summaryrefslogtreecommitdiff
path: root/mm/swapfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c444
1 files changed, 221 insertions, 223 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ba19430dd4ea..2eff8b51a945 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -37,7 +37,6 @@
#include <linux/oom.h>
#include <linux/swapfile.h>
#include <linux/export.h>
-#include <linux/swap_slots.h>
#include <linux/sort.h>
#include <linux/completion.h>
#include <linux/suspend.h>
@@ -116,6 +115,18 @@ static atomic_t proc_poll_event = ATOMIC_INIT(0);
atomic_t nr_rotate_swap = ATOMIC_INIT(0);
+struct percpu_swap_cluster {
+ struct swap_info_struct *si[SWAP_NR_ORDERS];
+ unsigned long offset[SWAP_NR_ORDERS];
+ local_lock_t lock;
+};
+
+static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = {
+ .si = { NULL },
+ .offset = { SWAP_ENTRY_INVALID },
+ .lock = INIT_LOCAL_LOCK(),
+};
+
static struct swap_info_struct *swap_type_to_swap_info(int type)
{
if (type >= MAX_SWAPFILES)
@@ -158,10 +169,8 @@ static long swap_usage_in_pages(struct swap_info_struct *si)
#define TTRS_UNMAPPED 0x2
/* Reclaim the swap entry if swap is getting full */
#define TTRS_FULL 0x4
-/* Reclaim directly, bypass the slot cache and don't touch device lock */
-#define TTRS_DIRECT 0x8
-static bool swap_is_has_cache(struct swap_info_struct *si,
+static bool swap_only_has_cache(struct swap_info_struct *si,
unsigned long offset, int nr_pages)
{
unsigned char *map = si->swap_map + offset;
@@ -210,6 +219,7 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
int ret, nr_pages;
bool need_reclaim;
+again:
folio = filemap_get_folio(address_space, swap_cache_index(entry));
if (IS_ERR(folio))
return 0;
@@ -227,8 +237,16 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
if (!folio_trylock(folio))
goto out;
- /* offset could point to the middle of a large folio */
+ /*
+ * Offset could point to the middle of a large folio, or folio
+ * may no longer point to the expected offset before it's locked.
+ */
entry = folio->swap;
+ if (offset < swp_offset(entry) || offset >= swp_offset(entry) + nr_pages) {
+ folio_unlock(folio);
+ folio_put(folio);
+ goto again;
+ }
offset = swp_offset(entry);
need_reclaim = ((flags & TTRS_ANYWAY) ||
@@ -243,28 +261,13 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
* reference or pending writeback, and can't be allocated to others.
*/
ci = lock_cluster(si, offset);
- need_reclaim = swap_is_has_cache(si, offset, nr_pages);
+ need_reclaim = swap_only_has_cache(si, offset, nr_pages);
unlock_cluster(ci);
if (!need_reclaim)
goto out_unlock;
- if (!(flags & TTRS_DIRECT)) {
- /* Free through slot cache */
- delete_from_swap_cache(folio);
- folio_set_dirty(folio);
- ret = nr_pages;
- goto out_unlock;
- }
-
- xa_lock_irq(&address_space->i_pages);
- __delete_from_swap_cache(folio, entry, NULL);
- xa_unlock_irq(&address_space->i_pages);
- folio_ref_sub(folio, nr_pages);
+ delete_from_swap_cache(folio);
folio_set_dirty(folio);
-
- ci = lock_cluster(si, offset);
- swap_entry_range_free(si, ci, entry, nr_pages);
- unlock_cluster(ci);
ret = nr_pages;
out_unlock:
folio_unlock(folio);
@@ -479,15 +482,6 @@ static void move_cluster(struct swap_info_struct *si,
static void swap_cluster_schedule_discard(struct swap_info_struct *si,
struct swap_cluster_info *ci)
{
- unsigned int idx = cluster_index(si, ci);
- /*
- * If scan_swap_map_slots() can't find a free cluster, it will check
- * si->swap_map directly. To make sure the discarding cluster isn't
- * taken by scan_swap_map_slots(), mark the swap entries bad (occupied).
- * It will be cleared after discard
- */
- memset(si->swap_map + idx * SWAPFILE_CLUSTER,
- SWAP_MAP_BAD, SWAPFILE_CLUSTER);
VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE);
move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD);
schedule_work(&si->discard_work);
@@ -556,7 +550,7 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si)
ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list);
/*
* Delete the cluster from list to prepare for discard, but keep
- * the CLUSTER_FLAG_DISCARD flag, there could be percpu_cluster
+ * the CLUSTER_FLAG_DISCARD flag, percpu_swap_cluster could be
* pointing to it, or ran into by relocate_cluster.
*/
list_del(&ci->list);
@@ -571,8 +565,6 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si)
* return the cluster to allocation list.
*/
ci->flags = CLUSTER_FLAG_NONE;
- memset(si->swap_map + idx * SWAPFILE_CLUSTER,
- 0, SWAPFILE_CLUSTER);
__free_cluster(si, ci);
spin_unlock(&ci->lock);
ret = true;
@@ -653,7 +645,8 @@ static void relocate_cluster(struct swap_info_struct *si,
return;
if (!ci->count) {
- free_cluster(si, ci);
+ if (ci->flags != CLUSTER_FLAG_FREE)
+ free_cluster(si, ci);
} else if (ci->count != SWAPFILE_CLUSTER) {
if (ci->flags != CLUSTER_FLAG_FRAG)
move_cluster(si, ci, &si->frag_clusters[ci->order],
@@ -698,7 +691,7 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
offset++;
break;
case SWAP_HAS_CACHE:
- nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT);
+ nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
if (nr_reclaim > 0)
offset += nr_reclaim;
else
@@ -729,6 +722,9 @@ static bool cluster_scan_range(struct swap_info_struct *si,
unsigned long offset, end = start + nr_pages;
unsigned char *map = si->swap_map;
+ if (cluster_is_empty(ci))
+ return true;
+
for (offset = start; offset < end; offset++) {
switch (READ_ONCE(map[offset])) {
case 0:
@@ -820,14 +816,15 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
out:
relocate_cluster(si, ci);
unlock_cluster(ci);
- if (si->flags & SWP_SOLIDSTATE)
- __this_cpu_write(si->percpu_cluster->next[order], next);
- else
+ if (si->flags & SWP_SOLIDSTATE) {
+ this_cpu_write(percpu_swap_cluster.offset[order], next);
+ this_cpu_write(percpu_swap_cluster.si[order], si);
+ } else {
si->global_cluster->next[order] = next;
+ }
return found;
}
-/* Return true if reclaimed a whole cluster */
static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
{
long to_scan = 1;
@@ -848,7 +845,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) {
spin_unlock(&ci->lock);
nr_reclaim = __try_to_reclaim_swap(si, offset,
- TTRS_ANYWAY | TTRS_DIRECT);
+ TTRS_ANYWAY);
spin_lock(&ci->lock);
if (nr_reclaim) {
offset += abs(nr_reclaim);
@@ -858,6 +855,10 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
offset++;
}
+ /* in case no swap cache is reclaimed */
+ if (ci->flags == CLUSTER_FLAG_NONE)
+ relocate_cluster(si, ci);
+
unlock_cluster(ci);
if (to_scan <= 0)
break;
@@ -874,27 +875,29 @@ static void swap_reclaim_work(struct work_struct *work)
}
/*
- * Try to get swap entries with specified order from current cpu's swap entry
- * pool (a cluster). This might involve allocating a new cluster for current CPU
- * too.
+ * Try to allocate swap entries with specified order and try set a new
+ * cluster for current CPU too.
*/
static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int order,
unsigned char usage)
{
struct swap_cluster_info *ci;
- unsigned int offset, found = 0;
+ unsigned int offset = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID;
- if (si->flags & SWP_SOLIDSTATE) {
- /* Fast path using per CPU cluster */
- local_lock(&si->percpu_cluster->lock);
- offset = __this_cpu_read(si->percpu_cluster->next[order]);
- } else {
+ /*
+ * Swapfile is not block device so unable
+ * to allocate large entries.
+ */
+ if (order && !(si->flags & SWP_BLKDEV))
+ return 0;
+
+ if (!(si->flags & SWP_SOLIDSTATE)) {
/* Serialize HDD SWAP allocation for each device. */
spin_lock(&si->global_cluster_lock);
offset = si->global_cluster->next[order];
- }
+ if (offset == SWAP_ENTRY_INVALID)
+ goto new_cluster;
- if (offset) {
ci = lock_cluster(si, offset);
/* Cluster could have been used by another order */
if (cluster_is_usable(ci, order)) {
@@ -985,9 +988,7 @@ new_cluster:
}
}
done:
- if (si->flags & SWP_SOLIDSTATE)
- local_unlock(&si->percpu_cluster->lock);
- else
+ if (!(si->flags & SWP_SOLIDSTATE))
spin_unlock(&si->global_cluster_lock);
return found;
}
@@ -1101,7 +1102,7 @@ static void swap_usage_sub(struct swap_info_struct *si, unsigned int nr_entries)
/*
* If device is not full, and SWAP_USAGE_OFFLIST_BIT is set,
- * remove it from the plist.
+ * add it to the plist.
*/
if (unlikely(val & SWAP_USAGE_OFFLIST_BIT))
add_to_avail_list(si, false);
@@ -1155,61 +1156,6 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
swap_usage_sub(si, nr_entries);
}
-static int cluster_alloc_swap(struct swap_info_struct *si,
- unsigned char usage, int nr,
- swp_entry_t slots[], int order)
-{
- int n_ret = 0;
-
- while (n_ret < nr) {
- unsigned long offset = cluster_alloc_swap_entry(si, order, usage);
-
- if (!offset)
- break;
- slots[n_ret++] = swp_entry(si->type, offset);
- }
-
- return n_ret;
-}
-
-static int scan_swap_map_slots(struct swap_info_struct *si,
- unsigned char usage, int nr,
- swp_entry_t slots[], int order)
-{
- unsigned int nr_pages = 1 << order;
-
- /*
- * We try to cluster swap pages by allocating them sequentially
- * in swap. Once we've allocated SWAPFILE_CLUSTER pages this
- * way, however, we resort to first-free allocation, starting
- * a new cluster. This prevents us from scattering swap pages
- * all over the entire swap partition, so that we reduce
- * overall disk seek times between swap pages. -- sct
- * But we do now try to find an empty cluster. -Andrea
- * And we let swap pages go all over an SSD partition. Hugh
- */
- if (order > 0) {
- /*
- * Should not even be attempting large allocations when huge
- * page swap is disabled. Warn and fail the allocation.
- */
- if (!IS_ENABLED(CONFIG_THP_SWAP) ||
- nr_pages > SWAPFILE_CLUSTER) {
- VM_WARN_ON_ONCE(1);
- return 0;
- }
-
- /*
- * Swapfile is not block device so unable
- * to allocate large entries.
- */
- if (!(si->flags & SWP_BLKDEV))
- return 0;
- }
-
- return cluster_alloc_swap(si, usage, nr, slots, order);
-}
-
static bool get_swap_device_info(struct swap_info_struct *si)
{
if (!percpu_ref_tryget_live(&si->users))
@@ -1226,39 +1172,65 @@ static bool get_swap_device_info(struct swap_info_struct *si)
return true;
}
-int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
+/*
+ * Fast path try to get swap entries with specified order from current
+ * CPU's swap entry pool (a cluster).
+ */
+static bool swap_alloc_fast(swp_entry_t *entry,
+ int order)
{
- int order = swap_entry_order(entry_order);
- unsigned long size = 1 << order;
- struct swap_info_struct *si, *next;
- long avail_pgs;
- int n_ret = 0;
- int node;
+ struct swap_cluster_info *ci;
+ struct swap_info_struct *si;
+ unsigned int offset, found = SWAP_ENTRY_INVALID;
- spin_lock(&swap_avail_lock);
+ /*
+ * Once allocated, swap_info_struct will never be completely freed,
+ * so checking it's liveness by get_swap_device_info is enough.
+ */
+ si = this_cpu_read(percpu_swap_cluster.si[order]);
+ offset = this_cpu_read(percpu_swap_cluster.offset[order]);
+ if (!si || !offset || !get_swap_device_info(si))
+ return false;
- avail_pgs = atomic_long_read(&nr_swap_pages) / size;
- if (avail_pgs <= 0) {
- spin_unlock(&swap_avail_lock);
- goto noswap;
+ ci = lock_cluster(si, offset);
+ if (cluster_is_usable(ci, order)) {
+ if (cluster_is_empty(ci))
+ offset = cluster_offset(si, ci);
+ found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE);
+ if (found)
+ *entry = swp_entry(si->type, found);
+ } else {
+ unlock_cluster(ci);
}
- n_goal = min3((long)n_goal, (long)SWAP_BATCH, avail_pgs);
+ put_swap_device(si);
+ return !!found;
+}
- atomic_long_sub(n_goal * size, &nr_swap_pages);
+/* Rotate the device and switch to a new cluster */
+static bool swap_alloc_slow(swp_entry_t *entry,
+ int order)
+{
+ int node;
+ unsigned long offset;
+ struct swap_info_struct *si, *next;
-start_over:
node = numa_node_id();
+ spin_lock(&swap_avail_lock);
+start_over:
plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
- /* requeue si to after same-priority siblings */
+ /* Rotate the device and switch to a new cluster */
plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
spin_unlock(&swap_avail_lock);
if (get_swap_device_info(si)) {
- n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
- n_goal, swp_entries, order);
+ offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE);
put_swap_device(si);
- if (n_ret || size > 1)
- goto check_out;
+ if (offset) {
+ *entry = swp_entry(si->type, offset);
+ return true;
+ }
+ if (order)
+ return false;
}
spin_lock(&swap_avail_lock);
@@ -1276,15 +1248,68 @@ start_over:
if (plist_node_empty(&next->avail_lists[node]))
goto start_over;
}
-
spin_unlock(&swap_avail_lock);
+ return false;
+}
+
+/**
+ * folio_alloc_swap - allocate swap space for a folio
+ * @folio: folio we want to move to swap
+ * @gfp: gfp mask for shadow nodes
+ *
+ * Allocate swap space for the folio and add the folio to the
+ * swap cache.
+ *
+ * Context: Caller needs to hold the folio lock.
+ * Return: Whether the folio was added to the swap cache.
+ */
+int folio_alloc_swap(struct folio *folio, gfp_t gfp)
+{
+ unsigned int order = folio_order(folio);
+ unsigned int size = 1 << order;
+ swp_entry_t entry = {};
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
+
+ /*
+ * Should not even be attempting large allocations when huge
+ * page swap is disabled. Warn and fail the allocation.
+ */
+ if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) {
+ VM_WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ local_lock(&percpu_swap_cluster.lock);
+ if (!swap_alloc_fast(&entry, order))
+ swap_alloc_slow(&entry, order);
+ local_unlock(&percpu_swap_cluster.lock);
-check_out:
- if (n_ret < n_goal)
- atomic_long_add((long)(n_goal - n_ret) * size,
- &nr_swap_pages);
-noswap:
- return n_ret;
+ /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */
+ if (mem_cgroup_try_charge_swap(folio, entry))
+ goto out_free;
+
+ if (!entry.val)
+ return -ENOMEM;
+
+ /*
+ * XArray node allocations from PF_MEMALLOC contexts could
+ * completely exhaust the page allocator. __GFP_NOMEMALLOC
+ * stops emergency reserves from being allocated.
+ *
+ * TODO: this could cause a theoretical memory reclaim
+ * deadlock in the swap out path.
+ */
+ if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL))
+ goto out_free;
+
+ atomic_long_sub(size, &nr_swap_pages);
+ return 0;
+
+out_free:
+ put_swap_folio(folio, entry);
+ return -ENOMEM;
}
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
@@ -1569,7 +1594,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
return;
ci = lock_cluster(si, offset);
- if (swap_is_has_cache(si, offset, size))
+ if (swap_only_has_cache(si, offset, size))
swap_entry_range_free(si, ci, entry, size);
else {
for (int i = 0; i < size; i++, entry.val++) {
@@ -1580,25 +1605,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
unlock_cluster(ci);
}
-void swapcache_free_entries(swp_entry_t *entries, int n)
-{
- int i;
- struct swap_cluster_info *ci;
- struct swap_info_struct *si = NULL;
-
- if (n <= 0)
- return;
-
- for (i = 0; i < n; ++i) {
- si = _swap_info_get(entries[i]);
- if (si) {
- ci = lock_cluster(si, swp_offset(entries[i]));
- swap_entry_range_free(si, ci, entries[i], 1);
- unlock_cluster(ci);
- }
- }
-}
-
int __swap_count(swp_entry_t entry)
{
struct swap_info_struct *si = swp_swap_info(entry);
@@ -1612,7 +1618,7 @@ int __swap_count(swp_entry_t entry)
* This does not give an exact answer when swap count is continued,
* but does include the high COUNT_CONTINUED flag to allow for that.
*/
-int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
+bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
{
pgoff_t offset = swp_offset(entry);
struct swap_cluster_info *ci;
@@ -1621,7 +1627,7 @@ int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
ci = lock_cluster(si, offset);
count = swap_count(si->swap_map[offset]);
unlock_cluster(ci);
- return count;
+ return !!count;
}
/*
@@ -1707,7 +1713,7 @@ static bool folio_swapped(struct folio *folio)
return false;
if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio)))
- return swap_swapcount(si, entry) != 0;
+ return swap_entry_swapped(si, entry);
return swap_page_trans_huge_swapped(si, entry, folio_order(folio));
}
@@ -1781,9 +1787,6 @@ void free_swap_and_cache_nr(swp_entry_t entry, int nr)
bool any_only_cache = false;
unsigned long offset;
- if (non_swap_entry(entry))
- return;
-
si = get_swap_device(entry);
if (!si)
return;
@@ -1842,6 +1845,7 @@ out:
swp_entry_t get_swap_page_of_type(int type)
{
struct swap_info_struct *si = swap_type_to_swap_info(type);
+ unsigned long offset;
swp_entry_t entry = {0};
if (!si)
@@ -1849,8 +1853,13 @@ swp_entry_t get_swap_page_of_type(int type)
/* This is called for allocating swap entry, not cache */
if (get_swap_device_info(si)) {
- if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0))
- atomic_long_dec(&nr_swap_pages);
+ if (si->flags & SWP_WRITEOK) {
+ offset = cluster_alloc_swap_entry(si, 0, 1);
+ if (offset) {
+ entry = swp_entry(si->type, offset);
+ atomic_long_dec(&nr_swap_pages);
+ }
+ }
put_swap_device(si);
}
fail:
@@ -2611,21 +2620,6 @@ static void reinsert_swap_info(struct swap_info_struct *si)
spin_unlock(&swap_lock);
}
-static bool __has_usable_swap(void)
-{
- return !plist_head_empty(&swap_active_head);
-}
-
-bool has_usable_swap(void)
-{
- bool ret;
-
- spin_lock(&swap_lock);
- ret = __has_usable_swap();
- spin_unlock(&swap_lock);
- return ret;
-}
-
/*
* Called after clearing SWP_WRITEOK, ensures cluster_alloc_range
* see the updated flags, so there will be no more allocations.
@@ -2641,10 +2635,31 @@ static void wait_for_allocation(struct swap_info_struct *si)
for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) {
ci = lock_cluster(si, offset);
unlock_cluster(ci);
- offset += SWAPFILE_CLUSTER;
}
}
+/*
+ * Called after swap device's reference count is dead, so
+ * neither scan nor allocation will use it.
+ */
+static void flush_percpu_swap_cluster(struct swap_info_struct *si)
+{
+ int cpu, i;
+ struct swap_info_struct **pcp_si;
+
+ for_each_possible_cpu(cpu) {
+ pcp_si = per_cpu_ptr(percpu_swap_cluster.si, cpu);
+ /*
+ * Invalidate the percpu swap cluster cache, si->users
+ * is dead, so no new user will point to it, just flush
+ * any existing user.
+ */
+ for (i = 0; i < SWAP_NR_ORDERS; i++)
+ cmpxchg(&pcp_si[i], si, NULL);
+ }
+}
+
+
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
@@ -2717,8 +2732,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
wait_for_allocation(p);
- disable_swap_slots_cache_lock();
-
set_current_oom_origin();
err = try_to_unuse(p->type);
clear_current_oom_origin();
@@ -2726,12 +2739,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
if (err) {
/* re-insert swap space back into swap_list */
reinsert_swap_info(p);
- reenable_swap_slots_cache_unlock();
goto out_dput;
}
- reenable_swap_slots_cache_unlock();
-
/*
* Wait for swap operations protected by get/put_swap_device()
* to complete. Because of synchronize_rcu() here, all swap
@@ -2746,6 +2756,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
flush_work(&p->discard_work);
flush_work(&p->reclaim_work);
+ flush_percpu_swap_cluster(p);
destroy_swap_extents(p);
if (p->flags & SWP_CONTINUED)
@@ -2773,8 +2784,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
arch_swap_invalidate_area(p->type);
zswap_swapoff(p->type);
mutex_unlock(&swapon_mutex);
- free_percpu(p->percpu_cluster);
- p->percpu_cluster = NULL;
kfree(p->global_cluster);
p->global_cluster = NULL;
vfree(swap_map);
@@ -3120,13 +3129,6 @@ static unsigned long read_swap_header(struct swap_info_struct *si,
return maxpages;
}
-#define SWAP_CLUSTER_INFO_COLS \
- DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
-#define SWAP_CLUSTER_SPACE_COLS \
- DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
-#define SWAP_CLUSTER_COLS \
- max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
-
static int setup_swap_map_and_extents(struct swap_info_struct *si,
union swap_header *swap_header,
unsigned char *swap_map,
@@ -3166,14 +3168,21 @@ static int setup_swap_map_and_extents(struct swap_info_struct *si,
return nr_extents;
}
+#define SWAP_CLUSTER_INFO_COLS \
+ DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
+#define SWAP_CLUSTER_SPACE_COLS \
+ DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
+#define SWAP_CLUSTER_COLS \
+ max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
+
static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
union swap_header *swap_header,
unsigned long maxpages)
{
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
struct swap_cluster_info *cluster_info;
- unsigned long i, j, k, idx;
- int cpu, err = -ENOMEM;
+ unsigned long i, j, idx;
+ int err = -ENOMEM;
cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL);
if (!cluster_info)
@@ -3182,20 +3191,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
for (i = 0; i < nr_clusters; i++)
spin_lock_init(&cluster_info[i].lock);
- if (si->flags & SWP_SOLIDSTATE) {
- si->percpu_cluster = alloc_percpu(struct percpu_cluster);
- if (!si->percpu_cluster)
- goto err_free;
-
- for_each_possible_cpu(cpu) {
- struct percpu_cluster *cluster;
-
- cluster = per_cpu_ptr(si->percpu_cluster, cpu);
- for (i = 0; i < SWAP_NR_ORDERS; i++)
- cluster->next[i] = SWAP_ENTRY_INVALID;
- local_lock_init(&cluster->lock);
- }
- } else {
+ if (!(si->flags & SWP_SOLIDSTATE)) {
si->global_cluster = kmalloc(sizeof(*si->global_cluster),
GFP_KERNEL);
if (!si->global_cluster)
@@ -3233,8 +3229,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
* Reduce false cache line sharing between cluster_info and
* sharing same address space.
*/
- for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
- j = k % SWAP_CLUSTER_COLS;
+ for (j = 0; j < SWAP_CLUSTER_COLS; j++) {
for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
struct swap_cluster_info *ci;
idx = i * SWAP_CLUSTER_COLS + j;
@@ -3449,8 +3444,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
mutex_lock(&swapon_mutex);
prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
- prio =
- (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
+ prio = swap_flags & SWAP_FLAG_PRIO_MASK;
enable_swap_info(si, prio, swap_map, cluster_info, zeromap);
pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n",
@@ -3474,8 +3468,6 @@ free_swap_address_space:
bad_swap_unlock_inode:
inode_unlock(inode);
bad_swap:
- free_percpu(si->percpu_cluster);
- si->percpu_cluster = NULL;
kfree(si->global_cluster);
si->global_cluster = NULL;
inode = NULL;
@@ -3499,8 +3491,6 @@ out:
putname(name);
if (inode)
inode_unlock(inode);
- if (!error)
- enable_swap_slots_cache();
return error;
}
@@ -3527,7 +3517,6 @@ void si_swapinfo(struct sysinfo *val)
* Returns error code in following case.
* - success -> 0
* - swp_entry is invalid -> EINVAL
- * - swp_entry is migration entry -> EINVAL
* - swap-cache reference is requested but there is already one. -> EEXIST
* - swap-cache reference is requested but the entry is not used. -> ENOENT
* - swap-mapped reference requested but needs continued swap count. -> ENOMEM
@@ -3542,6 +3531,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
int err, i;
si = swp_swap_info(entry);
+ if (WARN_ON_ONCE(!si)) {
+ pr_err("%s%08lx\n", Bad_file, entry.val);
+ return -EINVAL;
+ }
offset = swp_offset(entry);
VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
@@ -3787,8 +3780,8 @@ outer:
* into, carry if so, or else fail until a new continuation page is allocated;
* when the original swap_map count is decremented from 0 with continuation,
* borrow from the continuation and report whether it still holds more.
- * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
- * lock.
+ * Called while __swap_duplicate() or caller of __swap_entry_free_locked()
+ * holds cluster lock.
*/
static bool swap_count_continued(struct swap_info_struct *si,
pgoff_t offset, unsigned char count)
@@ -3893,6 +3886,11 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
}
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+static bool __has_usable_swap(void)
+{
+ return !plist_head_empty(&swap_active_head);
+}
+
void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
{
struct swap_info_struct *si, *next;