summaryrefslogtreecommitdiff
path: root/mm/swap_table.h
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap_table.h')
-rw-r--r--mm/swap_table.h138
1 files changed, 129 insertions, 9 deletions
diff --git a/mm/swap_table.h b/mm/swap_table.h
index ea244a57a5b7..8415ffbe2b9c 100644
--- a/mm/swap_table.h
+++ b/mm/swap_table.h
@@ -18,10 +18,69 @@ struct swap_table {
* (physical or virtual) device. The swap table in each cluster is a
* 1:1 map of the swap slots in this cluster.
*
- * Each swap table entry could be a pointer (folio), a XA_VALUE
- * (shadow), or NULL.
+ * Swap table entry type and bits layouts:
+ *
+ * NULL: |---------------- 0 ---------------| - Free slot
+ * Shadow: | SWAP_COUNT |---- SHADOW_VAL ---|1| - Swapped out slot
+ * PFN: | SWAP_COUNT |------ PFN -------|10| - Cached slot
+ * Pointer: |----------- Pointer ----------|100| - (Unused)
+ * Bad: |------------- 1 -------------|1000| - Bad slot
+ *
+ * SWAP_COUNT is `SWP_TB_COUNT_BITS` long, each entry is an atomic long.
+ *
+ * Usages:
+ *
+ * - NULL: Swap slot is unused, could be allocated.
+ *
+ * - Shadow: Swap slot is used and not cached (usually swapped out). It reuses
+ * the XA_VALUE format to be compatible with working set shadows. SHADOW_VAL
+ * part might be all 0 if the working shadow info is absent. In such a case,
+ * we still want to keep the shadow format as a placeholder.
+ *
+ * Memcg ID is embedded in SHADOW_VAL.
+ *
+ * - PFN: Swap slot is in use, and cached. Memcg info is recorded on the page
+ * struct.
+ *
+ * - Pointer: Unused yet. `0b100` is reserved for potential pointer usage
+ * because only the lower three bits can be used as a marker for 8 bytes
+ * aligned pointers.
+ *
+ * - Bad: Swap slot is reserved, protects swap header or holes on swap devices.
*/
+#if defined(MAX_POSSIBLE_PHYSMEM_BITS)
+#define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
+#elif defined(MAX_PHYSMEM_BITS)
+#define SWAP_CACHE_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#else
+#define SWAP_CACHE_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT)
+#endif
+
+/* NULL Entry, all 0 */
+#define SWP_TB_NULL 0UL
+
+/* Swapped out: shadow */
+#define SWP_TB_SHADOW_MARK 0b1UL
+
+/* Cached: PFN */
+#define SWP_TB_PFN_BITS (SWAP_CACHE_PFN_BITS + SWP_TB_PFN_MARK_BITS)
+#define SWP_TB_PFN_MARK 0b10UL
+#define SWP_TB_PFN_MARK_BITS 2
+#define SWP_TB_PFN_MARK_MASK (BIT(SWP_TB_PFN_MARK_BITS) - 1)
+
+/* SWAP_COUNT part for PFN or shadow, the width can be shrunk or extended */
+#define SWP_TB_COUNT_BITS min(4, BITS_PER_LONG - SWP_TB_PFN_BITS)
+#define SWP_TB_COUNT_MASK (~((~0UL) >> SWP_TB_COUNT_BITS))
+#define SWP_TB_COUNT_SHIFT (BITS_PER_LONG - SWP_TB_COUNT_BITS)
+#define SWP_TB_COUNT_MAX ((1 << SWP_TB_COUNT_BITS) - 1)
+
+/* Bad slot: ends with 0b1000 and rests of bits are all 1 */
+#define SWP_TB_BAD ((~0UL) << 3)
+
+/* Macro for shadow offset calculation */
+#define SWAP_COUNT_SHIFT SWP_TB_COUNT_BITS
+
/*
* Helpers for casting one type of info into a swap table entry.
*/
@@ -31,18 +90,47 @@ static inline unsigned long null_to_swp_tb(void)
return 0;
}
-static inline unsigned long folio_to_swp_tb(struct folio *folio)
+static inline unsigned long __count_to_swp_tb(unsigned char count)
{
+ /*
+ * At least three values are needed to distinguish free (0),
+ * used (count > 0 && count < SWP_TB_COUNT_MAX), and
+ * overflow (count == SWP_TB_COUNT_MAX).
+ */
+ BUILD_BUG_ON(SWP_TB_COUNT_MAX < 2 || SWP_TB_COUNT_BITS < 2);
+ VM_WARN_ON(count > SWP_TB_COUNT_MAX);
+ return ((unsigned long)count) << SWP_TB_COUNT_SHIFT;
+}
+
+static inline unsigned long pfn_to_swp_tb(unsigned long pfn, unsigned int count)
+{
+ unsigned long swp_tb;
+
BUILD_BUG_ON(sizeof(unsigned long) != sizeof(void *));
- return (unsigned long)folio;
+ BUILD_BUG_ON(SWAP_CACHE_PFN_BITS >
+ (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS));
+
+ swp_tb = (pfn << SWP_TB_PFN_MARK_BITS) | SWP_TB_PFN_MARK;
+ VM_WARN_ON_ONCE(swp_tb & SWP_TB_COUNT_MASK);
+
+ return swp_tb | __count_to_swp_tb(count);
+}
+
+static inline unsigned long folio_to_swp_tb(struct folio *folio, unsigned int count)
+{
+ return pfn_to_swp_tb(folio_pfn(folio), count);
}
-static inline unsigned long shadow_swp_to_tb(void *shadow)
+static inline unsigned long shadow_to_swp_tb(void *shadow, unsigned int count)
{
BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) !=
BITS_PER_BYTE * sizeof(unsigned long));
+ BUILD_BUG_ON((unsigned long)xa_mk_value(0) != SWP_TB_SHADOW_MARK);
+
VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow));
- return (unsigned long)shadow;
+ VM_WARN_ON_ONCE(shadow && ((unsigned long)shadow & SWP_TB_COUNT_MASK));
+
+ return (unsigned long)shadow | __count_to_swp_tb(count) | SWP_TB_SHADOW_MARK;
}
/*
@@ -55,7 +143,7 @@ static inline bool swp_tb_is_null(unsigned long swp_tb)
static inline bool swp_tb_is_folio(unsigned long swp_tb)
{
- return !xa_is_value((void *)swp_tb) && !swp_tb_is_null(swp_tb);
+ return ((swp_tb & SWP_TB_PFN_MARK_MASK) == SWP_TB_PFN_MARK);
}
static inline bool swp_tb_is_shadow(unsigned long swp_tb)
@@ -63,19 +151,49 @@ static inline bool swp_tb_is_shadow(unsigned long swp_tb)
return xa_is_value((void *)swp_tb);
}
+static inline bool swp_tb_is_bad(unsigned long swp_tb)
+{
+ return swp_tb == SWP_TB_BAD;
+}
+
+static inline bool swp_tb_is_countable(unsigned long swp_tb)
+{
+ return (swp_tb_is_shadow(swp_tb) || swp_tb_is_folio(swp_tb) ||
+ swp_tb_is_null(swp_tb));
+}
+
/*
* Helpers for retrieving info from swap table.
*/
static inline struct folio *swp_tb_to_folio(unsigned long swp_tb)
{
VM_WARN_ON(!swp_tb_is_folio(swp_tb));
- return (void *)swp_tb;
+ return pfn_folio((swp_tb & ~SWP_TB_COUNT_MASK) >> SWP_TB_PFN_MARK_BITS);
}
static inline void *swp_tb_to_shadow(unsigned long swp_tb)
{
VM_WARN_ON(!swp_tb_is_shadow(swp_tb));
- return (void *)swp_tb;
+ /* No shift needed, xa_value is stored as it is in the lower bits. */
+ return (void *)(swp_tb & ~SWP_TB_COUNT_MASK);
+}
+
+static inline unsigned char __swp_tb_get_count(unsigned long swp_tb)
+{
+ VM_WARN_ON(!swp_tb_is_countable(swp_tb));
+ return ((swp_tb & SWP_TB_COUNT_MASK) >> SWP_TB_COUNT_SHIFT);
+}
+
+static inline int swp_tb_get_count(unsigned long swp_tb)
+{
+ if (swp_tb_is_countable(swp_tb))
+ return __swp_tb_get_count(swp_tb);
+ return -EINVAL;
+}
+
+static inline unsigned long __swp_tb_mk_count(unsigned long swp_tb, int count)
+{
+ return ((swp_tb & ~SWP_TB_COUNT_MASK) | __count_to_swp_tb(count));
}
/*
@@ -120,6 +238,8 @@ static inline unsigned long swap_table_get(struct swap_cluster_info *ci,
atomic_long_t *table;
unsigned long swp_tb;
+ VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER);
+
rcu_read_lock();
table = rcu_dereference(ci->table);
swp_tb = table ? atomic_long_read(&table[off]) : null_to_swp_tb();