mm: Use multi-index entries in the page cache

We currently store large folios as 2^N consecutive entries. While this consumes rather more memory than necessary, it also turns out to be buggy. A writeback operation which starts within a tail page of a dirty folio will not write back the folio as the xarray's dirty bit is only set on the head index. With multi-index entries, the dirty bit will be found no matter where in the folio the operation starts. This does end up simplifying the page cache slightly, although not as much as I had hoped. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: William Kucharski <william.kucharski@oracle.com>
author: Matthew Wilcox (Oracle) <willy@infradead.org> 2020-06-27 22:19:08 -0400
committer: Matthew Wilcox (Oracle) <willy@infradead.org> 2022-01-08 00:28:41 -0500
commit: 6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3 (patch)
tree: 19f30971709b643688cada0032b9b33806c633e1 /mm/huge_memory.c
parent: 25a8de7f8d970ffa7263bd9d32a08138cd949f17 (diff)
download: lwn-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.tar.gz
lwn-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.zip
1 files changed, 14 insertions, 4 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e5483347291c..f58524394dc1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2614,6 +2614,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct page *head = compound_head(page);
 	struct deferred_split *ds_queue = get_deferred_split_queue(head);
+	XA_STATE(xas, &head->mapping->i_pages, head->index);
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
 	int extra_pins, ret;
@@ -2652,6 +2653,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			goto out;
 		}
 
+		xas_split_alloc(&xas, head, compound_order(head),
+				mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK);
+		if (xas_error(&xas)) {
+			ret = xas_error(&xas);
+			goto out;
+		}
+
 		anon_vma = NULL;
 		i_mmap_lock_read(mapping);
 
@@ -2681,13 +2689,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	/* block interrupt reentry in xa_lock and spinlock */
 	local_irq_disable();
 	if (mapping) {
-		XA_STATE(xas, &mapping->i_pages, page_index(head));
-
 		/*
 		 * Check if the head page is present in page cache.
 		 * We assume all tail are present too, if head is there.
 		 */
-		xa_lock(&mapping->i_pages);
+		xas_lock(&xas);
+		xas_reset(&xas);
 		if (xas_load(&xas) != head)
 			goto fail;
 	}
@@ -2703,6 +2710,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		if (mapping) {
 			int nr = thp_nr_pages(head);
 
+			xas_split(&xas, head, thp_order(head));
 			if (PageSwapBacked(head)) {
 				__mod_lruvec_page_state(head, NR_SHMEM_THPS,
 							-nr);
@@ -2719,7 +2727,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		spin_unlock(&ds_queue->split_queue_lock);
 fail:
 		if (mapping)
-			xa_unlock(&mapping->i_pages);
+			xas_unlock(&xas);
 		local_irq_enable();
 		remap_page(head, thp_nr_pages(head));
 		ret = -EBUSY;
@@ -2733,6 +2741,8 @@ out_unlock:
 	if (mapping)
 		i_mmap_unlock_read(mapping);
 out:
+	/* Free any memory we didn't use */
+	xas_nomem(&xas, 0);
 	count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
 	return ret;
 }
author	Matthew Wilcox (Oracle) <willy@infradead.org>	2020-06-27 22:19:08 -0400
committer	Matthew Wilcox (Oracle) <willy@infradead.org>	2022-01-08 00:28:41 -0500
commit	6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3 (patch)
tree	19f30971709b643688cada0032b9b33806c633e1 /mm/huge_memory.c
parent	25a8de7f8d970ffa7263bd9d32a08138cd949f17 (diff)
download	lwn-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.tar.gz lwn-6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3.zip