diff options
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 480 |
1 files changed, 341 insertions, 139 deletions
@@ -71,6 +71,11 @@ static unsigned long dax_to_pfn(void *entry) return xa_to_value(entry) >> DAX_SHIFT; } +static struct folio *dax_to_folio(void *entry) +{ + return page_folio(pfn_to_page(dax_to_pfn(entry))); +} + static void *dax_make_entry(pfn_t pfn, unsigned long flags) { return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT)); @@ -206,7 +211,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, * * Must be called with the i_pages lock held. */ -static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) +static void *get_next_unlocked_entry(struct xa_state *xas, unsigned int order) { void *entry; struct wait_exceptional_entry_queue ewait; @@ -236,6 +241,37 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) } /* + * Wait for the given entry to become unlocked. Caller must hold the i_pages + * lock and call either put_unlocked_entry() if it did not lock the entry or + * dax_unlock_entry() if it did. Returns an unlocked entry if still present. + */ +static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry) +{ + struct wait_exceptional_entry_queue ewait; + wait_queue_head_t *wq; + + init_wait(&ewait.wait); + ewait.wait.func = wake_exceptional_entry_func; + + while (unlikely(dax_is_locked(entry))) { + wq = dax_entry_waitqueue(xas, entry, &ewait.key); + prepare_to_wait_exclusive(wq, &ewait.wait, + TASK_UNINTERRUPTIBLE); + xas_pause(xas); + xas_unlock_irq(xas); + schedule(); + finish_wait(wq, &ewait.wait); + xas_lock_irq(xas); + entry = xas_load(xas); + } + + if (xa_is_internal(entry)) + return NULL; + + return entry; +} + +/* * The only thing keeping the address space around is the i_pages lock * (it's cycled in clear_inode() after removing the entries from i_pages) * After we call xas_unlock_irq(), we cannot touch xas->xa. @@ -250,7 +286,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) wq = dax_entry_waitqueue(xas, entry, &ewait.key); /* - * Unlike get_unlocked_entry() there is no guarantee that this + * Unlike get_next_unlocked_entry() there is no guarantee that this * path ever successfully retrieves an unlocked entry before an * inode dies. Perform a non-exclusive wait in case this path * never successfully performs its own wake up. @@ -307,109 +343,156 @@ static unsigned long dax_entry_size(void *entry) return PAGE_SIZE; } -static unsigned long dax_end_pfn(void *entry) +/* + * A DAX folio is considered shared if it has no mapping set and ->share (which + * shares the ->index field) is non-zero. Note this may return false even if the + * page is shared between multiple files but has not yet actually been mapped + * into multiple address spaces. + */ +static inline bool dax_folio_is_shared(struct folio *folio) { - return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; + return !folio->mapping && folio->share; } /* - * Iterate through all mapped pfns represented by an entry, i.e. skip - * 'empty' and 'zero' entries. + * When it is called by dax_insert_entry(), the shared flag will indicate + * whether this entry is shared by multiple files. If the page has not + * previously been associated with any mappings the ->mapping and ->index + * fields will be set. If it has already been associated with a mapping + * the mapping will be cleared and the share count set. It's then up to + * reverse map users like memory_failure() to call back into the filesystem to + * recover ->mapping and ->index information. For example by implementing + * dax_holder_operations. */ -#define for_each_mapped_pfn(entry, pfn) \ - for (pfn = dax_to_pfn(entry); \ - pfn < dax_end_pfn(entry); pfn++) - -static inline bool dax_page_is_shared(struct page *page) +static void dax_folio_make_shared(struct folio *folio) { - return page->mapping == PAGE_MAPPING_DAX_SHARED; + /* + * folio is not currently shared so mark it as shared by clearing + * folio->mapping. + */ + folio->mapping = NULL; + + /* + * folio has previously been mapped into one address space so set the + * share count. + */ + folio->share = 1; } -/* - * Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the - * refcount. - */ -static inline void dax_page_share_get(struct page *page) +static inline unsigned long dax_folio_put(struct folio *folio) { - if (page->mapping != PAGE_MAPPING_DAX_SHARED) { + unsigned long ref; + int order, i; + + if (!dax_folio_is_shared(folio)) + ref = 0; + else + ref = --folio->share; + + if (ref) + return ref; + + folio->mapping = NULL; + order = folio_order(folio); + if (!order) + return 0; + + for (i = 0; i < (1UL << order); i++) { + struct dev_pagemap *pgmap = page_pgmap(&folio->page); + struct page *page = folio_page(folio, i); + struct folio *new_folio = (struct folio *)page; + + ClearPageHead(page); + clear_compound_head(page); + + new_folio->mapping = NULL; /* - * Reset the index if the page was already mapped - * regularly before. + * Reset pgmap which was over-written by + * prep_compound_page(). */ - if (page->mapping) - page->share = 1; - page->mapping = PAGE_MAPPING_DAX_SHARED; + new_folio->pgmap = pgmap; + new_folio->share = 0; + WARN_ON_ONCE(folio_ref_count(new_folio)); } - page->share++; + + return ref; } -static inline unsigned long dax_page_share_put(struct page *page) +static void dax_folio_init(void *entry) { - return --page->share; + struct folio *folio = dax_to_folio(entry); + int order = dax_entry_order(entry); + + /* + * Folio should have been split back to order-0 pages in + * dax_folio_put() when they were removed from their + * final mapping. + */ + WARN_ON_ONCE(folio_order(folio)); + + if (order > 0) { + prep_compound_page(&folio->page, order); + if (order > 1) + INIT_LIST_HEAD(&folio->_deferred_list); + WARN_ON_ONCE(folio_ref_count(folio)); + } } -/* - * When it is called in dax_insert_entry(), the shared flag will indicate that - * whether this entry is shared by multiple files. If so, set the page->mapping - * PAGE_MAPPING_DAX_SHARED, and use page->share as refcount. - */ static void dax_associate_entry(void *entry, struct address_space *mapping, - struct vm_area_struct *vma, unsigned long address, bool shared) + struct vm_area_struct *vma, + unsigned long address, bool shared) { - unsigned long size = dax_entry_size(entry), pfn, index; - int i = 0; + unsigned long size = dax_entry_size(entry), index; + struct folio *folio = dax_to_folio(entry); + + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) + return; if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; index = linear_page_index(vma, address & ~(size - 1)); - for_each_mapped_pfn(entry, pfn) { - struct page *page = pfn_to_page(pfn); + if (shared && (folio->mapping || dax_folio_is_shared(folio))) { + if (folio->mapping) + dax_folio_make_shared(folio); - if (shared) { - dax_page_share_get(page); - } else { - WARN_ON_ONCE(page->mapping); - page->mapping = mapping; - page->index = index + i++; - } + WARN_ON_ONCE(!folio->share); + WARN_ON_ONCE(dax_entry_order(entry) != folio_order(folio)); + folio->share++; + } else { + WARN_ON_ONCE(folio->mapping); + dax_folio_init(entry); + folio = dax_to_folio(entry); + folio->mapping = mapping; + folio->index = index; } } static void dax_disassociate_entry(void *entry, struct address_space *mapping, - bool trunc) + bool trunc) { - unsigned long pfn; + struct folio *folio = dax_to_folio(entry); if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return; - for_each_mapped_pfn(entry, pfn) { - struct page *page = pfn_to_page(pfn); - - WARN_ON_ONCE(trunc && page_ref_count(page) > 1); - if (dax_page_is_shared(page)) { - /* keep the shared flag if this page is still shared */ - if (dax_page_share_put(page) > 0) - continue; - } else - WARN_ON_ONCE(page->mapping && page->mapping != mapping); - page->mapping = NULL; - page->index = 0; - } + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) + return; + + dax_folio_put(folio); } static struct page *dax_busy_page(void *entry) { - unsigned long pfn; + struct folio *folio = dax_to_folio(entry); - for_each_mapped_pfn(entry, pfn) { - struct page *page = pfn_to_page(pfn); + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) + return NULL; - if (page_ref_count(page) > 1) - return page; - } - return NULL; + if (folio_ref_count(folio) - folio_mapcount(folio)) + return &folio->page; + else + return NULL; } /** @@ -580,7 +663,7 @@ static void *grab_mapping_entry(struct xa_state *xas, retry: pmd_downgrade = false; xas_lock_irq(xas); - entry = get_unlocked_entry(xas, order); + entry = get_next_unlocked_entry(xas, order); if (entry) { if (dax_is_conflict(entry)) @@ -690,7 +773,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) return NULL; - if (!dax_mapping(mapping) || !mapping_mapped(mapping)) + if (!dax_mapping(mapping)) return NULL; /* If end == LLONG_MAX, all pages from start to till end of file */ @@ -716,8 +799,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, xas_for_each(&xas, entry, end_idx) { if (WARN_ON_ONCE(!xa_is_value(entry))) continue; - if (unlikely(dax_is_locked(entry))) - entry = get_unlocked_entry(&xas, 0); + entry = wait_entry_unlocked_exclusive(&xas, entry); if (entry) page = dax_busy_page(entry); put_unlocked_entry(&xas, entry, WAKE_NEXT); @@ -743,14 +825,14 @@ struct page *dax_layout_busy_page(struct address_space *mapping) EXPORT_SYMBOL_GPL(dax_layout_busy_page); static int __dax_invalidate_entry(struct address_space *mapping, - pgoff_t index, bool trunc) + pgoff_t index, bool trunc) { XA_STATE(xas, &mapping->i_pages, index); int ret = 0; void *entry; xas_lock_irq(&xas); - entry = get_unlocked_entry(&xas, 0); + entry = get_next_unlocked_entry(&xas, 0); if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) goto out; if (!trunc && @@ -776,7 +858,9 @@ static int __dax_clear_dirty_range(struct address_space *mapping, xas_lock_irq(&xas); xas_for_each(&xas, entry, end) { - entry = get_unlocked_entry(&xas, 0); + entry = wait_entry_unlocked_exclusive(&xas, entry); + if (!entry) + continue; xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); put_unlocked_entry(&xas, entry, WAKE_NEXT); @@ -813,6 +897,107 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) return ret; } +void dax_delete_mapping_range(struct address_space *mapping, + loff_t start, loff_t end) +{ + void *entry; + pgoff_t start_idx = start >> PAGE_SHIFT; + pgoff_t end_idx; + XA_STATE(xas, &mapping->i_pages, start_idx); + + /* If end == LLONG_MAX, all pages from start to till end of file */ + if (end == LLONG_MAX) + end_idx = ULONG_MAX; + else + end_idx = end >> PAGE_SHIFT; + + xas_lock_irq(&xas); + xas_for_each(&xas, entry, end_idx) { + if (!xa_is_value(entry)) + continue; + entry = wait_entry_unlocked_exclusive(&xas, entry); + if (!entry) + continue; + dax_disassociate_entry(entry, mapping, true); + xas_store(&xas, NULL); + mapping->nrpages -= 1UL << dax_entry_order(entry); + put_unlocked_entry(&xas, entry, WAKE_ALL); + } + xas_unlock_irq(&xas); +} +EXPORT_SYMBOL_GPL(dax_delete_mapping_range); + +static int wait_page_idle(struct page *page, + void (cb)(struct inode *), + struct inode *inode) +{ + return ___wait_var_event(page, dax_page_is_idle(page), + TASK_INTERRUPTIBLE, 0, 0, cb(inode)); +} + +static void wait_page_idle_uninterruptible(struct page *page, + struct inode *inode) +{ + ___wait_var_event(page, dax_page_is_idle(page), + TASK_UNINTERRUPTIBLE, 0, 0, schedule()); +} + +/* + * Unmaps the inode and waits for any DMA to complete prior to deleting the + * DAX mapping entries for the range. + * + * For NOWAIT behavior, pass @cb as NULL to early-exit on first found + * busy page + */ +int dax_break_layout(struct inode *inode, loff_t start, loff_t end, + void (cb)(struct inode *)) +{ + struct page *page; + int error = 0; + + if (!dax_mapping(inode->i_mapping)) + return 0; + + do { + page = dax_layout_busy_page_range(inode->i_mapping, start, end); + if (!page) + break; + if (!cb) { + error = -ERESTARTSYS; + break; + } + + error = wait_page_idle(page, cb, inode); + } while (error == 0); + + if (!page) + dax_delete_mapping_range(inode->i_mapping, start, end); + + return error; +} +EXPORT_SYMBOL_GPL(dax_break_layout); + +void dax_break_layout_final(struct inode *inode) +{ + struct page *page; + + if (!dax_mapping(inode->i_mapping)) + return; + + do { + page = dax_layout_busy_page_range(inode->i_mapping, 0, + LLONG_MAX); + if (!page) + break; + + wait_page_idle_uninterruptible(page, inode); + } while (true); + + if (!page) + dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX); +} +EXPORT_SYMBOL_GPL(dax_break_layout_final); + /* * Invalidate DAX entry if it is clean. */ @@ -895,8 +1080,9 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, void *old; dax_disassociate_entry(entry, mapping, false); - dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, - shared); + dax_associate_entry(new_entry, mapping, vmf->vma, + vmf->address, shared); + /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -940,7 +1126,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, if (unlikely(dax_is_locked(entry))) { void *old_entry = entry; - entry = get_unlocked_entry(xas, 0); + entry = get_next_unlocked_entry(xas, 0); /* Entry got punched out / reallocated? */ if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) @@ -1084,9 +1270,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, goto out; if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1)) goto out; - /* For larger pages we need devmap */ - if (length > 1 && !pfn_t_devmap(*pfnp)) - goto out; + rc = 0; out_check_addr: @@ -1193,7 +1377,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); - ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); + ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false); trace_dax_load_hole(inode, vmf, ret); return ret; } @@ -1258,7 +1442,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, } #endif /* CONFIG_FS_DAX_PMD */ -static s64 dax_unshare_iter(struct iomap_iter *iter) +static int dax_unshare_iter(struct iomap_iter *iter) { struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = iomap_iter_srcmap(iter); @@ -1266,11 +1450,11 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) u64 copy_len = iomap_length(iter); u32 mod; int id = 0; - s64 ret = 0; + s64 ret; void *daddr = NULL, *saddr = NULL; if (!iomap_want_unshare_iter(iter)) - return iomap_length(iter); + return iomap_iter_advance_full(iter); /* * Extend the file range to be aligned to fsblock/pagesize, because @@ -1300,14 +1484,14 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) if (ret < 0) goto out_unlock; - if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0) - ret = iomap_length(iter); - else + if (copy_mc_to_kernel(daddr, saddr, copy_len) != 0) ret = -EIO; out_unlock: dax_read_unlock(id); - return dax_mem2blk_err(ret); + if (ret < 0) + return dax_mem2blk_err(ret); + return iomap_iter_advance_full(iter); } int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, @@ -1326,7 +1510,7 @@ int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, iter.len = min(len, size - pos); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = dax_unshare_iter(&iter); + iter.status = dax_unshare_iter(&iter); return ret; } EXPORT_SYMBOL_GPL(dax_file_unshare); @@ -1354,17 +1538,16 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size) return ret; } -static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero) +static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero) { const struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t pos = iter->pos; u64 length = iomap_length(iter); - s64 written = 0; + int ret; /* already zeroed? we're done. */ if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) - return length; + return iomap_iter_advance(iter, &length); /* * invalidate the pages whose sharing state is to be changed @@ -1372,33 +1555,35 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero) */ if (iomap->flags & IOMAP_F_SHARED) invalidate_inode_pages2_range(iter->inode->i_mapping, - pos >> PAGE_SHIFT, - (pos + length - 1) >> PAGE_SHIFT); + iter->pos >> PAGE_SHIFT, + (iter->pos + length - 1) >> PAGE_SHIFT); do { + loff_t pos = iter->pos; unsigned offset = offset_in_page(pos); - unsigned size = min_t(u64, PAGE_SIZE - offset, length); pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); - long rc; int id; + length = min_t(u64, PAGE_SIZE - offset, length); + id = dax_read_lock(); - if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE) - rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); + if (IS_ALIGNED(pos, PAGE_SIZE) && length == PAGE_SIZE) + ret = dax_zero_page_range(iomap->dax_dev, pgoff, 1); else - rc = dax_memzero(iter, pos, size); + ret = dax_memzero(iter, pos, length); dax_read_unlock(id); - if (rc < 0) - return rc; - pos += size; - length -= size; - written += size; + if (ret < 0) + return ret; + + ret = iomap_iter_advance(iter, &length); + if (ret) + return ret; } while (length > 0); if (did_zero) *did_zero = true; - return written; + return ret; } int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, @@ -1413,7 +1598,7 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, int ret; while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = dax_zero_iter(&iter, did_zero); + iter.status = dax_zero_iter(&iter, did_zero); return ret; } EXPORT_SYMBOL_GPL(dax_zero_range); @@ -1431,8 +1616,7 @@ int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, } EXPORT_SYMBOL_GPL(dax_truncate_page); -static loff_t dax_iomap_iter(const struct iomap_iter *iomi, - struct iov_iter *iter) +static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter) { const struct iomap *iomap = &iomi->iomap; const struct iomap *srcmap = iomap_iter_srcmap(iomi); @@ -1451,8 +1635,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, if (pos >= end) return 0; - if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) - return iov_iter_zero(min(length, end - pos), iter); + if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) { + done = iov_iter_zero(min(length, end - pos), iter); + return iomap_iter_advance(iomi, &done); + } } /* @@ -1485,7 +1671,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, } id = dax_read_lock(); - while (pos < end) { + while ((pos = iomi->pos) < end) { unsigned offset = pos & (PAGE_SIZE - 1); const size_t size = ALIGN(length + offset, PAGE_SIZE); pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); @@ -1535,18 +1721,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, map_len, iter); - pos += xfer; - length -= xfer; - done += xfer; - - if (xfer == 0) + length = xfer; + ret = iomap_iter_advance(iomi, &length); + if (!ret && xfer == 0) ret = -EFAULT; if (xfer < map_len) break; } dax_read_unlock(id); - return done ? done : ret; + return ret; } /** @@ -1586,7 +1770,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, iomi.flags |= IOMAP_NOWAIT; while ((ret = iomap_iter(&iomi, ops)) > 0) - iomi.processed = dax_iomap_iter(&iomi, iter); + iomi.status = dax_iomap_iter(&iomi, iter); done = iomi.pos - iocb->ki_pos; iocb->ki_pos = iomi.pos; @@ -1664,7 +1848,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; bool write = iter->flags & IOMAP_WRITE; unsigned long entry_flags = pmd ? DAX_PMD : 0; - int err = 0; + struct folio *folio; + int ret, err = 0; pfn_t pfn; void *kaddr; @@ -1696,17 +1881,19 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return dax_fault_return(err); } + folio = dax_to_folio(*entry); if (dax_fault_is_synchronous(iter, vmf->vma)) return dax_fault_synchronous_pfnp(pfnp, pfn); - /* insert PMD pfn */ + folio_ref_inc(folio); if (pmd) - return vmf_insert_pfn_pmd(vmf, pfn, write); + ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)), + write); + else + ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write); + folio_put(folio); - /* insert PTE pfn */ - if (write) - return vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); - return vmf_insert_mixed(vmf->vma, vmf->address, pfn); + return ret; } static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, @@ -1757,7 +1944,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, while ((error = iomap_iter(&iter, ops)) > 0) { if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) { - iter.processed = -EIO; /* fs corruption? */ + iter.status = -EIO; /* fs corruption? */ continue; } @@ -1769,8 +1956,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, ret |= VM_FAULT_MAJOR; } - if (!(ret & VM_FAULT_ERROR)) - iter.processed = PAGE_SIZE; + if (!(ret & VM_FAULT_ERROR)) { + u64 length = PAGE_SIZE; + iter.status = iomap_iter_advance(&iter, &length); + } } if (iomap_errp) @@ -1883,8 +2072,10 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, continue; /* actually breaks out of the loop */ ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true); - if (ret != VM_FAULT_FALLBACK) - iter.processed = PMD_SIZE; + if (ret != VM_FAULT_FALLBACK) { + u64 length = PMD_SIZE; + iter.status = iomap_iter_advance(&iter, &length); + } } unlock_entry: @@ -1945,11 +2136,12 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); + struct folio *folio; void *entry; vm_fault_t ret; xas_lock_irq(&xas); - entry = get_unlocked_entry(&xas, order); + entry = get_next_unlocked_entry(&xas, order); /* Did we race with someone splitting entry or so? */ if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { @@ -1962,14 +2154,17 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); dax_lock_entry(&xas, entry); xas_unlock_irq(&xas); + folio = pfn_folio(pfn_t_to_pfn(pfn)); + folio_ref_inc(folio); if (order == 0) - ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); + ret = vmf_insert_page_mkwrite(vmf, &folio->page, true); #ifdef CONFIG_FS_DAX_PMD else if (order == PMD_ORDER) - ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE); + ret = vmf_insert_folio_pmd(vmf, folio, FAULT_FLAG_WRITE); #endif else ret = VM_FAULT_FALLBACK; + folio_put(folio); dax_unlock_entry(&xas, entry); trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); return ret; @@ -1999,12 +2194,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, } EXPORT_SYMBOL_GPL(dax_finish_sync_fault); -static loff_t dax_range_compare_iter(struct iomap_iter *it_src, +static int dax_range_compare_iter(struct iomap_iter *it_src, struct iomap_iter *it_dest, u64 len, bool *same) { const struct iomap *smap = &it_src->iomap; const struct iomap *dmap = &it_dest->iomap; loff_t pos1 = it_src->pos, pos2 = it_dest->pos; + u64 dest_len; void *saddr, *daddr; int id, ret; @@ -2012,7 +2208,7 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src, if (smap->type == IOMAP_HOLE && dmap->type == IOMAP_HOLE) { *same = true; - return len; + goto advance; } if (smap->type == IOMAP_HOLE || dmap->type == IOMAP_HOLE) { @@ -2035,7 +2231,13 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src, if (!*same) len = 0; dax_read_unlock(id); - return len; + +advance: + dest_len = len; + ret = iomap_iter_advance(it_src, &len); + if (!ret) + ret = iomap_iter_advance(it_dest, &dest_len); + return ret; out_unlock: dax_read_unlock(id); @@ -2058,15 +2260,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, .len = len, .flags = IOMAP_DAX, }; - int ret, compared = 0; + int ret, status; while ((ret = iomap_iter(&src_iter, ops)) > 0 && (ret = iomap_iter(&dst_iter, ops)) > 0) { - compared = dax_range_compare_iter(&src_iter, &dst_iter, + status = dax_range_compare_iter(&src_iter, &dst_iter, min(src_iter.len, dst_iter.len), same); - if (compared < 0) + if (status < 0) return ret; - src_iter.processed = dst_iter.processed = compared; + src_iter.status = dst_iter.status = status; } return ret; } |