diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-11 13:10:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-11 13:10:57 -0700 |
commit | 89fd915c402113528750353ad6de9ea68a787e5c (patch) | |
tree | 03ba8e8e6400e43ef518393259941eae39ee32d8 | |
parent | 66c9457df3926efff65529dab1a8c742df756790 (diff) | |
parent | 04c3c982fcc0151ed3574d7ae4f1e62278054d72 (diff) | |
download | lwn-89fd915c402113528750353ad6de9ea68a787e5c.tar.gz lwn-89fd915c402113528750353ad6de9ea68a787e5c.zip |
Merge tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm from Dan Williams:
"A rework of media error handling in the BTT driver and other updates.
It has appeared in a few -next releases and collected some late-
breaking build-error and warning fixups as a result.
Summary:
- Media error handling support in the Block Translation Table (BTT)
driver is reworked to address sleeping-while-atomic locking and
memory-allocation-context conflicts.
- The dax_device lookup overhead for xfs and ext4 is moved out of the
iomap hot-path to a mount-time lookup.
- A new 'ecc_unit_size' sysfs attribute is added to advertise the
read-modify-write boundary property of a persistent memory range.
- Preparatory fix-ups for arm and powerpc pmem support are included
along with other miscellaneous fixes"
* tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (26 commits)
libnvdimm, btt: fix format string warnings
libnvdimm, btt: clean up warning and error messages
ext4: fix null pointer dereference on sbi
libnvdimm, nfit: move the check on nd_reserved2 to the endpoint
dax: fix FS_DAX=n BLOCK=y compilation
libnvdimm: fix integer overflow static analysis warning
libnvdimm, nd_blk: remove mmio_flush_range()
libnvdimm, btt: rework error clearing
libnvdimm: fix potential deadlock while clearing errors
libnvdimm, btt: cache sector_size in arena_info
libnvdimm, btt: ensure that flags were also unchanged during a map_read
libnvdimm, btt: refactor map entry operations with macros
libnvdimm, btt: fix a missed NVDIMM_IO_ATOMIC case in the write path
libnvdimm, nfit: export an 'ecc_unit_size' sysfs attribute
ext4: perform dax_device lookup at mount
ext2: perform dax_device lookup at mount
xfs: perform dax_device lookup at mount
dax: introduce a fs_dax_get_by_bdev() helper
libnvdimm, btt: check memory allocation failure
libnvdimm, label: fix index block size calculation
...
33 files changed, 397 insertions, 170 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a3e6e6136a47..971feac13506 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -53,7 +53,6 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 - select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_PMEM_API if X86_64 # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT if BROKEN diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 8b4140f6724f..cb9a1af109b4 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -7,6 +7,4 @@ void clflush_cache_range(void *addr, unsigned int size); -#define mmio_flush_range(addr, size) clflush_cache_range(addr, size) - #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig index 6d3351452ea2..929ba4da0b30 100644 --- a/drivers/acpi/nfit/Kconfig +++ b/drivers/acpi/nfit/Kconfig @@ -2,7 +2,7 @@ config ACPI_NFIT tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" depends on PHYS_ADDR_T_64BIT depends on BLK_DEV - depends on ARCH_HAS_MMIO_FLUSH + depends on ARCH_HAS_PMEM_API select LIBNVDIMM help Infrastructure to probe ACPI 6 compliant platforms for diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 1893e416e7c0..9c2c49b6a240 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -228,6 +228,10 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (cmd == ND_CMD_CALL) { call_pkg = buf; func = call_pkg->nd_command; + + for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) + if (call_pkg->nd_reserved2[i]) + return -EINVAL; } if (nvdimm) { @@ -1674,8 +1678,19 @@ static ssize_t range_index_show(struct device *dev, } static DEVICE_ATTR_RO(range_index); +static ssize_t ecc_unit_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region); + + return sprintf(buf, "%d\n", nfit_spa->clear_err_unit); +} +static DEVICE_ATTR_RO(ecc_unit_size); + static struct attribute *acpi_nfit_region_attributes[] = { &dev_attr_range_index.attr, + &dev_attr_ecc_unit_size.attr, NULL, }; @@ -1804,6 +1819,7 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, spa->range_index, i); + struct acpi_nfit_control_region *dcr = nfit_mem->dcr; if (!memdev || !nfit_mem->dcr) { dev_err(dev, "%s: failed to find DCR\n", __func__); @@ -1811,13 +1827,13 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, } map->region_offset = memdev->region_offset; - map->serial_number = nfit_mem->dcr->serial_number; + map->serial_number = dcr->serial_number; map2->region_offset = memdev->region_offset; - map2->serial_number = nfit_mem->dcr->serial_number; - map2->vendor_id = nfit_mem->dcr->vendor_id; - map2->manufacturing_date = nfit_mem->dcr->manufacturing_date; - map2->manufacturing_location = nfit_mem->dcr->manufacturing_location; + map2->serial_number = dcr->serial_number; + map2->vendor_id = dcr->vendor_id; + map2->manufacturing_date = dcr->manufacturing_date; + map2->manufacturing_location = dcr->manufacturing_location; } /* v1.1 namespaces */ @@ -1835,6 +1851,28 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, cmp_map_compat, NULL); nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + /* record the result of the sort for the mapping position */ + for (i = 0; i < nr; i++) { + struct nfit_set_info_map2 *map2 = &info2->mapping[i]; + int j; + + for (j = 0; j < nr; j++) { + struct nd_mapping_desc *mapping = &ndr_desc->mapping[j]; + struct nvdimm *nvdimm = mapping->nvdimm; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_nfit_control_region *dcr = nfit_mem->dcr; + + if (map2->serial_number == dcr->serial_number && + map2->vendor_id == dcr->vendor_id && + map2->manufacturing_date == dcr->manufacturing_date && + map2->manufacturing_location + == dcr->manufacturing_location) { + mapping->position = i; + break; + } + } + } + ndr_desc->nd_set = nd_set; devm_kfree(dev, info); devm_kfree(dev, info2); @@ -1930,7 +1968,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c); else { if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) - mmio_flush_range((void __force *) + arch_invalidate_pmem((void __force *) mmio->addr.aperture + offset, c); memcpy(iobuf + copied, mmio->addr.aperture + offset, c); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 938eb4868f7f..3600ff786646 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -46,6 +46,8 @@ void dax_read_unlock(int id) EXPORT_SYMBOL_GPL(dax_read_unlock); #ifdef CONFIG_BLOCK +#include <linux/blkdev.h> + int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, pgoff_t *pgoff) { @@ -59,6 +61,16 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, } EXPORT_SYMBOL(bdev_dax_pgoff); +#if IS_ENABLED(CONFIG_FS_DAX) +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) +{ + if (!blk_queue_dax(bdev->bd_queue)) + return NULL; + return fs_dax_get_by_host(bdev->bd_disk->disk_name); +} +EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); +#endif + /** * __bdev_dax_supported() - Check if the device supports dax for filesystem * @sb: The superblock of the device diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 60491641a8d6..d5612bd1cc81 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -31,6 +31,16 @@ enum log_ent_request { LOG_OLD_ENT }; +static struct device *to_dev(struct arena_info *arena) +{ + return &arena->nd_btt->dev; +} + +static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset) +{ + return offset + nd_btt->initial_offset; +} + static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, void *buf, size_t n, unsigned long flags) { @@ -38,7 +48,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, struct nd_namespace_common *ndns = nd_btt->ndns; /* arena offsets may be shifted from the base of the device */ - offset += arena->nd_btt->initial_offset; + offset = adjust_initial_offset(nd_btt, offset); return nvdimm_read_bytes(ndns, offset, buf, n, flags); } @@ -49,7 +59,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, struct nd_namespace_common *ndns = nd_btt->ndns; /* arena offsets may be shifted from the base of the device */ - offset += arena->nd_btt->initial_offset; + offset = adjust_initial_offset(nd_btt, offset); return nvdimm_write_bytes(ndns, offset, buf, n, flags); } @@ -62,8 +72,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super) * We rely on that to make sure rw_bytes does error clearing * correctly, so make sure that is the case. */ - WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512)); - WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512)); + dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512), + "arena->infooff: %#llx is unaligned\n", arena->infooff); + dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512), + "arena->info2off: %#llx is unaligned\n", arena->info2off); ret = arena_write_bytes(arena, arena->info2off, super, sizeof(struct btt_sb), 0); @@ -76,7 +88,6 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super) static int btt_info_read(struct arena_info *arena, struct btt_sb *super) { - WARN_ON(!super); return arena_read_bytes(arena, arena->infooff, super, sizeof(struct btt_sb), 0); } @@ -92,7 +103,10 @@ static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping, { u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); - WARN_ON(lba >= arena->external_nlba); + if (unlikely(lba >= arena->external_nlba)) + dev_err_ratelimited(to_dev(arena), + "%s: lba %#x out of range (max: %#x)\n", + __func__, lba, arena->external_nlba); return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags); } @@ -106,7 +120,7 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, * This 'mapping' is supposed to be just the LBA mapping, without * any flags set, so strip the flag bits. */ - mapping &= MAP_LBA_MASK; + mapping = ent_lba(mapping); ze = (z_flag << 1) + e_flag; switch (ze) { @@ -131,7 +145,8 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, * construed as a valid 'normal' case, but we decide not to, * to avoid confusion */ - WARN_ONCE(1, "Invalid use of Z and E flags\n"); + dev_err_ratelimited(to_dev(arena), + "Invalid use of Z and E flags\n"); return -EIO; } @@ -147,7 +162,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, u32 raw_mapping, postmap, ze, z_flag, e_flag; u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); - WARN_ON(lba >= arena->external_nlba); + if (unlikely(lba >= arena->external_nlba)) + dev_err_ratelimited(to_dev(arena), + "%s: lba %#x out of range (max: %#x)\n", + __func__, lba, arena->external_nlba); ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags); if (ret) @@ -155,10 +173,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, raw_mapping = le32_to_cpu(in); - z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT; - e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT; + z_flag = ent_z_flag(raw_mapping); + e_flag = ent_e_flag(raw_mapping); ze = (z_flag << 1) + e_flag; - postmap = raw_mapping & MAP_LBA_MASK; + postmap = ent_lba(raw_mapping); /* Reuse the {z,e}_flag variables for *trim and *error */ z_flag = 0; @@ -195,7 +213,6 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, static int btt_log_read_pair(struct arena_info *arena, u32 lane, struct log_entry *ent) { - WARN_ON(!ent); return arena_read_bytes(arena, arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 2 * LOG_ENT_SIZE, 0); @@ -299,11 +316,6 @@ static int btt_log_get_old(struct log_entry *ent) return old; } -static struct device *to_dev(struct arena_info *arena) -{ - return &arena->nd_btt->dev; -} - /* * This function copies the desired (old/new) log entry into ent if * it is not NULL. It returns the sub-slot number (0 or 1) @@ -381,7 +393,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; if (++(arena->freelist[lane].seq) == 4) arena->freelist[lane].seq = 1; - arena->freelist[lane].block = le32_to_cpu(ent->old_map); + if (ent_e_flag(ent->old_map)) + arena->freelist[lane].has_err = 1; + arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map)); return ret; } @@ -407,12 +421,14 @@ static int btt_map_init(struct arena_info *arena) * make sure rw_bytes does error clearing correctly, so make sure that * is the case. */ - WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512)); + dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512), + "arena->mapoff: %#llx is unaligned\n", arena->mapoff); while (mapsize) { size_t size = min(mapsize, chunk_size); - WARN_ON_ONCE(size < 512); + dev_WARN_ONCE(to_dev(arena), size < 512, + "chunk size: %#zx is unaligned\n", size); ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, size, 0); if (ret) @@ -449,12 +465,14 @@ static int btt_log_init(struct arena_info *arena) * make sure rw_bytes does error clearing correctly, so make sure that * is the case. */ - WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512)); + dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512), + "arena->logoff: %#llx is unaligned\n", arena->logoff); while (logsize) { size_t size = min(logsize, chunk_size); - WARN_ON_ONCE(size < 512); + dev_WARN_ONCE(to_dev(arena), size < 512, + "chunk size: %#zx is unaligned\n", size); ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf, size, 0); if (ret) @@ -480,6 +498,40 @@ static int btt_log_init(struct arena_info *arena) return ret; } +static u64 to_namespace_offset(struct arena_info *arena, u64 lba) +{ + return arena->dataoff + ((u64)lba * arena->internal_lbasize); +} + +static int arena_clear_freelist_error(struct arena_info *arena, u32 lane) +{ + int ret = 0; + + if (arena->freelist[lane].has_err) { + void *zero_page = page_address(ZERO_PAGE(0)); + u32 lba = arena->freelist[lane].block; + u64 nsoff = to_namespace_offset(arena, lba); + unsigned long len = arena->sector_size; + + mutex_lock(&arena->err_lock); + + while (len) { + unsigned long chunk = min(len, PAGE_SIZE); + + ret = arena_write_bytes(arena, nsoff, zero_page, + chunk, 0); + if (ret) + break; + len -= chunk; + nsoff += chunk; + if (len == 0) + arena->freelist[lane].has_err = 0; + } + mutex_unlock(&arena->err_lock); + } + return ret; +} + static int btt_freelist_init(struct arena_info *arena) { int old, new, ret; @@ -505,6 +557,17 @@ static int btt_freelist_init(struct arena_info *arena) arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); arena->freelist[i].block = le32_to_cpu(log_new.old_map); + /* + * FIXME: if error clearing fails during init, we want to make + * the BTT read-only + */ + if (ent_e_flag(log_new.old_map)) { + ret = arena_clear_freelist_error(arena, i); + if (ret) + dev_err_ratelimited(to_dev(arena), + "Unable to clear known errors\n"); + } + /* This implies a newly created or untouched flog entry */ if (log_new.old_map == log_new.new_map) continue; @@ -525,7 +588,6 @@ static int btt_freelist_init(struct arena_info *arena) if (ret) return ret; } - } return 0; @@ -566,6 +628,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, if (!arena) return NULL; arena->nd_btt = btt->nd_btt; + arena->sector_size = btt->sector_size; if (!size) return arena; @@ -694,6 +757,7 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + mutex_init(&arena->err_lock); ret = btt_freelist_init(arena); if (ret) goto out; @@ -904,11 +968,6 @@ static void unlock_map(struct arena_info *arena, u32 premap) spin_unlock(&arena->map_locks[idx].lock); } -static u64 to_namespace_offset(struct arena_info *arena, u64 lba) -{ - return arena->dataoff + ((u64)lba * arena->internal_lbasize); -} - static int btt_data_read(struct arena_info *arena, struct page *page, unsigned int off, u32 lba, u32 len) { @@ -1032,6 +1091,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, */ while (1) { u32 new_map; + int new_t, new_e; if (t_flag) { zero_fill_data(page, off, cur_len); @@ -1050,20 +1110,29 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, */ barrier(); - ret = btt_map_read(arena, premap, &new_map, &t_flag, - &e_flag, NVDIMM_IO_ATOMIC); + ret = btt_map_read(arena, premap, &new_map, &new_t, + &new_e, NVDIMM_IO_ATOMIC); if (ret) goto out_rtt; - if (postmap == new_map) + if ((postmap == new_map) && (t_flag == new_t) && + (e_flag == new_e)) break; postmap = new_map; + t_flag = new_t; + e_flag = new_e; } ret = btt_data_read(arena, page, off, postmap, cur_len); - if (ret) + if (ret) { + int rc; + + /* Media error - set the e_flag */ + rc = btt_map_write(arena, premap, postmap, 0, 1, + NVDIMM_IO_ATOMIC); goto out_rtt; + } if (bip) { ret = btt_rw_integrity(btt, bip, arena, postmap, READ); @@ -1088,6 +1157,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, return ret; } +/* + * Normally, arena_{read,write}_bytes will take care of the initial offset + * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem, + * we need the final, raw namespace offset here + */ +static bool btt_is_badblock(struct btt *btt, struct arena_info *arena, + u32 postmap) +{ + u64 nsoff = adjust_initial_offset(arena->nd_btt, + to_namespace_offset(arena, postmap)); + sector_t phys_sector = nsoff >> 9; + + return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize); +} + static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, sector_t sector, struct page *page, unsigned int off, unsigned int len) @@ -1100,7 +1184,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, while (len) { u32 cur_len; + int e_flag; + retry: lane = nd_region_acquire_lane(btt->nd_region); ret = lba_to_arena(btt, sector, &premap, &arena); @@ -1113,6 +1199,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, goto out_lane; } + if (btt_is_badblock(btt, arena, arena->freelist[lane].block)) + arena->freelist[lane].has_err = 1; + + if (mutex_is_locked(&arena->err_lock) + || arena->freelist[lane].has_err) { + nd_region_release_lane(btt->nd_region, lane); + + ret = arena_clear_freelist_error(arena, lane); + if (ret) + return ret; + + /* OK to acquire a different lane/free block */ + goto retry; + } + new_postmap = arena->freelist[lane].block; /* Wait if the new block is being read from */ @@ -1138,7 +1239,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, } lock_map(arena, premap); - ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL, + ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag, NVDIMM_IO_ATOMIC); if (ret) goto out_map; @@ -1146,6 +1247,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, ret = -EIO; goto out_map; } + if (e_flag) + set_e_flag(old_postmap); log.lba = cpu_to_le32(premap); log.old_map = cpu_to_le32(old_postmap); @@ -1156,13 +1259,20 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, if (ret) goto out_map; - ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0); + ret = btt_map_write(arena, premap, new_postmap, 0, 0, + NVDIMM_IO_ATOMIC); if (ret) goto out_map; unlock_map(arena, premap); nd_region_release_lane(btt->nd_region, lane); + if (e_flag) { + ret = arena_clear_freelist_error(arena, lane); + if (ret) + return ret; + } + len -= cur_len; off += cur_len; sector += btt->sector_size >> SECTOR_SHIFT; @@ -1211,11 +1321,13 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; - BUG_ON(len > PAGE_SIZE); - /* Make sure len is in multiples of sector size. */ - /* XXX is this right? */ - BUG_ON(len < btt->sector_size); - BUG_ON(len % btt->sector_size); + if (len > PAGE_SIZE || len < btt->sector_size || + len % btt->sector_size) { + dev_err_ratelimited(&btt->nd_btt->dev, + "unaligned bio segment (len: %d)\n", len); + bio->bi_status = BLK_STS_IOERR; + break; + } err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, op_is_write(bio_op(bio)), iter.bi_sector); @@ -1345,6 +1457,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, { int ret; struct btt *btt; + struct nd_namespace_io *nsio; struct device *dev = &nd_btt->dev; btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL); @@ -1358,6 +1471,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, INIT_LIST_HEAD(&btt->arena_list); mutex_init(&btt->init_lock); btt->nd_region = nd_region; + nsio = to_nd_namespace_io(&nd_btt->ndns->dev); + btt->phys_bb = &nsio->bb; ret = discover_arenas(btt); if (ret) { @@ -1431,6 +1546,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) } btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL); + if (!btt_sb) + return -ENOMEM; /* * If this returns < 0, that is ok as it just means there wasn't diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 888e862907a0..578c2057524d 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -15,6 +15,7 @@ #ifndef _LINUX_BTT_H #define _LINUX_BTT_H +#include <linux/badblocks.h> #include <linux/types.h> #define BTT_SIG_LEN 16 @@ -38,6 +39,11 @@ #define IB_FLAG_ERROR 0x00000001 #define IB_FLAG_ERROR_MASK 0x00000001 +#define ent_lba(ent) (ent & MAP_LBA_MASK) +#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) +#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) +#define set_e_flag(ent) (ent |= MAP_ERR_MASK) + enum btt_init_state { INIT_UNCHECKED = 0, INIT_NOTFOUND, @@ -78,6 +84,7 @@ struct free_entry { u32 block; u8 sub; u8 seq; + u8 has_err; }; struct aligned_lock { @@ -104,6 +111,7 @@ struct aligned_lock { * handle incoming writes. * @version_major: Metadata layout version major. * @version_minor: Metadata layout version minor. + * @sector_size: The Linux sector size - 512 or 4096 * @nextoff: Offset in bytes to the start of the next arena. * @infooff: Offset in bytes to the info block of this arena. * @dataoff: Offset in bytes to the data area of this arena. @@ -131,6 +139,7 @@ struct arena_info { u32 nfree; u16 version_major; u16 version_minor; + u32 sector_size; /* Byte offsets to the different on-media structures */ u64 nextoff; u64 infooff; @@ -147,6 +156,7 @@ struct arena_info { struct dentry *debugfs_dir; /* Arena flags */ u32 flags; + struct mutex err_lock; }; /** @@ -181,6 +191,7 @@ struct btt { struct mutex init_lock; int init_state; int num_arenas; + struct badblocks *phys_bb; }; bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 3e359d282f8e..d58925295aa7 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -61,7 +61,7 @@ static ssize_t sector_size_show(struct device *dev, { struct nd_btt *nd_btt = to_nd_btt(dev); - return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf); + return nd_size_select_show(nd_btt->lbasize, btt_lbasize_supported, buf); } static ssize_t sector_size_store(struct device *dev, @@ -72,7 +72,7 @@ static ssize_t sector_size_store(struct device *dev, device_lock(dev); nvdimm_bus_lock(dev); - rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize, + rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 937fafa1886a..baf283986a7e 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -11,6 +11,7 @@ * General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/sched/mm.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> #include <linux/module.h> @@ -234,6 +235,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, struct nd_cmd_clear_error clear_err; struct nd_cmd_ars_cap ars_cap; u32 clear_err_unit, mask; + unsigned int noio_flag; int cmd_rc, rc; if (!nvdimm_bus) @@ -250,8 +252,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, memset(&ars_cap, 0, sizeof(ars_cap)); ars_cap.address = phys; ars_cap.length = len; + noio_flag = memalloc_noio_save(); rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, sizeof(ars_cap), &cmd_rc); + memalloc_noio_restore(noio_flag); if (rc < 0) return rc; if (cmd_rc < 0) @@ -266,8 +270,10 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, memset(&clear_err, 0, sizeof(clear_err)); clear_err.address = phys; clear_err.length = len; + noio_flag = memalloc_noio_save(); rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, sizeof(clear_err), &cmd_rc); + memalloc_noio_restore(noio_flag); if (rc < 0) return rc; if (cmd_rc < 0) @@ -905,19 +911,20 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, int read_only, unsigned int ioctl_cmd, unsigned long arg) { struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; - size_t buf_len = 0, in_len = 0, out_len = 0; static char out_env[ND_CMD_MAX_ENVELOPE]; static char in_env[ND_CMD_MAX_ENVELOPE]; const struct nd_cmd_desc *desc = NULL; unsigned int cmd = _IOC_NR(ioctl_cmd); - unsigned int func = cmd; - void __user *p = (void __user *) arg; struct device *dev = &nvdimm_bus->dev; - struct nd_cmd_pkg pkg; + void __user *p = (void __user *) arg; const char *cmd_name, *dimm_name; + u32 in_len = 0, out_len = 0; + unsigned int func = cmd; unsigned long cmd_mask; - void *buf; + struct nd_cmd_pkg pkg; int rc, i, cmd_rc; + u64 buf_len = 0; + void *buf; if (nvdimm) { desc = nd_cmd_dimm_desc(cmd); @@ -977,13 +984,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, if (cmd == ND_CMD_CALL) { func = pkg.nd_command; - dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n", + dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n", __func__, dimm_name, pkg.nd_command, in_len, out_len, buf_len); - - for (i = 0; i < ARRAY_SIZE(pkg.nd_reserved2); i++) - if (pkg.nd_reserved2[i]) - return -EINVAL; } /* process an output envelope */ @@ -1007,9 +1010,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, out_len += out_size; } - buf_len = out_len + in_len; + buf_len = (u64) out_len + (u64) in_len; if (buf_len > ND_IOCTL_MAX_BUFLEN) { - dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, + dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__, dimm_name, cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN); return -EINVAL; diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 47770460f3d3..b2fc29b8279b 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -280,18 +280,11 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, } if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { - /* - * FIXME: nsio_rw_bytes() may be called from atomic - * context in the btt case and the ACPI DSM path for - * clearing the error takes sleeping locks and allocates - * memory. An explicit error clearing path, and support - * for tracking badblocks in BTT metadata is needed to - * work around this collision. - */ if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) && !(flags & NVDIMM_IO_ATOMIC)) { long cleared; + might_sleep(); cleared = nvdimm_clear_poison(&ndns->dev, nsio->res.start + offset, size); if (cleared < size) diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 75bc08c6838c..bb71f0cf8f5d 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -277,14 +277,14 @@ int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, return 0; } -ssize_t nd_sector_size_show(unsigned long current_lbasize, +ssize_t nd_size_select_show(unsigned long current_size, const unsigned long *supported, char *buf) { ssize_t len = 0; int i; for (i = 0; supported[i]; i++) - if (current_lbasize == supported[i]) + if (current_size == supported[i]) len += sprintf(buf + len, "[%ld] ", supported[i]); else len += sprintf(buf + len, "%ld ", supported[i]); @@ -292,8 +292,8 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize, return len; } -ssize_t nd_sector_size_store(struct device *dev, const char *buf, - unsigned long *current_lbasize, const unsigned long *supported) +ssize_t nd_size_select_store(struct device *dev, const char *buf, + unsigned long *current_size, const unsigned long *supported) { unsigned long lbasize; int rc, i; @@ -310,7 +310,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf, break; if (supported[i]) { - *current_lbasize = lbasize; + *current_size = lbasize; return 0; } else { return -EINVAL; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 87796f840777..9c5f108910e3 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -45,12 +45,14 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd) return ndd->nslabel_size; } -size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) +int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) { - u32 index_span; + return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); +} - if (ndd->nsindex_size) - return ndd->nsindex_size; +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) +{ + u32 nslot, space, size; /* * The minimum index space is 512 bytes, with that amount of @@ -60,16 +62,16 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) * starts to waste space at larger config_sizes, but it's * unlikely we'll ever see anything but 128K. */ - index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); - index_span /= NSINDEX_ALIGN * 2; - ndd->nsindex_size = index_span * NSINDEX_ALIGN; - - return ndd->nsindex_size; -} - -int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) -{ - return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); + nslot = nvdimm_num_label_slots(ndd); + space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd); + size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8), + NSINDEX_ALIGN) * 2; + if (size <= space) + return size / 2; + + dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n", + ndd->nsarea.config_size, sizeof_namespace_label(ndd)); + return 0; } static int __nd_label_validate(struct nvdimm_drvdata *ndd) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 5f1c6756e57c..1427a386a033 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1313,14 +1313,14 @@ static ssize_t sector_size_show(struct device *dev, if (is_namespace_blk(dev)) { struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); - return nd_sector_size_show(nsblk->lbasize, + return nd_size_select_show(nsblk->lbasize, blk_lbasize_supported, buf); } if (is_namespace_pmem(dev)) { struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); - return nd_sector_size_show(nspm->lbasize, + return nd_size_select_show(nspm->lbasize, pmem_lbasize_supported, buf); } return -ENXIO; @@ -1352,7 +1352,7 @@ static ssize_t sector_size_store(struct device *dev, if (to_ndns(dev)->claim) rc = -EBUSY; if (rc >= 0) - rc = nd_sector_size_store(dev, buf, lbasize, supported); + rc = nd_size_select_store(dev, buf, lbasize, supported); if (rc >= 0) rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index a87f793f2945..9c758a91372b 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -42,7 +42,7 @@ struct nd_poison { struct nvdimm_drvdata { struct device *dev; - int nsindex_size, nslabel_size; + int nslabel_size; struct nd_cmd_get_config_size nsarea; void *data; int ns_current, ns_next; @@ -134,6 +134,7 @@ struct nd_mapping { struct nvdimm *nvdimm; u64 start; u64 size; + int position; struct list_head labels; struct mutex lock; /* @@ -233,10 +234,10 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode); void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len); -ssize_t nd_sector_size_show(unsigned long current_lbasize, +ssize_t nd_size_select_show(unsigned long current_size, const unsigned long *supported, char *buf); -ssize_t nd_sector_size_store(struct device *dev, const char *buf, - unsigned long *current_lbasize, const unsigned long *supported); +ssize_t nd_size_select_store(struct device *dev, const char *buf, + unsigned long *current_size, const unsigned long *supported); int __init nvdimm_init(void); int __init nd_region_init(void); int __init nd_label_init(void); @@ -285,6 +286,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE +#else +#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE +#endif + int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 5fcb6f5b22a2..9576c444f0ab 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -111,24 +111,27 @@ static ssize_t align_show(struct device *dev, return sprintf(buf, "%ld\n", nd_pfn->align); } -static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) +static const unsigned long *nd_pfn_supported_alignments(void) { - unsigned long val; - int rc; - - rc = kstrtoul(buf, 0, &val); - if (rc) - return rc; - - if (!is_power_of_2(val) || val < PAGE_SIZE || val > SZ_1G) - return -EINVAL; + /* + * This needs to be a non-static variable because the *_SIZE + * macros aren't always constants. + */ + const unsigned long supported_alignments[] = { + PAGE_SIZE, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + HPAGE_PMD_SIZE, +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + HPAGE_PUD_SIZE, +#endif +#endif + 0, + }; + static unsigned long data[ARRAY_SIZE(supported_alignments)]; - if (nd_pfn->dev.driver) - return -EBUSY; - else - nd_pfn->align = val; + memcpy(data, supported_alignments, sizeof(data)); - return 0; + return data; } static ssize_t align_store(struct device *dev, @@ -139,7 +142,8 @@ static ssize_t align_store(struct device *dev, device_lock(dev); nvdimm_bus_lock(dev); - rc = __align_store(nd_pfn, buf); + rc = nd_size_select_store(dev, buf, &nd_pfn->align, + nd_pfn_supported_alignments()); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); @@ -260,6 +264,13 @@ static ssize_t size_show(struct device *dev, } static DEVICE_ATTR_RO(size); +static ssize_t supported_alignments_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return nd_size_select_show(0, nd_pfn_supported_alignments(), buf); +} +static DEVICE_ATTR_RO(supported_alignments); + static struct attribute *nd_pfn_attributes[] = { &dev_attr_mode.attr, &dev_attr_namespace.attr, @@ -267,6 +278,7 @@ static struct attribute *nd_pfn_attributes[] = { &dev_attr_align.attr, &dev_attr_resource.attr, &dev_attr_size.attr, + &dev_attr_supported_alignments.attr, NULL, }; @@ -290,7 +302,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, return NULL; nd_pfn->mode = PFN_MODE_NONE; - nd_pfn->align = HPAGE_SIZE; + nd_pfn->align = PFN_DEFAULT_ALIGNMENT; dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { @@ -638,11 +650,12 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) / PAGE_SIZE); if (nd_pfn->mode == PFN_MODE_PMEM) { /* - * vmemmap_populate_hugepages() allocates the memmap array in - * HPAGE_SIZE chunks. + * The altmap should be padded out to the block size used + * when populating the vmemmap. This *should* be equal to + * PMD_SIZE for most architectures. */ offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, - max(nd_pfn->align, HPAGE_SIZE)) - start; + max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K + dax_label_reserve, nd_pfn->align) - start; diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 5434321cad67..c5917f040fa7 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -5,20 +5,6 @@ #include <linux/pfn_t.h> #include <linux/fs.h> -#ifdef CONFIG_ARCH_HAS_PMEM_API -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB -void arch_wb_cache_pmem(void *addr, size_t size); -void arch_invalidate_pmem(void *addr, size_t size); -#else -#define ARCH_MEMREMAP_PMEM MEMREMAP_WT -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ -} -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ -} -#endif - /* this definition is in it's own header for tools/testing/nvdimm to consume */ struct pmem_device { /* One contiguous memory region per device */ diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 5954cfbea3fc..829d760f651c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -723,8 +723,9 @@ static ssize_t mappingN(struct device *dev, char *buf, int n) nd_mapping = &nd_region->mapping[n]; nvdimm = nd_mapping->nvdimm; - return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev), - nd_mapping->start, nd_mapping->size); + return sprintf(buf, "%s,%llu,%llu,%d\n", dev_name(&nvdimm->dev), + nd_mapping->start, nd_mapping->size, + nd_mapping->position); } #define REGION_MAPPING(idx) \ @@ -965,6 +966,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->mapping[i].nvdimm = nvdimm; nd_region->mapping[i].start = mapping->start; nd_region->mapping[i].size = mapping->size; + nd_region->mapping[i].position = mapping->position; INIT_LIST_HEAD(&nd_region->mapping[i].labels); mutex_init(&nd_region->mapping[i].lock); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 23ebb92484c6..28de3edd4f4d 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -114,6 +114,7 @@ struct ext2_sb_info { */ spinlock_t s_lock; struct mb_cache *s_ea_block_cache; + struct dax_device *s_daxdev; }; static inline spinlock_t * diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 30163d007b2f..4dca6f348714 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -800,10 +800,10 @@ int ext2_get_block(struct inode *inode, sector_t iblock, static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap) { - struct block_device *bdev; unsigned int blkbits = inode->i_blkbits; unsigned long first_block = offset >> blkbits; unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; + struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb); bool new = false, boundary = false; u32 bno; int ret; @@ -814,13 +814,9 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return ret; iomap->flags = 0; - bdev = inode->i_sb->s_bdev; - iomap->bdev = bdev; + iomap->bdev = inode->i_sb->s_bdev; iomap->offset = (u64)first_block << blkbits; - if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); - else - iomap->dax_dev = NULL; + iomap->dax_dev = sbi->s_daxdev; if (ret == 0) { iomap->type = IOMAP_HOLE; @@ -842,7 +838,6 @@ static int ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { - fs_put_dax(iomap->dax_dev); if (iomap->type == IOMAP_MAPPED && written < length && (flags & IOMAP_WRITE)) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7b1bc9059863..fc18edd81815 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -171,6 +171,7 @@ static void ext2_put_super (struct super_block * sb) brelse (sbi->s_sbh); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); + fs_put_dax(sbi->s_daxdev); kfree(sbi); } @@ -813,6 +814,7 @@ static unsigned long descriptor_loc(struct super_block *sb, static int ext2_fill_super(struct super_block *sb, void *data, int silent) { + struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); struct buffer_head * bh; struct ext2_sb_info * sbi; struct ext2_super_block * es; @@ -842,6 +844,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } sb->s_fs_info = sbi; sbi->s_sb_block = sb_block; + sbi->s_daxdev = dax_dev; spin_lock_init(&sbi->s_lock); @@ -1200,6 +1203,7 @@ failed_sbi: kfree(sbi->s_blockgroup_lock); kfree(sbi); failed: + fs_put_dax(dax_dev); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 84b9da192238..e2abe01c8c6b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1526,6 +1526,7 @@ struct ext4_sb_info { /* Barrier between changing inodes' journal flags and writepages ops. */ struct percpu_rw_semaphore s_journal_flag_rwsem; + struct dax_device *s_daxdev; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e963508ea35f..31db875bc7a1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3397,7 +3397,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap) { - struct block_device *bdev; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned int blkbits = inode->i_blkbits; unsigned long first_block = offset >> blkbits; unsigned long last_block = (offset + length - 1) >> blkbits; @@ -3466,12 +3466,8 @@ retry: } iomap->flags = 0; - bdev = inode->i_sb->s_bdev; - iomap->bdev = bdev; - if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); - else - iomap->dax_dev = NULL; + iomap->bdev = inode->i_sb->s_bdev; + iomap->dax_dev = sbi->s_daxdev; iomap->offset = first_block << blkbits; if (ret == 0) { @@ -3504,7 +3500,6 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, int blkbits = inode->i_blkbits; bool truncate = false; - fs_put_dax(iomap->dax_dev); if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 93aece6891f2..71b9a667e1bc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -951,6 +951,7 @@ static void ext4_put_super(struct super_block *sb) if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); + fs_put_dax(sbi->s_daxdev); kfree(sbi); } @@ -3398,6 +3399,7 @@ static void ext4_set_resv_clusters(struct super_block *sb) static int ext4_fill_super(struct super_block *sb, void *data, int silent) { + struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); struct buffer_head *bh; struct ext4_super_block *es = NULL; @@ -3423,6 +3425,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if ((data && !orig_data) || !sbi) goto out_free_base; + sbi->s_daxdev = dax_dev; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); if (!sbi->s_blockgroup_lock) @@ -4399,6 +4402,7 @@ out_fail: out_free_base: kfree(sbi); kfree(orig_data); + fs_put_dax(dax_dev); return err ? err : ret; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index fffae1390d7f..29172609f2a3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -80,6 +80,19 @@ xfs_find_bdev_for_inode( return mp->m_ddev_targp->bt_bdev; } +struct dax_device * +xfs_find_daxdev_for_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_daxdev; + else + return mp->m_ddev_targp->bt_daxdev; +} + /* * We're now finished for good with this page. Update the page state via the * associated buffer_heads, paying attention to the start and end offsets that diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index cc174ec6c2fd..88c85ea63da0 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -59,5 +59,6 @@ int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); extern void xfs_count_page_state(struct page *, int *, int *); extern struct block_device *xfs_find_bdev_for_inode(struct inode *); +extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b1c9711e79a4..da14658da310 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1802,7 +1802,8 @@ xfs_setsize_buftarg_early( xfs_buftarg_t * xfs_alloc_buftarg( struct xfs_mount *mp, - struct block_device *bdev) + struct block_device *bdev, + struct dax_device *dax_dev) { xfs_buftarg_t *btp; @@ -1811,6 +1812,7 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; + btp->bt_daxdev = dax_dev; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 20721261dae5..bf71507ddb16 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -108,6 +108,7 @@ typedef unsigned int xfs_buf_flags_t; typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; + struct dax_device *bt_daxdev; struct xfs_mount *bt_mount; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; @@ -385,7 +386,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) * Handling of buftargs. */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, - struct block_device *); + struct block_device *, struct dax_device *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 79cb5b3d140c..a1909bc064e9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -69,6 +69,7 @@ xfs_bmbt_to_iomap( iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); + iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); } xfs_extlen_t @@ -975,7 +976,6 @@ xfs_file_iomap_begin( int nimaps = 1, error = 0; bool shared = false, trimmed = false; unsigned lockmode; - struct block_device *bdev; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -1085,13 +1085,6 @@ xfs_file_iomap_begin( xfs_bmbt_to_iomap(ip, iomap, &imap); - /* optionally associate a dax device with the iomap bdev */ - bdev = iomap->bdev; - if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); - else - iomap->dax_dev = NULL; - if (shared) iomap->flags |= IOMAP_F_SHARED; return 0; @@ -1169,7 +1162,6 @@ xfs_file_iomap_end( unsigned flags, struct iomap *iomap) { - fs_put_dax(iomap->dax_dev); if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, length, written, iomap); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c1c4c2ea1014..3008f31753df 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -714,17 +714,26 @@ STATIC void xfs_close_devices( struct xfs_mount *mp) { + struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev; + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { struct block_device *logdev = mp->m_logdev_targp->bt_bdev; + struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev; + xfs_free_buftarg(mp, mp->m_logdev_targp); xfs_blkdev_put(logdev); + fs_put_dax(dax_logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; + struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev; + xfs_free_buftarg(mp, mp->m_rtdev_targp); xfs_blkdev_put(rtdev); + fs_put_dax(dax_rtdev); } xfs_free_buftarg(mp, mp->m_ddev_targp); + fs_put_dax(dax_ddev); } /* @@ -742,6 +751,8 @@ xfs_open_devices( struct xfs_mount *mp) { struct block_device *ddev = mp->m_super->s_bdev; + struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev); + struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL; struct block_device *logdev = NULL, *rtdev = NULL; int error; @@ -752,6 +763,7 @@ xfs_open_devices( error = xfs_blkdev_get(mp, mp->m_logname, &logdev); if (error) goto out; + dax_logdev = fs_dax_get_by_bdev(logdev); } if (mp->m_rtname) { @@ -765,24 +777,25 @@ xfs_open_devices( error = -EINVAL; goto out_close_rtdev; } + dax_rtdev = fs_dax_get_by_bdev(rtdev); } /* * Setup xfs_mount buffer target pointers */ error = -ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -798,10 +811,14 @@ xfs_open_devices( xfs_free_buftarg(mp, mp->m_ddev_targp); out_close_rtdev: xfs_blkdev_put(rtdev); + fs_put_dax(dax_rtdev); out_close_logdev: - if (logdev && logdev != ddev) + if (logdev && logdev != ddev) { xfs_blkdev_put(logdev); + fs_put_dax(dax_logdev); + } out: + fs_put_dax(dax_ddev); return error; } diff --git a/include/linux/dax.h b/include/linux/dax.h index eb0bff6f1eab..46cad1d0f129 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -57,6 +57,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev) put_dax(dax_dev); } +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { @@ -71,6 +72,11 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host) static inline void fs_put_dax(struct dax_device *dax_dev) { } + +static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) +{ + return NULL; +} #endif int dax_read_lock(void); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f3d3e6af8838..3eaad2fbf284 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -87,6 +87,7 @@ struct nd_mapping_desc { struct nvdimm *nvdimm; u64 start; u64 size; + int position; }; struct nd_region_desc { @@ -173,4 +174,19 @@ u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); + +#ifdef CONFIG_ARCH_HAS_PMEM_API +#define ARCH_MEMREMAP_PMEM MEMREMAP_WB +void arch_wb_cache_pmem(void *addr, size_t size); +void arch_invalidate_pmem(void *addr, size_t size); +#else +#define ARCH_MEMREMAP_PMEM MEMREMAP_WT +static inline void arch_wb_cache_pmem(void *addr, size_t size) +{ +} +static inline void arch_invalidate_pmem(void *addr, size_t size) +{ +} +#endif + #endif /* __LIBNVDIMM_H__ */ diff --git a/lib/Kconfig b/lib/Kconfig index 40b114a11d7c..a85e6f76add5 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -559,9 +559,6 @@ config ARCH_HAS_PMEM_API config ARCH_HAS_UACCESS_FLUSHCACHE bool -config ARCH_HAS_MMIO_FLUSH - bool - config STACKDEPOT bool select STACKTRACE diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4c2fa98ef39d..d20791c3f499 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1546,8 +1546,8 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, else { memcpy(iobuf, mmio->addr.base + dpa, len); - /* give us some some coverage of the mmio_flush_range() API */ - mmio_flush_range(mmio->addr.base + dpa, len); + /* give us some some coverage of the arch_invalidate_pmem() API */ + arch_invalidate_pmem(mmio->addr.base + dpa, len); } nd_region_release_lane(nd_region, lane); |