diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 06:31:56 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 06:31:56 -0700 |
commit | 6078e07dcf5348075713124508d03786dc9ffa8b (patch) | |
tree | 0a93be6dc3de629352ea269a7b99018ab40b35e0 /drivers/nvdimm | |
parent | df132e4062afe06ddc9938802243d704906a884a (diff) | |
parent | 594861215c834e4b59a30d4b794f6372717bc197 (diff) | |
download | lwn-6078e07dcf5348075713124508d03786dc9ffa8b.tar.gz lwn-6078e07dcf5348075713124508d03786dc9ffa8b.zip |
Merge tag 'libnvdimm-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
- Improve the efficiency and performance of reading nvdimm-namespace
labels. Reduce the amount of label data read at driver load time by a
few orders of magnitude. Reduce heavyweight call-outs to
platform-firmware routines.
- Handle media errors located in the 'struct page' array stored on a
persistent memory namespace. Let the kernel clear these errors rather
than an awkward userspace workaround.
- Fix Address Range Scrub (ARS) completion tracking. Correct occasions
where the kernel indicates completion of ARS before submission.
- Fix asynchronous device registration reference counting.
- Add support for reporting an nvdimm dirty-shutdown-count via sysfs.
- Fix various small libnvdimm core and uapi issues.
* tag 'libnvdimm-for-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
acpi, nfit: Further restrict userspace ARS start requests
acpi, nfit: Fix Address Range Scrub completion tracking
UAPI: ndctl: Remove use of PAGE_SIZE
UAPI: ndctl: Fix g++-unsupported initialisation in headers
tools/testing/nvdimm: Populate dirty shutdown data
acpi, nfit: Collect shutdown status
acpi, nfit: Introduce nfit_mem flags
libnvdimm, label: Fix sparse warning
nvdimm: Use namespace index data to reduce number of label reads needed
nvdimm: Split label init out from the logic for getting config data
nvdimm: Remove empty if statement
nvdimm: Clarify comment in sizeof_namespace_index
nvdimm: Sanity check labeloff
libnvdimm, dimm: Maximize label transfer size
libnvdimm, pmem: Fix badblocks population for 'raw' namespaces
libnvdimm, namespace: Drop the repeat assignment for variable dev->parent
libnvdimm, region: Fail badblocks listing for inactive regions
libnvdimm, pfn: during init, clear errors in the metadata area
libnvdimm: Set device node in nd_device_register
libnvdimm: Hold reference on parent while scheduling async init
...
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r-- | drivers/nvdimm/bus.c | 20 | ||||
-rw-r--r-- | drivers/nvdimm/dimm.c | 6 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 60 | ||||
-rw-r--r-- | drivers/nvdimm/label.c | 144 | ||||
-rw-r--r-- | drivers/nvdimm/label.h | 4 | ||||
-rw-r--r-- | drivers/nvdimm/namespace_devs.c | 1 | ||||
-rw-r--r-- | drivers/nvdimm/nd-core.h | 1 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 2 | ||||
-rw-r--r-- | drivers/nvdimm/pfn_devs.c | 61 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 4 | ||||
-rw-r--r-- | drivers/nvdimm/region_devs.c | 11 |
11 files changed, 251 insertions, 63 deletions
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8aae6dcc839f..f1fb39921236 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -54,12 +54,6 @@ static int to_nd_device_type(struct device *dev) static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) { - /* - * Ensure that region devices always have their numa node set as - * early as possible. - */ - if (is_nd_region(dev)) - set_dev_node(dev, to_nd_region(dev)->numa_node); return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, to_nd_device_type(dev)); } @@ -488,6 +482,8 @@ static void nd_async_device_register(void *d, async_cookie_t cookie) put_device(dev); } put_device(dev); + if (dev->parent) + put_device(dev->parent); } static void nd_async_device_unregister(void *d, async_cookie_t cookie) @@ -506,7 +502,19 @@ void __nd_device_register(struct device *dev) { if (!dev) return; + + /* + * Ensure that region devices always have their NUMA node set as + * early as possible. This way we are able to make certain that + * any memory associated with the creation and the creation + * itself of the region is associated with the correct node. + */ + if (is_nd_region(dev)) + set_dev_node(dev, to_nd_region(dev)->numa_node); + dev->bus = &nvdimm_bus_type; + if (dev->parent) + get_device(dev->parent); get_device(dev); async_schedule_domain(nd_async_device_register, dev, &nd_async_domain); diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 6c8fb7590838..9899c97138a3 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -75,7 +75,7 @@ static int nvdimm_probe(struct device *dev) * DIMM capacity. We fail the dimm probe to prevent regions from * attempting to parse the label area. */ - rc = nvdimm_init_config_data(ndd); + rc = nd_label_data_init(ndd); if (rc == -EACCES) nvdimm_set_locked(dev); if (rc) @@ -84,10 +84,6 @@ static int nvdimm_probe(struct device *dev) dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); nvdimm_bus_lock(dev); - ndd->ns_current = nd_label_validate(ndd); - ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); - nd_label_copy(ndd, to_next_namespace_index(ndd), - to_current_namespace_index(ndd)); if (ndd->ns_current >= 0) { rc = nd_label_reserve_dpa(ndd); if (rc == 0) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 863cabc35215..6c3de2317390 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -85,56 +85,48 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) return cmd_rc; } -int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) +int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf, + size_t offset, size_t len) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; int rc = validate_dimm(ndd), cmd_rc = 0; struct nd_cmd_get_config_data_hdr *cmd; - struct nvdimm_bus_descriptor *nd_desc; - u32 max_cmd_size, config_size; - size_t offset; + size_t max_cmd_size, buf_offset; if (rc) return rc; - if (ndd->data) - return 0; - - if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 - || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) { - dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n", - ndd->nsarea.max_xfer, ndd->nsarea.config_size); + if (offset + len > ndd->nsarea.config_size) return -ENXIO; - } - ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL); - if (!ndd->data) - return -ENOMEM; - - max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer); - cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); + max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer); + cmd = kvzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); if (!cmd) return -ENOMEM; - nd_desc = nvdimm_bus->nd_desc; - for (config_size = ndd->nsarea.config_size, offset = 0; - config_size; config_size -= cmd->in_length, - offset += cmd->in_length) { - cmd->in_length = min(config_size, max_cmd_size); - cmd->in_offset = offset; + for (buf_offset = 0; len; + len -= cmd->in_length, buf_offset += cmd->in_length) { + size_t cmd_size; + + cmd->in_offset = offset + buf_offset; + cmd->in_length = min(max_cmd_size, len); + + cmd_size = sizeof(*cmd) + cmd->in_length; + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd), &cmd_rc); + ND_CMD_GET_CONFIG_DATA, cmd, cmd_size, &cmd_rc); if (rc < 0) break; if (cmd_rc < 0) { rc = cmd_rc; break; } - memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); + + /* out_buf should be valid, copy it into our output buffer */ + memcpy(buf + buf_offset, cmd->out_buf, cmd->in_length); } - dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc); - kfree(cmd); + kvfree(cmd); return rc; } @@ -151,15 +143,11 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, if (rc) return rc; - if (!ndd->data) - return -ENXIO; - if (offset + len > ndd->nsarea.config_size) return -ENXIO; - max_cmd_size = min_t(u32, PAGE_SIZE, len); - max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer); - cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); + max_cmd_size = min_t(u32, len, ndd->nsarea.max_xfer); + cmd = kvzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -183,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, break; } } - kfree(cmd); + kvfree(cmd); return rc; } diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 1d28cd656536..750dbaa6ce82 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -75,7 +75,8 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) /* * Per UEFI 2.7, the minimum size of the Label Storage Area is large * enough to hold 2 index blocks and 2 labels. The minimum index - * block size is 256 bytes, and the minimum label size is 256 bytes. + * block size is 256 bytes. The label size is 128 for namespaces + * prior to version 1.2 and at minimum 256 for version 1.2 and later. */ nslot = nvdimm_num_label_slots(ndd); space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd); @@ -183,6 +184,13 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) __le64_to_cpu(nsindex[i]->otheroff)); continue; } + if (__le64_to_cpu(nsindex[i]->labeloff) + != 2 * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "nsindex%d labeloff: %#llx invalid\n", + i, (unsigned long long) + __le64_to_cpu(nsindex[i]->labeloff)); + continue; + } size = __le64_to_cpu(nsindex[i]->mysize); if (size > sizeof_namespace_index(ndd) @@ -227,7 +235,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) return -1; } -int nd_label_validate(struct nvdimm_drvdata *ndd) +static int nd_label_validate(struct nvdimm_drvdata *ndd) { /* * In order to probe for and validate namespace index blocks we @@ -250,12 +258,12 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) return -1; } -void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, - struct nd_namespace_index *src) +static void nd_label_copy(struct nvdimm_drvdata *ndd, + struct nd_namespace_index *dst, + struct nd_namespace_index *src) { - if (dst && src) - /* pass */; - else + /* just exit if either destination or source is NULL */ + if (!dst || !src) return; memcpy(dst, src, sizeof_namespace_index(ndd)); @@ -410,6 +418,128 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; } +int nd_label_data_init(struct nvdimm_drvdata *ndd) +{ + size_t config_size, read_size, max_xfer, offset; + struct nd_namespace_index *nsindex; + unsigned int i; + int rc = 0; + u32 nslot; + + if (ndd->data) + return 0; + + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) { + dev_dbg(ndd->dev, "failed to init config data area: (%u:%u)\n", + ndd->nsarea.max_xfer, ndd->nsarea.config_size); + return -ENXIO; + } + + /* + * We need to determine the maximum index area as this is the section + * we must read and validate before we can start processing labels. + * + * If the area is too small to contain the two indexes and 2 labels + * then we abort. + * + * Start at a label size of 128 as this should result in the largest + * possible namespace index size. + */ + ndd->nslabel_size = 128; + read_size = sizeof_namespace_index(ndd) * 2; + if (!read_size) + return -ENXIO; + + /* Allocate config data */ + config_size = ndd->nsarea.config_size; + ndd->data = kvzalloc(config_size, GFP_KERNEL); + if (!ndd->data) + return -ENOMEM; + + /* + * We want to guarantee as few reads as possible while conserving + * memory. To do that we figure out how much unused space will be left + * in the last read, divide that by the total number of reads it is + * going to take given our maximum transfer size, and then reduce our + * maximum transfer size based on that result. + */ + max_xfer = min_t(size_t, ndd->nsarea.max_xfer, config_size); + if (read_size < max_xfer) { + /* trim waste */ + max_xfer -= ((max_xfer - 1) - (config_size - 1) % max_xfer) / + DIV_ROUND_UP(config_size, max_xfer); + /* make certain we read indexes in exactly 1 read */ + if (max_xfer < read_size) + max_xfer = read_size; + } + + /* Make our initial read size a multiple of max_xfer size */ + read_size = min(DIV_ROUND_UP(read_size, max_xfer) * max_xfer, + config_size); + + /* Read the index data */ + rc = nvdimm_get_config_data(ndd, ndd->data, 0, read_size); + if (rc) + goto out_err; + + /* Validate index data, if not valid assume all labels are invalid */ + ndd->ns_current = nd_label_validate(ndd); + if (ndd->ns_current < 0) + return 0; + + /* Record our index values */ + ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); + + /* Copy "current" index on top of the "next" index */ + nsindex = to_current_namespace_index(ndd); + nd_label_copy(ndd, to_next_namespace_index(ndd), nsindex); + + /* Determine starting offset for label data */ + offset = __le64_to_cpu(nsindex->labeloff); + nslot = __le32_to_cpu(nsindex->nslot); + + /* Loop through the free list pulling in any active labels */ + for (i = 0; i < nslot; i++, offset += ndd->nslabel_size) { + size_t label_read_size; + + /* zero out the unused labels */ + if (test_bit_le(i, nsindex->free)) { + memset(ndd->data + offset, 0, ndd->nslabel_size); + continue; + } + + /* if we already read past here then just continue */ + if (offset + ndd->nslabel_size <= read_size) + continue; + + /* if we haven't read in a while reset our read_size offset */ + if (read_size < offset) + read_size = offset; + + /* determine how much more will be read after this next call. */ + label_read_size = offset + ndd->nslabel_size - read_size; + label_read_size = DIV_ROUND_UP(label_read_size, max_xfer) * + max_xfer; + + /* truncate last read if needed */ + if (read_size + label_read_size > config_size) + label_read_size = config_size - read_size; + + /* Read the label data */ + rc = nvdimm_get_config_data(ndd, ndd->data + read_size, + read_size, label_read_size); + if (rc) + goto out_err; + + /* push read_size to next read offset */ + read_size += label_read_size; + } + + dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc); +out_err: + return rc; +} + int nd_label_active_count(struct nvdimm_drvdata *ndd) { struct nd_namespace_index *nsindex; diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 18bbe183b3a9..e9a2ad3c2150 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -138,9 +138,7 @@ static inline int nd_label_next_nsindex(int index) } struct nvdimm_drvdata; -int nd_label_validate(struct nvdimm_drvdata *ndd); -void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, - struct nd_namespace_index *src); +int nd_label_data_init(struct nvdimm_drvdata *ndd); size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); int nd_label_active_count(struct nvdimm_drvdata *ndd); struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4a4266250c28..681af3a8fd62 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2099,7 +2099,6 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) return NULL; } dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id); - dev->parent = &nd_region->dev; dev->groups = nd_namespace_attribute_groups; nd_namespace_pmem_set_resource(nd_region, nspm, 0); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index ac68072fb8cd..182258f64417 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -14,7 +14,6 @@ #define __ND_CORE_H__ #include <linux/libnvdimm.h> #include <linux/device.h> -#include <linux/libnvdimm.h> #include <linux/sizes.h> #include <linux/mutex.h> #include <linux/nd.h> diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 98317e7ce5b5..e79cc8e5c114 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -241,6 +241,8 @@ struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); int nvdimm_check_config_data(struct device *dev); int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); +int nvdimm_get_config_data(struct nvdimm_drvdata *ndd, void *buf, + size_t offset, size_t len); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3f7ad5bc443e..24c64090169e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -361,6 +361,65 @@ struct device *nd_pfn_create(struct nd_region *nd_region) return dev; } +/* + * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap + * space associated with the namespace. If the memmap is set to DRAM, then + * this is a no-op. Since the memmap area is freshly initialized during + * probe, we have an opportunity to clear any badblocks in this area. + */ +static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) +{ + struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); + struct nd_namespace_common *ndns = nd_pfn->ndns; + void *zero_page = page_address(ZERO_PAGE(0)); + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + int num_bad, meta_num, rc, bb_present; + sector_t first_bad, meta_start; + struct nd_namespace_io *nsio; + + if (nd_pfn->mode != PFN_MODE_PMEM) + return 0; + + nsio = to_nd_namespace_io(&ndns->dev); + meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; + meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; + + do { + unsigned long zero_len; + u64 nsoff; + + bb_present = badblocks_check(&nd_region->bb, meta_start, + meta_num, &first_bad, &num_bad); + if (bb_present) { + dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %lx\n", + num_bad, first_bad); + nsoff = ALIGN_DOWN((nd_region->ndr_start + + (first_bad << 9)) - nsio->res.start, + PAGE_SIZE); + zero_len = ALIGN(num_bad << 9, PAGE_SIZE); + while (zero_len) { + unsigned long chunk = min(zero_len, PAGE_SIZE); + + rc = nvdimm_write_bytes(ndns, nsoff, zero_page, + chunk, 0); + if (rc) + break; + + zero_len -= chunk; + nsoff += chunk; + } + if (rc) { + dev_err(&nd_pfn->dev, + "error clearing %x badblocks at %lx\n", + num_bad, first_bad); + return rc; + } + } + } while (bb_present); + + return 0; +} + int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; @@ -477,7 +536,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -ENXIO; } - return 0; + return nd_pfn_clear_memmap_errors(nd_pfn); } EXPORT_SYMBOL(nd_pfn_validate); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index a75d10c23d80..0e39e3d1846f 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -421,9 +421,11 @@ static int pmem_attach_disk(struct device *dev, addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); - } else + } else { addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); + memcpy(&bb_res, &nsio->res, sizeof(bb_res)); + } /* * At release time the queue must be frozen before diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index fa37afcd43ff..174a418cb171 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -560,10 +560,17 @@ static ssize_t region_badblocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; - return badblocks_show(&nd_region->bb, buf, 0); -} + device_lock(dev); + if (dev->driver) + rc = badblocks_show(&nd_region->bb, buf, 0); + else + rc = -ENXIO; + device_unlock(dev); + return rc; +} static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL); static ssize_t resource_show(struct device *dev, |