summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2016-07-24 08:05:44 -0700
committerDan Williams <dan.j.williams@intel.com>2016-07-24 08:05:44 -0700
commit0606263f24f3d64960de742c55894190b5df903b (patch)
tree673894f227c1a761f601ad7bdc1ceef003bb85cb /drivers
parenta72255983f12f31f0c8d8275fb1a781546cfacb7 (diff)
parentd4c5725d57323e2348940fcc6416072671a9b432 (diff)
downloadlwn-0606263f24f3d64960de742c55894190b5df903b.tar.gz
lwn-0606263f24f3d64960de742c55894190b5df903b.zip
Merge branch 'for-4.8/libnvdimm' into libnvdimm-for-next
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/Kconfig27
-rw-r--r--drivers/acpi/Makefile2
-rw-r--r--drivers/acpi/nfit/Kconfig26
-rw-r--r--drivers/acpi/nfit/Makefile3
-rw-r--r--drivers/acpi/nfit/core.c (renamed from drivers/acpi/nfit.c)647
-rw-r--r--drivers/acpi/nfit/mce.c89
-rw-r--r--drivers/acpi/nfit/nfit.h (renamed from drivers/acpi/nfit.h)60
-rw-r--r--drivers/block/brd.c4
-rw-r--r--drivers/dax/dax.c6
-rw-r--r--drivers/dax/pmem.c14
-rw-r--r--drivers/nvdimm/Kconfig2
-rw-r--r--drivers/nvdimm/blk.c11
-rw-r--r--drivers/nvdimm/btt_devs.c3
-rw-r--r--drivers/nvdimm/bus.c212
-rw-r--r--drivers/nvdimm/claim.c7
-rw-r--r--drivers/nvdimm/core.c253
-rw-r--r--drivers/nvdimm/dimm_devs.c5
-rw-r--r--drivers/nvdimm/e820.c1
-rw-r--r--drivers/nvdimm/nd-core.h5
-rw-r--r--drivers/nvdimm/nd.h10
-rw-r--r--drivers/nvdimm/pmem.c85
-rw-r--r--drivers/nvdimm/pmem.h24
-rw-r--r--drivers/nvdimm/region.c19
-rw-r--r--drivers/nvdimm/region_devs.c154
-rw-r--r--drivers/s390/block/dcssblk.c6
25 files changed, 1108 insertions, 567 deletions
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index b7e2e776397d..415b148a8698 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -447,32 +447,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
If you are unsure what to do, do not enable this option.
-config ACPI_NFIT
- tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
- depends on PHYS_ADDR_T_64BIT
- depends on BLK_DEV
- depends on ARCH_HAS_MMIO_FLUSH
- select LIBNVDIMM
- help
- Infrastructure to probe ACPI 6 compliant platforms for
- NVDIMMs (NFIT) and register a libnvdimm device tree. In
- addition to storage devices this also enables libnvdimm to pass
- ACPI._DSM messages for platform/dimm configuration.
-
- To compile this driver as a module, choose M here:
- the module will be called nfit.
-
-config ACPI_NFIT_DEBUG
- bool "NFIT DSM debug"
- depends on ACPI_NFIT
- depends on DYNAMIC_DEBUG
- default n
- help
- Enabling this option causes the nfit driver to dump the
- input and output buffers of _DSM operations on the ACPI0012
- device and its children. This can be very verbose, so leave
- it disabled unless you are debugging a hardware / firmware
- issue.
+source "drivers/acpi/nfit/Kconfig"
source "drivers/acpi/apei/Kconfig"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 251ce85a66fb..64a575a6f7ef 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI) += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o
-obj-$(CONFIG_ACPI_NFIT) += nfit.o
+obj-$(CONFIG_ACPI_NFIT) += nfit/
obj-$(CONFIG_ACPI) += acpi_memhotplug.o
obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
obj-$(CONFIG_ACPI_BATTERY) += battery.o
diff --git a/drivers/acpi/nfit/Kconfig b/drivers/acpi/nfit/Kconfig
new file mode 100644
index 000000000000..dd0d53c52552
--- /dev/null
+++ b/drivers/acpi/nfit/Kconfig
@@ -0,0 +1,26 @@
+config ACPI_NFIT
+ tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+ depends on PHYS_ADDR_T_64BIT
+ depends on BLK_DEV
+ depends on ARCH_HAS_MMIO_FLUSH
+ select LIBNVDIMM
+ help
+ Infrastructure to probe ACPI 6 compliant platforms for
+ NVDIMMs (NFIT) and register a libnvdimm device tree. In
+ addition to storage devices this also enables libnvdimm to pass
+ ACPI._DSM messages for platform/dimm configuration.
+
+ To compile this driver as a module, choose M here:
+ the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+ bool "NFIT DSM debug"
+ depends on ACPI_NFIT
+ depends on DYNAMIC_DEBUG
+ default n
+ help
+ Enabling this option causes the nfit driver to dump the
+ input and output buffers of _DSM operations on the ACPI0012
+ device and its children. This can be very verbose, so leave
+ it disabled unless you are debugging a hardware / firmware
+ issue.
diff --git a/drivers/acpi/nfit/Makefile b/drivers/acpi/nfit/Makefile
new file mode 100644
index 000000000000..a407e769f103
--- /dev/null
+++ b/drivers/acpi/nfit/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_ACPI_NFIT) := nfit.o
+nfit-y := core.o
+nfit-$(CONFIG_X86_MCE) += mce.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit/core.c
index 1f0e06065ae6..8c234dd9b8bc 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit/core.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ndctl.h>
+#include <linux/sysfs.h>
#include <linux/delay.h>
#include <linux/list.h>
#include <linux/acpi.h>
@@ -50,6 +51,9 @@ module_param(disable_vendor_specific, bool, S_IRUGO);
MODULE_PARM_DESC(disable_vendor_specific,
"Limit commands to the publicly specified set\n");
+LIST_HEAD(acpi_descs);
+DEFINE_MUTEX(acpi_desc_lock);
+
static struct workqueue_struct *nfit_wq;
struct nfit_table_prev {
@@ -360,7 +364,7 @@ static const char *spa_type_name(u16 type)
return to_name[type];
}
-static int nfit_spa_type(struct acpi_nfit_system_address *spa)
+int nfit_spa_type(struct acpi_nfit_system_address *spa)
{
int i;
@@ -374,22 +378,25 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_system_address *spa)
{
- size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa;
+ if (spa->header.length != sizeof(*spa))
+ return false;
+
list_for_each_entry(nfit_spa, &prev->spas, list) {
- if (memcmp(nfit_spa->spa, spa, length) == 0) {
+ if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
list_move_tail(&nfit_spa->list, &acpi_desc->spas);
return true;
}
}
- nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), GFP_KERNEL);
+ nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+ GFP_KERNEL);
if (!nfit_spa)
return false;
INIT_LIST_HEAD(&nfit_spa->list);
- nfit_spa->spa = spa;
+ memcpy(nfit_spa->spa, spa, sizeof(*spa));
list_add_tail(&nfit_spa->list, &acpi_desc->spas);
dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
spa->range_index,
@@ -401,21 +408,24 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_memory_map *memdev)
{
- size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_memdev *nfit_memdev;
+ if (memdev->header.length != sizeof(*memdev))
+ return false;
+
list_for_each_entry(nfit_memdev, &prev->memdevs, list)
- if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
+ if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
return true;
}
- nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev), GFP_KERNEL);
+ nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
+ GFP_KERNEL);
if (!nfit_memdev)
return false;
INIT_LIST_HEAD(&nfit_memdev->list);
- nfit_memdev->memdev = memdev;
+ memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
__func__, memdev->device_handle, memdev->range_index,
@@ -423,25 +433,42 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
return true;
}
+/*
+ * An implementation may provide a truncated control region if no block windows
+ * are defined.
+ */
+static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
+{
+ if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
+ window_size))
+ return 0;
+ if (dcr->windows)
+ return sizeof(*dcr);
+ return offsetof(struct acpi_nfit_control_region, window_size);
+}
+
static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_control_region *dcr)
{
- size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_dcr *nfit_dcr;
+ if (!sizeof_dcr(dcr))
+ return false;
+
list_for_each_entry(nfit_dcr, &prev->dcrs, list)
- if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
+ if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
return true;
}
- nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), GFP_KERNEL);
+ nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
+ GFP_KERNEL);
if (!nfit_dcr)
return false;
INIT_LIST_HEAD(&nfit_dcr->list);
- nfit_dcr->dcr = dcr;
+ memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
dcr->region_index, dcr->windows);
@@ -452,71 +479,102 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_data_region *bdw)
{
- size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_bdw *nfit_bdw;
+ if (bdw->header.length != sizeof(*bdw))
+ return false;
list_for_each_entry(nfit_bdw, &prev->bdws, list)
- if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
+ if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
return true;
}
- nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), GFP_KERNEL);
+ nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
+ GFP_KERNEL);
if (!nfit_bdw)
return false;
INIT_LIST_HEAD(&nfit_bdw->list);
- nfit_bdw->bdw = bdw;
+ memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
bdw->region_index, bdw->windows);
return true;
}
+static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
+{
+ if (idt->header.length < sizeof(*idt))
+ return 0;
+ return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
+}
+
static bool add_idt(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_interleave *idt)
{
- size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_idt *nfit_idt;
- list_for_each_entry(nfit_idt, &prev->idts, list)
- if (memcmp(nfit_idt->idt, idt, length) == 0) {
+ if (!sizeof_idt(idt))
+ return false;
+
+ list_for_each_entry(nfit_idt, &prev->idts, list) {
+ if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
+ continue;
+
+ if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
list_move_tail(&nfit_idt->list, &acpi_desc->idts);
return true;
}
+ }
- nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), GFP_KERNEL);
+ nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
+ GFP_KERNEL);
if (!nfit_idt)
return false;
INIT_LIST_HEAD(&nfit_idt->list);
- nfit_idt->idt = idt;
+ memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
list_add_tail(&nfit_idt->list, &acpi_desc->idts);
dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
idt->interleave_index, idt->line_count);
return true;
}
+static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
+{
+ if (flush->header.length < sizeof(*flush))
+ return 0;
+ return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
+}
+
static bool add_flush(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_flush_address *flush)
{
- size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_flush *nfit_flush;
- list_for_each_entry(nfit_flush, &prev->flushes, list)
- if (memcmp(nfit_flush->flush, flush, length) == 0) {
+ if (!sizeof_flush(flush))
+ return false;
+
+ list_for_each_entry(nfit_flush, &prev->flushes, list) {
+ if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
+ continue;
+
+ if (memcmp(nfit_flush->flush, flush,
+ sizeof_flush(flush)) == 0) {
list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
return true;
}
+ }
- nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), GFP_KERNEL);
+ nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
+ + sizeof_flush(flush), GFP_KERNEL);
if (!nfit_flush)
return false;
INIT_LIST_HEAD(&nfit_flush->list);
- nfit_flush->flush = flush;
+ memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
flush->device_handle, flush->hint_count);
@@ -614,7 +672,6 @@ static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
{
u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
struct nfit_memdev *nfit_memdev;
- struct nfit_flush *nfit_flush;
struct nfit_bdw *nfit_bdw;
struct nfit_idt *nfit_idt;
u16 idt_idx, range_index;
@@ -647,14 +704,6 @@ static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
nfit_mem->idt_bdw = nfit_idt->idt;
break;
}
-
- list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
- if (nfit_flush->flush->device_handle !=
- nfit_memdev->memdev->device_handle)
- continue;
- nfit_mem->nfit_flush = nfit_flush;
- break;
- }
break;
}
}
@@ -675,6 +724,7 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
}
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+ struct nfit_flush *nfit_flush;
struct nfit_dcr *nfit_dcr;
u32 device_handle;
u16 dcr;
@@ -721,6 +771,28 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
break;
}
+ list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
+ struct acpi_nfit_flush_address *flush;
+ u16 i;
+
+ if (nfit_flush->flush->device_handle != device_handle)
+ continue;
+ nfit_mem->nfit_flush = nfit_flush;
+ flush = nfit_flush->flush;
+ nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
+ flush->hint_count
+ * sizeof(struct resource), GFP_KERNEL);
+ if (!nfit_mem->flush_wpq)
+ return -ENOMEM;
+ for (i = 0; i < flush->hint_count; i++) {
+ struct resource *res = &nfit_mem->flush_wpq[i];
+
+ res->start = flush->hint_address[i];
+ res->end = res->start + 8 - 1;
+ }
+ break;
+ }
+
if (dcr && !nfit_mem->dcr) {
dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
spa->range_index, dcr);
@@ -806,14 +878,85 @@ static ssize_t revision_show(struct device *dev,
}
static DEVICE_ATTR_RO(revision);
+/*
+ * This shows the number of full Address Range Scrubs that have been
+ * completed since driver load time. Userspace can wait on this using
+ * select/poll etc. A '+' at the end indicates an ARS is in progress
+ */
+static ssize_t scrub_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ ssize_t rc = -ENXIO;
+
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (nd_desc) {
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
+ (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+ }
+ device_unlock(dev);
+ return rc;
+}
+
+static ssize_t scrub_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ ssize_t rc;
+ long val;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val != 1)
+ return -EINVAL;
+
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (nd_desc) {
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ rc = acpi_nfit_ars_rescan(acpi_desc);
+ }
+ device_unlock(dev);
+ if (rc)
+ return rc;
+ return size;
+}
+static DEVICE_ATTR_RW(scrub);
+
+static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
+{
+ struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+ const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
+ | 1 << ND_CMD_ARS_STATUS;
+
+ return (nd_desc->cmd_mask & mask) == mask;
+}
+
+static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+ if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
+ return 0;
+ return a->mode;
+}
+
static struct attribute *acpi_nfit_attributes[] = {
&dev_attr_revision.attr,
+ &dev_attr_scrub.attr,
NULL,
};
static struct attribute_group acpi_nfit_attribute_group = {
.name = "nfit",
.attrs = acpi_nfit_attributes,
+ .is_visible = nfit_visible,
};
static const struct attribute_group *acpi_nfit_attribute_groups[] = {
@@ -1130,11 +1273,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
}
/*
- * Until standardization materializes we need to consider up to 3
+ * Until standardization materializes we need to consider 4
* different command sets. Note, that checking for function0 (bit0)
* tells us if any commands are reachable through this uuid.
*/
- for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
+ for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
break;
@@ -1144,12 +1287,14 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
dsm_mask = 0x3fe;
if (disable_vendor_specific)
dsm_mask &= ~(1 << ND_CMD_VENDOR);
- } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1)
+ } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
dsm_mask = 0x1c3c76;
- else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
+ } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
dsm_mask = 0x1fe;
if (disable_vendor_specific)
dsm_mask &= ~(1 << 8);
+ } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
+ dsm_mask = 0xffffffff;
} else {
dev_dbg(dev, "unknown dimm command family\n");
nfit_mem->family = -1;
@@ -1171,6 +1316,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
int dimm_count = 0;
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ struct acpi_nfit_flush_address *flush;
unsigned long flags = 0, cmd_mask;
struct nvdimm *nvdimm;
u32 device_handle;
@@ -1204,9 +1350,12 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
cmd_mask |= nfit_mem->dsm_mask;
+ flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
+ : NULL;
nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
acpi_nfit_dimm_attribute_groups,
- flags, cmd_mask);
+ flags, cmd_mask, flush ? flush->hint_count : 0,
+ nfit_mem->flush_wpq);
if (!nvdimm)
return -ENOMEM;
@@ -1374,24 +1523,6 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
return mmio->base_offset + line_offset + table_offset + sub_line_offset;
}
-static void wmb_blk(struct nfit_blk *nfit_blk)
-{
-
- if (nfit_blk->nvdimm_flush) {
- /*
- * The first wmb() is needed to 'sfence' all previous writes
- * such that they are architecturally visible for the platform
- * buffer flush. Note that we've already arranged for pmem
- * writes to avoid the cache via arch_memcpy_to_pmem(). The
- * final wmb() ensures ordering for the NVDIMM flush write.
- */
- wmb();
- writeq(1, nfit_blk->nvdimm_flush);
- wmb();
- } else
- wmb_pmem();
-}
-
static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
{
struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
@@ -1426,7 +1557,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
offset = to_interleave_offset(offset, mmio);
writeq(cmd, mmio->addr.base + offset);
- wmb_blk(nfit_blk);
+ nvdimm_flush(nfit_blk->nd_region);
if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
readq(mmio->addr.base + offset);
@@ -1477,7 +1608,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
}
if (rw)
- wmb_blk(nfit_blk);
+ nvdimm_flush(nfit_blk->nd_region);
rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
return rc;
@@ -1509,125 +1640,6 @@ static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
return rc;
}
-static void nfit_spa_mapping_release(struct kref *kref)
-{
- struct nfit_spa_mapping *spa_map = to_spa_map(kref);
- struct acpi_nfit_system_address *spa = spa_map->spa;
- struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
-
- WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
- dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
- if (spa_map->type == SPA_MAP_APERTURE)
- memunmap((void __force *)spa_map->addr.aperture);
- else
- iounmap(spa_map->addr.base);
- release_mem_region(spa->address, spa->length);
- list_del(&spa_map->list);
- kfree(spa_map);
-}
-
-static struct nfit_spa_mapping *find_spa_mapping(
- struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa)
-{
- struct nfit_spa_mapping *spa_map;
-
- WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
- list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
- if (spa_map->spa == spa)
- return spa_map;
-
- return NULL;
-}
-
-static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa)
-{
- struct nfit_spa_mapping *spa_map;
-
- mutex_lock(&acpi_desc->spa_map_mutex);
- spa_map = find_spa_mapping(acpi_desc, spa);
-
- if (spa_map)
- kref_put(&spa_map->kref, nfit_spa_mapping_release);
- mutex_unlock(&acpi_desc->spa_map_mutex);
-}
-
-static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
- resource_size_t start = spa->address;
- resource_size_t n = spa->length;
- struct nfit_spa_mapping *spa_map;
- struct resource *res;
-
- WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
-
- spa_map = find_spa_mapping(acpi_desc, spa);
- if (spa_map) {
- kref_get(&spa_map->kref);
- return spa_map->addr.base;
- }
-
- spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
- if (!spa_map)
- return NULL;
-
- INIT_LIST_HEAD(&spa_map->list);
- spa_map->spa = spa;
- kref_init(&spa_map->kref);
- spa_map->acpi_desc = acpi_desc;
-
- res = request_mem_region(start, n, dev_name(acpi_desc->dev));
- if (!res)
- goto err_mem;
-
- spa_map->type = type;
- if (type == SPA_MAP_APERTURE)
- spa_map->addr.aperture = (void __pmem *)memremap(start, n,
- ARCH_MEMREMAP_PMEM);
- else
- spa_map->addr.base = ioremap_nocache(start, n);
-
-
- if (!spa_map->addr.base)
- goto err_map;
-
- list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
- return spa_map->addr.base;
-
- err_map:
- release_mem_region(start, n);
- err_mem:
- kfree(spa_map);
- return NULL;
-}
-
-/**
- * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
- * @nvdimm_bus: NFIT-bus that provided the spa table entry
- * @nfit_spa: spa table to map
- * @type: aperture or control region
- *
- * In the case where block-data-window apertures and
- * dimm-control-regions are interleaved they will end up sharing a
- * single request_mem_region() + ioremap() for the address range. In
- * the style of devm nfit_spa_map() mappings are automatically dropped
- * when all region devices referencing the same mapping are disabled /
- * unbound.
- */
-static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
- struct acpi_nfit_system_address *spa, enum spa_map_type type)
-{
- void __iomem *iomem;
-
- mutex_lock(&acpi_desc->spa_map_mutex);
- iomem = __nfit_spa_map(acpi_desc, spa, type);
- mutex_unlock(&acpi_desc->spa_map_mutex);
-
- return iomem;
-}
-
static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
struct acpi_nfit_interleave *idt, u16 interleave_ways)
{
@@ -1669,9 +1681,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
struct device *dev)
{
struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
- struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
struct nd_blk_region *ndbr = to_nd_blk_region(dev);
- struct nfit_flush *nfit_flush;
struct nfit_blk_mmio *mmio;
struct nfit_blk *nfit_blk;
struct nfit_mem *nfit_mem;
@@ -1697,8 +1707,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
/* map block aperture memory */
nfit_blk->bdw_offset = nfit_mem->bdw->offset;
mmio = &nfit_blk->mmio[BDW];
- mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
- SPA_MAP_APERTURE);
+ mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
+ nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
nvdimm_name(nvdimm));
@@ -1720,8 +1730,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
mmio = &nfit_blk->mmio[DCR];
- mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
- SPA_MAP_CONTROL);
+ mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
+ nfit_mem->spa_dcr->length);
if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
nvdimm_name(nvdimm));
@@ -1746,15 +1756,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
return rc;
}
- nfit_flush = nfit_mem->nfit_flush;
- if (nfit_flush && nfit_flush->flush->hint_count != 0) {
- nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev,
- nfit_flush->flush->hint_address[0], 8);
- if (!nfit_blk->nvdimm_flush)
- return -ENOMEM;
- }
-
- if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
+ if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
dev_warn(dev, "unable to guarantee persistence of writes\n");
if (mmio->line_size == 0)
@@ -1773,29 +1775,6 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
return 0;
}
-static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
- struct device *dev)
-{
- struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
- struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
- struct nd_blk_region *ndbr = to_nd_blk_region(dev);
- struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
- int i;
-
- if (!nfit_blk)
- return; /* never enabled */
-
- /* auto-free BLK spa mappings */
- for (i = 0; i < 2; i++) {
- struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
-
- if (mmio->addr.base)
- nfit_spa_unmap(acpi_desc, mmio->spa);
- }
- nd_blk_region_set_provider_data(ndbr, NULL);
- /* devm will free nfit_blk */
-}
-
static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
{
@@ -1919,11 +1898,11 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
if (ret)
return ret;
- ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res);
- if (ret) {
- remove_resource(res);
+ ret = devm_add_action_or_reset(acpi_desc->dev,
+ acpi_nfit_remove_resource,
+ res);
+ if (ret)
return ret;
- }
return 0;
}
@@ -1969,7 +1948,6 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
ndr_desc->num_mappings = blk_valid;
ndbr_desc = to_blk_region_desc(ndr_desc);
ndbr_desc->enable = acpi_nfit_blk_region_enable;
- ndbr_desc->disable = acpi_nfit_blk_region_disable;
ndbr_desc->do_io = acpi_desc->blk_do_io;
nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
ndr_desc);
@@ -1981,6 +1959,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
return 0;
}
+static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
+{
+ return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
+ nfit_spa_type(spa) == NFIT_SPA_VCD ||
+ nfit_spa_type(spa) == NFIT_SPA_PDISK ||
+ nfit_spa_type(spa) == NFIT_SPA_PCD);
+}
+
static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa)
{
@@ -1996,7 +1982,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
if (nfit_spa->nd_region)
return 0;
- if (spa->range_index == 0) {
+ if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
__func__);
return 0;
@@ -2060,6 +2046,11 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
ndr_desc);
if (!nfit_spa->nd_region)
rc = -ENOMEM;
+ } else if (nfit_spa_is_virtual(spa)) {
+ nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ rc = -ENOMEM;
}
out:
@@ -2139,7 +2130,7 @@ static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
unsigned int tmo = scrub_timeout;
int rc;
- if (nfit_spa->ars_done || !nfit_spa->nd_region)
+ if (!nfit_spa->ars_required || !nfit_spa->nd_region)
return;
rc = ars_start(acpi_desc, nfit_spa);
@@ -2228,7 +2219,9 @@ static void acpi_nfit_scrub(struct work_struct *work)
* firmware initiated scrubs to complete and then we go search for the
* affected spa regions to mark them scanned. In the second phase we
* initiate a directed scrub for every range that was not scrubbed in
- * phase 1.
+ * phase 1. If we're called for a 'rescan', we harmlessly pass through
+ * the first phase, but really only care about running phase 2, where
+ * regions can be notified of new poison.
*/
/* process platform firmware initiated scrubs */
@@ -2331,14 +2324,17 @@ static void acpi_nfit_scrub(struct work_struct *work)
* Flag all the ranges that still need scrubbing, but
* register them now to make data available.
*/
- if (nfit_spa->nd_region)
- nfit_spa->ars_done = 1;
- else
+ if (!nfit_spa->nd_region) {
+ nfit_spa->ars_required = 1;
acpi_nfit_register_region(acpi_desc, nfit_spa);
+ }
}
list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
acpi_nfit_async_scrub(acpi_desc, nfit_spa);
+ acpi_desc->scrub_count++;
+ if (acpi_desc->scrub_count_state)
+ sysfs_notify_dirent(acpi_desc->scrub_count_state);
mutex_unlock(&acpi_desc->init_mutex);
}
@@ -2376,14 +2372,89 @@ static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
return 0;
}
-int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
+static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
+{
+ struct device *dev = acpi_desc->dev;
+ struct kernfs_node *nfit;
+ struct device *bus_dev;
+
+ if (!ars_supported(acpi_desc->nvdimm_bus))
+ return 0;
+
+ bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+ nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
+ if (!nfit) {
+ dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
+ return -ENODEV;
+ }
+ acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
+ sysfs_put(nfit);
+ if (!acpi_desc->scrub_count_state) {
+ dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void acpi_nfit_destruct(void *data)
+{
+ struct acpi_nfit_desc *acpi_desc = data;
+ struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
+
+ /*
+ * Destruct under acpi_desc_lock so that nfit_handle_mce does not
+ * race teardown
+ */
+ mutex_lock(&acpi_desc_lock);
+ acpi_desc->cancel = 1;
+ /*
+ * Bounce the nvdimm bus lock to make sure any in-flight
+ * acpi_nfit_ars_rescan() submissions have had a chance to
+ * either submit or see ->cancel set.
+ */
+ device_lock(bus_dev);
+ device_unlock(bus_dev);
+
+ flush_workqueue(nfit_wq);
+ if (acpi_desc->scrub_count_state)
+ sysfs_put(acpi_desc->scrub_count_state);
+ nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+ acpi_desc->nvdimm_bus = NULL;
+ list_del(&acpi_desc->list);
+ mutex_unlock(&acpi_desc_lock);
+}
+
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
{
struct device *dev = acpi_desc->dev;
struct nfit_table_prev prev;
const void *end;
- u8 *data;
int rc;
+ if (!acpi_desc->nvdimm_bus) {
+ acpi_nfit_init_dsms(acpi_desc);
+
+ acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
+ &acpi_desc->nd_desc);
+ if (!acpi_desc->nvdimm_bus)
+ return -ENOMEM;
+
+ rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
+ acpi_desc);
+ if (rc)
+ return rc;
+
+ rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
+ if (rc)
+ return rc;
+
+ /* register this acpi_desc for mce notifications */
+ mutex_lock(&acpi_desc_lock);
+ list_add_tail(&acpi_desc->list, &acpi_descs);
+ mutex_unlock(&acpi_desc_lock);
+ }
+
mutex_lock(&acpi_desc->init_mutex);
INIT_LIST_HEAD(&prev.spas);
@@ -2406,7 +2477,6 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
list_cut_position(&prev.flushes, &acpi_desc->flushes,
acpi_desc->flushes.prev);
- data = (u8 *) acpi_desc->nfit;
end = data + sz;
while (!IS_ERR_OR_NULL(data))
data = add_table(acpi_desc, &prev, data, end);
@@ -2422,12 +2492,9 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
if (rc)
goto out_unlock;
- if (nfit_mem_init(acpi_desc) != 0) {
- rc = -ENOMEM;
+ rc = nfit_mem_init(acpi_desc);
+ if (rc)
goto out_unlock;
- }
-
- acpi_nfit_init_dsms(acpi_desc);
rc = acpi_nfit_register_dimms(acpi_desc);
if (rc)
@@ -2496,6 +2563,33 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nfit_spa *nfit_spa;
+
+ if (work_busy(&acpi_desc->work))
+ return -EBUSY;
+
+ if (acpi_desc->cancel)
+ return 0;
+
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+ if (nfit_spa_type(spa) != NFIT_SPA_PM)
+ continue;
+
+ nfit_spa->ars_required = 1;
+ }
+ queue_work(nfit_wq, &acpi_desc->work);
+ dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
+ mutex_unlock(&acpi_desc->init_mutex);
+
+ return 0;
+}
+
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
{
struct nvdimm_bus_descriptor *nd_desc;
@@ -2505,12 +2599,12 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
nd_desc = &acpi_desc->nd_desc;
nd_desc->provider_name = "ACPI.NFIT";
+ nd_desc->module = THIS_MODULE;
nd_desc->ndctl = acpi_nfit_ctl;
nd_desc->flush_probe = acpi_nfit_flush_probe;
nd_desc->clear_to_send = acpi_nfit_clear_to_send;
nd_desc->attr_groups = acpi_nfit_attribute_groups;
- INIT_LIST_HEAD(&acpi_desc->spa_maps);
INIT_LIST_HEAD(&acpi_desc->spas);
INIT_LIST_HEAD(&acpi_desc->dcrs);
INIT_LIST_HEAD(&acpi_desc->bdws);
@@ -2518,7 +2612,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
INIT_LIST_HEAD(&acpi_desc->flushes);
INIT_LIST_HEAD(&acpi_desc->memdevs);
INIT_LIST_HEAD(&acpi_desc->dimms);
- mutex_init(&acpi_desc->spa_map_mutex);
+ INIT_LIST_HEAD(&acpi_desc->list);
mutex_init(&acpi_desc->init_mutex);
INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
}
@@ -2532,7 +2626,7 @@ static int acpi_nfit_add(struct acpi_device *adev)
struct acpi_table_header *tbl;
acpi_status status = AE_OK;
acpi_size sz;
- int rc;
+ int rc = 0;
status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
if (ACPI_FAILURE(status)) {
@@ -2545,50 +2639,33 @@ static int acpi_nfit_add(struct acpi_device *adev)
if (!acpi_desc)
return -ENOMEM;
acpi_nfit_desc_init(acpi_desc, &adev->dev);
- acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
- if (!acpi_desc->nvdimm_bus)
- return -ENOMEM;
- /*
- * Save the acpi header for later and then skip it,
- * making nfit point to the first nfit table header.
- */
+ /* Save the acpi header for exporting the revision via sysfs */
acpi_desc->acpi_header = *tbl;
- acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
- sz -= sizeof(struct acpi_table_nfit);
/* Evaluate _FIT and override with that if present */
status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
if (ACPI_SUCCESS(status) && buf.length > 0) {
- union acpi_object *obj;
- /*
- * Adjust for the acpi_object header of the _FIT
- */
- obj = buf.pointer;
- if (obj->type == ACPI_TYPE_BUFFER) {
- acpi_desc->nfit =
- (struct acpi_nfit_header *)obj->buffer.pointer;
- sz = obj->buffer.length;
- } else
+ union acpi_object *obj = buf.pointer;
+
+ if (obj->type == ACPI_TYPE_BUFFER)
+ rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+ obj->buffer.length);
+ else
dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
__func__, (int) obj->type);
- }
-
- rc = acpi_nfit_init(acpi_desc, sz);
- if (rc) {
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
- return rc;
- }
- return 0;
+ kfree(buf.pointer);
+ } else
+ /* skip over the lead-in header table */
+ rc = acpi_nfit_init(acpi_desc, (void *) tbl
+ + sizeof(struct acpi_table_nfit),
+ sz - sizeof(struct acpi_table_nfit));
+ return rc;
}
static int acpi_nfit_remove(struct acpi_device *adev)
{
- struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
-
- acpi_desc->cancel = 1;
- flush_workqueue(nfit_wq);
- nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+ /* see acpi_nfit_destruct */
return 0;
}
@@ -2596,9 +2673,8 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_nfit_header *nfit_saved;
- union acpi_object *obj;
struct device *dev = &adev->dev;
+ union acpi_object *obj;
acpi_status status;
int ret;
@@ -2616,9 +2692,6 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
if (!acpi_desc)
goto out_unlock;
acpi_nfit_desc_init(acpi_desc, &adev->dev);
- acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
- if (!acpi_desc->nvdimm_bus)
- goto out_unlock;
} else {
/*
* Finish previous registration before considering new
@@ -2634,21 +2707,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
goto out_unlock;
}
- nfit_saved = acpi_desc->nfit;
obj = buf.pointer;
if (obj->type == ACPI_TYPE_BUFFER) {
- acpi_desc->nfit =
- (struct acpi_nfit_header *)obj->buffer.pointer;
- ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
- if (ret) {
- /* Merge failed, restore old nfit, and exit */
- acpi_desc->nfit = nfit_saved;
+ ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
+ obj->buffer.length);
+ if (ret)
dev_err(dev, "failed to merge updated NFIT\n");
- }
- } else {
- /* Bad _FIT, restore old nfit */
+ } else
dev_err(dev, "Invalid _FIT\n");
- }
kfree(buf.pointer);
out_unlock:
@@ -2693,18 +2759,23 @@ static __init int nfit_init(void)
acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+ acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
nfit_wq = create_singlethread_workqueue("nfit");
if (!nfit_wq)
return -ENOMEM;
+ nfit_mce_register();
+
return acpi_bus_register_driver(&acpi_nfit_driver);
}
static __exit void nfit_exit(void)
{
+ nfit_mce_unregister();
acpi_bus_unregister_driver(&acpi_nfit_driver);
destroy_workqueue(nfit_wq);
+ WARN_ON(!list_empty(&acpi_descs));
}
module_init(nfit_init);
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
new file mode 100644
index 000000000000..4c745bf389fe
--- /dev/null
+++ b/drivers/acpi/nfit/mce.c
@@ -0,0 +1,89 @@
+/*
+ * NFIT - Machine Check Handler
+ *
+ * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/notifier.h>
+#include <linux/acpi.h>
+#include <asm/mce.h>
+#include "nfit.h"
+
+static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ struct acpi_nfit_desc *acpi_desc;
+ struct nfit_spa *nfit_spa;
+
+ /* We only care about memory errors */
+ if (!(mce->status & MCACOD))
+ return NOTIFY_DONE;
+
+ /*
+ * mce->addr contains the physical addr accessed that caused the
+ * machine check. We need to walk through the list of NFITs, and see
+ * if any of them matches that address, and only then start a scrub.
+ */
+ mutex_lock(&acpi_desc_lock);
+ list_for_each_entry(acpi_desc, &acpi_descs, list) {
+ struct device *dev = acpi_desc->dev;
+ int found_match = 0;
+
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+ if (nfit_spa_type(spa) == NFIT_SPA_PM)
+ continue;
+ /* find the spa that covers the mce addr */
+ if (spa->address > mce->addr)
+ continue;
+ if ((spa->address + spa->length - 1) < mce->addr)
+ continue;
+ found_match = 1;
+ dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
+ __func__, spa->range_index, spa->address,
+ spa->length);
+ /*
+ * We can break at the first match because we're going
+ * to rescan all the SPA ranges. There shouldn't be any
+ * aliasing anyway.
+ */
+ break;
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+
+ /*
+ * We can ignore an -EBUSY here because if an ARS is already
+ * in progress, just let that be the last authoritative one
+ */
+ if (found_match)
+ acpi_nfit_ars_rescan(acpi_desc);
+ }
+
+ mutex_unlock(&acpi_desc_lock);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nfit_mce_dec = {
+ .notifier_call = nfit_handle_mce,
+};
+
+void nfit_mce_register(void)
+{
+ mce_register_decode_chain(&nfit_mce_dec);
+}
+
+void nfit_mce_unregister(void)
+{
+ mce_unregister_decode_chain(&nfit_mce_dec);
+}
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit/nfit.h
index 02b9ea1e8d2e..e894ded24d99 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -16,6 +16,7 @@
#define __NFIT_H__
#include <linux/workqueue.h>
#include <linux/libnvdimm.h>
+#include <linux/ndctl.h>
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/acpi.h>
@@ -31,6 +32,9 @@
#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
+/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
+#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
+
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
| ACPI_NFIT_MEM_NOT_ARMED)
@@ -40,6 +44,7 @@ enum nfit_uuids {
NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
+ NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
NFIT_SPA_VOLATILE,
NFIT_SPA_PM,
NFIT_SPA_DCR,
@@ -74,37 +79,37 @@ enum {
};
struct nfit_spa {
- struct acpi_nfit_system_address *spa;
struct list_head list;
struct nd_region *nd_region;
- unsigned int ars_done:1;
+ unsigned int ars_required:1;
u32 clear_err_unit;
u32 max_ars;
+ struct acpi_nfit_system_address spa[0];
};
struct nfit_dcr {
- struct acpi_nfit_control_region *dcr;
struct list_head list;
+ struct acpi_nfit_control_region dcr[0];
};
struct nfit_bdw {
- struct acpi_nfit_data_region *bdw;
struct list_head list;
+ struct acpi_nfit_data_region bdw[0];
};
struct nfit_idt {
- struct acpi_nfit_interleave *idt;
struct list_head list;
+ struct acpi_nfit_interleave idt[0];
};
struct nfit_flush {
- struct acpi_nfit_flush_address *flush;
struct list_head list;
+ struct acpi_nfit_flush_address flush[0];
};
struct nfit_memdev {
- struct acpi_nfit_memory_map *memdev;
struct list_head list;
+ struct acpi_nfit_memory_map memdev[0];
};
/* assembled tables for a given dimm/memory-device */
@@ -123,6 +128,7 @@ struct nfit_mem {
struct list_head list;
struct acpi_device *adev;
struct acpi_nfit_desc *acpi_desc;
+ struct resource *flush_wpq;
unsigned long dsm_mask;
int family;
};
@@ -130,10 +136,7 @@ struct nfit_mem {
struct acpi_nfit_desc {
struct nvdimm_bus_descriptor nd_desc;
struct acpi_table_header acpi_header;
- struct acpi_nfit_header *nfit;
- struct mutex spa_map_mutex;
struct mutex init_mutex;
- struct list_head spa_maps;
struct list_head memdevs;
struct list_head flushes;
struct list_head dimms;
@@ -146,6 +149,9 @@ struct acpi_nfit_desc {
struct nd_cmd_ars_status *ars_status;
size_t ars_status_size;
struct work_struct work;
+ struct list_head list;
+ struct kernfs_node *scrub_count_state;
+ unsigned int scrub_count;
unsigned int cancel:1;
unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en;
@@ -161,7 +167,7 @@ enum nd_blk_mmio_selector {
struct nd_blk_addr {
union {
void __iomem *base;
- void __pmem *aperture;
+ void *aperture;
};
};
@@ -180,28 +186,26 @@ struct nfit_blk {
u64 bdw_offset; /* post interleave offset */
u64 stat_offset;
u64 cmd_offset;
- void __iomem *nvdimm_flush;
u32 dimm_flags;
};
-enum spa_map_type {
- SPA_MAP_CONTROL,
- SPA_MAP_APERTURE,
-};
-
-struct nfit_spa_mapping {
- struct acpi_nfit_desc *acpi_desc;
- struct acpi_nfit_system_address *spa;
- struct list_head list;
- struct kref kref;
- enum spa_map_type type;
- struct nd_blk_addr addr;
-};
+extern struct list_head acpi_descs;
+extern struct mutex acpi_desc_lock;
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
-static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
+#ifdef CONFIG_X86_MCE
+void nfit_mce_register(void);
+void nfit_mce_unregister(void);
+#else
+static inline void nfit_mce_register(void)
{
- return container_of(kref, struct nfit_spa_mapping, kref);
}
+static inline void nfit_mce_unregister(void)
+{
+}
+#endif
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa);
static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
struct nfit_mem *nfit_mem)
@@ -218,6 +222,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
}
const u8 *to_nfit_uuid(enum nfit_uuids id);
-int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
#endif /* __NFIT_H__ */
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index c04bd9bc39fd..5f1fe4e6208d 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -381,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@@ -391,7 +391,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
page = brd_insert_page(brd, sector);
if (!page)
return -ENOSPC;
- *kaddr = (void __pmem *)page_address(page);
+ *kaddr = page_address(page);
*pfn = page_to_pfn_t(page);
return PAGE_SIZE;
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index b891a129b275..803f3953b341 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -211,11 +211,9 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
}
dax_dev->dev = dev;
- rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
- if (rc) {
- unregister_dax_dev(dev);
+ rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
+ if (rc)
return rc;
- }
return 0;
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 55d510e36cd1..dfb168568af1 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -102,21 +102,19 @@ static int dax_pmem_probe(struct device *dev)
if (rc)
return rc;
- rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
- if (rc) {
- dax_pmem_percpu_exit(&dax_pmem->ref);
+ rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
+ &dax_pmem->ref);
+ if (rc)
return rc;
- }
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
- rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
- if (rc) {
- dax_pmem_percpu_kill(&dax_pmem->ref);
+ rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
+ &dax_pmem->ref);
+ if (rc)
return rc;
- }
nd_region = to_nd_region(dev->parent);
dax_region = alloc_dax_region(dev, nd_region->id, &res,
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 7c8a3bf07884..124c2432ac9c 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -1,6 +1,7 @@
menuconfig LIBNVDIMM
tristate "NVDIMM (Non-Volatile Memory Device) Support"
depends on PHYS_ADDR_T_64BIT
+ depends on HAS_IOMEM
depends on BLK_DEV
help
Generic support for non-volatile memory devices including
@@ -19,7 +20,6 @@ if LIBNVDIMM
config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM
- depends on HAS_IOMEM
select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN
help
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 495e06d9f7e7..dfe691cf4d74 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
- if (devm_add_action(dev, nd_blk_release_queue, q)) {
- blk_cleanup_queue(q);
+ if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
return -ENOMEM;
- }
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
disk = alloc_disk(0);
if (!disk)
return -ENOMEM;
- if (devm_add_action(dev, nd_blk_release_disk, disk)) {
- put_disk(disk);
- return -ENOMEM;
- }
disk->driverfs_dev = dev;
disk->first_minor = 0;
@@ -296,6 +290,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
set_capacity(disk, 0);
add_disk(disk);
+ if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
+ return -ENOMEM;
+
if (nsblk_meta_size(nsblk)) {
int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 816d0dae6398..3fa7919f94a8 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -198,8 +198,7 @@ struct device *nd_btt_create(struct nd_region *nd_region)
{
struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
- if (dev)
- __nd_device_register(dev);
+ __nd_device_register(dev);
return dev;
}
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index f085f8bceae8..46d7e555b044 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -31,6 +31,7 @@
int nvdimm_major;
static int nvdimm_bus_major;
static struct class *nd_class;
+static DEFINE_IDA(nd_ida);
static int to_nd_device_type(struct device *dev)
{
@@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
to_nd_device_type(dev));
}
-static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
-{
- struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
-
- return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
-}
-
static struct module *to_bus_provider(struct device *dev)
{
/* pin bus providers while regions are enabled */
if (is_nd_pmem(dev) || is_nd_blk(dev)) {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- return nvdimm_bus->module;
+ return nvdimm_bus->nd_desc->module;
}
return NULL;
}
@@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev)
return rc;
}
+static void nvdimm_bus_shutdown(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nd_device_driver *nd_drv = NULL;
+
+ if (dev->driver)
+ nd_drv = to_nd_device_driver(dev->driver);
+
+ if (nd_drv && nd_drv->shutdown) {
+ nd_drv->shutdown(dev);
+ dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
+ dev->driver->name, dev_name(dev));
+ }
+}
+
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
device_lock(dev);
@@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
+
static struct bus_type nvdimm_bus_type = {
.name = "nd",
.uevent = nvdimm_bus_uevent,
.match = nvdimm_bus_match,
.probe = nvdimm_bus_probe,
.remove = nvdimm_bus_remove,
+ .shutdown = nvdimm_bus_shutdown,
+};
+
+static void nvdimm_bus_release(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus;
+
+ nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+ ida_simple_remove(&nd_ida, nvdimm_bus->id);
+ kfree(nvdimm_bus);
+}
+
+static bool is_nvdimm_bus(struct device *dev)
+{
+ return dev->release == nvdimm_bus_release;
+}
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+ struct device *dev;
+
+ for (dev = nd_dev; dev; dev = dev->parent)
+ if (is_nvdimm_bus(dev))
+ break;
+ dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+ if (dev)
+ return to_nvdimm_bus(dev);
+ return NULL;
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus;
+
+ nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+ WARN_ON(!is_nvdimm_bus(dev));
+ return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+ struct nvdimm_bus_descriptor *nd_desc)
+{
+ struct nvdimm_bus *nvdimm_bus;
+ int rc;
+
+ nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+ if (!nvdimm_bus)
+ return NULL;
+ INIT_LIST_HEAD(&nvdimm_bus->list);
+ INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
+ INIT_LIST_HEAD(&nvdimm_bus->poison_list);
+ init_waitqueue_head(&nvdimm_bus->probe_wait);
+ nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+ mutex_init(&nvdimm_bus->reconfig_mutex);
+ if (nvdimm_bus->id < 0) {
+ kfree(nvdimm_bus);
+ return NULL;
+ }
+ nvdimm_bus->nd_desc = nd_desc;
+ nvdimm_bus->dev.parent = parent;
+ nvdimm_bus->dev.release = nvdimm_bus_release;
+ nvdimm_bus->dev.groups = nd_desc->attr_groups;
+ nvdimm_bus->dev.bus = &nvdimm_bus_type;
+ dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+ rc = device_register(&nvdimm_bus->dev);
+ if (rc) {
+ dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+ goto err;
+ }
+
+ return nvdimm_bus;
+ err:
+ put_device(&nvdimm_bus->dev);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+ if (!nvdimm_bus)
+ return;
+ device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+static int child_unregister(struct device *dev, void *data)
+{
+ /*
+ * the singular ndctl class device per bus needs to be
+ * "device_destroy"ed, so skip it here
+ *
+ * i.e. remove classless children
+ */
+ if (dev->class)
+ /* pass */;
+ else
+ nd_device_unregister(dev, ND_SYNC);
+ return 0;
+}
+
+static void free_poison_list(struct list_head *poison_list)
+{
+ struct nd_poison *pl, *next;
+
+ list_for_each_entry_safe(pl, next, poison_list, list) {
+ list_del(&pl->list);
+ kfree(pl);
+ }
+ list_del_init(poison_list);
+}
+
+static int nd_bus_remove(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+ mutex_lock(&nvdimm_bus_list_mutex);
+ list_del_init(&nvdimm_bus->list);
+ mutex_unlock(&nvdimm_bus_list_mutex);
+
+ nd_synchronize();
+ device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ free_poison_list(&nvdimm_bus->poison_list);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+
+ nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+ return 0;
+}
+
+static int nd_bus_probe(struct device *dev)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ int rc;
+
+ rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+ if (rc)
+ return rc;
+
+ mutex_lock(&nvdimm_bus_list_mutex);
+ list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+ mutex_unlock(&nvdimm_bus_list_mutex);
+
+ /* enable bus provider attributes to look up their local context */
+ dev_set_drvdata(dev, nvdimm_bus->nd_desc);
+
+ return 0;
+}
+
+static struct nd_device_driver nd_bus_driver = {
+ .probe = nd_bus_probe,
+ .remove = nd_bus_remove,
+ .drv = {
+ .name = "nd_bus",
+ .suppress_bind_attrs = true,
+ .bus = &nvdimm_bus_type,
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ },
};
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+ struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+ if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
+ return true;
+
+ return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
void nd_synchronize(void)
@@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
"ndctl%d", nvdimm_bus->id);
- if (IS_ERR(dev)) {
+ if (IS_ERR(dev))
dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
nvdimm_bus->id, PTR_ERR(dev));
- return PTR_ERR(dev);
- }
- return 0;
+ return PTR_ERR_OR_ZERO(dev);
}
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void)
goto err_class;
}
+ rc = driver_register(&nd_bus_driver.drv);
+ if (rc)
+ goto err_nd_bus;
+
return 0;
+ err_nd_bus:
+ class_destroy(nd_class);
err_class:
unregister_chrdev(nvdimm_major, "dimmctl");
err_dimm_chrdev:
@@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void)
void nvdimm_bus_exit(void)
{
+ driver_unregister(&nd_bus_driver.drv);
class_destroy(nd_class);
unregister_chrdev(nvdimm_bus_major, "ndctl");
unregister_chrdev(nvdimm_major, "dimmctl");
bus_unregister(&nvdimm_bus_type);
+ ida_destroy(&nd_ida);
}
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 8b2e3c4fb0ad..d5dc80c48b4c 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
return memcpy_from_pmem(buf, nsio->addr + offset, size);
} else {
memcpy_to_pmem(nsio->addr + offset, buf, size);
- wmb_pmem();
+ nvdimm_flush(to_nd_region(ndns->dev.parent));
}
return 0;
@@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
nsio->addr = devm_memremap(dev, res->start, resource_size(res),
ARCH_MEMREMAP_PMEM);
- if (IS_ERR(nsio->addr))
- return PTR_ERR(nsio->addr);
- return 0;
+
+ return PTR_ERR_OR_ZERO(nsio->addr);
}
EXPORT_SYMBOL_GPL(devm_nsio_enable);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index be89764315c2..715583f69d28 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -20,12 +20,12 @@
#include <linux/ndctl.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include "nd-core.h"
#include "nd.h"
LIST_HEAD(nvdimm_bus_list);
DEFINE_MUTEX(nvdimm_bus_list_mutex);
-static DEFINE_IDA(nd_ida);
void nvdimm_bus_lock(struct device *dev)
{
@@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev)
}
EXPORT_SYMBOL(is_nvdimm_bus_locked);
+struct nvdimm_map {
+ struct nvdimm_bus *nvdimm_bus;
+ struct list_head list;
+ resource_size_t offset;
+ unsigned long flags;
+ size_t size;
+ union {
+ void *mem;
+ void __iomem *iomem;
+ };
+ struct kref kref;
+};
+
+static struct nvdimm_map *find_nvdimm_map(struct device *dev,
+ resource_size_t offset)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_map *nvdimm_map;
+
+ list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
+ if (nvdimm_map->offset == offset)
+ return nvdimm_map;
+ return NULL;
+}
+
+static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
+ resource_size_t offset, size_t size, unsigned long flags)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_map *nvdimm_map;
+
+ nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
+ if (!nvdimm_map)
+ return NULL;
+
+ INIT_LIST_HEAD(&nvdimm_map->list);
+ nvdimm_map->nvdimm_bus = nvdimm_bus;
+ nvdimm_map->offset = offset;
+ nvdimm_map->flags = flags;
+ nvdimm_map->size = size;
+ kref_init(&nvdimm_map->kref);
+
+ if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+ goto err_request_region;
+
+ if (flags)
+ nvdimm_map->mem = memremap(offset, size, flags);
+ else
+ nvdimm_map->iomem = ioremap(offset, size);
+
+ if (!nvdimm_map->mem)
+ goto err_map;
+
+ dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
+ __func__);
+ list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
+
+ return nvdimm_map;
+
+ err_map:
+ release_mem_region(offset, size);
+ err_request_region:
+ kfree(nvdimm_map);
+ return NULL;
+}
+
+static void nvdimm_map_release(struct kref *kref)
+{
+ struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_map *nvdimm_map;
+
+ nvdimm_map = container_of(kref, struct nvdimm_map, kref);
+ nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+ dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+ list_del(&nvdimm_map->list);
+ if (nvdimm_map->flags)
+ memunmap(nvdimm_map->mem);
+ else
+ iounmap(nvdimm_map->iomem);
+ release_mem_region(nvdimm_map->offset, nvdimm_map->size);
+ kfree(nvdimm_map);
+}
+
+static void nvdimm_map_put(void *data)
+{
+ struct nvdimm_map *nvdimm_map = data;
+ struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ kref_put(&nvdimm_map->kref, nvdimm_map_release);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+/**
+ * devm_nvdimm_memremap - map a resource that is shared across regions
+ * @dev: device that will own a reference to the shared mapping
+ * @offset: physical base address of the mapping
+ * @size: mapping size
+ * @flags: memremap flags, or, if zero, perform an ioremap instead
+ */
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags)
+{
+ struct nvdimm_map *nvdimm_map;
+
+ nvdimm_bus_lock(dev);
+ nvdimm_map = find_nvdimm_map(dev, offset);
+ if (!nvdimm_map)
+ nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
+ else
+ kref_get(&nvdimm_map->kref);
+ nvdimm_bus_unlock(dev);
+
+ if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
+ return NULL;
+
+ return nvdimm_map->mem;
+}
+EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
+
u64 nd_fletcher64(void *addr, size_t len, bool le)
{
u32 *buf = addr;
@@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le)
}
EXPORT_SYMBOL_GPL(nd_fletcher64);
-static void nvdimm_bus_release(struct device *dev)
-{
- struct nvdimm_bus *nvdimm_bus;
-
- nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
- ida_simple_remove(&nd_ida, nvdimm_bus->id);
- kfree(nvdimm_bus);
-}
-
-struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
-{
- struct nvdimm_bus *nvdimm_bus;
-
- nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
- WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
- return nvdimm_bus;
-}
-EXPORT_SYMBOL_GPL(to_nvdimm_bus);
-
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
{
/* struct nvdimm_bus definition is private to libnvdimm */
@@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
}
EXPORT_SYMBOL_GPL(to_nd_desc);
-struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
{
- struct device *dev;
-
- for (dev = nd_dev; dev; dev = dev->parent)
- if (dev->release == nvdimm_bus_release)
- break;
- dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
- if (dev)
- return to_nvdimm_bus(dev);
- return NULL;
+ /* struct nvdimm_bus definition is private to libnvdimm */
+ return &nvdimm_bus->dev;
}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
static bool is_uuid_sep(char sep)
{
@@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = {
};
EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
- struct nvdimm_bus_descriptor *nd_desc, struct module *module)
-{
- struct nvdimm_bus *nvdimm_bus;
- int rc;
-
- nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
- if (!nvdimm_bus)
- return NULL;
- INIT_LIST_HEAD(&nvdimm_bus->list);
- INIT_LIST_HEAD(&nvdimm_bus->poison_list);
- init_waitqueue_head(&nvdimm_bus->probe_wait);
- nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
- mutex_init(&nvdimm_bus->reconfig_mutex);
- if (nvdimm_bus->id < 0) {
- kfree(nvdimm_bus);
- return NULL;
- }
- nvdimm_bus->nd_desc = nd_desc;
- nvdimm_bus->module = module;
- nvdimm_bus->dev.parent = parent;
- nvdimm_bus->dev.release = nvdimm_bus_release;
- nvdimm_bus->dev.groups = nd_desc->attr_groups;
- dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
- rc = device_register(&nvdimm_bus->dev);
- if (rc) {
- dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
- goto err;
- }
-
- rc = nvdimm_bus_create_ndctl(nvdimm_bus);
- if (rc)
- goto err;
-
- mutex_lock(&nvdimm_bus_list_mutex);
- list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
- mutex_unlock(&nvdimm_bus_list_mutex);
-
- return nvdimm_bus;
- err:
- put_device(&nvdimm_bus->dev);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
-
static void set_badblock(struct badblocks *bb, sector_t s, int num)
{
dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
@@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
-static void free_poison_list(struct list_head *poison_list)
-{
- struct nd_poison *pl, *next;
-
- list_for_each_entry_safe(pl, next, poison_list, list) {
- list_del(&pl->list);
- kfree(pl);
- }
- list_del_init(poison_list);
-}
-
-static int child_unregister(struct device *dev, void *data)
-{
- /*
- * the singular ndctl class device per bus needs to be
- * "device_destroy"ed, so skip it here
- *
- * i.e. remove classless children
- */
- if (dev->class)
- /* pass */;
- else
- nd_device_unregister(dev, ND_SYNC);
- return 0;
-}
-
-void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
-{
- if (!nvdimm_bus)
- return;
-
- mutex_lock(&nvdimm_bus_list_mutex);
- list_del_init(&nvdimm_bus->list);
- mutex_unlock(&nvdimm_bus_list_mutex);
-
- nd_synchronize();
- device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
-
- nvdimm_bus_lock(&nvdimm_bus->dev);
- free_poison_list(&nvdimm_bus->poison_list);
- nvdimm_bus_unlock(&nvdimm_bus->dev);
-
- nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
- device_unregister(&nvdimm_bus->dev);
-}
-EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
-
#ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
{
@@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
if (meta_size == 0)
return 0;
- bi.profile = NULL;
+ memset(&bi, 0, sizeof(bi));
+
bi.tuple_size = meta_size;
bi.tag_size = meta_size;
@@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void)
nvdimm_bus_exit();
nd_region_devs_exit();
nvdimm_devs_exit();
- ida_destroy(&nd_ida);
}
MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index bbde28d3dec5..d9bba5edd8dc 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask)
+ unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
@@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
nvdimm->provider_data = provider_data;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
+ nvdimm->num_flush = num_flush;
+ nvdimm->flush_wpq = flush_wpq;
atomic_set(&nvdimm->busy, 0);
dev = &nvdimm->dev;
dev_set_name(dev, "nmem%d", nvdimm->id);
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 95825b38559a..11ea90120542 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
nd_desc.attr_groups = e820_pmem_attribute_groups;
nd_desc.provider_name = "e820";
+ nd_desc.module = THIS_MODULE;
nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
if (!nvdimm_bus)
goto err;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 284cdaa268cf..38ce6bbbc170 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -26,11 +26,11 @@ extern int nvdimm_major;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
wait_queue_head_t probe_wait;
- struct module *module;
struct list_head list;
struct device dev;
int id, probe_active;
struct list_head poison_list;
+ struct list_head mapping_list;
struct mutex reconfig_mutex;
};
@@ -40,7 +40,8 @@ struct nvdimm {
unsigned long cmd_mask;
struct device dev;
atomic_t busy;
- int id;
+ int id, num_flush;
+ struct resource *flush_wpq;
};
bool is_nvdimm(struct device *dev);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index d0ac93c31dda..40476399d227 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -49,9 +49,11 @@ struct nvdimm_drvdata {
struct kref kref;
};
-struct nd_region_namespaces {
- int count;
- int active;
+struct nd_region_data {
+ int ns_count;
+ int ns_active;
+ unsigned int flush_mask;
+ void __iomem *flush_wpq[0][0];
};
static inline struct nd_namespace_index *to_namespace_index(
@@ -119,7 +121,6 @@ struct nd_region {
struct nd_blk_region {
int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
- void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
void *blk_provider_data;
@@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev,
}
#endif
int nd_blk_region_init(struct nd_region *nd_region);
+int nd_region_activate(struct nd_region *nd_region);
void __nd_iostat_start(struct bio *bio, unsigned long *start);
static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
{
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 608fc4464574..9f75eb85cf7c 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,27 +29,28 @@
#include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/nd.h>
+#include "pmem.h"
#include "pfn.h"
#include "nd.h"
-struct pmem_device {
- /* One contiguous memory region per device */
- phys_addr_t phys_addr;
- /* when non-zero this device is hosting a 'pfn' instance */
- phys_addr_t data_offset;
- u64 pfn_flags;
- void __pmem *virt_addr;
- /* immutable base size of the namespace */
- size_t size;
- /* trim size when namespace capacity has been section aligned */
- u32 pfn_pad;
- struct badblocks bb;
-};
+static struct device *to_dev(struct pmem_device *pmem)
+{
+ /*
+ * nvdimm bus services need a 'dev' parameter, and we record the device
+ * at init in bb.dev.
+ */
+ return pmem->bb.dev;
+}
+
+static struct nd_region *to_region(struct pmem_device *pmem)
+{
+ return to_nd_region(to_dev(pmem)->parent);
+}
static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
unsigned int len)
{
- struct device *dev = pmem->bb.dev;
+ struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
@@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared > 0 && cleared / 512) {
- dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+ dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
__func__, (unsigned long long) sector,
cleared / 512, cleared / 512 > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared / 512);
@@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
bool bad_pmem = false;
void *mem = kmap_atomic(page);
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
- void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
+ void *pmem_addr = pmem->virt_addr + pmem_off;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true;
@@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
return rc;
}
+/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
+#ifndef REQ_FLUSH
+#define REQ_FLUSH REQ_PREFLUSH
+#endif
+
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
int rc = 0;
@@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct bio_vec bvec;
struct bvec_iter iter;
struct pmem_device *pmem = q->queuedata;
+ struct nd_region *nd_region = to_region(pmem);
+
+ if (bio->bi_rw & REQ_FLUSH)
+ nvdimm_flush(nd_region);
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
if (do_acct)
nd_iostat_end(bio, start);
- if (bio_data_dir(bio))
- wmb_pmem();
+ if (bio->bi_rw & REQ_FUA)
+ nvdimm_flush(nd_region);
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
int rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
- if (rw & WRITE)
- wmb_pmem();
/*
* The ->rw_page interface is subtle and tricky. The core
@@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
return rc;
}
-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size)
+/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
+__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, pfn_t *pfn, long size)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
resource_size_t offset = sector * 512 + pmem->data_offset;
@@ -195,7 +204,7 @@ static void pmem_release_queue(void *q)
blk_cleanup_queue(q);
}
-void pmem_release_disk(void *disk)
+static void pmem_release_disk(void *disk)
{
del_gendisk(disk);
put_disk(disk);
@@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev,
struct nd_namespace_common *ndns)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct nd_region *nd_region = to_nd_region(dev->parent);
struct vmem_altmap __altmap, *altmap = NULL;
struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL;
@@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev,
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
- if (!arch_has_wmb_pmem())
+ if (nvdimm_has_flush(nd_region) < 0)
dev_warn(dev, "unable to guarantee persistence of writes\n");
if (!devm_request_mem_region(dev, res->start, resource_size(res),
@@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev,
* At release time the queue must be dead before
* devm_memremap_pages is unwound
*/
- if (devm_add_action(dev, pmem_release_queue, q)) {
- blk_cleanup_queue(q);
+ if (devm_add_action_or_reset(dev, pmem_release_queue, q))
return -ENOMEM;
- }
if (IS_ERR(addr))
return PTR_ERR(addr);
- pmem->virt_addr = (void __pmem *) addr;
+ pmem->virt_addr = addr;
+ blk_queue_write_cache(q, true, true);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
@@ -288,10 +297,6 @@ static int pmem_attach_disk(struct device *dev,
disk = alloc_disk_node(0, nid);
if (!disk)
return -ENOMEM;
- if (devm_add_action(dev, pmem_release_disk, disk)) {
- put_disk(disk);
- return -ENOMEM;
- }
disk->fops = &pmem_fops;
disk->queue = q;
@@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev,
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
- nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
+ nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
disk->bb = &pmem->bb;
add_disk(disk);
+
+ if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
+ return -ENOMEM;
+
revalidate_disk(disk);
return 0;
@@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev)
{
if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev));
+ nvdimm_flush(to_nd_region(dev->parent));
+
return 0;
}
+static void nd_pmem_shutdown(struct device *dev)
+{
+ nvdimm_flush(to_nd_region(dev->parent));
+}
+
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
{
- struct nd_region *nd_region = to_nd_region(dev->parent);
struct pmem_device *pmem = dev_get_drvdata(dev);
+ struct nd_region *nd_region = to_region(pmem);
resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
@@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = {
.probe = nd_pmem_probe,
.remove = nd_pmem_remove,
.notify = nd_pmem_notify,
+ .shutdown = nd_pmem_shutdown,
.drv = {
.name = "nd_pmem",
},
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
new file mode 100644
index 000000000000..b4ee4f71b4a1
--- /dev/null
+++ b/drivers/nvdimm/pmem.h
@@ -0,0 +1,24 @@
+#ifndef __NVDIMM_PMEM_H__
+#define __NVDIMM_PMEM_H__
+#include <linux/badblocks.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/fs.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+ void **kaddr, pfn_t *pfn, long size);
+/* this definition is in it's own header for tools/testing/nvdimm to consume */
+struct pmem_device {
+ /* One contiguous memory region per device */
+ phys_addr_t phys_addr;
+ /* when non-zero this device is hosting a 'pfn' instance */
+ phys_addr_t data_offset;
+ u64 pfn_flags;
+ void *virt_addr;
+ /* immutable base size of the namespace */
+ size_t size;
+ /* trim size when namespace capacity has been section aligned */
+ u32 pfn_pad;
+ struct badblocks bb;
+};
+#endif /* __NVDIMM_PMEM_H__ */
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 05a912359939..8f241772ec0b 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev)
{
int err, rc;
static unsigned long once;
- struct nd_region_namespaces *num_ns;
+ struct nd_region_data *ndrd;
struct nd_region *nd_region = to_nd_region(dev);
if (nd_region->num_lanes > num_online_cpus()
@@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev)
nd_region->num_lanes);
}
+ rc = nd_region_activate(nd_region);
+ if (rc)
+ return rc;
+
rc = nd_blk_region_init(nd_region);
if (rc)
return rc;
rc = nd_region_register_namespaces(nd_region, &err);
- num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
- if (!num_ns)
- return -ENOMEM;
-
if (rc < 0)
return rc;
- num_ns->active = rc;
- num_ns->count = rc + err;
- dev_set_drvdata(dev, num_ns);
+ ndrd = dev_get_drvdata(dev);
+ ndrd->ns_active = rc;
+ ndrd->ns_count = rc + err;
if (rc && err && rc == err)
return -ENODEV;
@@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev);
+ device_for_each_child(dev, NULL, child_unregister);
+
/* flush attribute readers and disable */
nvdimm_bus_lock(dev);
nd_region->ns_seed = NULL;
@@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev)
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
- device_for_each_child(dev, NULL, child_unregister);
return 0;
}
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 40fcfea26fbb..e8d5ba7b29af 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -14,13 +14,97 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/pmem.h>
#include <linux/sort.h>
#include <linux/io.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "nd.h"
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
static DEFINE_IDA(region_ida);
+static DEFINE_PER_CPU(int, flush_idx);
+
+static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
+ struct nd_region_data *ndrd)
+{
+ int i, j;
+
+ dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
+ nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
+ for (i = 0; i < nvdimm->num_flush; i++) {
+ struct resource *res = &nvdimm->flush_wpq[i];
+ unsigned long pfn = PHYS_PFN(res->start);
+ void __iomem *flush_page;
+
+ /* check if flush hints share a page */
+ for (j = 0; j < i; j++) {
+ struct resource *res_j = &nvdimm->flush_wpq[j];
+ unsigned long pfn_j = PHYS_PFN(res_j->start);
+
+ if (pfn == pfn_j)
+ break;
+ }
+
+ if (j < i)
+ flush_page = (void __iomem *) ((unsigned long)
+ ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+ else
+ flush_page = devm_nvdimm_ioremap(dev,
+ PHYS_PFN(pfn), PAGE_SIZE);
+ if (!flush_page)
+ return -ENXIO;
+ ndrd->flush_wpq[dimm][i] = flush_page
+ + (res->start & ~PAGE_MASK);
+ }
+
+ return 0;
+}
+
+int nd_region_activate(struct nd_region *nd_region)
+{
+ int i, num_flush = 0;
+ struct nd_region_data *ndrd;
+ struct device *dev = &nd_region->dev;
+ size_t flush_data_size = sizeof(void *);
+
+ nvdimm_bus_lock(&nd_region->dev);
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ /* at least one null hint slot per-dimm for the "no-hint" case */
+ flush_data_size += sizeof(void *);
+ num_flush = min_not_zero(num_flush, nvdimm->num_flush);
+ if (!nvdimm->num_flush)
+ continue;
+ flush_data_size += nvdimm->num_flush * sizeof(void *);
+ }
+ nvdimm_bus_unlock(&nd_region->dev);
+
+ ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
+ if (!ndrd)
+ return -ENOMEM;
+ dev_set_drvdata(dev, ndrd);
+
+ ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
+
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
static void nd_region_release(struct device *dev)
{
@@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
static ssize_t init_namespaces_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+ struct nd_region_data *ndrd = dev_get_drvdata(dev);
ssize_t rc;
nvdimm_bus_lock(dev);
- if (num_ns)
- rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+ if (ndrd)
+ rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
else
rc = -ENXIO;
nvdimm_bus_unlock(dev);
@@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
if (is_nd_pmem(dev))
return;
-
- to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
}
if (dev->parent && is_nd_blk(dev->parent) && probe) {
nd_region = to_nd_region(dev->parent);
@@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (ndbr) {
nd_region = &ndbr->nd_region;
ndbr->enable = ndbr_desc->enable;
- ndbr->disable = ndbr_desc->disable;
ndbr->do_io = ndbr_desc->do_io;
}
region_buf = ndbr;
@@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
+/**
+ * nvdimm_flush - flush any posted write queues between the cpu and pmem media
+ * @nd_region: blk or interleaved pmem region
+ */
+void nvdimm_flush(struct nd_region *nd_region)
+{
+ struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+ int i, idx;
+
+ /*
+ * Try to encourage some diversity in flush hint addresses
+ * across cpus assuming a limited number of flush hints.
+ */
+ idx = this_cpu_read(flush_idx);
+ idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
+
+ /*
+ * The first wmb() is needed to 'sfence' all previous writes
+ * such that they are architecturally visible for the platform
+ * buffer flush. Note that we've already arranged for pmem
+ * writes to avoid the cache via arch_memcpy_to_pmem(). The
+ * final wmb() ensures ordering for the NVDIMM flush write.
+ */
+ wmb();
+ for (i = 0; i < nd_region->ndr_mappings; i++)
+ if (ndrd->flush_wpq[i][0])
+ writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+ wmb();
+}
+EXPORT_SYMBOL_GPL(nvdimm_flush);
+
+/**
+ * nvdimm_has_flush - determine write flushing requirements
+ * @nd_region: blk or interleaved pmem region
+ *
+ * Returns 1 if writes require flushing
+ * Returns 0 if writes do not require flushing
+ * Returns -ENXIO if flushing capability can not be determined
+ */
+int nvdimm_has_flush(struct nd_region *nd_region)
+{
+ struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+ int i;
+
+ /* no nvdimm == flushing capability unknown */
+ if (nd_region->ndr_mappings == 0)
+ return -ENXIO;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++)
+ /* flush hints present, flushing required */
+ if (ndrd->flush_wpq[i][0])
+ return 1;
+
+ /*
+ * The platform defines dimm devices without hints, assume
+ * platform persistence mechanism like ADR
+ */
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_flush);
+
void __exit nd_region_devs_exit(void)
{
ida_destroy(&region_ida);
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index bed53c46dd90..023c5c975dc0 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
static blk_qc_t dcssblk_make_request(struct request_queue *q,
struct bio *bio);
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
- void __pmem **kaddr, pfn_t *pfn, long size);
+ void **kaddr, pfn_t *pfn, long size);
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
@@ -884,7 +884,7 @@ fail:
static long
dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
- void __pmem **kaddr, pfn_t *pfn, long size)
+ void **kaddr, pfn_t *pfn, long size)
{
struct dcssblk_dev_info *dev_info;
unsigned long offset, dev_sz;
@@ -894,7 +894,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
return -ENODEV;
dev_sz = dev_info->end - dev_info->start;
offset = secnum * 512;
- *kaddr = (void __pmem *) (dev_info->start + offset);
+ *kaddr = (void *) dev_info->start + offset;
*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
return dev_sz - offset;