diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 17:22:07 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 17:38:16 -0700 |
commit | f0c98ebc57c2d5e535bc4f9167f35650d2ba3c90 (patch) | |
tree | ad584aa321c0a2dbdaa49e0754f6c9f233b79a48 /include | |
parent | d94ba9e7d8d5c821d0442f13b30b0140c1109c38 (diff) | |
parent | 0606263f24f3d64960de742c55894190b5df903b (diff) | |
download | lwn-f0c98ebc57c2d5e535bc4f9167f35650d2ba3c90.tar.gz lwn-f0c98ebc57c2d5e535bc4f9167f35650d2ba3c90.zip |
Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
- Replace pcommit with ADR / directed-flushing.
The pcommit instruction, which has not shipped on any product, is
deprecated. Instead, the requirement is that platforms implement
either ADR, or provide one or more flush addresses per nvdimm.
ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers
to the memory controller on a power-fail event.
Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware
Interface Table (NFIT) sub-structure: "Flush Hint Address Structure".
A flush hint is an mmio address that when written and fenced assures
that all previous posted writes targeting a given dimm have been
flushed to media.
- On-demand ARS (address range scrub).
Linux uses the results of the ACPI ARS commands to track bad blocks
in pmem devices. When latent errors are detected we re-scrub the
media to refresh the bad block list, userspace can also request a
re-scrub at any time.
- Support for the Microsoft DSM (device specific method) command
format.
- Support for EDK2/OVMF virtual disk device memory ranges.
- Various fixes and cleanups across the subsystem.
* tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits)
libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register"
nfit: do an ARS scrub on hitting a latent media error
nfit: move to nfit/ sub-directory
nfit, libnvdimm: allow an ARS scrub to be triggered on demand
libnvdimm: register nvdimm_bus devices with an nd_bus driver
pmem: clarify a debug print in pmem_clear_poison
x86/insn: remove pcommit
Revert "KVM: x86: add pcommit support"
nfit, tools/testing/nvdimm/: unify shutdown paths
libnvdimm: move ->module to struct nvdimm_bus_descriptor
nfit: cleanup acpi_nfit_init calling convention
nfit: fix _FIT evaluation memory leak + use after free
tools/testing/nvdimm: add manufacturing_{date|location} dimm properties
tools/testing/nvdimm: add virtual ramdisk range
acpi, nfit: treat virtual ramdisk SPA as pmem region
pmem: kill __pmem address space
pmem: kill wmb_pmem()
libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes
fs/dax: remove wmb_pmem()
libnvdimm, pmem: flush posted-write queues on shutdown
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blkdev.h | 6 | ||||
-rw-r--r-- | include/linux/compiler.h | 2 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 2 | ||||
-rw-r--r-- | include/linux/libnvdimm.h | 24 | ||||
-rw-r--r-- | include/linux/nd.h | 3 | ||||
-rw-r--r-- | include/linux/pfn_t.h | 5 | ||||
-rw-r--r-- | include/linux/pmem.h | 117 | ||||
-rw-r--r-- | include/uapi/linux/ndctl.h | 1 |
8 files changed, 50 insertions, 110 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c96db9c22d10..adf33079771e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1665,7 +1665,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, */ struct blk_dax_ctl { sector_t sector; - void __pmem *addr; + void *addr; long size; pfn_t pfn; }; @@ -1676,8 +1676,8 @@ struct block_device_operations { int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - long (*direct_access)(struct block_device *, sector_t, void __pmem **, - pfn_t *, long); + long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, + long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2e853b679a5d..1bb954842725 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,7 +17,6 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) -# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else /* CONFIG_SPARSE_RCU_POINTER */ @@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __cond_lock(x,c) (c) # define __percpu # define __rcu -# define __pmem # define __private # define ACCESS_PRIVATE(p, member) ((p)->member) #endif /* __CHECKER__ */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b0db857f334b..91acfce74a22 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -131,7 +131,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); * >= 0 : the number of bytes accessible at the address */ typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, - void __pmem **kaddr, pfn_t *pfn, long size); + void **kaddr, pfn_t *pfn, long size); void dm_error(const char *message); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 0c3c30cbbea5..b519e137b9b7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nd_namespace_label; struct nvdimm_drvdata; + struct nd_mapping { struct nvdimm *nvdimm; struct nd_namespace_label **labels; @@ -69,6 +70,7 @@ struct nd_mapping { struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long cmd_mask; + struct module *module; char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); @@ -99,13 +101,21 @@ struct nd_region_desc { unsigned long flags; }; +struct device; +void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags); +static inline void __iomem *devm_nvdimm_ioremap(struct device *dev, + resource_size_t offset, size_t size) +{ + return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0); +} + struct nvdimm_bus; struct module; struct device; struct nd_blk_region; struct nd_blk_region_desc { int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); - void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); struct nd_region_desc ndr_desc; @@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); -struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, - struct nvdimm_bus_descriptor *nfit_desc, struct module *module); -#define nvdimm_bus_register(parent, desc) \ - __nvdimm_bus_register(parent, desc, THIS_MODULE) +struct nvdimm_bus *nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); struct nd_region *to_nd_region(struct device *dev); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); +struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long cmd_mask); + unsigned long cmd_mask, int num_flush, + struct resource *flush_wpq); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, @@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); +void nvdimm_flush(struct nd_region *nd_region); +int nvdimm_has_flush(struct nd_region *nd_region); #endif /* __LIBNVDIMM_H__ */ diff --git a/include/linux/nd.h b/include/linux/nd.h index aee2761d294c..f1ea426d6a5e 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -26,6 +26,7 @@ struct nd_device_driver { unsigned long type; int (*probe)(struct device *dev); int (*remove)(struct device *dev); + void (*shutdown)(struct device *dev); void (*notify)(struct device *dev, enum nvdimm_event event); }; @@ -67,7 +68,7 @@ struct nd_namespace_io { struct nd_namespace_common common; struct resource res; resource_size_t size; - void __pmem *addr; + void *addr; struct badblocks bb; }; diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 94994810c7c0..a3d90b9da18d 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn) return __pfn_to_pfn_t(pfn, 0); } -extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags); +static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags) +{ + return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); +} static inline bool pfn_t_has_page(pfn_t pfn) { diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 57d146fe44dd..e856c2cb0fe8 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -26,47 +26,35 @@ * calling these symbols with arch_has_pmem_api() and redirect to the * implementation in asm/pmem.h. */ -static inline bool __arch_has_wmb_pmem(void) -{ - return false; -} - -static inline void arch_wmb_pmem(void) -{ - BUG(); -} - -static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, - size_t n) +static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { BUG(); } -static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, - size_t n) +static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { BUG(); return -EFAULT; } -static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, +static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { BUG(); return 0; } -static inline void arch_clear_pmem(void __pmem *addr, size_t size) +static inline void arch_clear_pmem(void *addr, size_t size) { BUG(); } -static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) +static inline void arch_wb_cache_pmem(void *addr, size_t size) { BUG(); } -static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +static inline void arch_invalidate_pmem(void *addr, size_t size) { BUG(); } @@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void) return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); } -static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, - size_t size) -{ - memcpy(dst, (void __force *) src, size); - return 0; -} - /* * memcpy_from_pmem - read from persistent memory with error handling * @dst: destination buffer @@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, * * Returns 0 on success negative error code on failure. */ -static inline int memcpy_from_pmem(void *dst, void __pmem const *src, - size_t size) +static inline int memcpy_from_pmem(void *dst, void const *src, size_t size) { if (arch_has_pmem_api()) return arch_memcpy_from_pmem(dst, src, size); else - return default_memcpy_from_pmem(dst, src, size); -} - -/** - * arch_has_wmb_pmem - true if wmb_pmem() ensures durability - * - * For a given cpu implementation within an architecture it is possible - * that wmb_pmem() resolves to a nop. In the case this returns - * false, pmem api users are unable to ensure durability and may want to - * fall back to a different data consistency model, or otherwise notify - * the user. - */ -static inline bool arch_has_wmb_pmem(void) -{ - return arch_has_pmem_api() && __arch_has_wmb_pmem(); -} - -/* - * These defaults seek to offer decent performance and minimize the - * window between i/o completion and writes being durable on media. - * However, it is undefined / architecture specific whether - * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for - * making data durable relative to i/o completion. - */ -static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, - size_t size) -{ - memcpy((void __force *) dst, src, size); -} - -static inline size_t default_copy_from_iter_pmem(void __pmem *addr, - size_t bytes, struct iov_iter *i) -{ - return copy_from_iter_nocache((void __force *)addr, bytes, i); -} - -static inline void default_clear_pmem(void __pmem *addr, size_t size) -{ - if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0) - clear_page((void __force *)addr); - else - memset((void __force *)addr, 0, size); + memcpy(dst, src, size); + return 0; } /** @@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size) * being effectively evicted from, or never written to, the processor * cache hierarchy after the copy completes. After memcpy_to_pmem() * data may still reside in cpu or platform buffers, so this operation - * must be followed by a wmb_pmem(). + * must be followed by a blkdev_issue_flush() on the pmem block device. */ -static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) +static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) { if (arch_has_pmem_api()) arch_memcpy_to_pmem(dst, src, n); else - default_memcpy_to_pmem(dst, src, n); -} - -/** - * wmb_pmem - synchronize writes to persistent memory - * - * After a series of memcpy_to_pmem() operations this drains data from - * cpu write buffers and any platform (memory controller) buffers to - * ensure that written data is durable on persistent memory media. - */ -static inline void wmb_pmem(void) -{ - if (arch_has_wmb_pmem()) - arch_wmb_pmem(); - else - wmb(); + memcpy(dst, src, n); } /** @@ -184,14 +109,14 @@ static inline void wmb_pmem(void) * @i: iterator with source data * * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - * This function requires explicit ordering with a wmb_pmem() call. + * See blkdev_issue_flush() note for memcpy_to_pmem(). */ -static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes, +static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { if (arch_has_pmem_api()) return arch_copy_from_iter_pmem(addr, bytes, i); - return default_copy_from_iter_pmem(addr, bytes, i); + return copy_from_iter_nocache(addr, bytes, i); } /** @@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes, * @size: number of bytes to zero * * Write zeros into the memory range starting at 'addr' for 'size' bytes. - * This function requires explicit ordering with a wmb_pmem() call. + * See blkdev_issue_flush() note for memcpy_to_pmem(). */ -static inline void clear_pmem(void __pmem *addr, size_t size) +static inline void clear_pmem(void *addr, size_t size) { if (arch_has_pmem_api()) arch_clear_pmem(addr, size); else - default_clear_pmem(addr, size); + memset(addr, 0, size); } /** @@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size) * For platforms that support clearing poison this flushes any poisoned * ranges out of the cache */ -static inline void invalidate_pmem(void __pmem *addr, size_t size) +static inline void invalidate_pmem(void *addr, size_t size) { if (arch_has_pmem_api()) arch_invalidate_pmem(addr, size); @@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size) * @size: number of bytes to write back * * Write back the processor cache range starting at 'addr' for 'size' bytes. - * This function requires explicit ordering with a wmb_pmem() call. + * See blkdev_issue_flush() note for memcpy_to_pmem(). */ -static inline void wb_cache_pmem(void __pmem *addr, size_t size) +static inline void wb_cache_pmem(void *addr, size_t size) { if (arch_has_pmem_api()) arch_wb_cache_pmem(addr, size); diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 309915f74492..ba5a8c79652a 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -298,6 +298,7 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_INTEL 0 #define NVDIMM_FAMILY_HPE1 1 #define NVDIMM_FAMILY_HPE2 2 +#define NVDIMM_FAMILY_MSFT 3 #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) |