diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-07 09:44:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-07 09:44:06 -0700 |
commit | b6ffe9ba46016f8351896ccee33bebcd0e5ea7c0 (patch) | |
tree | 839a5a070eabe8851797330ea77ca7eb7c93bcc1 /include | |
parent | 9f45efb9286268e01d5022d34a58a68f53ca3072 (diff) | |
parent | 9d92573fff3ec70785ef1815cc80573f70e7a921 (diff) | |
download | lwn-b6ffe9ba46016f8351896ccee33bebcd0e5ea7c0.tar.gz lwn-b6ffe9ba46016f8351896ccee33bebcd0e5ea7c0.zip |
Merge tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"libnvdimm updates for the latest ACPI and UEFI specifications. This
pull request also includes new 'struct dax_operations' enabling to
undo the abuse of copy_user_nocache() for copy operations to pmem.
The dax work originally missed 4.12 to address concerns raised by Al.
Summary:
- Introduce the _flushcache() family of memory copy helpers and use
them for persistent memory write operations on x86. The
_flushcache() semantic indicates that the cache is either bypassed
for the copy operation (movnt) or any lines dirtied by the copy
operation are written back (clwb, clflushopt, or clflush).
- Extend dax_operations with ->copy_from_iter() and ->flush()
operations. These operations and other infrastructure updates allow
all persistent memory specific dax functionality to be pushed into
libnvdimm and the pmem driver directly. It also allows dax-specific
sysfs attributes to be linked to a host device, for example:
/sys/block/pmem0/dax/write_cache
- Add support for the new NVDIMM platform/firmware mechanisms
introduced in ACPI 6.2 and UEFI 2.7. This support includes the v1.2
namespace label format, extensions to the address-range-scrub
command set, new error injection commands, and a new BTT
(block-translation-table) layout. These updates support inter-OS
and pre-OS compatibility.
- Fix a longstanding memory corruption bug in nfit_test.
- Make the pmem and nvdimm-region 'badblocks' sysfs files poll(2)
capable.
- Miscellaneous fixes and small updates across libnvdimm and the nfit
driver.
Acknowledgements that came after the branch was pushed: commit
6aa734a2f38e ("libnvdimm, region, pmem: fix 'badblocks'
sysfs_get_dirent() reference lifetime") was reviewed by Toshi Kani
<toshi.kani@hpe.com>"
* tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (42 commits)
libnvdimm, namespace: record 'lbasize' for pmem namespaces
acpi/nfit: Issue Start ARS to retrieve existing records
libnvdimm: New ACPI 6.2 DSM functions
acpi, nfit: Show bus_dsm_mask in sysfs
libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru.
acpi, nfit: Enable DSM pass thru for root functions.
libnvdimm: passthru functions clear to send
libnvdimm, btt: convert some info messages to warn/err
libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime
libnvdimm: fix the clear-error check in nsio_rw_bytes
libnvdimm, btt: fix btt_rw_page not returning errors
acpi, nfit: quiet invalid block-aperture-region warnings
libnvdimm, btt: BTT updates for UEFI 2.7 format
acpi, nfit: constify *_attribute_group
libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region
libnvdimm, pmem, dax: export a cache control attribute
dax: convert to bitmask for flags
dax: remove default copy_from_iter fallback
libnvdimm, nfit: enable support for volatile ranges
libnvdimm, pmem: fix persistence warning
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/dax.h | 12 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 6 | ||||
-rw-r--r-- | include/linux/libnvdimm.h | 11 | ||||
-rw-r--r-- | include/linux/nd.h | 13 | ||||
-rw-r--r-- | include/linux/pmem.h | 142 | ||||
-rw-r--r-- | include/linux/string.h | 6 | ||||
-rw-r--r-- | include/linux/uio.h | 15 | ||||
-rw-r--r-- | include/uapi/linux/ndctl.h | 42 |
8 files changed, 103 insertions, 144 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h index 5ec1f6c47716..8f39db7439c3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,8 +16,15 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); + /* copy_from_iter: required operation for fs-dax direct-i/o */ + size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); + /* flush: optional driver-specific cache management after writes */ + void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; +extern struct attribute_group dax_attribute_group; + #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); void put_dax(struct dax_device *dax_dev); @@ -75,6 +82,11 @@ void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); +void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t size); +void dax_write_cache(struct dax_device *dax_dev, bool wc); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 0c1b50ad23b0..1473455d0341 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -132,6 +132,10 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); +typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, + size_t size); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -181,6 +185,8 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; + dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_flush_fn dax_flush; /* For internal device-mapper use. */ struct list_head list; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..f3d3e6af8838 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/sizes.h> #include <linux/types.h> +#include <linux/uuid.h> enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -54,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; + unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; @@ -71,9 +73,14 @@ struct nd_cmd_desc { }; struct nd_interleave_set { - u64 cookie; + /* v1.1 definition of the interleave-set-cookie algorithm */ + u64 cookie1; + /* v1.2 definition of the interleave-set-cookie algorithm */ + u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; + + guid_t type_guid; }; struct nd_mapping_desc { @@ -159,9 +166,11 @@ void *nd_region_provider_data(struct nd_region *nd_region); void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); +int nvdimm_has_cache(struct nd_region *nd_region); #endif /* __LIBNVDIMM_H__ */ diff --git a/include/linux/nd.h b/include/linux/nd.h index 194b8e002ea7..5dc6b695437d 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -21,6 +21,15 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, }; +enum nvdimm_claim_class { + NVDIMM_CCLASS_NONE, + NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_BTT2, + NVDIMM_CCLASS_PFN, + NVDIMM_CCLASS_DAX, + NVDIMM_CCLASS_UNKNOWN, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; @@ -41,12 +50,14 @@ static inline struct nd_device_driver *to_nd_device_driver( * @force_raw: ignore other personalities for the namespace (e.g. btt) * @dev: device model node * @claim: when set a another personality has taken ownership of the namespace + * @claim_class: restrict claim type to a given class * @rw_bytes: access the raw namespace capacity with byte-aligned transfers */ struct nd_namespace_common { int force_raw; struct device dev; struct device *claim; + enum nvdimm_claim_class claim_class; int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, void *buf, size_t size, int rw, unsigned long flags); }; @@ -75,12 +86,14 @@ struct nd_namespace_io { /** * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory * @nsio: device and system physical address range to drive + * @lbasize: logical sector size for the namespace in block-device-mode * @alt_name: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label * @id: ida allocated id */ struct nd_namespace_pmem { struct nd_namespace_io nsio; + unsigned long lbasize; char *alt_name; u8 *uuid; int id; diff --git a/include/linux/pmem.h b/include/linux/pmem.h deleted file mode 100644 index 71ecf3d46aac..000000000000 --- a/include/linux/pmem.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#ifndef __PMEM_H__ -#define __PMEM_H__ - -#include <linux/io.h> -#include <linux/uio.h> - -#ifdef CONFIG_ARCH_HAS_PMEM_API -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB -#include <asm/pmem.h> -#else -#define ARCH_MEMREMAP_PMEM MEMREMAP_WT -/* - * These are simply here to enable compilation, all call sites gate - * calling these symbols with arch_has_pmem_api() and redirect to the - * implementation in asm/pmem.h. - */ -static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - BUG(); -} - -static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - BUG(); - return 0; -} - -static inline void arch_clear_pmem(void *addr, size_t size) -{ - BUG(); -} - -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ - BUG(); -} - -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ - BUG(); -} -#endif - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); -} - -/** - * memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Perform a memory copy that results in the destination of the copy - * being effectively evicted from, or never written to, the processor - * cache hierarchy after the copy completes. After memcpy_to_pmem() - * data may still reside in cpu or platform buffers, so this operation - * must be followed by a blkdev_issue_flush() on the pmem block device. - */ -static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - if (arch_has_pmem_api()) - arch_memcpy_to_pmem(dst, src, n); - else - memcpy(dst, src, n); -} - -/** - * copy_from_iter_pmem - copy data from an iterator to PMEM - * @addr: PMEM destination address - * @bytes: number of bytes to copy - * @i: iterator with source data - * - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - if (arch_has_pmem_api()) - return arch_copy_from_iter_pmem(addr, bytes, i); - return copy_from_iter_nocache(addr, bytes, i); -} - -/** - * clear_pmem - zero a PMEM memory range - * @addr: virtual start address - * @size: number of bytes to zero - * - * Write zeros into the memory range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void clear_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_clear_pmem(addr, size); - else - memset(addr, 0, size); -} - -/** - * invalidate_pmem - flush a pmem range from the cache hierarchy - * @addr: virtual start address - * @size: bytes to invalidate (internally aligned to cache line size) - * - * For platforms that support clearing poison this flushes any poisoned - * ranges out of the cache - */ -static inline void invalidate_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_invalidate_pmem(addr, size); -} - -/** - * wb_cache_pmem - write back processor cache for PMEM memory range - * @addr: virtual start address - * @size: number of bytes to write back - * - * Write back the processor cache range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void wb_cache_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_wb_cache_pmem(addr, size); -} -#endif /* __PMEM_H__ */ diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..7439d83eaa33 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src, return 0; } #endif +#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE +static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); +} +#endif void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); diff --git a/include/linux/uio.h b/include/linux/uio.h index f2d36a3d3005..55cd54a0e941 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/* + * Note, users like pmem that depend on the stricter semantics of + * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the + * destination is flushed from the cache on return. + */ +size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); +#else +static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes, + struct iov_iter *i) +{ + return copy_from_iter_nocache(addr, bytes, i); +} +#endif bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7ad3863cb88b..6d3c54264d8e 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -105,7 +105,8 @@ struct nd_cmd_ars_cap { __u32 status; __u32 max_ars_out; __u32 clear_err_unit; - __u32 reserved; + __u16 flags; + __u16 reserved; } __packed; struct nd_cmd_ars_start { @@ -144,6 +145,43 @@ struct nd_cmd_clear_error { __u64 cleared; } __packed; +struct nd_cmd_trans_spa { + __u64 spa; + __u32 status; + __u8 flags; + __u8 _reserved[3]; + __u64 trans_length; + __u32 num_nvdimms; + struct nd_nvdimm_device { + __u32 nfit_device_handle; + __u32 _reserved; + __u64 dpa; + } __packed devices[0]; + +} __packed; + +struct nd_cmd_ars_err_inj { + __u64 err_inj_spa_range_base; + __u64 err_inj_spa_range_length; + __u8 err_inj_options; + __u32 status; +} __packed; + +struct nd_cmd_ars_err_inj_clr { + __u64 err_inj_clr_spa_range_base; + __u64 err_inj_clr_spa_range_length; + __u32 status; +} __packed; + +struct nd_cmd_ars_err_inj_stat { + __u32 status; + __u32 inj_err_rec_count; + struct nd_error_stat_query_record { + __u64 err_inj_stat_spa_range_base; + __u64 err_inj_stat_spa_range_length; + } __packed record[0]; +} __packed; + enum { ND_CMD_IMPLEMENTED = 0, @@ -169,6 +207,7 @@ enum { enum { ND_ARS_VOLATILE = 1, ND_ARS_PERSISTENT = 2, + ND_ARS_RETURN_PREV_DATA = 1 << 1, ND_CONFIG_LOCKED = 1, }; @@ -179,6 +218,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_CLEAR_ERROR] = "clear_error", + [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) |