summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi.h4
-rw-r--r--include/linux/backing-dev.h39
-rw-r--r--include/linux/bio.h47
-rw-r--r--include/linux/bitmap.h33
-rw-r--r--include/linux/bug.h72
-rw-r--r--include/linux/build_bug.h84
-rw-r--r--include/linux/bvec.h41
-rw-r--r--include/linux/ceph/ceph_features.h264
-rw-r--r--include/linux/ceph/ceph_fs.h1
-rw-r--r--include/linux/ceph/decode.h60
-rw-r--r--include/linux/ceph/libceph.h49
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/ceph/osd_client.h70
-rw-r--r--include/linux/ceph/osdmap.h41
-rw-r--r--include/linux/ceph/rados.h6
-rw-r--r--include/linux/clk.h22
-rw-r--r--include/linux/cpu_cooling.h32
-rw-r--r--include/linux/cpufreq.h14
-rw-r--r--include/linux/crash_core.h7
-rw-r--r--include/linux/crush/crush.h66
-rw-r--r--include/linux/crush/mapper.h9
-rw-r--r--include/linux/dax.h5
-rw-r--r--include/linux/dcache.h4
-rw-r--r--include/linux/eventpoll.h5
-rw-r--r--include/linux/extable.h5
-rw-r--r--include/linux/fs.h10
-rw-r--r--include/linux/fwnode.h69
-rw-r--r--include/linux/gfp.h56
-rw-r--r--include/linux/huge_mm.h45
-rw-r--r--include/linux/hugetlb.h39
-rw-r--r--include/linux/i2c.h2
-rw-r--r--include/linux/i2c/i2c-sh_mobile.h11
-rw-r--r--include/linux/initrd.h3
-rw-r--r--include/linux/intel-svm.h20
-rw-r--r--include/linux/ipc.h3
-rw-r--r--include/linux/kernel.h10
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/khugepaged.h3
-rw-r--r--include/linux/list_lru.h1
-rw-r--r--include/linux/lockd/lockd.h4
-rw-r--r--include/linux/lockd/xdr.h26
-rw-r--r--include/linux/lockd/xdr4.h26
-rw-r--r--include/linux/mfd/cros_ec.h19
-rw-r--r--include/linux/mfd/cros_ec_commands.h42
-rw-r--r--include/linux/mfd/cros_ec_lpc_mec.h90
-rw-r--r--include/linux/mfd/cros_ec_lpc_reg.h61
-rw-r--r--include/linux/migrate.h16
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/mtd/nand.h80
-rw-r--r--include/linux/mtd/partitions.h7
-rw-r--r--include/linux/mtd/spi-nor.h161
-rw-r--r--include/linux/nfs4.h1
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_fs_sb.h2
-rw-r--r--include/linux/nfs_page.h4
-rw-r--r--include/linux/nfs_xdr.h31
-rw-r--r--include/linux/nmi.h57
-rw-r--r--include/linux/ntb.h721
-rw-r--r--include/linux/nvme-fc.h23
-rw-r--r--include/linux/nvmem-provider.h3
-rw-r--r--include/linux/of.h2
-rw-r--r--include/linux/page_ref.h1
-rw-r--r--include/linux/platform_data/usb-ohci-s3c2410.h2
-rw-r--r--include/linux/property.h5
-rw-r--r--include/linux/random.h21
-rw-r--r--include/linux/rtc.h21
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/sched/coredump.h5
-rw-r--r--include/linux/sem.h24
-rw-r--r--include/linux/slab.h3
-rw-r--r--include/linux/string.h201
-rw-r--r--include/linux/sunrpc/clnt.h6
-rw-r--r--include/linux/sunrpc/sched.h2
-rw-r--r--include/linux/sunrpc/svc.h23
-rw-r--r--include/linux/sunrpc/svc_rdma.h46
-rw-r--r--include/linux/sunrpc/xdr.h15
-rw-r--r--include/linux/swap.h6
-rw-r--r--include/linux/swapops.h9
-rw-r--r--include/linux/sysctl.h5
-rw-r--r--include/linux/t10-pi.h2
-rw-r--r--include/linux/vfio.h2
81 files changed, 2420 insertions, 621 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 99f96df83dd8..c749eef1daa1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -57,6 +57,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
acpi_fwnode_handle(adev) : NULL)
#define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev))
+
+extern const struct fwnode_operations acpi_fwnode_ops;
+
static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
{
struct fwnode_handle *fwnode;
@@ -66,6 +69,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
return NULL;
fwnode->type = FWNODE_ACPI_STATIC;
+ fwnode->ops = &acpi_fwnode_ops;
return fwnode;
}
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ace73f96eb1e..854e1bdd0b2a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
}
-static inline void __inc_wb_stat(struct bdi_writeback *wb,
- enum wb_stat_item item)
-{
- __add_wb_stat(wb, item, 1);
-}
-
static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- unsigned long flags;
-
- local_irq_save(flags);
- __inc_wb_stat(wb, item);
- local_irq_restore(flags);
-}
-
-static inline void __dec_wb_stat(struct bdi_writeback *wb,
- enum wb_stat_item item)
-{
- __add_wb_stat(wb, item, -1);
+ __add_wb_stat(wb, item, 1);
}
static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- unsigned long flags;
-
- local_irq_save(flags);
- __dec_wb_stat(wb, item);
- local_irq_restore(flags);
+ __add_wb_stat(wb, item, -1);
}
static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
@@ -104,22 +84,9 @@ static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
return percpu_counter_read_positive(&wb->stat[item]);
}
-static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
- enum wb_stat_item item)
-{
- return percpu_counter_sum_positive(&wb->stat[item]);
-}
-
static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
{
- s64 sum;
- unsigned long flags;
-
- local_irq_save(flags);
- sum = __wb_stat_sum(wb, item);
- local_irq_restore(flags);
-
- return sum;
+ return percpu_counter_sum_positive(&wb->stat[item]);
}
extern void wb_writeout_inc(struct bdi_writeback *wb);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 664a27da276d..7b1cf4ba0902 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -165,10 +165,27 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
{
iter->bi_sector += bytes >> 9;
- if (bio_no_advance_iter(bio))
+ if (bio_no_advance_iter(bio)) {
iter->bi_size -= bytes;
- else
+ iter->bi_done += bytes;
+ } else {
bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+ /* TODO: It is reasonable to complete bio with error here. */
+ }
+}
+
+static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
+ unsigned int bytes)
+{
+ iter->bi_sector -= bytes >> 9;
+
+ if (bio_no_advance_iter(bio)) {
+ iter->bi_size += bytes;
+ iter->bi_done -= bytes;
+ return true;
+ }
+
+ return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
}
#define __bio_for_each_segment(bvl, bio, iter, start) \
@@ -303,8 +320,6 @@ struct bio_integrity_payload {
struct bvec_iter bip_iter;
- bio_end_io_t *bip_end_io; /* saved I/O completion fn */
-
unsigned short bip_slab; /* slab the bip came from */
unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_max_vcnt; /* integrity bio_vec slots */
@@ -722,13 +737,10 @@ struct biovec_slab {
bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_prep(struct bio *);
-extern void bio_integrity_endio(struct bio *);
+extern bool bio_integrity_prep(struct bio *);
extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_trim(struct bio *);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
extern int bioset_integrity_create(struct bio_set *, int);
extern void bioset_integrity_free(struct bio_set *);
@@ -741,11 +753,6 @@ static inline void *bio_integrity(struct bio *bio)
return NULL;
}
-static inline bool bio_integrity_enabled(struct bio *bio)
-{
- return false;
-}
-
static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
{
return 0;
@@ -756,14 +763,9 @@ static inline void bioset_integrity_free (struct bio_set *bs)
return;
}
-static inline int bio_integrity_prep(struct bio *bio)
-{
- return 0;
-}
-
-static inline void bio_integrity_free(struct bio *bio)
+static inline bool bio_integrity_prep(struct bio *bio)
{
- return;
+ return true;
}
static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
@@ -778,8 +780,7 @@ static inline void bio_integrity_advance(struct bio *bio,
return;
}
-static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
- unsigned int sectors)
+static inline void bio_integrity_trim(struct bio *bio)
{
return;
}
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3b77588a9360..5797ca6fdfe2 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
extern int __bitmap_subset(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-
-extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_clear(unsigned long *map, unsigned int start, int len);
extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
unsigned long size,
@@ -267,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1,
{
if (small_const_nbits(nbits))
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
-#ifdef CONFIG_S390
- if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+ if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
return !memcmp(src1, src2, nbits / 8);
-#endif
return __bitmap_equal(src1, src2, nbits);
}
@@ -315,6 +312,30 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int
return __bitmap_weight(src, nbits);
}
+static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
+ unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __set_bit(start, map);
+ else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+ __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+ memset((char *)map + start / 8, 0xff, nbits / 8);
+ else
+ __bitmap_set(map, start, nbits);
+}
+
+static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
+ unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __clear_bit(start, map);
+ else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+ __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+ memset((char *)map + start / 8, 0, nbits / 8);
+ else
+ __bitmap_clear(map, start, nbits);
+}
+
static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
unsigned int shift, int nbits)
{
diff --git a/include/linux/bug.h b/include/linux/bug.h
index 687b557fc5eb..5d5554c874fd 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -3,6 +3,7 @@
#include <asm/bug.h>
#include <linux/compiler.h>
+#include <linux/build_bug.h>
enum bug_trap_type {
BUG_TRAP_TYPE_NONE = 0,
@@ -13,80 +14,9 @@ enum bug_trap_type {
struct pt_regs;
#ifdef __CHECKER__
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_ZERO(e) (0)
-#define BUILD_BUG_ON_NULL(e) ((void*)0)
-#define BUILD_BUG_ON_INVALID(e) (0)
-#define BUILD_BUG_ON_MSG(cond, msg) (0)
-#define BUILD_BUG_ON(condition) (0)
-#define BUILD_BUG() (0)
#define MAYBE_BUILD_BUG_ON(cond) (0)
#else /* __CHECKER__ */
-/* Force a compilation error if a constant expression is not a power of 2 */
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \
- BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \
- BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
-
-/* Force a compilation error if condition is true, but also produce a
- result (of value 0 and type size_t), so the expression can be used
- e.g. in a structure initializer (or where-ever else comma expressions
- aren't permitted). */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
-#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
-
-/*
- * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
- * expression but avoids the generation of any code, even if that expression
- * has side-effects.
- */
-#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
-
-/**
- * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
- * error message.
- * @condition: the condition which the compiler should know is false.
- *
- * See BUILD_BUG_ON for description.
- */
-#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
-
-/**
- * BUILD_BUG_ON - break compile if a condition is true.
- * @condition: the condition which the compiler should know is false.
- *
- * If you have some code which relies on certain constants being equal, or
- * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
- * detect if someone changes it.
- *
- * The implementation uses gcc's reluctance to create a negative array, but gcc
- * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
- * inline functions). Luckily, in 4.3 they added the "error" function
- * attribute just for this type of case. Thus, we use a negative sized array
- * (should always create an error on gcc versions older than 4.4) and then call
- * an undefined function with the error attribute (should always create an
- * error on gcc 4.3 and later). If for some reason, neither creates a
- * compile-time error, we'll still have a link-time error, which is harder to
- * track down.
- */
-#ifndef __OPTIMIZE__
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#else
-#define BUILD_BUG_ON(condition) \
- BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
-#endif
-
-/**
- * BUILD_BUG - break compile if used.
- *
- * If you have some code that you expect the compiler to eliminate at
- * build time, you should use BUILD_BUG to detect if it is
- * unexpectedly used.
- */
-#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
-
#define MAYBE_BUILD_BUG_ON(cond) \
do { \
if (__builtin_constant_p((cond))) \
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
new file mode 100644
index 000000000000..b7d22d60008a
--- /dev/null
+++ b/include/linux/build_bug.h
@@ -0,0 +1,84 @@
+#ifndef _LINUX_BUILD_BUG_H
+#define _LINUX_BUILD_BUG_H
+
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_ZERO(e) (0)
+#define BUILD_BUG_ON_NULL(e) ((void *)0)
+#define BUILD_BUG_ON_INVALID(e) (0)
+#define BUILD_BUG_ON_MSG(cond, msg) (0)
+#define BUILD_BUG_ON(condition) (0)
+#define BUILD_BUG() (0)
+#else /* __CHECKER__ */
+
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \
+ BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \
+ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
+
+/*
+ * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
+ * expression but avoids the generation of any code, even if that expression
+ * has side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
+/**
+ * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
+ * error message.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * See BUILD_BUG_ON for description.
+ */
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but gcc
+ * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
+ * inline functions). Luckily, in 4.3 they added the "error" function
+ * attribute just for this type of case. Thus, we use a negative sized array
+ * (should always create an error on gcc versions older than 4.4) and then call
+ * an undefined function with the error attribute (should always create an
+ * error on gcc 4.3 and later). If for some reason, neither creates a
+ * compile-time error, we'll still have a link-time error, which is harder to
+ * track down.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+#define BUILD_BUG_ON(condition) \
+ BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+#endif
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
+
+#endif /* __CHECKER__ */
+
+#endif /* _LINUX_BUILD_BUG_H */
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 89b65b82d98f..ec8a4d7af6bd 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -22,6 +22,7 @@
#include <linux/kernel.h>
#include <linux/bug.h>
+#include <linux/errno.h>
/*
* was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -39,6 +40,8 @@ struct bvec_iter {
unsigned int bi_idx; /* current index into bvl_vec */
+ unsigned int bi_done; /* number of bytes completed */
+
unsigned int bi_bvec_done; /* number of bytes completed in
current bvec */
};
@@ -66,12 +69,14 @@ struct bvec_iter {
.bv_offset = bvec_iter_offset((bvec), (iter)), \
})
-static inline void bvec_iter_advance(const struct bio_vec *bv,
- struct bvec_iter *iter,
- unsigned bytes)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+ struct bvec_iter *iter, unsigned bytes)
{
- WARN_ONCE(bytes > iter->bi_size,
- "Attempted to advance past end of bvec iter\n");
+ if (WARN_ONCE(bytes > iter->bi_size,
+ "Attempted to advance past end of bvec iter\n")) {
+ iter->bi_size = 0;
+ return false;
+ }
while (bytes) {
unsigned iter_len = bvec_iter_len(bv, *iter);
@@ -80,12 +85,38 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
bytes -= len;
iter->bi_size -= len;
iter->bi_bvec_done += len;
+ iter->bi_done += len;
if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
iter->bi_bvec_done = 0;
iter->bi_idx++;
}
}
+ return true;
+}
+
+static inline bool bvec_iter_rewind(const struct bio_vec *bv,
+ struct bvec_iter *iter,
+ unsigned int bytes)
+{
+ while (bytes) {
+ unsigned len = min(bytes, iter->bi_bvec_done);
+
+ if (iter->bi_bvec_done == 0) {
+ if (WARN_ONCE(iter->bi_idx == 0,
+ "Attempted to rewind iter beyond "
+ "bvec's boundaries\n")) {
+ return false;
+ }
+ iter->bi_idx--;
+ iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
+ continue;
+ }
+ bytes -= len;
+ iter->bi_size += len;
+ iter->bi_bvec_done -= len;
+ }
+ return true;
}
#define for_each_bvec(bvl, bio_vec, iter, start) \
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index fd8b2953c78f..f0f6c537b64c 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -2,103 +2,174 @@
#define __CEPH_FEATURES
/*
- * feature bits
+ * Each time we reclaim bits for reuse we need to specify another bit
+ * that, if present, indicates we have the new incarnation of that
+ * feature. Base case is 1 (first use).
*/
-#define CEPH_FEATURE_UID (1ULL<<0)
-#define CEPH_FEATURE_NOSRCADDR (1ULL<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2)
-#define CEPH_FEATURE_FLOCK (1ULL<<3)
-#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4)
-#define CEPH_FEATURE_MONNAMES (1ULL<<5)
-#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6)
-#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7)
-#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8)
-#define CEPH_FEATURE_PGID64 (1ULL<<9)
-#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10)
-#define CEPH_FEATURE_PGPOOL3 (1ULL<<11)
-#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12)
-#define CEPH_FEATURE_OSDENC (1ULL<<13)
-#define CEPH_FEATURE_OMAP (1ULL<<14)
-#define CEPH_FEATURE_MONENC (1ULL<<15)
-#define CEPH_FEATURE_QUERY_T (1ULL<<16)
-#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17)
-#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
-#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19)
-#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20)
-#define CEPH_FEATURE_MON_GV (1ULL<<21)
-#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
-#define CEPH_FEATURE_MSG_AUTH (1ULL<<23)
-#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
-#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
-#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26)
-#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27)
-#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28)
-#define CEPH_FEATURE_MDSENC (1ULL<<29)
-#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30)
-#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
-#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
-#define CEPH_FEATURE_MON_SCRUB (1ULL<<33)
-#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
-#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
-#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */
-#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
-#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
-#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */
-/* The process supports new-style OSDMap encoding. Monitors also use
- this bit to determine if peers support NAK messages. */
-#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39)
-#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40)
-#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41)
-#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
-#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42)
-#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
-#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
-#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
-#define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
-#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47)
-#define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */
-#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
-// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
-#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */
-#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
-#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
-#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
-#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
-#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
-#define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */
-#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
-#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
-#define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */
-// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
-#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */
-#define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */
+#define CEPH_FEATURE_INCARNATION_1 (0ull)
+#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
+
+#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
+ const static uint64_t CEPH_FEATURE_##name = (1ULL<<bit); \
+ const static uint64_t CEPH_FEATUREMASK_##name = \
+ (1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
+
+/* this bit is ignored but still advertised by release *when* */
+#define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
+ const static uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
+ const static uint64_t DEPRECATED_CEPH_FEATUREMASK_##name = \
+ (1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
/*
- * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
- * vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63
- * to mean 33 bit ~0, and introduce a helper below to do the
- * translation.
+ * this bit is ignored by release *unused* and not advertised by
+ * release *unadvertised*
+ */
+#define DEFINE_CEPH_FEATURE_RETIRED(bit, inc, name, unused, unadvertised)
+
+
+/*
+ * test for a feature. this test is safer than a typical mask against
+ * the bit because it ensures that we have the bit AND the marker for the
+ * bit's incarnation. this must be used in any case where the features
+ * bits may include an old meaning of the bit.
+ */
+#define CEPH_HAVE_FEATURE(x, name) \
+ (((x) & (CEPH_FEATUREMASK_##name)) == (CEPH_FEATUREMASK_##name))
+
+
+/*
+ * Notes on deprecation:
+ *
+ * A *major* release is a release through which all upgrades must pass
+ * (e.g., jewel). For example, no pre-jewel server will ever talk to
+ * a post-jewel server (mon, osd, etc).
+ *
+ * For feature bits used *only* on the server-side:
+ *
+ * - In the first phase we indicate that a feature is DEPRECATED as of
+ * a particular release. This is the first major release X (say,
+ * jewel) that does not depend on its peers advertising the feature.
+ * That is, it safely assumes its peers all have the feature. We
+ * indicate this with the DEPRECATED macro. For example,
+ *
+ * DEFINE_CEPH_FEATURE_DEPRECATED( 2, 1, MONCLOCKCHECK, JEWEL)
+ *
+ * because 10.2.z (jewel) did not care if its peers advertised this
+ * feature bit.
+ *
+ * - In the second phase we stop advertising the the bit and call it
+ * RETIRED. This can normally be done in the *next* major release
+ * following the one in which we marked the feature DEPRECATED. In
+ * the above example, for 12.0.z (luminous) we can say:
+ *
+ * DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
*
- * This was introduced by ceph.git commit
- * 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
- * and fixed by ceph.git commit
- * 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
+ * - The bit can be reused in the first post-luminous release, 13.0.z
+ * (m).
+ *
+ * This ensures that no two versions who have different meanings for
+ * the bit ever speak to each other.
*/
-#define CEPH_FEATURE_RESERVED (1ULL<<63)
-
-static inline u64 ceph_sanitize_features(u64 features)
-{
- if (features & CEPH_FEATURE_RESERVED) {
- /* everything through OSD_SNAPMAPPER */
- return 0x1ffffffffull;
- } else {
- return features;
- }
-}
+
+DEFINE_CEPH_FEATURE( 0, 1, UID)
+DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
+DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
+DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
+DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
+DEFINE_CEPH_FEATURE( 6, 1, RECONNECT_SEQ)
+DEFINE_CEPH_FEATURE( 7, 1, DIRLAYOUTHASH)
+DEFINE_CEPH_FEATURE( 8, 1, OBJECTLOCATOR)
+DEFINE_CEPH_FEATURE( 9, 1, PGID64)
+DEFINE_CEPH_FEATURE(10, 1, INCSUBOSDMAP)
+DEFINE_CEPH_FEATURE(11, 1, PGPOOL3)
+DEFINE_CEPH_FEATURE(12, 1, OSDREPLYMUX)
+DEFINE_CEPH_FEATURE(13, 1, OSDENC)
+DEFINE_CEPH_FEATURE_RETIRED(14, 1, OMAP, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(14, 2, SERVER_KRAKEN)
+DEFINE_CEPH_FEATURE(15, 1, MONENC)
+DEFINE_CEPH_FEATURE_RETIRED(16, 1, QUERY_T, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(17, 1, INDEP_PG_MAP, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(18, 1, CRUSH_TUNABLES)
+DEFINE_CEPH_FEATURE_RETIRED(19, 1, CHUNKY_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(20, 1, MON_NULLROUTE, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(21, 1, MON_GV, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(21, 2, SERVER_LUMINOUS)
+DEFINE_CEPH_FEATURE(21, 2, RESEND_ON_SPLIT) // overlap
+DEFINE_CEPH_FEATURE(21, 2, RADOS_BACKOFF) // overlap
+DEFINE_CEPH_FEATURE(21, 2, OSDMAP_PG_UPMAP) // overlap
+DEFINE_CEPH_FEATURE(21, 2, CRUSH_CHOOSE_ARGS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(22, 1, BACKFILL_RESERVATION, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(23, 1, MSG_AUTH)
+DEFINE_CEPH_FEATURE_RETIRED(24, 1, RECOVERY_RESERVATION, JEWEL, LUNINOUS)
+
+DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
+DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
+DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
+DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
+DEFINE_CEPH_FEATURE(29, 1, MDSENC)
+DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
+DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
+DEFINE_CEPH_FEATURE_RETIRED(32, 1, OSD_SNAPMAPPER, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(33, 1, MON_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(34, 1, OSD_PACKED_RECOVERY, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(35, 1, OSD_CACHEPOOL)
+DEFINE_CEPH_FEATURE(36, 1, CRUSH_V2)
+DEFINE_CEPH_FEATURE(37, 1, EXPORT_PEER)
+DEFINE_CEPH_FEATURE(38, 1, OSD_ERASURE_CODES)
+DEFINE_CEPH_FEATURE(38, 1, OSD_OSD_TMAP2OMAP) // overlap
+DEFINE_CEPH_FEATURE(39, 1, OSDMAP_ENC)
+DEFINE_CEPH_FEATURE(40, 1, MDS_INLINE_DATA)
+DEFINE_CEPH_FEATURE(41, 1, CRUSH_TUNABLES3)
+DEFINE_CEPH_FEATURE(41, 1, OSD_PRIMARY_AFFINITY) // overlap
+DEFINE_CEPH_FEATURE(42, 1, MSGR_KEEPALIVE2)
+DEFINE_CEPH_FEATURE(43, 1, OSD_POOLRESEND)
+DEFINE_CEPH_FEATURE(44, 1, ERASURE_CODE_PLUGINS_V2)
+DEFINE_CEPH_FEATURE_RETIRED(45, 1, OSD_SET_ALLOC_HINT, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(46, 1, OSD_FADVISE_FLAGS)
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_REPOP, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_OBJECT_DIGEST, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_TRANSACTION_MAY_LAYOUT, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(47, 1, MDS_QUOTA)
+DEFINE_CEPH_FEATURE(48, 1, CRUSH_V4)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_MIN_SIZE_RECOVERY, JEWEL, LUMINOUS)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(50, 1, MON_METADATA)
+DEFINE_CEPH_FEATURE(51, 1, OSD_BITWISE_HOBJ_SORT)
+DEFINE_CEPH_FEATURE(52, 1, OSD_PROXY_WRITE_FEATURES)
+DEFINE_CEPH_FEATURE(53, 1, ERASURE_CODE_PLUGINS_V3)
+DEFINE_CEPH_FEATURE(54, 1, OSD_HITSET_GMT)
+DEFINE_CEPH_FEATURE(55, 1, HAMMER_0_94_4)
+DEFINE_CEPH_FEATURE(56, 1, NEW_OSDOP_ENCODING)
+DEFINE_CEPH_FEATURE(57, 1, MON_STATEFUL_SUB)
+DEFINE_CEPH_FEATURE(57, 1, MON_ROUTE_OSDMAP) // overlap
+DEFINE_CEPH_FEATURE(57, 1, OSDSUBOP_NO_SNAPCONTEXT) // overlap
+DEFINE_CEPH_FEATURE(57, 1, SERVER_JEWEL) // overlap
+DEFINE_CEPH_FEATURE(58, 1, CRUSH_TUNABLES5)
+DEFINE_CEPH_FEATURE(58, 1, NEW_OSDOPREPLY_ENCODING) // overlap
+DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
+DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
+DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
+DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
+DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING) // *do not share this bit*
+
+DEFINE_CEPH_FEATURE(61, 1, RESERVED2) // unused, but slow down!
+DEFINE_CEPH_FEATURE(62, 1, RESERVED) // do not use; used as a sentinal
+DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
+
/*
* Features supported.
@@ -113,6 +184,11 @@ static inline u64 ceph_sanitize_features(u64 features)
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \
+ CEPH_FEATURE_SERVER_LUMINOUS | \
+ CEPH_FEATURE_RESEND_ON_SPLIT | \
+ CEPH_FEATURE_RADOS_BACKOFF | \
+ CEPH_FEATURE_OSDMAP_PG_UPMAP | \
+ CEPH_FEATURE_CRUSH_CHOOSE_ARGS | \
CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
@@ -126,7 +202,11 @@ static inline u64 ceph_sanitize_features(u64 features)
CEPH_FEATURE_CRUSH_TUNABLES3 | \
CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
CEPH_FEATURE_MSGR_KEEPALIVE2 | \
+ CEPH_FEATURE_OSD_POOLRESEND | \
CEPH_FEATURE_CRUSH_V4 | \
+ CEPH_FEATURE_NEW_OSDOP_ENCODING | \
+ CEPH_FEATURE_SERVER_JEWEL | \
+ CEPH_FEATURE_MON_STATEFUL_SUB | \
CEPH_FEATURE_CRUSH_TUNABLES5 | \
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ad078ebe25d6..edf5b04b918a 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -147,6 +147,7 @@ struct ceph_dir_layout {
#define CEPH_MSG_OSD_OP 42
#define CEPH_MSG_OSD_OPREPLY 43
#define CEPH_MSG_WATCH_NOTIFY 44
+#define CEPH_MSG_OSD_BACKOFF 61
/* watch-notify operations */
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index f990f2cc907a..14af9b70d301 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -133,6 +133,66 @@ bad:
}
/*
+ * skip helpers
+ */
+#define ceph_decode_skip_n(p, end, n, bad) \
+ do { \
+ ceph_decode_need(p, end, n, bad); \
+ *p += n; \
+ } while (0)
+
+#define ceph_decode_skip_64(p, end, bad) \
+ceph_decode_skip_n(p, end, sizeof(u64), bad)
+
+#define ceph_decode_skip_32(p, end, bad) \
+ceph_decode_skip_n(p, end, sizeof(u32), bad)
+
+#define ceph_decode_skip_16(p, end, bad) \
+ceph_decode_skip_n(p, end, sizeof(u16), bad)
+
+#define ceph_decode_skip_8(p, end, bad) \
+ceph_decode_skip_n(p, end, sizeof(u8), bad)
+
+#define ceph_decode_skip_string(p, end, bad) \
+ do { \
+ u32 len; \
+ \
+ ceph_decode_32_safe(p, end, len, bad); \
+ ceph_decode_skip_n(p, end, len, bad); \
+ } while (0)
+
+#define ceph_decode_skip_set(p, end, type, bad) \
+ do { \
+ u32 len; \
+ \
+ ceph_decode_32_safe(p, end, len, bad); \
+ while (len--) \
+ ceph_decode_skip_##type(p, end, bad); \
+ } while (0)
+
+#define ceph_decode_skip_map(p, end, ktype, vtype, bad) \
+ do { \
+ u32 len; \
+ \
+ ceph_decode_32_safe(p, end, len, bad); \
+ while (len--) { \
+ ceph_decode_skip_##ktype(p, end, bad); \
+ ceph_decode_skip_##vtype(p, end, bad); \
+ } \
+ } while (0)
+
+#define ceph_decode_skip_map_of_map(p, end, ktype1, ktype2, vtype2, bad) \
+ do { \
+ u32 len; \
+ \
+ ceph_decode_32_safe(p, end, len, bad); \
+ while (len--) { \
+ ceph_decode_skip_##ktype1(p, end, bad); \
+ ceph_decode_skip_map(p, end, ktype2, vtype2, bad); \
+ } \
+ } while (0)
+
+/*
* struct ceph_timespec <-> struct timespec
*/
static inline void ceph_decode_timespec(struct timespec *ts,
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 3229ae6c7846..8a79587e1317 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -184,10 +184,11 @@ static inline int calc_pages_for(u64 off, u64 len)
(off >> PAGE_SHIFT);
}
-/*
- * These are not meant to be generic - an integer key is assumed.
- */
-#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
+#define RB_BYVAL(a) (a)
+#define RB_BYPTR(a) (&(a))
+#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
static void insert_##name(struct rb_root *root, type *t) \
{ \
struct rb_node **n = &root->rb_node; \
@@ -197,11 +198,13 @@ static void insert_##name(struct rb_root *root, type *t) \
\
while (*n) { \
type *cur = rb_entry(*n, type, nodefld); \
+ int cmp; \
\
parent = *n; \
- if (t->keyfld < cur->keyfld) \
+ cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld)); \
+ if (cmp < 0) \
n = &(*n)->rb_left; \
- else if (t->keyfld > cur->keyfld) \
+ else if (cmp > 0) \
n = &(*n)->rb_right; \
else \
BUG(); \
@@ -217,19 +220,24 @@ static void erase_##name(struct rb_root *root, type *t) \
RB_CLEAR_NODE(&t->nodefld); \
}
-#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
-extern type __lookup_##name##_key; \
-static type *lookup_##name(struct rb_root *root, \
- typeof(__lookup_##name##_key.keyfld) key) \
+/*
+ * @lookup_param_type is a parameter and not constructed from (@type,
+ * @keyfld) with typeof() because adding const is too unwieldy.
+ */
+#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \
+ lookup_param_type, nodefld) \
+static type *lookup_##name(struct rb_root *root, lookup_param_type key) \
{ \
struct rb_node *n = root->rb_node; \
\
while (n) { \
type *cur = rb_entry(n, type, nodefld); \
+ int cmp; \
\
- if (key < cur->keyfld) \
+ cmp = cmpexp(key, keyexp(cur->keyfld)); \
+ if (cmp < 0) \
n = n->rb_left; \
- else if (key > cur->keyfld) \
+ else if (cmp > 0) \
n = n->rb_right; \
else \
return cur; \
@@ -238,6 +246,23 @@ static type *lookup_##name(struct rb_root *root, \
return NULL; \
}
+#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp, \
+ lookup_param_type, nodefld) \
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \
+ lookup_param_type, nodefld)
+
+/*
+ * Shorthands for integer keys.
+ */
+#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld)
+
+#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \
+extern type __lookup_##name##_key; \
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, \
+ typeof(__lookup_##name##_key.keyfld), nodefld)
+
#define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \
DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \
DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c5c4c713e00f..fbd94d9fa5dd 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -44,6 +44,8 @@ struct ceph_connection_operations {
struct ceph_msg_header *hdr,
int *skip);
+ void (*reencode_message) (struct ceph_msg *msg);
+
int (*sign_message) (struct ceph_msg *msg);
int (*check_message_signature) (struct ceph_msg *msg);
};
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 85650b415e73..c6d96a5f46fd 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -1,6 +1,7 @@
#ifndef _FS_CEPH_OSD_CLIENT_H
#define _FS_CEPH_OSD_CLIENT_H
+#include <linux/bitrev.h>
#include <linux/completion.h>
#include <linux/kref.h>
#include <linux/mempool.h>
@@ -36,6 +37,8 @@ struct ceph_osd {
struct ceph_connection o_con;
struct rb_root o_requests;
struct rb_root o_linger_requests;
+ struct rb_root o_backoff_mappings;
+ struct rb_root o_backoffs_by_id;
struct list_head o_osd_lru;
struct ceph_auth_handshake o_auth;
unsigned long lru_ttl;
@@ -136,7 +139,8 @@ struct ceph_osd_request_target {
struct ceph_object_id target_oid;
struct ceph_object_locator target_oloc;
- struct ceph_pg pgid;
+ struct ceph_pg pgid; /* last raw pg we mapped to */
+ struct ceph_spg spgid; /* last actual spg we mapped to */
u32 pg_num;
u32 pg_num_mask;
struct ceph_osds acting;
@@ -148,6 +152,9 @@ struct ceph_osd_request_target {
unsigned int flags; /* CEPH_OSD_FLAG_* */
bool paused;
+ u32 epoch;
+ u32 last_force_resend;
+
int osd;
};
@@ -193,7 +200,6 @@ struct ceph_osd_request {
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
int r_attempts;
- u32 r_last_force_resend;
u32 r_map_dne_bound;
struct ceph_osd_req_op r_ops[];
@@ -203,6 +209,23 @@ struct ceph_request_redirect {
struct ceph_object_locator oloc;
};
+/*
+ * osd request identifier
+ *
+ * caller name + incarnation# + tid to unique identify this request
+ */
+struct ceph_osd_reqid {
+ struct ceph_entity_name name;
+ __le64 tid;
+ __le32 inc;
+} __packed;
+
+struct ceph_blkin_trace_info {
+ __le64 trace_id;
+ __le64 span_id;
+ __le64 parent_span_id;
+} __packed;
+
typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
u64 notifier_id, void *data, size_t data_len);
typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
@@ -221,7 +244,6 @@ struct ceph_osd_linger_request {
struct list_head pending_lworks;
struct ceph_osd_request_target t;
- u32 last_force_resend;
u32 map_dne_bound;
struct timespec mtime;
@@ -256,6 +278,48 @@ struct ceph_watch_item {
struct ceph_entity_addr addr;
};
+struct ceph_spg_mapping {
+ struct rb_node node;
+ struct ceph_spg spgid;
+
+ struct rb_root backoffs;
+};
+
+struct ceph_hobject_id {
+ void *key;
+ size_t key_len;
+ void *oid;
+ size_t oid_len;
+ u64 snapid;
+ u32 hash;
+ u8 is_max;
+ void *nspace;
+ size_t nspace_len;
+ s64 pool;
+
+ /* cache */
+ u32 hash_reverse_bits;
+};
+
+static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
+{
+ hoid->hash_reverse_bits = bitrev32(hoid->hash);
+}
+
+/*
+ * PG-wide backoff: [begin, end)
+ * per-object backoff: begin == end
+ */
+struct ceph_osd_backoff {
+ struct rb_node spg_node;
+ struct rb_node id_node;
+
+ struct ceph_spg spgid;
+ u64 id;
+ struct ceph_hobject_id *begin;
+ struct ceph_hobject_id *end;
+};
+
#define CEPH_LINGER_ID_START 0xffff000000000000ULL
struct ceph_osd_client {
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 938656f70807..a0996cb9faed 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -24,7 +24,15 @@ struct ceph_pg {
uint32_t seed;
};
+#define CEPH_SPG_NOSHARD -1
+
+struct ceph_spg {
+ struct ceph_pg pgid;
+ s8 shard;
+};
+
int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
together */
@@ -135,10 +143,14 @@ struct ceph_pg_mapping {
struct {
int len;
int osds[];
- } pg_temp;
+ } pg_temp, pg_upmap;
struct {
int osd;
} primary_temp;
+ struct {
+ int len;
+ int from_to[][2];
+ } pg_upmap_items;
};
};
@@ -150,13 +162,17 @@ struct ceph_osdmap {
u32 flags; /* CEPH_OSDMAP_* */
u32 max_osd; /* size of osd_state, _offload, _addr arrays */
- u8 *osd_state; /* CEPH_OSD_* */
+ u32 *osd_state; /* CEPH_OSD_* */
u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
struct ceph_entity_addr *osd_addr;
struct rb_root pg_temp;
struct rb_root primary_temp;
+ /* remap (post-CRUSH, pre-up) */
+ struct rb_root pg_upmap; /* PG := raw set */
+ struct rb_root pg_upmap_items; /* from -> to within raw set */
+
u32 *osd_primary_affinity;
struct rb_root pg_pools;
@@ -187,7 +203,7 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd)
return !ceph_osd_is_up(map, osd);
}
-extern char *ceph_osdmap_state_str(char *str, int len, int state);
+char *ceph_osdmap_state_str(char *str, int len, u32 state);
extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
@@ -198,11 +214,13 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
return &map->osd_addr[osd];
}
+#define CEPH_PGID_ENCODING_LEN (1 + 8 + 4 + 4)
+
static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
{
__u8 version;
- if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+ if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) {
pr_warn("incomplete pg encoding\n");
return -EINVAL;
}
@@ -240,6 +258,8 @@ static inline void ceph_osds_init(struct ceph_osds *set)
void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src);
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+ u32 new_pg_num);
bool ceph_is_new_interval(const struct ceph_osds *old_acting,
const struct ceph_osds *new_acting,
const struct ceph_osds *old_up,
@@ -262,15 +282,24 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 off, u64 len,
u64 *bno, u64 *oxoff, u64 *oxlen);
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+ const struct ceph_object_id *oid,
+ const struct ceph_object_locator *oloc,
+ struct ceph_pg *raw_pgid);
int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
- struct ceph_object_id *oid,
- struct ceph_object_locator *oloc,
+ const struct ceph_object_id *oid,
+ const struct ceph_object_locator *oloc,
struct ceph_pg *raw_pgid);
void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pi,
const struct ceph_pg *raw_pgid,
struct ceph_osds *up,
struct ceph_osds *acting);
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+ struct ceph_pg_pool_info *pi,
+ const struct ceph_pg *raw_pgid,
+ struct ceph_spg *spgid);
int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
const struct ceph_pg *raw_pgid);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 5d0018782d50..385db08bb8b2 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -439,6 +439,12 @@ enum {
const char *ceph_osd_watch_op_name(int o);
+enum {
+ CEPH_OSD_BACKOFF_OP_BLOCK = 1,
+ CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
+ CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
+};
+
/*
* an individual object operation. each may be accompanied by some data
* payload
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 91bd464f4c9b..12c96d94d1fa 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -657,6 +657,28 @@ static inline void clk_disable_unprepare(struct clk *clk)
clk_unprepare(clk);
}
+static inline int clk_bulk_prepare_enable(int num_clks,
+ struct clk_bulk_data *clks)
+{
+ int ret;
+
+ ret = clk_bulk_prepare(num_clks, clks);
+ if (ret)
+ return ret;
+ ret = clk_bulk_enable(num_clks, clks);
+ if (ret)
+ clk_bulk_unprepare(num_clks, clks);
+
+ return ret;
+}
+
+static inline void clk_bulk_disable_unprepare(int num_clks,
+ struct clk_bulk_data *clks)
+{
+ clk_bulk_disable(num_clks, clks);
+ clk_bulk_unprepare(num_clks, clks);
+}
+
#if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
struct clk *of_clk_get(struct device_node *np, int index);
struct clk *of_clk_get_by_name(struct device_node *np, const char *name);
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c156f5082758..d4292ebc5c8b 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -28,47 +28,49 @@
#include <linux/thermal.h>
#include <linux/cpumask.h>
+struct cpufreq_policy;
+
typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
unsigned long voltage, u32 *power);
#ifdef CONFIG_CPU_THERMAL
/**
* cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
*/
struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus);
+cpufreq_cooling_register(struct cpufreq_policy *policy);
struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
u32 capacitance, get_static_t plat_static_func);
/**
* of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
* @np: a valid struct device_node to the cooling device device tree node.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
*/
#ifdef CONFIG_THERMAL_OF
struct thermal_cooling_device *
of_cpufreq_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus);
+ struct cpufreq_policy *policy);
struct thermal_cooling_device *
of_cpufreq_power_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus,
+ struct cpufreq_policy *policy,
u32 capacitance,
get_static_t plat_static_func);
#else
static inline struct thermal_cooling_device *
of_cpufreq_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus)
+ struct cpufreq_policy *policy)
{
return ERR_PTR(-ENOSYS);
}
static inline struct thermal_cooling_device *
of_cpufreq_power_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus,
+ struct cpufreq_policy *policy,
u32 capacitance,
get_static_t plat_static_func)
{
@@ -82,15 +84,14 @@ of_cpufreq_power_cooling_register(struct device_node *np,
*/
void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq);
#else /* !CONFIG_CPU_THERMAL */
static inline struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
{
return ERR_PTR(-ENOSYS);
}
static inline struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
u32 capacitance, get_static_t plat_static_func)
{
return NULL;
@@ -98,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
static inline struct thermal_cooling_device *
of_cpufreq_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus)
+ struct cpufreq_policy *policy)
{
return ERR_PTR(-ENOSYS);
}
static inline struct thermal_cooling_device *
of_cpufreq_power_cooling_register(struct device_node *np,
- const struct cpumask *clip_cpus,
+ struct cpufreq_policy *policy,
u32 capacitance,
get_static_t plat_static_func)
{
@@ -117,11 +118,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
{
return;
}
-static inline
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
- return THERMAL_CSTATE_INVALID;
-}
#endif /* CONFIG_CPU_THERMAL */
#endif /* __CPU_COOLING_H__ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 905117bd5012..f10a9b3761cd 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
return -EINVAL;
}
}
+
+static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
+{
+ struct cpufreq_frequency_table *pos;
+ int count = 0;
+
+ if (unlikely(!policy->freq_table))
+ return 0;
+
+ cpufreq_for_each_valid_entry(pos, policy->freq_table)
+ count++;
+
+ return count;
+}
#else
static inline int cpufreq_boost_trigger_state(int state)
{
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 4090a42578a8..2df2118fbe13 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -19,7 +19,7 @@
CRASH_CORE_NOTE_NAME_BYTES + \
CRASH_CORE_NOTE_DESC_BYTES)
-#define VMCOREINFO_BYTES (4096)
+#define VMCOREINFO_BYTES PAGE_SIZE
#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \
@@ -28,6 +28,7 @@
typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
+void crash_update_vmcoreinfo_safecopy(void *ptr);
void crash_save_vmcoreinfo(void);
void arch_crash_save_vmcoreinfo(void);
__printf(1, 2)
@@ -56,9 +57,7 @@ phys_addr_t paddr_vmcoreinfo_note(void);
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
+extern u32 *vmcoreinfo_note;
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len);
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index fbecbd089d75..92e165d417a6 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -2,6 +2,7 @@
#define CEPH_CRUSH_CRUSH_H
#ifdef __KERNEL__
+# include <linux/rbtree.h>
# include <linux/types.h>
#else
# include "crush_compat.h"
@@ -137,6 +138,68 @@ struct crush_bucket {
};
+/** @ingroup API
+ *
+ * Replacement weights for each item in a bucket. The size of the
+ * array must be exactly the size of the straw2 bucket, just as the
+ * item_weights array.
+ *
+ */
+struct crush_weight_set {
+ __u32 *weights; /*!< 16.16 fixed point weights
+ in the same order as items */
+ __u32 size; /*!< size of the __weights__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for a given straw2 bucket, for
+ * placement purposes.
+ *
+ * When crush_do_rule() chooses the Nth item from a straw2 bucket, the
+ * replacement weights found at __weight_set[N]__ are used instead of
+ * the weights from __item_weights__. If __N__ is greater than
+ * __weight_set_size__, the weights found at __weight_set_size-1__ are
+ * used instead. For instance if __weight_set__ is:
+ *
+ * [ [ 0x10000, 0x20000 ], // position 0
+ * [ 0x20000, 0x40000 ] ] // position 1
+ *
+ * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ]
+ * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * etc.
+ *
+ */
+struct crush_choose_arg {
+ __s32 *ids; /*!< values to use instead of items */
+ __u32 ids_size; /*!< size of the __ids__ array */
+ struct crush_weight_set *weight_set; /*!< weight replacements for
+ a given position */
+ __u32 weight_set_size; /*!< size of the __weight_set__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for each bucket in the crushmap. The
+ * __size__ of the __args__ array must be exactly the same as the
+ * __map->max_buckets__.
+ *
+ * The __crush_choose_arg__ at index N will be used when choosing
+ * an item from the bucket __map->buckets[N]__ bucket, provided it
+ * is a straw2 bucket.
+ *
+ */
+struct crush_choose_arg_map {
+#ifdef __KERNEL__
+ struct rb_node node;
+ u64 choose_args_index;
+#endif
+ struct crush_choose_arg *args; /*!< replacement for each bucket
+ in the crushmap */
+ __u32 size; /*!< size of the __args__ array */
+};
+
struct crush_bucket_uniform {
struct crush_bucket h;
__u32 item_weight; /* 16-bit fixed point; all items equally weighted */
@@ -236,6 +299,9 @@ struct crush_map {
__u32 allowed_bucket_algs;
__u32 *choose_tries;
+#else
+ /* CrushWrapper::choose_args */
+ struct rb_root choose_args;
#endif
};
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c95e19e1ff11..141edabb947e 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -11,11 +11,10 @@
#include "crush.h"
extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
-extern int crush_do_rule(const struct crush_map *map,
- int ruleno,
- int x, int *result, int result_max,
- const __u32 *weights, int weight_max,
- void *cwin);
+int crush_do_rule(const struct crush_map *map,
+ int ruleno, int x, int *result, int result_max,
+ const __u32 *weight, int weight_max,
+ void *cwin, const struct crush_choose_arg *choose_args);
/*
* Returns the exact amount of workspace that will need to be used
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8f39db7439c3..794811875732 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -154,11 +154,6 @@ static inline unsigned int dax_radix_order(void *entry)
#endif
int dax_pfn_mkwrite(struct vm_fault *vmf);
-static inline bool vma_is_dax(struct vm_area_struct *vma)
-{
- return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
-}
-
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 025727bf6797..c706eaac692e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
}
struct name_snapshot {
- const char *name;
- char inline_name[DNAME_INLINE_LEN];
+ const unsigned char *name;
+ unsigned char inline_name[DNAME_INLINE_LEN];
};
void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
void release_dentry_name_snapshot(struct name_snapshot *);
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6daf6d4971f6..2f14ac73d01d 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -14,6 +14,7 @@
#define _LINUX_EVENTPOLL_H
#include <uapi/linux/eventpoll.h>
+#include <uapi/linux/kcmp.h>
/* Forward declarations to avoid compiler errors */
@@ -22,6 +23,10 @@ struct file;
#ifdef CONFIG_EPOLL
+#ifdef CONFIG_CHECKPOINT_RESTORE
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+#endif
+
/* Used to initialize the epoll bits inside the "struct file" */
static inline void eventpoll_init_file(struct file *file)
{
diff --git a/include/linux/extable.h b/include/linux/extable.h
index 7effea4b257d..28addad0dda7 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -2,13 +2,14 @@
#define _LINUX_EXTABLE_H
#include <linux/stddef.h> /* for NULL */
+#include <linux/types.h>
struct module;
struct exception_table_entry;
const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+ const size_t num,
unsigned long value);
void sort_extable(struct exception_table_entry *start,
struct exception_table_entry *finish);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cfa47125d52..976aaa1af82a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
#include <linux/bug.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
+#include <linux/mm_types.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fcntl.h>
@@ -1954,6 +1955,9 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* wb stat updates to grab mapping->tree_lock. See
* inode_switch_wb_work_fn() for details.
*
+ * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
+ * and work dirs among overlayfs mounts.
+ *
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
#define I_DIRTY_SYNC (1 << 0)
@@ -1974,6 +1978,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
#define __I_DIRTY_TIME_EXPIRED 12
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_WB_SWITCH (1 << 13)
+#define I_OVL_INUSE (1 << 14)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
@@ -3127,6 +3132,11 @@ static inline bool io_is_direct(struct file *filp)
return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
}
+static inline bool vma_is_dax(struct vm_area_struct *vma)
+{
+ return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
+}
+
static inline int iocb_flags(struct file *file)
{
int res = 0;
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 3dff2398a5f0..9ab375419189 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -12,6 +12,8 @@
#ifndef _LINUX_FWNODE_H_
#define _LINUX_FWNODE_H_
+#include <linux/types.h>
+
enum fwnode_type {
FWNODE_INVALID = 0,
FWNODE_OF,
@@ -22,9 +24,12 @@ enum fwnode_type {
FWNODE_IRQCHIP
};
+struct fwnode_operations;
+
struct fwnode_handle {
enum fwnode_type type;
struct fwnode_handle *secondary;
+ const struct fwnode_operations *ops;
};
/**
@@ -39,4 +44,68 @@ struct fwnode_endpoint {
const struct fwnode_handle *local_fwnode;
};
+/**
+ * struct fwnode_operations - Operations for fwnode interface
+ * @get: Get a reference to an fwnode.
+ * @put: Put a reference to an fwnode.
+ * @property_present: Return true if a property is present.
+ * @property_read_integer_array: Read an array of integer properties. Return
+ * zero on success, a negative error code
+ * otherwise.
+ * @property_read_string_array: Read an array of string properties. Return zero
+ * on success, a negative error code otherwise.
+ * @get_parent: Return the parent of an fwnode.
+ * @get_next_child_node: Return the next child node in an iteration.
+ * @get_named_child_node: Return a child node with a given name.
+ * @graph_get_next_endpoint: Return an endpoint node in an iteration.
+ * @graph_get_remote_endpoint: Return the remote endpoint node of a local
+ * endpoint node.
+ * @graph_get_port_parent: Return the parent node of a port node.
+ * @graph_parse_endpoint: Parse endpoint for port and endpoint id.
+ */
+struct fwnode_operations {
+ void (*get)(struct fwnode_handle *fwnode);
+ void (*put)(struct fwnode_handle *fwnode);
+ bool (*device_is_available)(struct fwnode_handle *fwnode);
+ bool (*property_present)(struct fwnode_handle *fwnode,
+ const char *propname);
+ int (*property_read_int_array)(struct fwnode_handle *fwnode,
+ const char *propname,
+ unsigned int elem_size, void *val,
+ size_t nval);
+ int (*property_read_string_array)(struct fwnode_handle *fwnode_handle,
+ const char *propname,
+ const char **val, size_t nval);
+ struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode);
+ struct fwnode_handle *
+ (*get_next_child_node)(struct fwnode_handle *fwnode,
+ struct fwnode_handle *child);
+ struct fwnode_handle *
+ (*get_named_child_node)(struct fwnode_handle *fwnode, const char *name);
+ struct fwnode_handle *
+ (*graph_get_next_endpoint)(struct fwnode_handle *fwnode,
+ struct fwnode_handle *prev);
+ struct fwnode_handle *
+ (*graph_get_remote_endpoint)(struct fwnode_handle *fwnode);
+ struct fwnode_handle *
+ (*graph_get_port_parent)(struct fwnode_handle *fwnode);
+ int (*graph_parse_endpoint)(struct fwnode_handle *fwnode,
+ struct fwnode_endpoint *endpoint);
+};
+
+#define fwnode_has_op(fwnode, op) \
+ ((fwnode) && (fwnode)->ops && (fwnode)->ops->op)
+#define fwnode_call_int_op(fwnode, op, ...) \
+ (fwnode ? (fwnode_has_op(fwnode, op) ? \
+ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
+ -EINVAL)
+#define fwnode_call_ptr_op(fwnode, op, ...) \
+ (fwnode_has_op(fwnode, op) ? \
+ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
+#define fwnode_call_void_op(fwnode, op, ...) \
+ do { \
+ if (fwnode_has_op(fwnode, op)) \
+ (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \
+ } while (false)
+
#endif
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4c6656f1fee7..bcfb9f7c46f5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -25,7 +25,7 @@ struct vm_area_struct;
#define ___GFP_FS 0x80u
#define ___GFP_COLD 0x100u
#define ___GFP_NOWARN 0x200u
-#define ___GFP_REPEAT 0x400u
+#define ___GFP_RETRY_MAYFAIL 0x400u
#define ___GFP_NOFAIL 0x800u
#define ___GFP_NORETRY 0x1000u
#define ___GFP_MEMALLOC 0x2000u
@@ -136,26 +136,56 @@ struct vm_area_struct;
*
* __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
*
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- * _might_ fail. This depends upon the particular VM implementation.
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules
+ *
+ * __GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput
+ *
+ * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made else where. It can wait for other
+ * tasks to attempt high level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with __GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon. The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
*
* __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures. New users should be evaluated carefully
- * (and the flag should be used only when there is no reasonable failure
- * policy) but it is definitely preferable to use the flag rather than
- * opencode endless loop around allocator.
- *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- * return NULL when direct reclaim and memory compaction have failed to allow
- * the allocation to succeed. The OOM killer is not called with the current
- * implementation.
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Using this flag for costly allocations is _highly_ discouraged.
*/
#define __GFP_IO ((__force gfp_t)___GFP_IO)
#define __GFP_FS ((__force gfp_t)___GFP_FS)
#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT)
+#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL)
#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d3b3e8fcc717..ee696347f928 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,6 +1,10 @@
#ifndef _LINUX_HUGE_MM_H
#define _LINUX_HUGE_MM_H
+#include <linux/sched/coredump.h>
+
+#include <linux/fs.h> /* only for vma_is_dax() */
+
extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -85,14 +89,32 @@ extern struct kobj_attribute shmem_enabled_attr;
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
-#define transparent_hugepage_enabled(__vma) \
- ((transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
- ((__vma)->vm_flags & VM_HUGEPAGE))) && \
- !((__vma)->vm_flags & VM_NOHUGEPAGE) && \
- !is_vma_temporary_stack(__vma))
+extern unsigned long transparent_hugepage_flags;
+
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & VM_NOHUGEPAGE)
+ return false;
+
+ if (is_vma_temporary_stack(vma))
+ return false;
+
+ if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ return false;
+
+ if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
+ return true;
+
+ if (vma_is_dax(vma))
+ return true;
+
+ if (transparent_hugepage_flags &
+ (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+ return !!(vma->vm_flags & VM_HUGEPAGE);
+
+ return false;
+}
+
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
@@ -104,8 +126,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#define transparent_hugepage_debug_cow() 0
#endif /* CONFIG_DEBUG_VM */
-extern unsigned long transparent_hugepage_flags;
-
extern unsigned long thp_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags);
@@ -224,7 +244,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
#define hpage_nr_pages(x) 1
-#define transparent_hugepage_enabled(__vma) 0
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+ return false;
+}
static inline void prep_transhuge_page(struct page *page) {}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 46bfb702e7d6..8d9fe131a240 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -116,7 +116,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
-int dequeue_hwpoisoned_huge_page(struct page *page);
bool isolate_huge_page(struct page *page, struct list_head *list);
void putback_active_hugepage(struct page *page);
void free_huge_page(struct page *page);
@@ -192,10 +191,6 @@ static inline void hugetlb_show_meminfo(void)
#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
src_addr, pagep) ({ BUG(); 0; })
#define huge_pte_offset(mm, address, sz) 0
-static inline int dequeue_hwpoisoned_huge_page(struct page *page)
-{
- return 0;
-}
static inline bool isolate_huge_page(struct page *page, struct list_head *list)
{
@@ -354,6 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
struct page *alloc_huge_page_node(struct hstate *h, int nid);
struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
@@ -472,6 +469,7 @@ static inline pgoff_t basepage_index(struct page *page)
return __basepage_index(page);
}
+extern int dissolve_free_huge_page(struct page *page);
extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn);
static inline bool hugepage_migration_supported(struct hstate *h)
@@ -528,6 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
struct hstate {};
#define alloc_huge_page(v, a, r) NULL
#define alloc_huge_page_node(h, nid) NULL
+#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL
#define alloc_huge_page_noerr(v, a, r) NULL
#define alloc_bootmem_huge_page(h) NULL
#define hstate_file(f) NULL
@@ -550,15 +549,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
{
return 1;
}
-#define hstate_index_to_shift(index) 0
-#define hstate_index(h) 0
+
+static inline unsigned hstate_index_to_shift(unsigned index)
+{
+ return 0;
+}
+
+static inline int hstate_index(struct hstate *h)
+{
+ return 0;
+}
static inline pgoff_t basepage_index(struct page *page)
{
return page->index;
}
-#define dissolve_free_huge_pages(s, e) 0
-#define hugepage_migration_supported(h) false
+
+static inline int dissolve_free_huge_page(struct page *page)
+{
+ return 0;
+}
+
+static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ return 0;
+}
+
+static inline bool hugepage_migration_supported(struct hstate *h)
+{
+ return false;
+}
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 72d0ece70ed3..00ca5b86a753 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -295,6 +295,8 @@ static inline int i2c_slave_event(struct i2c_client *client,
{
return client->slave_cb(client, event, val);
}
+#else
+static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
#endif
/**
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
deleted file mode 100644
index 06e3089795fb..000000000000
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __I2C_SH_MOBILE_H__
-#define __I2C_SH_MOBILE_H__
-
-#include <linux/platform_device.h>
-
-struct i2c_sh_mobile_platform_data {
- unsigned long bus_speed;
- unsigned int clks_per_count;
-};
-
-#endif /* __I2C_SH_MOBILE_H__ */
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 55289d261b4f..bc67b767f9ce 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -10,6 +10,9 @@ extern int rd_prompt;
/* starting block # of image */
extern int rd_image_start;
+/* size of a single RAM disk */
+extern unsigned long rd_size;
+
/* 1 if it is not an error if initrd_start < memory_start */
extern int initrd_below_start_ok;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 3c25794042f9..99bc5b3ae26e 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
*/
extern int intel_svm_unbind_mm(struct device *dev, int pasid);
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev: Device for which PASID was allocated
+ * @pasid: PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
#else /* CONFIG_INTEL_IOMMU_SVM */
static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
{
BUG();
}
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_INTEL_IOMMU_SVM */
#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 71fd92d81b26..5591f055e13f 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -20,6 +20,9 @@ struct kern_ipc_perm {
umode_t mode;
unsigned long seq;
void *security;
+
+ struct rcu_head rcu;
+ atomic_t refcount;
} ____cacheline_aligned_in_smp;
#endif /* _LINUX_IPC_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1c91f26e2996..bd6d96cf80b1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -11,6 +11,7 @@
#include <linux/log2.h>
#include <linux/typecheck.h>
#include <linux/printk.h>
+#include <linux/build_bug.h>
#include <asm/byteorder.h>
#include <uapi/linux/kernel.h>
@@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @member: the name of the member within the struct.
*
*/
-#define container_of(ptr, type, member) ({ \
- const typeof( ((type *)0)->member ) *__mptr = (ptr); \
- (type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({ \
+ void *__mptr = (void *)(ptr); \
+ BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
+ !__same_type(*(ptr), void), \
+ "pointer type mismatch in container_of()"); \
+ ((type *)(__mptr - offsetof(type, member))); })
/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 65888418fb69..dd056fab9e35 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -172,6 +172,7 @@ struct kimage {
unsigned long start;
struct page *control_code_page;
struct page *swap_page;
+ void *vmcoreinfo_data_copy; /* locates in the crash memory */
unsigned long nr_segments;
struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
int kexec_crash_loaded(void);
void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 5d9a400af509..f0d7335336cd 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if ((khugepaged_always() ||
(khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
- !(vm_flags & VM_NOHUGEPAGE))
+ !(vm_flags & VM_NOHUGEPAGE) &&
+ !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM;
return 0;
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index cb0ba9f2a9a2..fa7fd03cb5f9 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -44,6 +44,7 @@ struct list_lru_node {
/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
struct list_lru_memcg *memcg_lrus;
#endif
+ long nr_items;
} ____cacheline_aligned_in_smp;
struct list_lru {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 41f7b6a04d69..3eca67728366 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -192,9 +192,9 @@ struct nlm_block {
* Global variables
*/
extern const struct rpc_program nlm_program;
-extern struct svc_procedure nlmsvc_procedures[];
+extern const struct svc_procedure nlmsvc_procedures[];
#ifdef CONFIG_LOCKD_V4
-extern struct svc_procedure nlmsvc_procedures4[];
+extern const struct svc_procedure nlmsvc_procedures4[];
#endif
extern int nlmsvc_grace_period;
extern unsigned long nlmsvc_timeout;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d39ed1cc5fbf..7acbecc21a40 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -95,19 +95,19 @@ struct nlm_reboot {
*/
#define NLMSVC_XDRSIZE sizeof(struct nlm_args)
-int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
+int nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
+int nlmsvc_encode_res(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_res(struct svc_rqst *, __be32 *);
+int nlmsvc_encode_void(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
+int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
/*
int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index e58c88b52ce1..bf1645609225 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -23,19 +23,19 @@
-int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
+int nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
+int nlm4svc_encode_res(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_res(struct svc_rqst *, __be32 *);
+int nlm4svc_encode_void(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
+int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
/*
int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 28baee63eaf6..4e887ba22635 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -149,6 +149,7 @@ struct cros_ec_device {
struct ec_response_get_next_event event_data;
int event_size;
+ u32 host_event_wake_mask;
};
/**
@@ -172,6 +173,8 @@ struct cros_ec_platform {
u16 cmd_offset;
};
+struct cros_ec_debugfs;
+
/*
* struct cros_ec_dev - ChromeOS EC device entry point
*
@@ -179,6 +182,7 @@ struct cros_ec_platform {
* @cdev: Character device structure in /dev
* @ec_dev: cros_ec_device structure to talk to the physical device
* @dev: pointer to the platform device
+ * @debug_info: cros_ec_debugfs structure for debugging information
* @cmd_offset: offset to apply for each command.
*/
struct cros_ec_dev {
@@ -186,6 +190,7 @@ struct cros_ec_dev {
struct cdev cdev;
struct cros_ec_device *ec_dev;
struct device *dev;
+ struct cros_ec_debugfs *debug_info;
u16 cmd_offset;
u32 features[2];
};
@@ -295,10 +300,22 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
* cros_ec_get_next_event - Fetch next event from the ChromeOS EC
*
* @ec_dev: Device to fetch event from
+ * @wake_event: Pointer to a bool set to true upon return if the event might be
+ * treated as a wake event. Ignored if null.
*
* Returns: 0 on success, Linux error number on failure
*/
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev);
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
+
+/**
+ * cros_ec_get_host_event - Return a mask of event set by the EC.
+ *
+ * When MKBP is supported, when the EC raises an interrupt,
+ * We collect the events raised and call the functions in the ec notifier.
+ *
+ * This function is a helper to know which events are raised.
+ */
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
/* sysfs stuff */
extern struct attribute_group cros_ec_attr_group;
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index c93e7e0300ef..190c8f4afa02 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -625,6 +625,10 @@ struct ec_params_get_cmd_versions {
uint8_t cmd; /* Command to check */
} __packed;
+struct ec_params_get_cmd_versions_v1 {
+ uint16_t cmd; /* Command to check */
+} __packed;
+
struct ec_response_get_cmd_versions {
/*
* Mask of supported versions; use EC_VER_MASK() to compare with a
@@ -1158,13 +1162,20 @@ struct lightbar_params_v1 {
struct rgb_s color[8]; /* 0-3 are Google colors */
} __packed;
+/* Lightbar program */
+#define EC_LB_PROG_LEN 192
+struct lightbar_program {
+ uint8_t size;
+ uint8_t data[EC_LB_PROG_LEN];
+};
+
struct ec_params_lightbar {
uint8_t cmd; /* Command (see enum lightbar_command) */
union {
struct {
/* no args */
} dump, off, on, init, get_seq, get_params_v0, get_params_v1,
- version, get_brightness, get_demo;
+ version, get_brightness, get_demo, suspend, resume;
struct {
uint8_t num;
@@ -1182,8 +1193,13 @@ struct ec_params_lightbar {
uint8_t led;
} get_rgb;
+ struct {
+ uint8_t enable;
+ } manual_suspend_ctrl;
+
struct lightbar_params_v0 set_params_v0;
struct lightbar_params_v1 set_params_v1;
+ struct lightbar_program set_program;
};
} __packed;
@@ -1216,7 +1232,8 @@ struct ec_response_lightbar {
struct {
/* no return params */
} off, on, init, set_brightness, seq, reg, set_rgb,
- demo, set_params_v0, set_params_v1;
+ demo, set_params_v0, set_params_v1,
+ set_program, manual_suspend_ctrl, suspend, resume;
};
} __packed;
@@ -1240,6 +1257,10 @@ enum lightbar_command {
LIGHTBAR_CMD_GET_DEMO = 15,
LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
+ LIGHTBAR_CMD_SET_PROGRAM = 18,
+ LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19,
+ LIGHTBAR_CMD_SUSPEND = 20,
+ LIGHTBAR_CMD_RESUME = 21,
LIGHTBAR_NUM_CMDS
};
@@ -2285,13 +2306,28 @@ struct ec_params_charge_control {
#define EC_CMD_CONSOLE_SNAPSHOT 0x97
/*
- * Read next chunk of data from saved snapshot.
+ * Read data from the saved snapshot. If the subcmd parameter is
+ * CONSOLE_READ_NEXT, this will return data starting from the beginning of
+ * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the
+ * end of the previous snapshot.
+ *
+ * The params are only looked at in version >= 1 of this command. Prior
+ * versions will just default to CONSOLE_READ_NEXT behavior.
*
* Response is null-terminated string. Empty string, if there is no more
* remaining output.
*/
#define EC_CMD_CONSOLE_READ 0x98
+enum ec_console_read_subcmd {
+ CONSOLE_READ_NEXT = 0,
+ CONSOLE_READ_RECENT
+};
+
+struct ec_params_console_read_v1 {
+ uint8_t subcmd; /* enum ec_console_read_subcmd */
+} __packed;
+
/*****************************************************************************/
/*
diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h
new file mode 100644
index 000000000000..176496ddc66c
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_mec.h
@@ -0,0 +1,90 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing,
+ * but everything else (including deghosting) is done here. The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_MEC_H
+#define __LINUX_MFD_CROS_EC_MEC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+enum cros_ec_lpc_mec_emi_access_mode {
+ /* 8-bit access */
+ ACCESS_TYPE_BYTE = 0x0,
+ /* 16-bit access */
+ ACCESS_TYPE_WORD = 0x1,
+ /* 32-bit access */
+ ACCESS_TYPE_LONG = 0x2,
+ /*
+ * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
+ * EC data register to be incremented.
+ */
+ ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
+};
+
+enum cros_ec_lpc_mec_io_type {
+ MEC_IO_READ,
+ MEC_IO_WRITE,
+};
+
+/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
+#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
+#define MEC_EMI_RANGE_END (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
+
+/* EMI registers are relative to base */
+#define MEC_EMI_BASE 0x800
+#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
+#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
+#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
+#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
+#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
+#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
+#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
+#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
+
+/*
+ * cros_ec_lpc_mec_init
+ *
+ * Initialize MEC I/O.
+ */
+void cros_ec_lpc_mec_init(void);
+
+/*
+ * cros_ec_lpc_mec_destroy
+ *
+ * Cleanup MEC I/O.
+ */
+void cros_ec_lpc_mec_destroy(void);
+
+/**
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset: Base read / write address
+ * @length: Number of bytes to read / write
+ * @buf: Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+ unsigned int offset, unsigned int length, u8 *buf);
+
+#endif /* __LINUX_MFD_CROS_EC_MEC_H */
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
new file mode 100644
index 000000000000..5560bef63c2b
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_reg.h
@@ -0,0 +1,61 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing,
+ * but everything else (including deghosting) is done here. The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_REG_H
+#define __LINUX_MFD_CROS_EC_REG_H
+
+/**
+ * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes read.
+ *
+ * @offset: Base read address
+ * @length: Number of bytes to read
+ * @dest: Destination buffer
+ */
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
+
+/**
+ * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes written.
+ *
+ * @offset: Base write address
+ * @length: Number of bytes to write
+ * @msg: Write data buffer
+ */
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
+
+/**
+ * cros_ec_lpc_reg_init
+ *
+ * Initialize register I/O.
+ */
+void cros_ec_lpc_reg_init(void);
+
+/**
+ * cros_ec_lpc_reg_destroy
+ *
+ * Cleanup reg I/O.
+ */
+void cros_ec_lpc_reg_destroy(void);
+
+#endif /* __LINUX_MFD_CROS_EC_REG_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 48e24844b3c5..3e0d405dc842 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -4,6 +4,7 @@
#include <linux/mm.h>
#include <linux/mempolicy.h>
#include <linux/migrate_mode.h>
+#include <linux/hugetlb.h>
typedef struct page *new_page_t(struct page *page, unsigned long private,
int **reason);
@@ -30,6 +31,21 @@ enum migrate_reason {
/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
extern char *migrate_reason_names[MR_TYPES];
+static inline struct page *new_page_nodemask(struct page *page,
+ int preferred_nid, nodemask_t *nodemask)
+{
+ gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
+
+ if (PageHuge(page))
+ return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
+ preferred_nid, nodemask);
+
+ if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
+ gfp_mask |= __GFP_HIGHMEM;
+
+ return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask);
+}
+
#ifdef CONFIG_MIGRATION
extern void putback_movable_pages(struct list_head *l);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7e8f100cb56d..fc14b8b3f6ce 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -603,12 +603,9 @@ extern struct page *mem_map;
#endif
/*
- * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
- * (mostly NUMA machines?) to denote a higher-level memory zone than the
- * zone denotes.
- *
* On NUMA machines, each NUMA node would have a pg_data_t to describe
- * it's memory layout.
+ * it's memory layout. On UMA machines there is a single pglist_data which
+ * describes the whole memory.
*
* Memory statistics and page replacement data structures are maintained on a
* per-zone basis.
@@ -1058,6 +1055,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
static inline unsigned long early_pfn_to_nid(unsigned long pfn)
{
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
return 0;
}
#endif
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index de0d889e4fe1..892148c448cc 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -107,6 +107,8 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
#define NAND_STATUS_READY 0x40
#define NAND_STATUS_WP 0x80
+#define NAND_DATA_IFACE_CHECK_ONLY -1
+
/*
* Constants for ECC_MODES
*/
@@ -116,6 +118,7 @@ typedef enum {
NAND_ECC_HW,
NAND_ECC_HW_SYNDROME,
NAND_ECC_HW_OOB_FIRST,
+ NAND_ECC_ON_DIE,
} nand_ecc_modes_t;
enum nand_ecc_algo {
@@ -257,6 +260,8 @@ struct nand_chip;
/* Vendor-specific feature address (Micron) */
#define ONFI_FEATURE_ADDR_READ_RETRY 0x89
+#define ONFI_FEATURE_ON_DIE_ECC 0x90
+#define ONFI_FEATURE_ON_DIE_ECC_EN BIT(3)
/* ONFI subfeature parameters length */
#define ONFI_SUBFEATURE_PARAM_LEN 4
@@ -477,6 +482,44 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc)
}
/**
+ * struct nand_ecc_step_info - ECC step information of ECC engine
+ * @stepsize: data bytes per ECC step
+ * @strengths: array of supported strengths
+ * @nstrengths: number of supported strengths
+ */
+struct nand_ecc_step_info {
+ int stepsize;
+ const int *strengths;
+ int nstrengths;
+};
+
+/**
+ * struct nand_ecc_caps - capability of ECC engine
+ * @stepinfos: array of ECC step information
+ * @nstepinfos: number of ECC step information
+ * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step
+ */
+struct nand_ecc_caps {
+ const struct nand_ecc_step_info *stepinfos;
+ int nstepinfos;
+ int (*calc_ecc_bytes)(int step_size, int strength);
+};
+
+/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */
+#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...) \
+static const int __name##_strengths[] = { __VA_ARGS__ }; \
+static const struct nand_ecc_step_info __name##_stepinfo = { \
+ .stepsize = __step, \
+ .strengths = __name##_strengths, \
+ .nstrengths = ARRAY_SIZE(__name##_strengths), \
+}; \
+static const struct nand_ecc_caps __name = { \
+ .stepinfos = &__name##_stepinfo, \
+ .nstepinfos = 1, \
+ .calc_ecc_bytes = __calc, \
+}
+
+/**
* struct nand_ecc_ctrl - Control structure for ECC
* @mode: ECC mode
* @algo: ECC algorithm
@@ -815,7 +858,10 @@ struct nand_manufacturer_ops {
* @read_retries: [INTERN] the number of read retry modes supported
* @onfi_set_features: [REPLACEABLE] set the features for ONFI nand
* @onfi_get_features: [REPLACEABLE] get the features for ONFI nand
- * @setup_data_interface: [OPTIONAL] setup the data interface and timing
+ * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If
+ * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this
+ * means the configuration should not be applied but
+ * only checked.
* @bbt: [INTERN] bad block table pointer
* @bbt_td: [REPLACEABLE] bad block table descriptor for flash
* lookup.
@@ -826,9 +872,6 @@ struct nand_manufacturer_ops {
* structure which is shared among multiple independent
* devices.
* @priv: [OPTIONAL] pointer to private chip data
- * @errstat: [OPTIONAL] hardware specific function to perform
- * additional error status checks (determine if errors are
- * correctable).
* @manufacturer: [INTERN] Contains manufacturer information
*/
@@ -852,16 +895,13 @@ struct nand_chip {
int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
int (*erase)(struct mtd_info *mtd, int page);
int (*scan_bbt)(struct mtd_info *mtd);
- int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
- int status, int page);
int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip,
int feature_addr, uint8_t *subfeature_para);
int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
int feature_addr, uint8_t *subfeature_para);
int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
- int (*setup_data_interface)(struct mtd_info *mtd,
- const struct nand_data_interface *conf,
- bool check_only);
+ int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
+ const struct nand_data_interface *conf);
int chip_delay;
@@ -1244,6 +1284,15 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
void *extraoob, int extraooblen,
int threshold);
+int nand_check_ecc_caps(struct nand_chip *chip,
+ const struct nand_ecc_caps *caps, int oobavail);
+
+int nand_match_ecc_req(struct nand_chip *chip,
+ const struct nand_ecc_caps *caps, int oobavail);
+
+int nand_maximize_ecc(struct nand_chip *chip,
+ const struct nand_ecc_caps *caps, int oobavail);
+
/* Default write_oob implementation */
int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
@@ -1258,6 +1307,19 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
int page);
+/* Stub used by drivers that do not support GET/SET FEATURES operations */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+ struct nand_chip *chip, int addr,
+ u8 *subfeature_param);
+
+/* Default read_page_raw implementation */
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+ uint8_t *buf, int oob_required, int page);
+
+/* Default write_page_raw implementation */
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+ const uint8_t *buf, int oob_required, int page);
+
/* Reset and initialize a NAND device */
int nand_reset(struct nand_chip *chip, int chipnr);
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 06df1e06b6e0..c4beb70dacbd 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -20,6 +20,12 @@
*
* For each partition, these fields are available:
* name: string that will be used to label the partition's MTD device.
+ * types: some partitions can be containers using specific format to describe
+ * embedded subpartitions / volumes. E.g. many home routers use "firmware"
+ * partition that contains at least kernel and rootfs. In such case an
+ * extra parser is needed that will detect these dynamic partitions and
+ * report them to the MTD subsystem. If set this property stores an array
+ * of parser names to use when looking for subpartitions.
* size: the partition size; if defined as MTDPART_SIZ_FULL, the partition
* will extend to the end of the master MTD device.
* offset: absolute starting position within the master MTD device; if
@@ -38,6 +44,7 @@
struct mtd_partition {
const char *name; /* identifier string */
+ const char *const *types; /* names of parsers to use if any */
uint64_t size; /* partition size */
uint64_t offset; /* offset within the master MTD space */
uint32_t mask_flags; /* master MTD flags to mask out for this partition */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f2a718030476..55faa2f07cca 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -73,6 +73,15 @@
#define SPINOR_OP_BE_32K_4B 0x5c /* Erase 32KiB block */
#define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */
+/* Double Transfer Rate opcodes - defined in JEDEC JESD216B. */
+#define SPINOR_OP_READ_1_1_1_DTR 0x0d
+#define SPINOR_OP_READ_1_2_2_DTR 0xbd
+#define SPINOR_OP_READ_1_4_4_DTR 0xed
+
+#define SPINOR_OP_READ_1_1_1_DTR_4B 0x0e
+#define SPINOR_OP_READ_1_2_2_DTR_4B 0xbe
+#define SPINOR_OP_READ_1_4_4_DTR_4B 0xee
+
/* Used for SST flashes only. */
#define SPINOR_OP_BP 0x02 /* Byte program */
#define SPINOR_OP_WRDI 0x04 /* Write disable */
@@ -119,13 +128,81 @@
/* Configuration Register bits. */
#define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */
-enum read_mode {
- SPI_NOR_NORMAL = 0,
- SPI_NOR_FAST,
- SPI_NOR_DUAL,
- SPI_NOR_QUAD,
+/* Supported SPI protocols */
+#define SNOR_PROTO_INST_MASK GENMASK(23, 16)
+#define SNOR_PROTO_INST_SHIFT 16
+#define SNOR_PROTO_INST(_nbits) \
+ ((((unsigned long)(_nbits)) << SNOR_PROTO_INST_SHIFT) & \
+ SNOR_PROTO_INST_MASK)
+
+#define SNOR_PROTO_ADDR_MASK GENMASK(15, 8)
+#define SNOR_PROTO_ADDR_SHIFT 8
+#define SNOR_PROTO_ADDR(_nbits) \
+ ((((unsigned long)(_nbits)) << SNOR_PROTO_ADDR_SHIFT) & \
+ SNOR_PROTO_ADDR_MASK)
+
+#define SNOR_PROTO_DATA_MASK GENMASK(7, 0)
+#define SNOR_PROTO_DATA_SHIFT 0
+#define SNOR_PROTO_DATA(_nbits) \
+ ((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \
+ SNOR_PROTO_DATA_MASK)
+
+#define SNOR_PROTO_IS_DTR BIT(24) /* Double Transfer Rate */
+
+#define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits) \
+ (SNOR_PROTO_INST(_inst_nbits) | \
+ SNOR_PROTO_ADDR(_addr_nbits) | \
+ SNOR_PROTO_DATA(_data_nbits))
+#define SNOR_PROTO_DTR(_inst_nbits, _addr_nbits, _data_nbits) \
+ (SNOR_PROTO_IS_DTR | \
+ SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits))
+
+enum spi_nor_protocol {
+ SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
+ SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2),
+ SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4),
+ SNOR_PROTO_1_1_8 = SNOR_PROTO_STR(1, 1, 8),
+ SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2),
+ SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
+ SNOR_PROTO_1_8_8 = SNOR_PROTO_STR(1, 8, 8),
+ SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
+ SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
+ SNOR_PROTO_8_8_8 = SNOR_PROTO_STR(8, 8, 8),
+
+ SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1),
+ SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
+ SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
+ SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8),
};
+static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
+{
+ return !!(proto & SNOR_PROTO_IS_DTR);
+}
+
+static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto)
+{
+ return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >>
+ SNOR_PROTO_INST_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_addr_nbits(enum spi_nor_protocol proto)
+{
+ return ((unsigned long)(proto & SNOR_PROTO_ADDR_MASK)) >>
+ SNOR_PROTO_ADDR_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_data_nbits(enum spi_nor_protocol proto)
+{
+ return ((unsigned long)(proto & SNOR_PROTO_DATA_MASK)) >>
+ SNOR_PROTO_DATA_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto)
+{
+ return spi_nor_get_protocol_data_nbits(proto);
+}
+
#define SPI_NOR_MAX_CMD_SIZE 8
enum spi_nor_ops {
SPI_NOR_OPS_READ = 0,
@@ -154,9 +231,11 @@ enum spi_nor_option_flags {
* @read_opcode: the read opcode
* @read_dummy: the dummy needed by the read operation
* @program_opcode: the program opcode
- * @flash_read: the mode of the read
* @sst_write_second: used by the SST write operation
* @flags: flag options for the current SPI-NOR (SNOR_F_*)
+ * @read_proto: the SPI protocol for read operations
+ * @write_proto: the SPI protocol for write operations
+ * @reg_proto the SPI protocol for read_reg/write_reg/erase operations
* @cmd_buf: used by the write_reg
* @prepare: [OPTIONAL] do some preparations for the
* read/write/erase/lock/unlock operations
@@ -185,7 +264,9 @@ struct spi_nor {
u8 read_opcode;
u8 read_dummy;
u8 program_opcode;
- enum read_mode flash_read;
+ enum spi_nor_protocol read_proto;
+ enum spi_nor_protocol write_proto;
+ enum spi_nor_protocol reg_proto;
bool sst_write_second;
u32 flags;
u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE];
@@ -220,10 +301,71 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
}
/**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask: the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+ u32 mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Octo SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK GENMASK(14, 0)
+#define SNOR_HWCAPS_READ BIT(0)
+#define SNOR_HWCAPS_READ_FAST BIT(1)
+#define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2 BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2 BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2 BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4 BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4 BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4 BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10)
+
+#define SNOR_HWCPAS_READ_OCTO GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_1_1_8 BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8 BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8 BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR BIT(14)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK GENMASK(22, 16)
+#define SNOR_HWCAPS_PP BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4 BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4 BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4 BIT(19)
+
+#define SNOR_HWCAPS_PP_OCTO GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_1_1_8 BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8 BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8 BIT(22)
+
+/**
* spi_nor_scan() - scan the SPI NOR
* @nor: the spi_nor structure
* @name: the chip type name
- * @mode: the read mode supported by the driver
+ * @hwcaps: the hardware capabilities supported by the controller driver
*
* The drivers can use this fuction to scan the SPI NOR.
* In the scanning, it will try to get all the necessary information to
@@ -233,6 +375,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
*
* Return: 0 for success, others for failure.
*/
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+ const struct spi_nor_hwcaps *hwcaps);
#endif
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 1b1ca04820a3..47239c336688 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -479,6 +479,7 @@ enum {
NFSPROC4_CLNT_ACCESS,
NFSPROC4_CLNT_GETATTR,
NFSPROC4_CLNT_LOOKUP,
+ NFSPROC4_CLNT_LOOKUPP,
NFSPROC4_CLNT_LOOKUP_ROOT,
NFSPROC4_CLNT_REMOVE,
NFSPROC4_CLNT_RENAME,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bb0eb2c9acca..e52cc55ac300 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *);
extern void nfs_invalidate_atime(struct inode *);
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
struct nfs_fattr *, struct nfs4_label *);
+struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e418a1096662..74c44665e6d3 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -42,6 +42,7 @@ struct nfs_client {
#define NFS_CS_MIGRATION 2 /* - transparent state migr */
#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
#define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */
+#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@@ -210,6 +211,7 @@ struct nfs_server {
unsigned long mig_status;
#define NFS_MIG_IN_TRANSITION (1)
#define NFS_MIG_FAILED (2)
+#define NFS_MIG_TSM_POSSIBLE (3)
void (*destroy)(struct nfs_server *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 247cc3d3498f..d67b67ae6c8b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -33,6 +33,8 @@ enum {
PG_UPTODATE, /* page group sync bit in read path */
PG_WB_END, /* page group sync bit in write path */
PG_REMOVE, /* page group sync bit in write path */
+ PG_CONTENDED1, /* Is someone waiting for a lock? */
+ PG_CONTENDED2, /* Is someone waiting for a lock? */
};
struct nfs_inode;
@@ -93,8 +95,8 @@ struct nfs_pageio_descriptor {
const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
+ struct nfs_io_completion *pg_io_completion;
struct nfs_direct_req *pg_dreq;
- void *pg_layout_private;
unsigned int pg_bsize; /* default bsize for mirrors */
u32 pg_mirror_count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b28c83475ee8..ca3bcc4ed4e5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -878,7 +878,7 @@ struct nfs3_readdirargs {
struct nfs_fh * fh;
__u64 cookie;
__be32 verf[2];
- int plus;
+ bool plus;
unsigned int count;
struct page ** pages;
};
@@ -909,7 +909,7 @@ struct nfs3_linkres {
struct nfs3_readdirres {
struct nfs_fattr * dir_attr;
__be32 * verf;
- int plus;
+ bool plus;
};
struct nfs3_getaclres {
@@ -1012,7 +1012,6 @@ struct nfs4_link_res {
struct nfs_fattr * dir_attr;
};
-
struct nfs4_lookup_arg {
struct nfs4_sequence_args seq_args;
const struct nfs_fh * dir_fh;
@@ -1028,6 +1027,20 @@ struct nfs4_lookup_res {
struct nfs4_label *label;
};
+struct nfs4_lookupp_arg {
+ struct nfs4_sequence_args seq_args;
+ const struct nfs_fh *fh;
+ const u32 *bitmask;
+};
+
+struct nfs4_lookupp_res {
+ struct nfs4_sequence_res seq_res;
+ const struct nfs_server *server;
+ struct nfs_fattr *fattr;
+ struct nfs_fh *fh;
+ struct nfs4_label *label;
+};
+
struct nfs4_lookup_root_arg {
struct nfs4_sequence_args seq_args;
const u32 * bitmask;
@@ -1053,7 +1066,7 @@ struct nfs4_readdir_arg {
struct page ** pages; /* zero-copy data */
unsigned int pgbase; /* zero-copy data */
const u32 * bitmask;
- int plus;
+ bool plus;
};
struct nfs4_readdir_res {
@@ -1422,6 +1435,7 @@ enum {
NFS_IOHDR_STAT,
};
+struct nfs_io_completion;
struct nfs_pgio_header {
struct inode *inode;
struct rpc_cred *cred;
@@ -1435,8 +1449,8 @@ struct nfs_pgio_header {
void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops;
const struct nfs_rw_ops *rw_ops;
+ struct nfs_io_completion *io_completion;
struct nfs_direct_req *dreq;
- void *layout_private;
spinlock_t lock;
/* fields protected by lock */
int pnfs_error;
@@ -1533,6 +1547,7 @@ struct nfs_renamedata {
struct nfs_fattr new_fattr;
void (*complete)(struct rpc_task *, struct nfs_renamedata *);
long timeout;
+ bool cancelled;
};
struct nfs_access_entry;
@@ -1567,6 +1582,8 @@ struct nfs_rpc_ops {
int (*lookup) (struct inode *, const struct qstr *,
struct nfs_fh *, struct nfs_fattr *,
struct nfs4_label *);
+ int (*lookupp) (struct inode *, struct nfs_fh *,
+ struct nfs_fattr *, struct nfs4_label *);
int (*access) (struct inode *, struct nfs_access_entry *);
int (*readlink)(struct inode *, struct page *, unsigned int,
unsigned int);
@@ -1585,7 +1602,7 @@ struct nfs_rpc_ops {
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
int (*readdir) (struct dentry *, struct rpc_cred *,
- u64, struct page **, unsigned int, int);
+ u64, struct page **, unsigned int, bool);
int (*mknod) (struct inode *, struct dentry *, struct iattr *,
dev_t);
int (*statfs) (struct nfs_server *, struct nfs_fh *,
@@ -1595,7 +1612,7 @@ struct nfs_rpc_ops {
int (*pathconf) (struct nfs_server *, struct nfs_fh *,
struct nfs_pathconf *);
int (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
- int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+ int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool);
int (*pgio_rpc_prepare)(struct rpc_task *,
struct nfs_pgio_header *);
void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index aa3cd0878270..8aa01fd859fb 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -6,18 +6,26 @@
#include <linux/sched.h>
#include <asm/irq.h>
+#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#include <asm/nmi.h>
+#endif
#ifdef CONFIG_LOCKUP_DETECTOR
+void lockup_detector_init(void);
+#else
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
extern void touch_softlockup_watchdog_sched(void);
extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
-extern unsigned int hardlockup_panic;
-void lockup_detector_init(void);
+extern int soft_watchdog_enabled;
+extern atomic_t watchdog_park_in_progress;
#else
static inline void touch_softlockup_watchdog_sched(void)
{
@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void)
static inline void touch_all_softlockup_watchdogs(void)
{
}
-static inline void lockup_detector_init(void)
-{
-}
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
@@ -61,6 +66,21 @@ static inline void reset_hung_task_detector(void)
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void hardlockup_detector_disable(void);
+extern unsigned int hardlockup_panic;
+#else
+static inline void hardlockup_detector_disable(void) {}
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+extern void arch_touch_nmi_watchdog(void);
+#else
+#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+#endif
+
/**
* touch_nmi_watchdog - restart NMI watchdog timeout.
*
@@ -68,21 +88,11 @@ static inline void reset_hung_task_detector(void)
* may be used to reset the timeout - for code which intentionally
* disables interrupts for a long time. This call is stateless.
*/
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-#include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-#else
static inline void touch_nmi_watchdog(void)
{
+ arch_touch_nmi_watchdog();
touch_softlockup_watchdog();
}
-#endif
-
-#if defined(CONFIG_HARDLOCKUP_DETECTOR)
-extern void hardlockup_detector_disable(void);
-#else
-static inline void hardlockup_detector_disable(void) {}
-#endif
/*
* Create trigger_all_cpu_backtrace() out of the arch-provided
@@ -139,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
}
#endif
-#ifdef CONFIG_LOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh);
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
+extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits;
-extern atomic_t watchdog_park_in_progress;
+extern int __read_mostly watchdog_suspended;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index de87ceac110e..609e232c00da 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -5,6 +5,7 @@
* GPL LICENSE SUMMARY
*
* Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ * Copyright (C) 2016 T-Platforms. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
* BSD LICENSE
*
* Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ * Copyright (C) 2016 T-Platforms. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -106,6 +108,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo)
* @NTB_SPEED_GEN1: Link is trained to gen1 speed.
* @NTB_SPEED_GEN2: Link is trained to gen2 speed.
* @NTB_SPEED_GEN3: Link is trained to gen3 speed.
+ * @NTB_SPEED_GEN4: Link is trained to gen4 speed.
*/
enum ntb_speed {
NTB_SPEED_AUTO = -1,
@@ -113,6 +116,7 @@ enum ntb_speed {
NTB_SPEED_GEN1 = 1,
NTB_SPEED_GEN2 = 2,
NTB_SPEED_GEN3 = 3,
+ NTB_SPEED_GEN4 = 4
};
/**
@@ -140,6 +144,20 @@ enum ntb_width {
};
/**
+ * enum ntb_default_port - NTB default port number
+ * @NTB_PORT_PRI_USD: Default port of the NTB_TOPO_PRI/NTB_TOPO_B2B_USD
+ * topologies
+ * @NTB_PORT_SEC_DSD: Default port of the NTB_TOPO_SEC/NTB_TOPO_B2B_DSD
+ * topologies
+ */
+enum ntb_default_port {
+ NTB_PORT_PRI_USD,
+ NTB_PORT_SEC_DSD
+};
+#define NTB_DEF_PEER_CNT (1)
+#define NTB_DEF_PEER_IDX (0)
+
+/**
* struct ntb_client_ops - ntb client operations
* @probe: Notify client of a new device.
* @remove: Notify client to remove a device.
@@ -162,10 +180,12 @@ static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops)
* struct ntb_ctx_ops - ntb driver context operations
* @link_event: See ntb_link_event().
* @db_event: See ntb_db_event().
+ * @msg_event: See ntb_msg_event().
*/
struct ntb_ctx_ops {
void (*link_event)(void *ctx);
void (*db_event)(void *ctx, int db_vector);
+ void (*msg_event)(void *ctx);
};
static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
@@ -174,18 +194,27 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
return
/* ops->link_event && */
/* ops->db_event && */
+ /* ops->msg_event && */
1;
}
/**
* struct ntb_ctx_ops - ntb device operations
- * @mw_count: See ntb_mw_count().
- * @mw_get_range: See ntb_mw_get_range().
- * @mw_set_trans: See ntb_mw_set_trans().
- * @mw_clear_trans: See ntb_mw_clear_trans().
+ * @port_number: See ntb_port_number().
+ * @peer_port_count: See ntb_peer_port_count().
+ * @peer_port_number: See ntb_peer_port_number().
+ * @peer_port_idx: See ntb_peer_port_idx().
* @link_is_up: See ntb_link_is_up().
* @link_enable: See ntb_link_enable().
* @link_disable: See ntb_link_disable().
+ * @mw_count: See ntb_mw_count().
+ * @mw_get_align: See ntb_mw_get_align().
+ * @mw_set_trans: See ntb_mw_set_trans().
+ * @mw_clear_trans: See ntb_mw_clear_trans().
+ * @peer_mw_count: See ntb_peer_mw_count().
+ * @peer_mw_get_addr: See ntb_peer_mw_get_addr().
+ * @peer_mw_set_trans: See ntb_peer_mw_set_trans().
+ * @peer_mw_clear_trans:See ntb_peer_mw_clear_trans().
* @db_is_unsafe: See ntb_db_is_unsafe().
* @db_valid_mask: See ntb_db_valid_mask().
* @db_vector_count: See ntb_db_vector_count().
@@ -210,22 +239,43 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
* @peer_spad_addr: See ntb_peer_spad_addr().
* @peer_spad_read: See ntb_peer_spad_read().
* @peer_spad_write: See ntb_peer_spad_write().
+ * @msg_count: See ntb_msg_count().
+ * @msg_inbits: See ntb_msg_inbits().
+ * @msg_outbits: See ntb_msg_outbits().
+ * @msg_read_sts: See ntb_msg_read_sts().
+ * @msg_clear_sts: See ntb_msg_clear_sts().
+ * @msg_set_mask: See ntb_msg_set_mask().
+ * @msg_clear_mask: See ntb_msg_clear_mask().
+ * @msg_read: See ntb_msg_read().
+ * @msg_write: See ntb_msg_write().
*/
struct ntb_dev_ops {
- int (*mw_count)(struct ntb_dev *ntb);
- int (*mw_get_range)(struct ntb_dev *ntb, int idx,
- phys_addr_t *base, resource_size_t *size,
- resource_size_t *align, resource_size_t *align_size);
- int (*mw_set_trans)(struct ntb_dev *ntb, int idx,
- dma_addr_t addr, resource_size_t size);
- int (*mw_clear_trans)(struct ntb_dev *ntb, int idx);
+ int (*port_number)(struct ntb_dev *ntb);
+ int (*peer_port_count)(struct ntb_dev *ntb);
+ int (*peer_port_number)(struct ntb_dev *ntb, int pidx);
+ int (*peer_port_idx)(struct ntb_dev *ntb, int port);
- int (*link_is_up)(struct ntb_dev *ntb,
+ u64 (*link_is_up)(struct ntb_dev *ntb,
enum ntb_speed *speed, enum ntb_width *width);
int (*link_enable)(struct ntb_dev *ntb,
enum ntb_speed max_speed, enum ntb_width max_width);
int (*link_disable)(struct ntb_dev *ntb);
+ int (*mw_count)(struct ntb_dev *ntb, int pidx);
+ int (*mw_get_align)(struct ntb_dev *ntb, int pidx, int widx,
+ resource_size_t *addr_align,
+ resource_size_t *size_align,
+ resource_size_t *size_max);
+ int (*mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
+ dma_addr_t addr, resource_size_t size);
+ int (*mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
+ int (*peer_mw_count)(struct ntb_dev *ntb);
+ int (*peer_mw_get_addr)(struct ntb_dev *ntb, int widx,
+ phys_addr_t *base, resource_size_t *size);
+ int (*peer_mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
+ u64 addr, resource_size_t size);
+ int (*peer_mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
+
int (*db_is_unsafe)(struct ntb_dev *ntb);
u64 (*db_valid_mask)(struct ntb_dev *ntb);
int (*db_vector_count)(struct ntb_dev *ntb);
@@ -252,32 +302,55 @@ struct ntb_dev_ops {
int (*spad_is_unsafe)(struct ntb_dev *ntb);
int (*spad_count)(struct ntb_dev *ntb);
- u32 (*spad_read)(struct ntb_dev *ntb, int idx);
- int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+ u32 (*spad_read)(struct ntb_dev *ntb, int sidx);
+ int (*spad_write)(struct ntb_dev *ntb, int sidx, u32 val);
- int (*peer_spad_addr)(struct ntb_dev *ntb, int idx,
+ int (*peer_spad_addr)(struct ntb_dev *ntb, int pidx, int sidx,
phys_addr_t *spad_addr);
- u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx);
- int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+ u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx);
+ int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx,
+ u32 val);
+
+ int (*msg_count)(struct ntb_dev *ntb);
+ u64 (*msg_inbits)(struct ntb_dev *ntb);
+ u64 (*msg_outbits)(struct ntb_dev *ntb);
+ u64 (*msg_read_sts)(struct ntb_dev *ntb);
+ int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits);
+ int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits);
+ int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits);
+ int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg);
+ int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg);
};
static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
{
/* commented callbacks are not required: */
return
- ops->mw_count &&
- ops->mw_get_range &&
- ops->mw_set_trans &&
- /* ops->mw_clear_trans && */
+ /* Port operations are required for multiport devices */
+ !ops->peer_port_count == !ops->port_number &&
+ !ops->peer_port_number == !ops->port_number &&
+ !ops->peer_port_idx == !ops->port_number &&
+
+ /* Link operations are required */
ops->link_is_up &&
ops->link_enable &&
ops->link_disable &&
+
+ /* One or both MW interfaces should be developed */
+ ops->mw_count &&
+ ops->mw_get_align &&
+ (ops->mw_set_trans ||
+ ops->peer_mw_set_trans) &&
+ /* ops->mw_clear_trans && */
+ ops->peer_mw_count &&
+ ops->peer_mw_get_addr &&
+ /* ops->peer_mw_clear_trans && */
+
+ /* Doorbell operations are mostly required */
/* ops->db_is_unsafe && */
ops->db_valid_mask &&
-
/* both set, or both unset */
- (!ops->db_vector_count == !ops->db_vector_mask) &&
-
+ (!ops->db_vector_count == !ops->db_vector_mask) &&
ops->db_read &&
/* ops->db_set && */
ops->db_clear &&
@@ -291,13 +364,24 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
/* ops->peer_db_read_mask && */
/* ops->peer_db_set_mask && */
/* ops->peer_db_clear_mask && */
- /* ops->spad_is_unsafe && */
- ops->spad_count &&
- ops->spad_read &&
- ops->spad_write &&
- /* ops->peer_spad_addr && */
- /* ops->peer_spad_read && */
- ops->peer_spad_write &&
+
+ /* Scrachpads interface is optional */
+ /* !ops->spad_is_unsafe == !ops->spad_count && */
+ !ops->spad_read == !ops->spad_count &&
+ !ops->spad_write == !ops->spad_count &&
+ /* !ops->peer_spad_addr == !ops->spad_count && */
+ /* !ops->peer_spad_read == !ops->spad_count && */
+ !ops->peer_spad_write == !ops->spad_count &&
+
+ /* Messaging interface is optional */
+ !ops->msg_inbits == !ops->msg_count &&
+ !ops->msg_outbits == !ops->msg_count &&
+ !ops->msg_read_sts == !ops->msg_count &&
+ !ops->msg_clear_sts == !ops->msg_count &&
+ /* !ops->msg_set_mask == !ops->msg_count && */
+ /* !ops->msg_clear_mask == !ops->msg_count && */
+ !ops->msg_read == !ops->msg_count &&
+ !ops->msg_write == !ops->msg_count &&
1;
}
@@ -310,13 +394,12 @@ struct ntb_client {
struct device_driver drv;
const struct ntb_client_ops ops;
};
-
#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
/**
* struct ntb_device - ntb device
* @dev: Linux device object.
- * @pdev: Pci device entry of the ntb.
+ * @pdev: PCI device entry of the ntb.
* @topo: Detected topology of the ntb.
* @ops: See &ntb_dev_ops.
* @ctx: See &ntb_ctx_ops.
@@ -337,7 +420,6 @@ struct ntb_dev {
/* block unregister until device is fully released */
struct completion released;
};
-
#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
/**
@@ -434,86 +516,152 @@ void ntb_link_event(struct ntb_dev *ntb);
* multiple interrupt vectors for doorbells, the vector number indicates which
* vector received the interrupt. The vector number is relative to the first
* vector used for doorbells, starting at zero, and must be less than
- ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which
+ * ntb_db_vector_count(). The driver may call ntb_db_read() to check which
* doorbell bits need service, and ntb_db_vector_mask() to determine which of
* those bits are associated with the vector number.
*/
void ntb_db_event(struct ntb_dev *ntb, int vector);
/**
- * ntb_mw_count() - get the number of memory windows
+ * ntb_msg_event() - notify driver context of a message event
* @ntb: NTB device context.
*
- * Hardware and topology may support a different number of memory windows.
+ * Notify the driver context of a message event. If hardware supports
+ * message registers, this event indicates, that a new message arrived in
+ * some incoming message register or last sent message couldn't be delivered.
+ * The events can be masked/unmasked by the methods ntb_msg_set_mask() and
+ * ntb_msg_clear_mask().
+ */
+void ntb_msg_event(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_port_number() - get the default local port number
+ * @ntb: NTB device context.
*
- * Return: the number of memory windows.
+ * If hardware driver doesn't specify port_number() callback method, the NTB
+ * is considered with just two ports. So this method returns default local
+ * port number in compliance with topology.
+ *
+ * NOTE Don't call this method directly. The ntb_port_number() function should
+ * be used instead.
+ *
+ * Return: the default local port number
+ */
+int ntb_default_port_number(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_port_count() - get the default number of peer device ports
+ * @ntb: NTB device context.
+ *
+ * By default hardware driver supports just one peer device.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_count() function
+ * should be used instead.
+ *
+ * Return: the default number of peer ports
+ */
+int ntb_default_peer_port_count(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_peer_port_number() - get the default peer port by given index
+ * @ntb: NTB device context.
+ * @idx: Peer port index (should not differ from zero).
+ *
+ * By default hardware driver supports just one peer device, so this method
+ * shall return the corresponding value from enum ntb_default_port.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_number() function
+ * should be used instead.
+ *
+ * Return: the peer device port or negative value indicating an error
+ */
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx);
+
+/**
+ * ntb_default_peer_port_idx() - get the default peer device port index by
+ * given port number
+ * @ntb: NTB device context.
+ * @port: Peer port number (should be one of enum ntb_default_port).
+ *
+ * By default hardware driver supports just one peer device, so while
+ * specified port-argument indicates peer port from enum ntb_default_port,
+ * the return value shall be zero.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_idx() function
+ * should be used instead.
+ *
+ * Return: the peer port index or negative value indicating an error
+ */
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port);
+
+/**
+ * ntb_port_number() - get the local port number
+ * @ntb: NTB device context.
+ *
+ * Hardware must support at least simple two-ports ntb connection
+ *
+ * Return: the local port number
*/
-static inline int ntb_mw_count(struct ntb_dev *ntb)
+static inline int ntb_port_number(struct ntb_dev *ntb)
{
- return ntb->ops->mw_count(ntb);
+ if (!ntb->ops->port_number)
+ return ntb_default_port_number(ntb);
+
+ return ntb->ops->port_number(ntb);
}
/**
- * ntb_mw_get_range() - get the range of a memory window
+ * ntb_peer_port_count() - get the number of peer device ports
* @ntb: NTB device context.
- * @idx: Memory window number.
- * @base: OUT - the base address for mapping the memory window
- * @size: OUT - the size for mapping the memory window
- * @align: OUT - the base alignment for translating the memory window
- * @align_size: OUT - the size alignment for translating the memory window
*
- * Get the range of a memory window. NULL may be given for any output
- * parameter if the value is not needed. The base and size may be used for
- * mapping the memory window, to access the peer memory. The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
+ * Hardware may support an access to memory of several remote domains
+ * over multi-port NTB devices. This method returns the number of peers,
+ * local device can have shared memory with.
*
- * Return: Zero on success, otherwise an error number.
+ * Return: the number of peer ports
*/
-static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx,
- phys_addr_t *base, resource_size_t *size,
- resource_size_t *align, resource_size_t *align_size)
+static inline int ntb_peer_port_count(struct ntb_dev *ntb)
{
- return ntb->ops->mw_get_range(ntb, idx, base, size,
- align, align_size);
+ if (!ntb->ops->peer_port_count)
+ return ntb_default_peer_port_count(ntb);
+
+ return ntb->ops->peer_port_count(ntb);
}
/**
- * ntb_mw_set_trans() - set the translation of a memory window
+ * ntb_peer_port_number() - get the peer port by given index
* @ntb: NTB device context.
- * @idx: Memory window number.
- * @addr: The dma address local memory to expose to the peer.
- * @size: The size of the local memory to expose to the peer.
+ * @pidx: Peer port index.
*
- * Set the translation of a memory window. The peer may access local memory
- * through the window starting at the address, up to the size. The address
- * must be aligned to the alignment specified by ntb_mw_get_range(). The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().
+ * Peer ports are continuously enumerated by NTB API logic, so this method
+ * lets to retrieve port real number by its index.
*
- * Return: Zero on success, otherwise an error number.
+ * Return: the peer device port or negative value indicating an error
*/
-static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
- dma_addr_t addr, resource_size_t size)
+static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
{
- return ntb->ops->mw_set_trans(ntb, idx, addr, size);
+ if (!ntb->ops->peer_port_number)
+ return ntb_default_peer_port_number(ntb, pidx);
+
+ return ntb->ops->peer_port_number(ntb, pidx);
}
/**
- * ntb_mw_clear_trans() - clear the translation of a memory window
+ * ntb_peer_port_idx() - get the peer device port index by given port number
* @ntb: NTB device context.
- * @idx: Memory window number.
+ * @port: Peer port number.
*
- * Clear the translation of a memory window. The peer may no longer access
- * local memory through the window.
+ * Inverse operation of ntb_peer_port_number(), so one can get port index
+ * by specified port number.
*
- * Return: Zero on success, otherwise an error number.
+ * Return: the peer port index or negative value indicating an error
*/
-static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
+static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port)
{
- if (!ntb->ops->mw_clear_trans)
- return ntb->ops->mw_set_trans(ntb, idx, 0, 0);
+ if (!ntb->ops->peer_port_idx)
+ return ntb_default_peer_port_idx(ntb, port);
- return ntb->ops->mw_clear_trans(ntb, idx);
+ return ntb->ops->peer_port_idx(ntb, port);
}
/**
@@ -526,25 +674,26 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
* state once after every link event. It is safe to query the link state in
* the context of the link event callback.
*
- * Return: One if the link is up, zero if the link is down, otherwise a
- * negative value indicating the error number.
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ * link is up/down respectively.
*/
-static inline int ntb_link_is_up(struct ntb_dev *ntb,
+static inline u64 ntb_link_is_up(struct ntb_dev *ntb,
enum ntb_speed *speed, enum ntb_width *width)
{
return ntb->ops->link_is_up(ntb, speed, width);
}
/**
- * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * ntb_link_enable() - enable the local port ntb connection
* @ntb: NTB device context.
* @max_speed: The maximum link speed expressed as PCIe generation number.
* @max_width: The maximum link width expressed as the number of PCIe lanes.
*
- * Enable the link on the secondary side of the ntb. This can only be done
- * from the primary side of the ntb in primary or b2b topology. The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
+ * Enable the NTB/PCIe link on the local or remote (for bridge-to-bridge
+ * topology) side of the bridge. If it's supported the ntb device should train
+ * the link to its maximum speed and width, or the requested speed and width,
+ * whichever is smaller. Some hardware doesn't support PCIe link training, so
+ * the last two arguments will be ignored then.
*
* Return: Zero on success, otherwise an error number.
*/
@@ -556,14 +705,14 @@ static inline int ntb_link_enable(struct ntb_dev *ntb,
}
/**
- * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * ntb_link_disable() - disable the local port ntb connection
* @ntb: NTB device context.
*
- * Disable the link on the secondary side of the ntb. This can only be
- * done from the primary side of the ntb in primary or b2b topology. The ntb
- * device should disable the link. Returning from this call must indicate that
- * a barrier has passed, though with no more writes may pass in either
- * direction across the link, except if this call returns an error number.
+ * Disable the link on the local or remote (for b2b topology) of the ntb.
+ * The ntb device should disable the link. Returning from this call must
+ * indicate that a barrier has passed, though with no more writes may pass in
+ * either direction across the link, except if this call returns an error
+ * number.
*
* Return: Zero on success, otherwise an error number.
*/
@@ -573,6 +722,183 @@ static inline int ntb_link_disable(struct ntb_dev *ntb)
}
/**
+ * ntb_mw_count() - get the number of inbound memory windows, which could
+ * be created for a specified peer device
+ * @ntb: NTB device context.
+ * @pidx: Port index of peer device.
+ *
+ * Hardware and topology may support a different number of memory windows.
+ * Moreover different peer devices can support different number of memory
+ * windows. Simply speaking this method returns the number of possible inbound
+ * memory windows to share with specified peer device.
+ *
+ * Return: the number of memory windows.
+ */
+static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+ return ntb->ops->mw_count(ntb, pidx);
+}
+
+/**
+ * ntb_mw_get_align() - get the restriction parameters of inbound memory window
+ * @ntb: NTB device context.
+ * @pidx: Port index of peer device.
+ * @widx: Memory window index.
+ * @addr_align: OUT - the base alignment for translating the memory window
+ * @size_align: OUT - the size alignment for translating the memory window
+ * @size_max: OUT - the maximum size of the memory window
+ *
+ * Get the alignments of an inbound memory window with specified index.
+ * NULL may be given for any output parameter if the value is not needed.
+ * The alignment and size parameters may be used for allocation of proper
+ * shared memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+ resource_size_t *addr_align,
+ resource_size_t *size_align,
+ resource_size_t *size_max)
+{
+ return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align,
+ size_max);
+}
+
+/**
+ * ntb_mw_set_trans() - set the translation of an inbound memory window
+ * @ntb: NTB device context.
+ * @pidx: Port index of peer device.
+ * @widx: Memory window index.
+ * @addr: The dma address of local memory to expose to the peer.
+ * @size: The size of the local memory to expose to the peer.
+ *
+ * Set the translation of a memory window. The peer may access local memory
+ * through the window starting at the address, up to the size. The address
+ * and size must be aligned in compliance with restrictions of
+ * ntb_mw_get_align(). The region size should not exceed the size_max parameter
+ * of that method.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+ dma_addr_t addr, resource_size_t size)
+{
+ if (!ntb->ops->mw_set_trans)
+ return 0;
+
+ return ntb->ops->mw_set_trans(ntb, pidx, widx, addr, size);
+}
+
+/**
+ * ntb_mw_clear_trans() - clear the translation address of an inbound memory
+ * window
+ * @ntb: NTB device context.
+ * @pidx: Port index of peer device.
+ * @widx: Memory window index.
+ *
+ * Clear the translation of an inbound memory window. The peer may no longer
+ * access local memory through the window.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int pidx, int widx)
+{
+ if (!ntb->ops->mw_clear_trans)
+ return ntb_mw_set_trans(ntb, pidx, widx, 0, 0);
+
+ return ntb->ops->mw_clear_trans(ntb, pidx, widx);
+}
+
+/**
+ * ntb_peer_mw_count() - get the number of outbound memory windows, which could
+ * be mapped to access a shared memory
+ * @ntb: NTB device context.
+ *
+ * Hardware and topology may support a different number of memory windows.
+ * This method returns the number of outbound memory windows supported by
+ * local device.
+ *
+ * Return: the number of memory windows.
+ */
+static inline int ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+ return ntb->ops->peer_mw_count(ntb);
+}
+
+/**
+ * ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ * @ntb: NTB device context.
+ * @widx: Memory window index (within ntb_peer_mw_count() return value).
+ * @base: OUT - the base address of mapping region.
+ * @size: OUT - the size of mapping region.
+ *
+ * Get base and size of memory region to map. NULL may be given for any output
+ * parameter if the value is not needed. The base and size may be used for
+ * mapping the memory window, to access the peer memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+ phys_addr_t *base, resource_size_t *size)
+{
+ return ntb->ops->peer_mw_get_addr(ntb, widx, base, size);
+}
+
+/**
+ * ntb_peer_mw_set_trans() - set a translation address of a memory window
+ * retrieved from a peer device
+ * @ntb: NTB device context.
+ * @pidx: Port index of peer device the translation address received from.
+ * @widx: Memory window index.
+ * @addr: The dma address of the shared memory to access.
+ * @size: The size of the shared memory to access.
+ *
+ * Set the translation of an outbound memory window. The local device may
+ * access shared memory allocated by a peer device sent the address.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface, so a translation address can be only set on the side,
+ * where shared memory (inbound memory windows) is allocated.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+ u64 addr, resource_size_t size)
+{
+ if (!ntb->ops->peer_mw_set_trans)
+ return 0;
+
+ return ntb->ops->peer_mw_set_trans(ntb, pidx, widx, addr, size);
+}
+
+/**
+ * ntb_peer_mw_clear_trans() - clear the translation address of an outbound
+ * memory window
+ * @ntb: NTB device context.
+ * @pidx: Port index of peer device.
+ * @widx: Memory window index.
+ *
+ * Clear the translation of a outbound memory window. The local device may no
+ * longer access a shared memory through the window.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+ int widx)
+{
+ if (!ntb->ops->peer_mw_clear_trans)
+ return ntb_peer_mw_set_trans(ntb, pidx, widx, 0, 0);
+
+ return ntb->ops->peer_mw_clear_trans(ntb, pidx, widx);
+}
+
+/**
* ntb_db_is_unsafe() - check if it is safe to use hardware doorbell
* @ntb: NTB device context.
*
@@ -900,47 +1226,58 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb)
* @ntb: NTB device context.
*
* Hardware and topology may support a different number of scratchpads.
+ * Although it must be the same for all ports per NTB device.
*
* Return: the number of scratchpads.
*/
static inline int ntb_spad_count(struct ntb_dev *ntb)
{
+ if (!ntb->ops->spad_count)
+ return 0;
+
return ntb->ops->spad_count(ntb);
}
/**
* ntb_spad_read() - read the local scratchpad register
* @ntb: NTB device context.
- * @idx: Scratchpad index.
+ * @sidx: Scratchpad index.
*
* Read the local scratchpad register, and return the value.
*
* Return: The value of the local scratchpad register.
*/
-static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_spad_read(struct ntb_dev *ntb, int sidx)
{
- return ntb->ops->spad_read(ntb, idx);
+ if (!ntb->ops->spad_read)
+ return ~(u32)0;
+
+ return ntb->ops->spad_read(ntb, sidx);
}
/**
* ntb_spad_write() - write the local scratchpad register
* @ntb: NTB device context.
- * @idx: Scratchpad index.
+ * @sidx: Scratchpad index.
* @val: Scratchpad value.
*
* Write the value to the local scratchpad register.
*
* Return: Zero on success, otherwise an error number.
*/
-static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
{
- return ntb->ops->spad_write(ntb, idx, val);
+ if (!ntb->ops->spad_write)
+ return -EINVAL;
+
+ return ntb->ops->spad_write(ntb, sidx, val);
}
/**
* ntb_peer_spad_addr() - address of the peer scratchpad register
* @ntb: NTB device context.
- * @idx: Scratchpad index.
+ * @pidx: Port index of peer device.
+ * @sidx: Scratchpad index.
* @spad_addr: OUT - The address of the peer scratchpad register.
*
* Return the address of the peer doorbell register. This may be used, for
@@ -948,45 +1285,213 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
*
* Return: Zero on success, otherwise an error number.
*/
-static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
phys_addr_t *spad_addr)
{
if (!ntb->ops->peer_spad_addr)
return -EINVAL;
- return ntb->ops->peer_spad_addr(ntb, idx, spad_addr);
+ return ntb->ops->peer_spad_addr(ntb, pidx, sidx, spad_addr);
}
/**
* ntb_peer_spad_read() - read the peer scratchpad register
* @ntb: NTB device context.
- * @idx: Scratchpad index.
+ * @pidx: Port index of peer device.
+ * @sidx: Scratchpad index.
*
* Read the peer scratchpad register, and return the value.
*
* Return: The value of the local scratchpad register.
*/
-static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
{
if (!ntb->ops->peer_spad_read)
- return 0;
+ return ~(u32)0;
- return ntb->ops->peer_spad_read(ntb, idx);
+ return ntb->ops->peer_spad_read(ntb, pidx, sidx);
}
/**
* ntb_peer_spad_write() - write the peer scratchpad register
* @ntb: NTB device context.
- * @idx: Scratchpad index.
+ * @pidx: Port index of peer device.
+ * @sidx: Scratchpad index.
* @val: Scratchpad value.
*
* Write the value to the peer scratchpad register.
*
* Return: Zero on success, otherwise an error number.
*/
-static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
+ u32 val)
+{
+ if (!ntb->ops->peer_spad_write)
+ return -EINVAL;
+
+ return ntb->ops->peer_spad_write(ntb, pidx, sidx, val);
+}
+
+/**
+ * ntb_msg_count() - get the number of message registers
+ * @ntb: NTB device context.
+ *
+ * Hardware may support a different number of message registers.
+ *
+ * Return: the number of message registers.
+ */
+static inline int ntb_msg_count(struct ntb_dev *ntb)
+{
+ if (!ntb->ops->msg_count)
+ return 0;
+
+ return ntb->ops->msg_count(ntb);
+}
+
+/**
+ * ntb_msg_inbits() - get a bitfield of inbound message registers status
+ * @ntb: NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to inbound message registers.
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static inline u64 ntb_msg_inbits(struct ntb_dev *ntb)
{
- return ntb->ops->peer_spad_write(ntb, idx, val);
+ if (!ntb->ops->msg_inbits)
+ return 0;
+
+ return ntb->ops->msg_inbits(ntb);
+}
+
+/**
+ * ntb_msg_outbits() - get a bitfield of outbound message registers status
+ * @ntb: NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to outbound message registers.
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static inline u64 ntb_msg_outbits(struct ntb_dev *ntb)
+{
+ if (!ntb->ops->msg_outbits)
+ return 0;
+
+ return ntb->ops->msg_outbits(ntb);
+}
+
+/**
+ * ntb_msg_read_sts() - read the message registers status
+ * @ntb: NTB device context.
+ *
+ * Read the status of message register. Inbound and outbound message registers
+ * related bits can be filtered by masks retrieved from ntb_msg_inbits() and
+ * ntb_msg_outbits().
+ *
+ * Return: status bits of message registers
+ */
+static inline u64 ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+ if (!ntb->ops->msg_read_sts)
+ return 0;
+
+ return ntb->ops->msg_read_sts(ntb);
+}
+
+/**
+ * ntb_msg_clear_sts() - clear status bits of message registers
+ * @ntb: NTB device context.
+ * @sts_bits: Status bits to clear.
+ *
+ * Clear bits in the status register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+ if (!ntb->ops->msg_clear_sts)
+ return -EINVAL;
+
+ return ntb->ops->msg_clear_sts(ntb, sts_bits);
+}
+
+/**
+ * ntb_msg_set_mask() - set mask of message register status bits
+ * @ntb: NTB device context.
+ * @mask_bits: Mask bits.
+ *
+ * Mask the message registers status bits from raising the message event.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+ if (!ntb->ops->msg_set_mask)
+ return -EINVAL;
+
+ return ntb->ops->msg_set_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_clear_mask() - clear message registers mask
+ * @ntb: NTB device context.
+ * @mask_bits: Mask bits to clear.
+ *
+ * Clear bits in the message events mask register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+ if (!ntb->ops->msg_clear_mask)
+ return -EINVAL;
+
+ return ntb->ops->msg_clear_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_read() - read message register with specified index
+ * @ntb: NTB device context.
+ * @midx: Message register index
+ * @pidx: OUT - Port index of peer device a message retrieved from
+ * @msg: OUT - Data
+ *
+ * Read data from the specified message register. Source port index of a
+ * message is retrieved as well.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx,
+ u32 *msg)
+{
+ if (!ntb->ops->msg_read)
+ return -EINVAL;
+
+ return ntb->ops->msg_read(ntb, midx, pidx, msg);
+}
+
+/**
+ * ntb_msg_write() - write data to the specified message register
+ * @ntb: NTB device context.
+ * @midx: Message register index
+ * @pidx: Port index of peer device a message being sent to
+ * @msg: Data to send
+ *
+ * Send data to a specified peer device using the defined message register.
+ * Message event can be raised if the midx registers isn't empty while
+ * calling this method and the corresponding interrupt isn't masked.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx,
+ u32 msg)
+{
+ if (!ntb->ops->msg_write)
+ return -EINVAL;
+
+ return ntb->ops->msg_write(ntb, midx, pidx, msg);
}
#endif
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index bc711a10be05..21c37e39e41a 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -17,6 +17,7 @@
/*
* This file contains definitions relative to FC-NVME r1.14 (16-020vB).
+ * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
*/
#ifndef _NVME_FC_H
@@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
uuid_t hostid;
u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN];
- u8 rsvd632[384];
+ __be32 rsvd584[108]; /* pad to 1016 bytes,
+ * which makes overall LS rqst
+ * payload 1024 bytes
+ */
};
+#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN \
+ offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584)
+
+#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN \
+ (FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \
+ offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio))
+
+
+
/* FCNVME_LSDESC_CREATE_CONN_CMD */
struct fcnvme_lsdesc_cr_conn_cmd {
__be32 desc_tag; /* FCNVME_LSDESC_xxx */
@@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst {
struct fcnvme_lsdesc_cr_assoc_cmd assoc_cmd;
};
+#define FCNVME_LSDESC_CRA_RQST_MINLEN \
+ (offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \
+ FCNVME_LSDESC_CRA_CMD_DESC_MINLEN)
+
+#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN \
+ FCNVME_LSDESC_CRA_CMD_DESC_MINLEN
+
+
struct fcnvme_ls_cr_assoc_acc {
struct fcnvme_ls_acc_hdr hdr;
struct fcnvme_lsdesc_assoc_id associd;
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index cd93416d762e..497706f5adca 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -12,6 +12,9 @@
#ifndef _LINUX_NVMEM_PROVIDER_H
#define _LINUX_NVMEM_PROVIDER_H
+#include <linux/err.h>
+#include <linux/errno.h>
+
struct nvmem_device;
struct nvmem_cell_info;
typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset,
diff --git a/include/linux/of.h b/include/linux/of.h
index fa089a2789a0..4a8a70916237 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -100,10 +100,12 @@ struct of_reconfig_data {
/* initialize a node */
extern struct kobj_type of_node_ktype;
+extern const struct fwnode_operations of_fwnode_ops;
static inline void of_node_init(struct device_node *node)
{
kobject_init(&node->kobj, &of_node_ktype);
node->fwnode.type = FWNODE_OF;
+ node->fwnode.ops = &of_fwnode_ops;
}
/* true when node is initialized */
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 610e13271918..1fd71733aa68 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -174,6 +174,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
VM_BUG_ON_PAGE(page_count(page) != 0, page);
VM_BUG_ON(count == 0);
+ smp_mb();
atomic_set(&page->_refcount, count);
if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
__page_ref_unfreeze(page, count);
diff --git a/include/linux/platform_data/usb-ohci-s3c2410.h b/include/linux/platform_data/usb-ohci-s3c2410.h
index 7fa1fbefc3f2..cc7554ae6e8b 100644
--- a/include/linux/platform_data/usb-ohci-s3c2410.h
+++ b/include/linux/platform_data/usb-ohci-s3c2410.h
@@ -31,7 +31,7 @@ struct s3c2410_hcd_info {
void (*report_oc)(struct s3c2410_hcd_info *, int ports);
};
-static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
+static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
{
if (info->report_oc != NULL) {
(info->report_oc)(info, ports);
diff --git a/include/linux/property.h b/include/linux/property.h
index 2f482616a2f2..7e77039e6b81 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -51,6 +51,7 @@ int device_property_read_string(struct device *dev, const char *propname,
int device_property_match_string(struct device *dev,
const char *propname, const char *string);
+bool fwnode_device_is_available(struct fwnode_handle *fwnode);
bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname);
int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
const char *propname, u8 *val,
@@ -274,12 +275,16 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen);
struct fwnode_handle *fwnode_graph_get_next_endpoint(
struct fwnode_handle *fwnode, struct fwnode_handle *prev);
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *fwnode);
struct fwnode_handle *fwnode_graph_get_remote_port_parent(
struct fwnode_handle *fwnode);
struct fwnode_handle *fwnode_graph_get_remote_port(
struct fwnode_handle *fwnode);
struct fwnode_handle *fwnode_graph_get_remote_endpoint(
struct fwnode_handle *fwnode);
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+ u32 port, u32 endpoint);
int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
struct fwnode_endpoint *endpoint);
diff --git a/include/linux/random.h b/include/linux/random.h
index ed5c3838780d..1fa0dc880bd7 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void)
#endif
}
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+# define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+# define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+ unsigned long val = get_random_long();
+
+ return val & CANARY_MASK;
+}
+
unsigned long randomize_page(unsigned long start, unsigned long range);
u32 prandom_u32(void);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b693adac853b..0a0f0d14a5fb 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/interrupt.h>
+#include <linux/nvmem-provider.h>
#include <uapi/linux/rtc.h>
extern int rtc_month_days(unsigned int month, unsigned int year);
@@ -32,17 +33,11 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs)
return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs);
}
-/**
- * Deprecated. Use rtc_time64_to_tm().
- */
static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
{
rtc_time64_to_tm(time, tm);
}
-/**
- * Deprecated. Use rtc_tm_to_time64().
- */
static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
{
*time = rtc_tm_to_time64(tm);
@@ -116,7 +111,6 @@ struct rtc_device {
struct module *owner;
int id;
- char name[RTC_DEVICE_NAME_SIZE];
const struct rtc_class_ops *ops;
struct mutex ops_lock;
@@ -143,6 +137,14 @@ struct rtc_device {
/* Some hardware can't support UIE mode */
int uie_unsupported;
+ bool registered;
+
+ struct nvmem_config *nvmem_config;
+ struct nvmem_device *nvmem;
+ /* Old ABI support */
+ bool nvram_old_abi;
+ struct bin_attribute *nvram;
+
#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
struct work_struct uie_task;
struct timer_list uie_timer;
@@ -164,6 +166,8 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev,
const char *name,
const struct rtc_class_ops *ops,
struct module *owner);
+struct rtc_device *devm_rtc_allocate_device(struct device *dev);
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
extern void rtc_device_unregister(struct rtc_device *rtc);
extern void devm_rtc_device_unregister(struct device *dev,
struct rtc_device *rtc);
@@ -219,6 +223,9 @@ static inline bool is_leap_year(unsigned int year)
return (!(year % 4) && (year % 100)) || !(year % 400);
}
+#define rtc_register_device(device) \
+ __rtc_register_device(THIS_MODULE, device)
+
#ifdef CONFIG_RTC_HCTOSYS_DEVICE
extern int rtc_hctosys_ret;
#else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 20814b7d7d70..2ba9ec93423f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,6 +974,7 @@ struct task_struct {
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
+ unsigned int fail_nth;
#endif
/*
* When (nr_dirtied >= nr_dirtied_pause), it's time to call
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 69eedcef8f03..98ae0d05aa32 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */
#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
+#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
-#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+ MMF_DISABLE_THP_MASK)
#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9edec926e9d9..be5cf2ea14ad 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -8,11 +8,29 @@
struct task_struct;
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+ int semval; /* current value */
+ /*
+ * PID of the process that last modified the semaphore. For
+ * Linux, specifically these are:
+ * - semop
+ * - semctl, via SETVAL and SETALL.
+ * - at task exit when performing undo adjustments (see exit_sem).
+ */
+ int sempid;
+ spinlock_t lock; /* spinlock for fine-grained semtimedop */
+ struct list_head pending_alter; /* pending single-sop operations */
+ /* that alter the semaphore */
+ struct list_head pending_const; /* pending single-sop operations */
+ /* that do not alter the semaphore*/
+ time_t sem_otime; /* candidate for sem_otime */
+} ____cacheline_aligned_in_smp;
+
/* One sem_array data structure for each set of semaphores in the system. */
struct sem_array {
struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */
- time_t sem_ctime; /* last change time */
- struct sem *sem_base; /* ptr to first semaphore in array */
+ time_t sem_ctime; /* create/last semctl() time */
struct list_head pending_alter; /* pending operations */
/* that alter the array */
struct list_head pending_const; /* pending complex operations */
@@ -21,6 +39,8 @@ struct sem_array {
int sem_nsems; /* no. of semaphores in array */
int complex_count; /* pending complex operations */
unsigned int use_global_lock;/* >0: global lock required */
+
+ struct sem sems[];
};
#ifdef CONFIG_SYSVIPC
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 04a7f7993e67..41473df6dfb0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
*
* %__GFP_NOWARN - If allocation fails, don't issue any warnings.
*
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail
+ * eventually.
*
* There are other flags available as well, but these are not intended
* for general use, and so are not documented here. For a full list of
diff --git a/include/linux/string.h b/include/linux/string.h
index 7439d83eaa33..049866760e8b 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -193,4 +193,205 @@ static inline const char *kbasename(const char *path)
return tail ? tail + 1 : path;
}
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
+
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __builtin_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (p_size == (size_t)-1)
+ return __builtin_strcat(p, q);
+ if (strlcat(p, q, p_size) >= p_size)
+ fortify_panic(__func__);
+ return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+ __kernel_size_t ret;
+ size_t p_size = __builtin_object_size(p, 0);
+ if (p_size == (size_t)-1)
+ return __builtin_strlen(p);
+ ret = strnlen(p, p_size);
+ if (p_size <= ret)
+ fortify_panic(__func__);
+ return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+ if (p_size <= ret && maxlen != ret)
+ fortify_panic(__func__);
+ return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+ size_t ret;
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __real_strlcpy(p, q, size);
+ ret = strlen(q);
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ if (__builtin_constant_p(len) && len >= p_size)
+ __write_overflow();
+ if (len >= p_size)
+ fortify_panic(__func__);
+ __builtin_memcpy(p, q, len);
+ p[len] = '\0';
+ }
+ return ret;
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+ size_t p_len, copy_len;
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __builtin_strncat(p, q, count);
+ p_len = strlen(p);
+ copy_len = strnlen(q, count);
+ if (p_size < p_len + copy_len + 1)
+ fortify_panic(__func__);
+ __builtin_memcpy(p + p_len, q, copy_len);
+ p[p_len + copy_len] = '\0';
+ return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __builtin_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __write_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __builtin_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __write_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __builtin_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __read_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __builtin_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __builtin_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_kmemdup(p, size, gfp);
+}
+
+/* defined after fortified strlen and memcpy to reuse them */
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __builtin_strcpy(p, q);
+ memcpy(p, q, strlen(q) + 1);
+ return p;
+}
+
+#endif
+
#endif /* _LINUX_STRING_H_ */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecba0dde..55ef67bea06b 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -39,7 +39,7 @@ struct rpc_clnt {
struct list_head cl_tasks; /* List of tasks */
spinlock_t cl_lock; /* spinlock */
struct rpc_xprt __rcu * cl_xprt; /* transport */
- struct rpc_procinfo * cl_procinfo; /* procedure info */
+ const struct rpc_procinfo *cl_procinfo; /* procedure info */
u32 cl_prog, /* RPC program number */
cl_vers, /* RPC version number */
cl_maxproc; /* max procedure number */
@@ -87,7 +87,8 @@ struct rpc_program {
struct rpc_version {
u32 number; /* version number */
unsigned int nrprocs; /* number of procs */
- struct rpc_procinfo * procs; /* procedure array */
+ const struct rpc_procinfo *procs; /* procedure array */
+ unsigned int *counts; /* call counts */
};
/*
@@ -99,7 +100,6 @@ struct rpc_procinfo {
kxdrdproc_t p_decode; /* XDR decode function */
unsigned int p_arglen; /* argument hdr length (u32) */
unsigned int p_replen; /* reply hdr length (u32) */
- unsigned int p_count; /* call count */
unsigned int p_timer; /* Which RTT timer to use */
u32 p_statidx; /* Which procedure to account */
const char * p_name; /* name of procedure */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9d7529ffc4ce..50a99a117da7 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -22,7 +22,7 @@
*/
struct rpc_procinfo;
struct rpc_message {
- struct rpc_procinfo * rpc_proc; /* Procedure information */
+ const struct rpc_procinfo *rpc_proc; /* Procedure information */
void * rpc_argp; /* Arguments */
void * rpc_resp; /* Result */
struct rpc_cred * rpc_cred; /* Credentials */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 11cef5a7bc87..a3f8af9bd543 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -237,7 +237,7 @@ struct svc_rqst {
struct svc_serv * rq_server; /* RPC service definition */
struct svc_pool * rq_pool; /* thread pool */
- struct svc_procedure * rq_procinfo; /* procedure info */
+ const struct svc_procedure *rq_procinfo;/* procedure info */
struct auth_ops * rq_authop; /* authentication flavour */
struct svc_cred rq_cred; /* auth info */
void * rq_xprt_ctxt; /* transport specific context ptr */
@@ -246,7 +246,7 @@ struct svc_rqst {
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
struct xdr_buf rq_res;
- struct page * rq_pages[RPCSVC_MAXPAGES];
+ struct page *rq_pages[RPCSVC_MAXPAGES + 1];
struct page * *rq_respages; /* points into rq_pages */
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
@@ -384,7 +384,7 @@ struct svc_program {
unsigned int pg_lovers; /* lowest version */
unsigned int pg_hivers; /* highest version */
unsigned int pg_nvers; /* number of versions */
- struct svc_version ** pg_vers; /* version array */
+ const struct svc_version **pg_vers; /* version array */
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
struct svc_stat * pg_stats; /* rpc statistics */
@@ -397,7 +397,8 @@ struct svc_program {
struct svc_version {
u32 vs_vers; /* version number */
u32 vs_nproc; /* number of procedures */
- struct svc_procedure * vs_proc; /* per-procedure info */
+ const struct svc_procedure *vs_proc; /* per-procedure info */
+ unsigned int *vs_count; /* call counts */
u32 vs_xdrsize; /* xdrsize needed for this version */
/* Don't register with rpcbind */
@@ -419,15 +420,17 @@ struct svc_version {
/*
* RPC procedure info
*/
-typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
struct svc_procedure {
- svc_procfunc pc_func; /* process the request */
- kxdrproc_t pc_decode; /* XDR decode args */
- kxdrproc_t pc_encode; /* XDR encode result */
- kxdrproc_t pc_release; /* XDR free result */
+ /* process the request: */
+ __be32 (*pc_func)(struct svc_rqst *);
+ /* XDR decode args: */
+ int (*pc_decode)(struct svc_rqst *, __be32 *data);
+ /* XDR encode result: */
+ int (*pc_encode)(struct svc_rqst *, __be32 *data);
+ /* XDR free result: */
+ void (*pc_release)(struct svc_rqst *);
unsigned int pc_argsize; /* argument struct size */
unsigned int pc_ressize; /* result struct size */
- unsigned int pc_count; /* call count */
unsigned int pc_cachetype; /* cache info (NFS) */
unsigned int pc_xdrressize; /* maximum size of XDR reply */
};
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f3787d800ba4..995c6fe9ee90 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -77,46 +77,25 @@ extern atomic_t rdma_stat_sq_prod;
*/
struct svc_rdma_op_ctxt {
struct list_head list;
- struct svc_rdma_op_ctxt *read_hdr;
- struct svc_rdma_fastreg_mr *frmr;
- int hdr_count;
struct xdr_buf arg;
struct ib_cqe cqe;
- struct ib_cqe reg_cqe;
- struct ib_cqe inv_cqe;
u32 byte_len;
- u32 position;
struct svcxprt_rdma *xprt;
- unsigned long flags;
enum dma_data_direction direction;
int count;
unsigned int mapped_sges;
+ int hdr_count;
struct ib_send_wr send_wr;
struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
struct page *pages[RPCSVC_MAXPAGES];
};
-struct svc_rdma_fastreg_mr {
- struct ib_mr *mr;
- struct scatterlist *sg;
- int sg_nents;
- unsigned long access_flags;
- enum dma_data_direction direction;
- struct list_head frmr_list;
-};
-
-#define RDMACTXT_F_LAST_CTXT 2
-
-#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */
-#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */
-
struct svcxprt_rdma {
struct svc_xprt sc_xprt; /* SVC transport structure */
struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
int sc_max_sge;
- int sc_max_sge_rd; /* max sge for read target */
bool sc_snd_w_inv; /* OK to use Send With Invalidate */
atomic_t sc_sq_avail; /* SQEs ready to be consumed */
@@ -141,14 +120,6 @@ struct svcxprt_rdma {
struct ib_qp *sc_qp;
struct ib_cq *sc_rq_cq;
struct ib_cq *sc_sq_cq;
- int (*sc_reader)(struct svcxprt_rdma *,
- struct svc_rqst *,
- struct svc_rdma_op_ctxt *,
- int *, u32 *, u32, u32, u64, bool);
- u32 sc_dev_caps; /* distilled device caps */
- unsigned int sc_frmr_pg_list_len;
- struct list_head sc_frmr_q;
- spinlock_t sc_frmr_q_lock;
spinlock_t sc_lock; /* transport lock */
@@ -185,20 +156,14 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
__be32 *rdma_resp,
struct xdr_buf *rcvbuf);
-/* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-
/* svc_rdma_recvfrom.c */
extern int svc_rdma_recvfrom(struct svc_rqst *);
-extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
- struct svc_rdma_op_ctxt *, int *, u32 *,
- u32, u32, u64, bool);
-extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
- struct svc_rdma_op_ctxt *, int *, u32 *,
- u32, u32, u64, bool);
/* svc_rdma_rw.c */
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
+extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head, __be32 *p);
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
__be32 *wr_ch, struct xdr_buf *xdr);
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
@@ -226,9 +191,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
-extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
- struct svc_rdma_fastreg_mr *);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 054c8cde18f3..261b48a2701d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -17,6 +17,8 @@
#include <asm/unaligned.h>
#include <linux/scatterlist.h>
+struct rpc_rqst;
+
/*
* Buffer adjustment
*/
@@ -33,13 +35,6 @@ struct xdr_netobj {
};
/*
- * This is the legacy generic XDR function. rqstp is either a rpc_rqst
- * (client side) or svc_rqst pointer (server side).
- * Encode functions always assume there's enough room in the buffer.
- */
-typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
-
-/*
* Basic structure for transmission/reception of a client XDR message.
* Features a header (for a linear buffer containing RPC headers
* and the data payload for short messages), and then an array of
@@ -222,8 +217,10 @@ struct xdr_stream {
/*
* These are the xdr_stream style generic XDR encode and decode functions.
*/
-typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
-typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ const void *obj);
+typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5ab1c98c7d27..d83d28e53e62 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -277,6 +277,7 @@ extern void mark_page_accessed(struct page *);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_all(void);
+extern void lru_add_drain_all_cpuslocked(void);
extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
@@ -331,7 +332,7 @@ extern void kswapd_stop(int nid);
#include <linux/blk_types.h> /* for bio_end_io_t */
/* linux/mm/page_io.c */
-extern int swap_readpage(struct page *);
+extern int swap_readpage(struct page *page, bool do_poll);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
extern void end_swap_bio_write(struct bio *bio);
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -362,7 +363,8 @@ extern void free_page_and_swap_cache(struct page *);
extern void free_pages_and_swap_cache(struct page **, int);
extern struct page *lookup_swap_cache(swp_entry_t);
extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
- struct vm_area_struct *vma, unsigned long addr);
+ struct vm_area_struct *vma, unsigned long addr,
+ bool do_poll);
extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr,
bool *new_page_allocated);
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 5c3a5f3e7eec..c5ff7b217ee6 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void)
atomic_long_dec(&num_poisoned_pages);
}
-static inline void num_poisoned_pages_add(long num)
-{
- atomic_long_add(num, &num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_sub(long num)
-{
- atomic_long_sub(num, &num_poisoned_pages);
-}
#else
static inline swp_entry_t make_hwpoison_entry(struct page *page)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d07816def0..3a89b9ff4cdc 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_dointvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
+extern int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
extern int proc_dointvec_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
@@ -143,7 +146,7 @@ struct ctl_table_header
struct ctl_table_set *set;
struct ctl_dir *parent;
struct ctl_node *node;
- struct list_head inodes; /* head for proc_inode->sysctl_inodes */
+ struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
};
struct ctl_dir {
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 9375d23a24e7..635a3c5706bd 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -33,6 +33,8 @@ struct t10_pi_tuple {
__be32 ref_tag; /* Target LBA or indirect LBA */
};
+#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
+#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
extern const struct blk_integrity_profile t10_pi_type1_crc;
extern const struct blk_integrity_profile t10_pi_type1_ip;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index f57076b958b7..586809abb273 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver(
*/
extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
extern void vfio_group_put_external_user(struct vfio_group *group);
+extern bool vfio_external_group_match_file(struct vfio_group *group,
+ struct file *filep);
extern int vfio_external_user_iommu_id(struct vfio_group *group);
extern long vfio_external_check_extension(struct vfio_group *group,
unsigned long arg);