From ba65dc5ef16f82fba77869cecf7a7d515f61446b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2016 11:32:47 -0400 Subject: much milder d_walk() race d_walk() relies upon the tree not getting rearranged under it without rename_lock being touched. And we do grab rename_lock around the places that change the tree topology. Unfortunately, branch reordering is just as bad from d_walk() POV and we have two places that do it without touching rename_lock - one in handling of cursors (for ramfs-style directories) and another in autofs. autofs one is a separate story; this commit deals with the cursors. * mark cursor dentries explicitly at allocation time * make __dentry_kill() leave ->d_child.next pointing to the next non-cursor sibling, making sure that it won't be moved around unnoticed before the parent is relocked on ascend-to-parent path in d_walk(). * make d_walk() skip cursors explicitly; strictly speaking it's not necessary (all callbacks we pass to d_walk() are no-ops on cursors), but it makes analysis easier. Signed-off-by: Al Viro --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 484c8792da82..bcd0c64e3ed8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -212,6 +212,7 @@ struct dentry_operations { #define DCACHE_OP_REAL 0x08000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ +#define DCACHE_DENTRY_CURSOR 0x20000000 extern seqlock_t rename_lock; -- cgit v1.2.3 From c3ec5e5ce9cea1f369a5a8ad69d6471680796bc6 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 9 Jun 2016 18:05:09 +0100 Subject: net: diag: add missing declarations The functions inet_diag_msg_common_fill and inet_diag_msg_attrs_fill seem to have been missed from the include/linux/inet_diag.h header file. Add them to fix the following warnings: net/ipv4/inet_diag.c:69:6: warning: symbol 'inet_diag_msg_common_fill' was not declared. Should it be static? net/ipv4/inet_diag.c:108:5: warning: symbol 'inet_diag_msg_attrs_fill' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 6 ++++++ net/sctp/sctp_diag.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 7c27fa1030e8..feb04ea20f11 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net, int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); + +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* _INET_DIAG_H_ */ diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 1ce724b87618..f69edcf219e5 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -3,12 +3,6 @@ #include #include -extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, - struct sock *sk); -extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); - static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); -- cgit v1.2.3 From 8588ac097b49ce8802f11541d9cd6f6667badb34 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 11 Jun 2016 12:20:27 +0800 Subject: netfilter: nf_tables: reject loops from set element jump to chain Liping Zhang says: "Users may add such a wrong nft rules successfully, which will cause an endless jump loop: # nft add rule filter test tcp dport vmap {1: jump test} This is because before we commit, the element in the current anonymous set is inactive, so osp->walk will skip this element and miss the validate check." To resolve this problem, this patch passes the generation mask to the walk function through the iter container structure depending on the code path: 1) If we're dumping the elements, then we have to check if the element is active in the current generation. Thus, we check for the current bit in the genmask. 2) If we're checking for loops, then we have to check if the element is active in the next generation, as we're in the middle of a transaction. Thus, we check for the next bit in the genmask. Based on original patch from Liping Zhang. Reported-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso Tested-by: Liping Zhang --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 15 +++++++++------ net/netfilter/nft_hash.c | 3 +-- net/netfilter/nft_rbtree.c | 3 +-- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 092235458691..f7c291ff4074 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -167,6 +167,7 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { + u8 genmask; unsigned int count; unsigned int skip; int err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 492f6f8efdda..0fd69988f00b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2951,6 +2951,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, goto bind; } + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; @@ -3192,12 +3193,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - args.cb = cb; - args.skb = skb; - args.iter.skip = cb->args[0]; - args.iter.count = 0; - args.iter.err = 0; - args.iter.fn = nf_tables_dump_setelem; + args.cb = cb; + args.skb = skb; + args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -4284,6 +4286,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, binding->chain != chain) continue; + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 6fa016564f90..f39c53a159eb 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -189,7 +189,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -218,7 +217,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, goto cont; if (nft_set_elem_expired(&he->ext)) goto cont; - if (!nft_set_elem_active(&he->ext, genmask)) + if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; elem.priv = he; diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index f762094af7c1..7201d57b5a93 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { @@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&rbe->ext, genmask)) + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; elem.priv = rbe; -- cgit v1.2.3 From daddef76c3deaaa7922f9d7b18edbf0a061215c3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 15 Jun 2016 11:14:53 +0200 Subject: net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG The implementation of net_dbg_ratelimited in the CONFIG_DYNAMIC_DEBUG case was added with 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case"). The implementation strategy was to take the usual definition of the dynamic_pr_debug macro, but alter it by adding a call to "net_ratelimit()" in the if statement. This is, in fact, the correct approach. However, while doing this, the author of the commit forgot to surround fmt by pr_fmt, resulting in unprefixed log messages appearing in the console. So, this commit adds back the pr_fmt(fmt) invocation, making net_dbg_ratelimited properly consistent across DEBUG, no DEBUG, and DYNAMIC_DEBUG cases, and bringing parity with the behavior of dynamic_pr_debug as well. Fixes: 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case") Signed-off-by: Jason A. Donenfeld Cc: Tim Bingham Signed-off-by: David S. Miller --- include/linux/net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 9aa49a05fe38..25aa03b51c4e 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -251,7 +251,8 @@ do { \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ net_ratelimit()) \ - __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ + __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ + ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ -- cgit v1.2.3 From e582615ad33dbd39623084a02e95567b116e1eea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jun 2016 06:24:00 -0700 Subject: gre: fix error handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) gre_parse_header() can be called from gre_err() At this point transport header points to ICMP header, not the inner header. 2) We can not really change transport header as ipgre_err() will later assume transport header still points to ICMP header (using icmp_hdr()) 3) pskb_may_pull() logic in gre_parse_header() really works if we are interested at zone pointed by skb->data 4) As Jiri explained in commit b7f8fe251e46 ("gre: do not pull header in ICMP error processing") we should not pull headers in error handler. So this fix : A) changes gre_parse_header() to use skb->data instead of skb_transport_header() B) Adds a nhs parameter to gre_parse_header() so that we can skip the not pulled IP header from error path. This offset is 0 for normal receive path. C) remove obsolete IPV6 includes Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Maciej Żenczykowski Cc: Jiri Benc Signed-off-by: David S. Miller --- include/net/gre.h | 2 +- net/ipv4/gre_demux.c | 10 +++++----- net/ipv4/ip_gre.c | 12 ++++-------- net/ipv6/ip6_gre.c | 2 +- 4 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/gre.h b/include/net/gre.h index 5dce30a6abe3..7a54a31d1d4c 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto); + bool *csum_err, __be16 proto, int nhs); static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4c39f4fd332a..de1d119a4497 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto) + bool *csum_err, __be16 proto, int nhs) { const struct gre_base_hdr *greh; __be32 *options; int hdr_len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = gre_calc_hlen(tpi->flags); - if (!pskb_may_pull(skb, hdr_len)) + if (!pskb_may_pull(skb, nhs + hdr_len)) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 07c5cf1838d8..1d000af7f561 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -49,12 +49,6 @@ #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -#include -#include -#endif - /* Problems & solutions -------------------- @@ -217,12 +211,14 @@ static void gre_err(struct sk_buff *skb, u32 info) * by themselves??? */ + const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), + iph->ihl * 4) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -338,7 +334,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fdc9de276ab1..776d145113e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; -- cgit v1.2.3 From 19de99f70b87fcc3338da52a89c439b088cbff71 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Jun 2016 18:25:38 -0700 Subject: bpf: fix matching of data/data_end in verifier The ctx structure passed into bpf programs is different depending on bpf program type. The verifier incorrectly marked ctx->data and ctx->data_end access based on ctx offset only. That caused loads in tracing programs int bpf_prog(struct pt_regs *ctx) { .. ctx->ax .. } to be incorrectly marked as PTR_TO_PACKET which later caused verifier to reject the program that was actually valid in tracing context. Fix this by doing program type specific matching of ctx offsets. Fixes: 969bf05eb3ce ("bpf: direct packet access") Reported-by: Sasha Goldshtein Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 28 +++++++++++++++++++++++++++- kernel/bpf/verifier.c | 41 +++++++---------------------------------- kernel/trace/bpf_trace.c | 6 ++++-- net/core/filter.c | 16 ++++++++++++++-- 4 files changed, 52 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8ee27b8afe81..8269cafc6eb1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -111,6 +111,31 @@ enum bpf_access_type { BPF_WRITE = 2 }; +/* types of values stored in eBPF registers */ +enum bpf_reg_type { + NOT_INIT = 0, /* nothing was written into register */ + UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + PTR_TO_CTX, /* reg points to bpf_context */ + CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ + PTR_TO_MAP_VALUE, /* reg points to map element value */ + PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + FRAME_PTR, /* reg == frame_pointer */ + PTR_TO_STACK, /* reg == frame_pointer + imm */ + CONST_IMM, /* constant integer value */ + + /* PTR_TO_PACKET represents: + * skb->data + * skb->data + imm + * skb->data + (u16) var + * skb->data + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) menas that 'imm' was added + */ + PTR_TO_PACKET, + PTR_TO_PACKET_END, /* skb->data + headlen */ +}; + struct bpf_prog; struct bpf_verifier_ops { @@ -120,7 +145,8 @@ struct bpf_verifier_ops { /* return true if 'size' wide access at offset 'off' within bpf_context * with 'type' (read or write) is allowed */ - bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + bool (*is_valid_access)(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 668e07903c8f..eec9f90ba030 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -126,31 +126,6 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -/* types of values stored in eBPF registers */ -enum bpf_reg_type { - NOT_INIT = 0, /* nothing was written into register */ - UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ - PTR_TO_CTX, /* reg points to bpf_context */ - CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ - PTR_TO_MAP_VALUE, /* reg points to map element value */ - PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ - FRAME_PTR, /* reg == frame_pointer */ - PTR_TO_STACK, /* reg == frame_pointer + imm */ - CONST_IMM, /* constant integer value */ - - /* PTR_TO_PACKET represents: - * skb->data - * skb->data + imm - * skb->data + (u16) var - * skb->data + (u16) var + imm - * if (range > 0) then [ptr, ptr + range - off) is safe to access - * if (id > 0) means that some 'var' was added - * if (off > 0) menas that 'imm' was added - */ - PTR_TO_PACKET, - PTR_TO_PACKET_END, /* skb->data + headlen */ -}; - struct reg_state { enum bpf_reg_type type; union { @@ -695,10 +670,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, /* check access to 'struct bpf_context' fields */ static int check_ctx_access(struct verifier_env *env, int off, int size, - enum bpf_access_type t) + enum bpf_access_type t, enum bpf_reg_type *reg_type) { if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t)) { + env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -798,21 +773,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, mark_reg_unknown_value(state->regs, value_regno); } else if (reg->type == PTR_TO_CTX) { + enum bpf_reg_type reg_type = UNKNOWN_VALUE; + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - err = check_ctx_access(env, off, size, t); + err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); - if (off == offsetof(struct __sk_buff, data) && - env->allow_ptr_leaks) + if (env->allow_ptr_leaks) /* note that reg.[id|off|range] == 0 */ - state->regs[value_regno].type = PTR_TO_PACKET; - else if (off == offsetof(struct __sk_buff, data_end) && - env->allow_ptr_leaks) - state->regs[value_regno].type = PTR_TO_PACKET_END; + state->regs[value_regno].type = reg_type; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 720b7bb01d43..e7af6cb9d5cf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -349,7 +349,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func } /* bpf+kprobe programs can access fields of 'struct pt_regs' */ -static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) +static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) { /* check bounds */ if (off < 0 || off >= sizeof(struct pt_regs)) @@ -427,7 +428,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) } } -static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type) +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; diff --git a/net/core/filter.c b/net/core/filter.c index 68adb5f52110..c4b330c85c02 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2085,7 +2085,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) } static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { switch (off) { case offsetof(struct __sk_buff, tc_classid): @@ -2108,7 +2109,8 @@ static bool sk_filter_is_valid_access(int off, int size, } static bool tc_cls_act_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (type == BPF_WRITE) { switch (off) { @@ -2123,6 +2125,16 @@ static bool tc_cls_act_is_valid_access(int off, int size, return false; } } + + switch (off) { + case offsetof(struct __sk_buff, data): + *reg_type = PTR_TO_PACKET; + break; + case offsetof(struct __sk_buff, data_end): + *reg_type = PTR_TO_PACKET_END; + break; + } + return __is_valid_access(off, size, type); } -- cgit v1.2.3 From 8f45927c3cae4db85887700e5415286f766cbaf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Jun 2016 12:54:18 +0200 Subject: netfilter: xt_SYNPROXY: add missing header to Kbuild Matt Whitlock says: Without this line, the file xt_SYNPROXY.h does not get installed in /usr/include/linux/netfilter/, and thus user-space programs cannot make use of it. Reported-by: Matt Whitlock Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 1d973d2ba417..cd26d7a0fd07 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -33,6 +33,7 @@ header-y += xt_NFLOG.h header-y += xt_NFQUEUE.h header-y += xt_RATEEST.h header-y += xt_SECMARK.h +header-y += xt_SYNPROXY.h header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h -- cgit v1.2.3 From 1463847e93fe693e89c52b03ab4ede6800d717c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Jun 2016 12:54:18 +0200 Subject: netfilter: xt_SYNPROXY: include missing ./usr/include/linux/netfilter/xt_SYNPROXY.h:11: found __[us]{8,16,32,64} type without #include Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_SYNPROXY.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h index 2d59fbaa93c6..ca67e61d2a61 100644 --- a/include/uapi/linux/netfilter/xt_SYNPROXY.h +++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h @@ -1,6 +1,8 @@ #ifndef _XT_SYNPROXY_H #define _XT_SYNPROXY_H +#include + #define XT_SYNPROXY_OPT_MSS 0x01 #define XT_SYNPROXY_OPT_WSCALE 0x02 #define XT_SYNPROXY_OPT_SACK_PERM 0x04 -- cgit v1.2.3 From 3720b69bafe9ccb989b9a8604f3e3f89b3610685 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 18 Jun 2016 10:10:26 -0700 Subject: Input: add BUS_CEC type Inputs can come in over the HDMI CEC bus, so add a new type for this. Signed-off-by: Hans Verkuil Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 01113841190d..c51494119817 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -247,6 +247,7 @@ struct input_mask { #define BUS_ATARI 0x1B #define BUS_SPI 0x1C #define BUS_RMI 0x1D +#define BUS_CEC 0x1E /* * MT_TOOL types -- cgit v1.2.3 From 488326947cd1f038da8d2c9068a0d07b913b7983 Mon Sep 17 00:00:00 2001 From: Kamil Debski Date: Sat, 18 Jun 2016 10:10:56 -0700 Subject: Input: add HDMI CEC specific keycodes Add HDMI CEC specific keycodes to the keycodes definition. Signed-off-by: Kamil Debski Signed-off-by: Hans Verkuil Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 87cf351bab03..737fa32faad4 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -611,6 +611,37 @@ #define KEY_KBDINPUTASSIST_ACCEPT 0x264 #define KEY_KBDINPUTASSIST_CANCEL 0x265 +/* Diagonal movement keys */ +#define KEY_RIGHT_UP 0x266 +#define KEY_RIGHT_DOWN 0x267 +#define KEY_LEFT_UP 0x268 +#define KEY_LEFT_DOWN 0x269 + +#define KEY_ROOT_MENU 0x26a /* Show Device's Root Menu */ +/* Show Top Menu of the Media (e.g. DVD) */ +#define KEY_MEDIA_TOP_MENU 0x26b +#define KEY_NUMERIC_11 0x26c +#define KEY_NUMERIC_12 0x26d +/* + * Toggle Audio Description: refers to an audio service that helps blind and + * visually impaired consumers understand the action in a program. Note: in + * some countries this is referred to as "Video Description". + */ +#define KEY_AUDIO_DESC 0x26e +#define KEY_3D_MODE 0x26f +#define KEY_NEXT_FAVORITE 0x270 +#define KEY_STOP_RECORD 0x271 +#define KEY_PAUSE_RECORD 0x272 +#define KEY_VOD 0x273 /* Video on Demand */ +#define KEY_UNMUTE 0x274 +#define KEY_FASTREVERSE 0x275 +#define KEY_SLOWREVERSE 0x276 +/* + * Control a data application associated with the currently viewed channel, + * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) + */ +#define KEY_DATA 0x275 + #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 #define BTN_TRIGGER_HAPPY2 0x2c1 -- cgit v1.2.3 From a0d26d5a4fc8e13993279f788deeb08069e73b69 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:13 +0300 Subject: qed*: Don't reset statistics on inner reload Several user APIs can cause driver to perform an inner-reload. Currently, doing this would cause the HW/FW statistics of the adapter to reset, which isn't the expected behavior [statistics should only reset on explicit unloads]. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 ++- drivers/net/ethernet/qlogic/qede/qede_main.c | 7 ++++--- include/linux/qed/qed_eth_if.h | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7e7ae83453d5..aada4c7e095f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1745,7 +1745,8 @@ static int qed_start_vport(struct qed_dev *cdev, start.vport_id, start.mtu); } - qed_reset_vport_stats(cdev); + if (params->clear_stats) + qed_reset_vport_stats(cdev); return 0; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5733d1888223..f8e11f953acb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -3231,7 +3231,7 @@ static int qede_stop_queues(struct qede_dev *edev) return rc; } -static int qede_start_queues(struct qede_dev *edev) +static int qede_start_queues(struct qede_dev *edev, bool clear_stats) { int rc, tc, i; int vlan_removal_en = 1; @@ -3462,6 +3462,7 @@ out: enum qede_load_mode { QEDE_LOAD_NORMAL, + QEDE_LOAD_RELOAD, }; static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) @@ -3500,7 +3501,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) goto err3; DP_INFO(edev, "Setup IRQs succeeded\n"); - rc = qede_start_queues(edev); + rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD); if (rc) goto err4; DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n"); @@ -3555,7 +3556,7 @@ void qede_reload(struct qede_dev *edev, if (func) func(edev, args); - qede_load(edev, QEDE_LOAD_NORMAL); + qede_load(edev, QEDE_LOAD_RELOAD); mutex_lock(&edev->qede_lock); qede_config_rx_mode(edev->ndev); diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 6ae8cb4a61d3..6c876a63558d 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -49,6 +49,7 @@ struct qed_start_vport_params { bool drop_ttl0; u8 vport_id; u16 mtu; + bool clear_stats; }; struct qed_stop_rxq_params { -- cgit v1.2.3 From 9d76931180557270796f9631e2c79b9c7bb3c9fb Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 21 Jun 2016 14:20:03 +0300 Subject: net/mlx4_en: Avoid unregister_netdev at shutdown flow This allows a clean shutdown, even if some netdev clients do not release their reference from this netdev. It is enough to release the HW resources only as the kernel is shutting down. Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly') Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++++- include/linux/mlx4/device.h | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 43f505e2d291..0c0dfd6cdca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2036,11 +2036,20 @@ err: return -ENOMEM; } +static void mlx4_en_shutdown(struct net_device *dev) +{ + rtnl_lock(); + netif_device_detach(dev); + mlx4_en_close(dev); + rtnl_unlock(); +} void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + bool shutdown = mdev->dev->persist->interface_state & + MLX4_INTERFACE_STATE_SHUTDOWN; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2048,7 +2057,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (priv->registered) { devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev, priv->port)); - unregister_netdev(dev); + if (shutdown) + mlx4_en_shutdown(dev); + else + unregister_netdev(dev); } if (priv->allocated) @@ -2073,7 +2085,8 @@ void mlx4_en_destroy_netdev(struct net_device *dev) kfree(priv->tx_ring); kfree(priv->tx_cq); - free_netdev(dev); + if (!shutdown) + free_netdev(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 372ebfa880f5..546fab0ecc3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4135,8 +4135,11 @@ static void mlx4_shutdown(struct pci_dev *pdev) mlx4_info(persist->dev, "mlx4_shutdown was called\n"); mutex_lock(&persist->interface_state_mutex); - if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) { + /* Notify mlx4 clients that the kernel is being shut down */ + persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN; mlx4_unload_one(pdev); + } mutex_unlock(&persist->interface_state_mutex); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80dec87a94f8..d46a0e7f144d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -466,6 +466,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3 From c9b254955b9f8814966f5dabd34c39d0e0a2b437 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 22 Jun 2016 17:27:26 +0300 Subject: IB/mlx5: Fix post send fence logic If the caller specified IB_SEND_FENCE in the send flags of the work request and no previous work request stated that the successive one should be fenced, the work request would be executed without a fence. This could result in RDMA read or atomic operations failure due to a MR being invalidated. Fix this by adding the mlx5 enumeration for fencing RDMA/atomic operations and fix the logic to apply this. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++--- include/linux/mlx5/qp.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce434228a5ea..ce0a7ab35a22 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) return MLX5_FENCE_MODE_SMALL_AND_FENCE; else return fence; - - } else { - return 0; + } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { + return MLX5_FENCE_MODE_FENCE; } + + return 0; } static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index e4e29882fdfd..630f66a186b7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -172,6 +172,7 @@ enum { enum { MLX5_FENCE_MODE_NONE = 0 << 5, MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_FENCE = 2 << 5, MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; -- cgit v1.2.3 From c755f4afa66ad3ed98870bd3254f37c47fb2c800 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 22 Jun 2016 13:29:33 -0700 Subject: IB/rdmavt: Correct qp_priv_alloc() return value test The current drivers return errors from this calldown wrapped in an ERR_PTR(). The rdmavt code incorrectly tests for NULL. The code is fixed to use IS_ERR() and change ret according to the driver return value. Cc: Stable # 4.6+ Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 +++- include/rdma/rdma_vt.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d04f4fe522b5..41ba7e9cadaa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -699,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * initialization that is needed. */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); - if (!priv) + if (IS_ERR(priv)) { + ret = priv; goto bail_qp; + } qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 16274e2133cd..9c9a27d42aaa 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -203,7 +203,9 @@ struct rvt_driver_provided { /* * Allocate a private queue pair data structure for driver specific - * information which is opaque to rdmavt. + * information which is opaque to rdmavt. Errors are returned via + * ERR_PTR(err). The driver is free to return NULL or a valid + * pointer. */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); -- cgit v1.2.3 From 067a7cd06f7bf860f2e3415394b065b9a0983802 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 20 Jun 2016 13:37:18 -0700 Subject: act_ife: only acquire tcf_lock for existing actions Alexey reported that we have GFP_KERNEL allocation when holding the spinlock tcf_lock. Actually we don't have to take that spinlock for all the cases, especially for the new one we just create. To modify the existing actions, we still need this spinlock to make sure the whole update is atomic. For net-next, we can get rid of this spinlock because we already hold the RTNL lock on slow path, and on fast path we can use RCU to protect the metalist. Joint work with Jamal. Reported-by: Alexey Khoroshilov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 6 ++--- net/sched/act_ife.c | 55 +++++++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index dc9a09aefb33..c55facd17b7e 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -36,7 +36,7 @@ struct tcf_meta_ops { int (*encode)(struct sk_buff *, void *, struct tcf_meta_info *); int (*decode)(struct sk_buff *, void *, u16 len); int (*get)(struct sk_buff *skb, struct tcf_meta_info *mi); - int (*alloc)(struct tcf_meta_info *, void *); + int (*alloc)(struct tcf_meta_info *, void *, gfp_t); void (*release)(struct tcf_meta_info *); int (*validate)(void *val, int len); struct module *owner; @@ -48,8 +48,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval); +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 658046dfe02d..e4076598f214 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_get_meta_u16); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; @@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; @@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len) + void *val, int len, bool exists) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len) + int len, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), GFP_KERNEL); + mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval); + ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -307,14 +309,14 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; int installed = 0; list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0); + rc = add_metainfo(ife, o->metaid, NULL, 0, exists); if (rc == 0) installed += 1; } @@ -385,8 +387,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock */ -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) +/* under ife->tcf_lock for existing action */ +static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) { int len = 0; int rc = 0; @@ -398,11 +401,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len); + rc = load_metaops_and_vet(ife, i, val, len, exists); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } @@ -474,7 +477,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { @@ -504,11 +508,12 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } - err = populate_metalist(ife, tb2); + err = populate_metalist(ife, tb2, exists); if (err) goto metadata_parse_err; @@ -518,17 +523,19 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } } - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, a); -- cgit v1.2.3 From 33cdcee04be3b4482be97393167e7561b2584e1e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 22 Jun 2016 09:25:14 +0200 Subject: pwm: Fix pwm_apply_args() Commit 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates"), implemented pwm_disable() as a wrapper around pwm_apply_state(), and then, commit ef2bf4997f7d ("pwm: Improve args checking in pwm_apply_state()") added missing checks on the ->period value in pwm_apply_state() to ensure we were not passing inappropriate values to the ->config() or ->apply() methods. The conjunction of these 2 commits led to a case where pwm_disable() was no longer succeeding, thus preventing the polarity setting done in pwm_apply_args(). Set a valid period in pwm_apply_args() to ensure polarity setting won't be rejected. Signed-off-by: Boris Brezillon Reported-by: Geert Uytterhoeven Suggested-by: Brian Norris Fixes: 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates") Tested-by: Geert Uytterhoeven Reviewed-by: Brian Norris Signed-off-by: Thierry Reding --- include/linux/pwm.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 908b67c847cd..c038ae36b10e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -464,6 +464,8 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm) static inline void pwm_apply_args(struct pwm_device *pwm) { + struct pwm_state state = { }; + /* * PWM users calling pwm_apply_args() expect to have a fresh config * where the polarity and period are set according to pwm_args info. @@ -476,18 +478,20 @@ static inline void pwm_apply_args(struct pwm_device *pwm) * at startup (even if they are actually enabled), thus authorizing * polarity setting. * - * Instead of setting ->enabled to false, we call pwm_disable() - * before pwm_set_polarity() to ensure that everything is configured - * as expected, and the PWM is really disabled when the user request - * it. + * To fulfill this requirement, we apply a new state which disables + * the PWM device and set the reference period and polarity config. * * Note that PWM users requiring a smooth handover between the * bootloader and the kernel (like critical regulators controlled by * PWM devices) will have to switch to the atomic API and avoid calling * pwm_apply_args(). */ - pwm_disable(pwm); - pwm_set_polarity(pwm, pwm->args.polarity); + + state.enabled = false; + state.polarity = pwm->args.polarity; + state.period = pwm->args.period; + + pwm_apply_state(pwm, &state); } struct pwm_lookup { -- cgit v1.2.3 From 4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 21 Jun 2016 18:52:17 +0200 Subject: locking/static_key: Fix concurrent static_key_slow_inc() The following scenario is possible: CPU 1 CPU 2 static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 0, no increment jump_label_lock() atomic_inc_return() -> key.enabled == 1 now static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 1, inc to 2 return ** static key is wrong! jump_label_update() jump_label_unlock() Testing the static key at the point marked by (**) will follow the wrong path for jumps that have not been patched yet. This can actually happen when creating many KVM virtual machines with userspace LAPIC emulation; just run several copies of the following program: #include #include #include #include int main(void) { for (;;) { int kvmfd = open("/dev/kvm", O_RDONLY); int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); close(ioctl(vmfd, KVM_CREATE_VCPU, 1)); close(vmfd); close(kvmfd); } return 0; } Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call. The static key's purpose is to skip NULL pointer checks and indeed one of the processes eventually dereferences NULL. As explained in the commit that introduced the bug: 706249c222f6 ("locking/static_keys: Rework update logic") jump_label_update() needs key.enabled to be true. The solution adopted here is to temporarily make key.enabled == -1, and use go down the slow path when key.enabled <= 0. Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Cc: # v4.3+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 706249c222f6 ("locking/static_keys: Rework update logic") Link: http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonzini@redhat.com [ Small stylistic edits to the changelog and the code. ] Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 16 +++++++++++++--- kernel/jump_label.c | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0536524bb9eb..68904469fba1 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -117,13 +117,18 @@ struct module; #include +#ifdef HAVE_JUMP_LABEL + static inline int static_key_count(struct static_key *key) { - return atomic_read(&key->enabled); + /* + * -1 means the first static_key_slow_inc() is in progress. + * static_key_enabled() must return true, so return 1 here. + */ + int n = atomic_read(&key->enabled); + return n >= 0 ? n : 1; } -#ifdef HAVE_JUMP_LABEL - #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_MASK 1UL @@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod); #else /* !HAVE_JUMP_LABEL */ +static inline int static_key_count(struct static_key *key) +{ + return atomic_read(&key->enabled); +} + static __always_inline void jump_label_init(void) { static_key_initialized = true; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 05254eeb4b4e..4b353e0be121 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key); void static_key_slow_inc(struct static_key *key) { + int v, v1; + STATIC_KEY_CHECK_USE(); - if (atomic_inc_not_zero(&key->enabled)) - return; + + /* + * Careful if we get concurrent static_key_slow_inc() calls; + * later calls must wait for the first one to _finish_ the + * jump_label_update() process. At the same time, however, + * the jump_label_update() call below wants to see + * static_key_enabled(&key) for jumps to be updated properly. + * + * So give a special meaning to negative key->enabled: it sends + * static_key_slow_inc() down the slow path, and it is non-zero + * so it counts as "enabled" in jump_label_update(). Note that + * atomic_inc_unless_negative() checks >= 0, so roll our own. + */ + for (v = atomic_read(&key->enabled); v > 0; v = v1) { + v1 = atomic_cmpxchg(&key->enabled, v, v + 1); + if (likely(v1 == v)) + return; + } jump_label_lock(); - if (atomic_inc_return(&key->enabled) == 1) + if (atomic_read(&key->enabled) == 0) { + atomic_set(&key->enabled, -1); jump_label_update(key); + atomic_set(&key->enabled, 1); + } else { + atomic_inc(&key->enabled); + } jump_label_unlock(); } EXPORT_SYMBOL_GPL(static_key_slow_inc); @@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc); static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { + /* + * The negative count check is valid even when a negative + * key->enabled is in use by static_key_slow_inc(); a + * __static_key_slow_dec() before the first static_key_slow_inc() + * returns is unbalanced, because all other static_key_slow_inc() + * instances block while the update is in progress. + */ if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { WARN(atomic_read(&key->enabled) < 0, "jump label: negative count!\n"); -- cgit v1.2.3 From b235beea9e996a4d36fed6cfef4801a3e7d7a9a5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 15:09:37 -0700 Subject: Clarify naming of thread info/stack allocators We've had the thread info allocated together with the thread stack for most architectures for a long time (since the thread_info was split off from the task struct), but that is about to change. But the patches that move the thread info to be off-stack (and a part of the task struct instead) made it clear how confused the allocator and freeing functions are. Because the common case was that we share an allocation with the thread stack and the thread_info, the two pointers were identical. That identity then meant that we would have things like ti = alloc_thread_info_node(tsk, node); ... tsk->stack = ti; which certainly _worked_ (since stack and thread_info have the same value), but is rather confusing: why are we assigning a thread_info to the stack? And if we move the thread_info away, the "confusing" code just gets to be entirely bogus. So remove all this confusion, and make it clear that we are doing the stack allocation by renaming and clarifying the function names to be about the stack. The fact that the thread_info then shares the allocation is an implementation detail, and not really about the allocation itself. This is a pure renaming and type fix: we pass in the same pointer, it's just that we clarify what the pointer means. The ia64 code that actually only has one single allocation (for all of task_struct, thread_info and kernel thread stack) now looks a bit odd, but since "tsk->stack" is actually not even used there, that oddity doesn't matter. It would be a separate thing to clean that up, I intentionally left the ia64 changes as a pure brute-force renaming and type change. Acked-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 +-- arch/ia64/Kconfig | 2 +- arch/ia64/include/asm/thread_info.h | 8 +++--- arch/mn10300/include/asm/thread_info.h | 2 +- arch/mn10300/kernel/kgdb.c | 3 +- arch/tile/include/asm/thread_info.h | 2 +- arch/tile/kernel/process.c | 3 +- include/linux/sched.h | 2 +- init/main.c | 4 +-- kernel/fork.c | 50 +++++++++++++++++----------------- 10 files changed, 41 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index e9734796531f..15996290fed4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -226,8 +226,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80758cb7157..e109ee95e919 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 4b7cef9e94e0..c1467ac59ce6 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 6b705ccc9cc1..a465d8372edd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e42ada26345..253538f29ade 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) diff --git a/init/main.c b/init/main.c index 4c17fda5c2ff..826fd57fa3aa 100644 --- a/init/main.c +++ b/init/main.c @@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void) } # if THREAD_SIZE >= PAGE_SIZE -void __init __weak thread_info_cache_init(void) +void __init __weak thread_stack_cache_init(void) { } #endif @@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void) /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif - thread_info_cache_init(); + thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 5c2c355aa97f..37b9439b8c07 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_info(struct thread_info *ti) +void __weak arch_release_thread_stack(unsigned long *stack) { } -#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, @@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, return page ? page_address(page) : NULL; } -static inline void free_thread_info(struct thread_info *ti) +static inline void free_thread_stack(unsigned long *stack) { - struct page *page = virt_to_page(ti); + struct page *page = virt_to_page(stack); memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, -(1 << THREAD_SIZE_ORDER)); __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else -static struct kmem_cache *thread_info_cache; +static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_info(struct thread_info *ti) +static void free_stack(unsigned long *stack) { - kmem_cache_free(thread_info_cache, ti); + kmem_cache_free(thread_stack_cache, stack); } -void thread_info_cache_init(void) +void thread_stack_cache_init(void) { - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); + BUG_ON(thread_stack_cache == NULL); } # endif #endif @@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(struct thread_info *ti, int account) +static void account_kernel_stack(unsigned long *stack, int account) { - struct zone *zone = page_zone(virt_to_page(ti)); + struct zone *zone = page_zone(virt_to_page(stack)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } @@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); - arch_release_thread_info(tsk->stack); - free_thread_info(tsk->stack); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - struct thread_info *ti; + unsigned long *stack; int err; if (node == NUMA_NO_NODE) @@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - ti = alloc_thread_info_node(tsk, node); - if (!ti) + stack = alloc_thread_stack_node(tsk, node); + if (!stack) goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto free_ti; + goto free_stack; - tsk->stack = ti; + tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(ti, 1); + account_kernel_stack(stack, 1); kcov_task_init(tsk); return tsk; -free_ti: - free_thread_info(ti); +free_stack: + free_thread_stack(stack); free_tsk: free_task_struct(tsk); return NULL; -- cgit v1.2.3 From 7e8b3dfef16375dbfeb1f36a83eb9f27117c51fd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 23 Jun 2016 14:54:37 -0400 Subject: USB: EHCI: declare hostpc register as zero-length array The HOSTPC extension registers found in some EHCI implementations form a variable-length array, with one element for each port. Therefore the hostpc field in struct ehci_regs should be declared as a zero-length array, not a single-element array. This fixes a problem reported by UBSAN. Signed-off-by: Alan Stern Reported-by: Wilfried Klaebe Tested-by: Wilfried Klaebe CC: Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ehci_def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index 966889a20ea3..e479033bd782 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -180,11 +180,11 @@ struct ehci_regs { * PORTSCx */ /* HOSTPC: offset 0x84 */ - u32 hostpc[1]; /* HOSTPC extension */ + u32 hostpc[0]; /* HOSTPC extension */ #define HOSTPC_PHCD (1<<22) /* Phy clock disable */ #define HOSTPC_PSPD (3<<25) /* Port speed detection */ - u32 reserved5[16]; + u32 reserved5[17]; /* USBMODE_EX: offset 0xc8 */ u32 usbmode_ex; /* USB Device mode extension */ -- cgit v1.2.3 From 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 17:07:33 -0700 Subject: fix up initial thread stack pointer vs thread_info confusion The INIT_TASK() initializer was similarly confused about the stack vs thread_info allocation that the allocators had, and that were fixed in commit b235beea9e99 ("Clarify naming of thread info/stack allocators"). The task ->stack pointer only incidentally ends up having the same value as the thread_info, and in fact that will change. So fix the initial task struct initializer to point to 'init_stack' instead of 'init_thread_info', and make sure the ia64 definition for that exists. This actually makes the ia64 tsk->stack pointer be sensible for the initial task, but not for any other task. As mentioned in commit b235beea9e99, that whole pointer isn't actually used on ia64, since task_stack_page() there just points to the (single) allocation. All the other architectures seem to have copied the 'init_stack' definition, even if it tended to be generally unusued. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/init_task.c | 1 + include/linux/init_task.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f2cb8d45513d..f8834f820ec2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -190,7 +190,7 @@ extern struct task_group root_task_group; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ -- cgit v1.2.3 From 9b75a867cc9ddbafcaf35029358ac500f2635ff3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Jun 2016 14:49:34 -0700 Subject: mm: mempool: kasan: don't poot mempool objects in quarantine Currently we may put reserved by mempool elements into quarantine via kasan_kfree(). This is totally wrong since quarantine may really free these objects. So when mempool will try to use such element, use-after-free will happen. Or mempool may decide that it no longer need that element and double-free it. So don't put object into quarantine in kasan_kfree(), just poison it. Rename kasan_kfree() to kasan_poison_kfree() to respect that. Also, we shouldn't use kasan_slab_alloc()/kasan_krealloc() in kasan_unpoison_element() because those functions may update allocation stacktrace. This would be wrong for the most of the remove_element call sites. (The only call site where we may want to update alloc stacktrace is in mempool_alloc(). Kmemleak solves this by calling kmemleak_update_trace(), so we could make something like that too. But this is out of scope of this patch). Fixes: 55834c59098d ("mm: kasan: initial memory quarantine implementation") Link: http://lkml.kernel.org/r/575977C3.1010905@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Kuthonuzo Luruo Acked-by: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Kostya Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 11 +++++++---- mm/kasan/kasan.c | 6 +++--- mm/mempool.c | 12 ++++-------- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 611927f5870d..ac4b3c46a84d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); -void kasan_kfree(void *ptr); +void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object); -void kasan_poison_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { int alloc_meta_offset; @@ -76,6 +75,9 @@ struct kasan_cache { int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); +size_t ksize(const void *); +static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache, static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} -static inline void kasan_kfree(void *ptr) {} +static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} static inline void kasan_krealloc(const void *object, size_t new_size, @@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object) { return false; } -static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline void kasan_unpoison_slab(const void *ptr) { } + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 28439acda6ec..6845f9294696 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags) kasan_kmalloc(cache, object, cache->object_size, flags); } -void kasan_poison_slab_free(struct kmem_cache *cache, void *object) +static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) { unsigned long size = cache->object_size; unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); @@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags) kasan_kmalloc(page->slab_cache, object, size, flags); } -void kasan_kfree(void *ptr) +void kasan_poison_kfree(void *ptr) { struct page *page; @@ -636,7 +636,7 @@ void kasan_kfree(void *ptr) kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), KASAN_FREE_PAGE); else - kasan_slab_free(page->slab_cache, ptr); + kasan_poison_slab_free(page->slab_cache, ptr); } void kasan_kfree_large(const void *ptr) diff --git a/mm/mempool.c b/mm/mempool.c index 9e075f829d0d..8f65464da5de 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element) static void kasan_poison_element(mempool_t *pool, void *element) { - if (pool->alloc == mempool_alloc_slab) - kasan_poison_slab_free(pool->pool_data, element); - if (pool->alloc == mempool_kmalloc) - kasan_kfree(element); + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + kasan_poison_kfree(element); if (pool->alloc == mempool_alloc_pages) kasan_free_pages(element, (unsigned long)pool->pool_data); } static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags) { - if (pool->alloc == mempool_alloc_slab) - kasan_slab_alloc(pool->pool_data, element, flags); - if (pool->alloc == mempool_kmalloc) - kasan_krealloc(element, (size_t)pool->pool_data, flags); + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + kasan_unpoison_slab(element); if (pool->alloc == mempool_alloc_pages) kasan_alloc_pages(element, (unsigned long)pool->pool_data); } -- cgit v1.2.3 From 315d09bf30c2b436a1fdac86d31c24380cd56c4f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Jun 2016 14:49:45 -0700 Subject: Revert "mm: make faultaround produce old ptes" This reverts commit 5c0a85fad949212b3e059692deecdeed74ae7ec7. The commit causes ~6% regression in unixbench. Let's revert it for now and consider other solution for reclaim problem later. Link: http://lkml.kernel.org/r/1465893750-44080-2-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: "Huang, Ying" Cc: Linus Torvalds Cc: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Vinayak Menon Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- mm/filemap.c | 2 +- mm/memory.c | 23 +++++------------------ 3 files changed, 7 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5df5feb49575..ece042dfe23c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -602,7 +602,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon, bool old); + struct page *page, pte_t *pte, bool write, bool anon); #endif /* diff --git a/mm/filemap.c b/mm/filemap.c index 00ae878b2a38..20f3b1f33f0e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2186,7 +2186,7 @@ repeat: if (file->f_ra.mmap_miss > 0) file->f_ra.mmap_miss--; addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; - do_set_pte(vma, addr, page, pte, false, false, true); + do_set_pte(vma, addr, page, pte, false, false); unlock_page(page); goto next; unlock: diff --git a/mm/memory.c b/mm/memory.c index 15322b73636b..61fe7e7b56bf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2877,7 +2877,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, * vm_ops->map_pages. */ void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon, bool old) + struct page *page, pte_t *pte, bool write, bool anon) { pte_t entry; @@ -2885,8 +2885,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (old) - entry = pte_mkold(entry); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address, false); @@ -3032,20 +3030,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); - if (!pte_same(*pte, orig_pte)) - goto unlock_out; do_fault_around(vma, address, pte, pgoff, flags); - /* Check if the fault is handled by faultaround */ - if (!pte_same(*pte, orig_pte)) { - /* - * Faultaround produce old pte, but the pte we've - * handler fault for should be young. - */ - pte_t entry = pte_mkyoung(*pte); - if (ptep_set_access_flags(vma, address, pte, entry, 0)) - update_mmu_cache(vma, address, pte); + if (!pte_same(*pte, orig_pte)) goto unlock_out; - } pte_unmap_unlock(pte, ptl); } @@ -3060,7 +3047,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, false, false, false); + do_set_pte(vma, address, fault_page, pte, false, false); unlock_page(fault_page); unlock_out: pte_unmap_unlock(pte, ptl); @@ -3111,7 +3098,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } goto uncharge_out; } - do_set_pte(vma, address, new_page, pte, true, true, false); + do_set_pte(vma, address, new_page, pte, true, true); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); @@ -3164,7 +3151,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, true, false, false); + do_set_pte(vma, address, fault_page, pte, true, false); pte_unmap_unlock(pte, ptl); if (set_page_dirty(fault_page)) -- cgit v1.2.3 From 0efce9da1255f13d910842b62cb14371044a3c50 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 8 Jun 2016 11:38:55 +0100 Subject: arm64: KVM: fix build with CONFIG_ARM_PMU disabled When CONFIG_ARM_PMU is disabled, we get the following build error: arch/arm64/kvm/sys_regs.c: In function 'pmu_counter_idx_valid': arch/arm64/kvm/sys_regs.c:564:27: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function) if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) ^ arch/arm64/kvm/sys_regs.c:564:27: note: each undeclared identifier is reported only once for each function it appears in arch/arm64/kvm/sys_regs.c: In function 'access_pmu_evcntr': arch/arm64/kvm/sys_regs.c:592:10: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function) idx = ARMV8_PMU_CYCLE_IDX; ^ arch/arm64/kvm/sys_regs.c: In function 'access_pmu_evtyper': arch/arm64/kvm/sys_regs.c:638:14: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function) if (idx == ARMV8_PMU_CYCLE_IDX) ^ arch/arm64/kvm/hyp/switch.c:86:15: error: 'ARMV8_PMU_USERENR_MASK' undeclared (first use in this function) write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); This patch fixes the build with CONFIG_ARM_PMU disabled. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Sudeep Holla Signed-off-by: Christoffer Dall --- include/kvm/arm_pmu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index fe389ac31489..92e7e97ca8ff 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -18,13 +18,13 @@ #ifndef __ASM_ARM_KVM_PMU_H #define __ASM_ARM_KVM_PMU_H -#ifdef CONFIG_KVM_ARM_PMU - #include #include #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) +#ifdef CONFIG_KVM_ARM_PMU + struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ struct perf_event *perf_event; -- cgit v1.2.3 From 9a0fee2b552b1235fb1706ae1fc664ae74573be8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jun 2016 16:02:35 -0400 Subject: sock_diag: do not broadcast raw socket destruction Diag intends to broadcast tcp_sk and udp_sk socket destruction. Testing sk->sk_protocol for IPPROTO_TCP/IPPROTO_UDP alone is not sufficient for this. Raw sockets can have the same type. Add a test for sk->sk_type. Fixes: eb4cb008529c ("sock_diag: define destruction multicast groups") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 4018b48f2b3b..a0596ca0e80a 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) { switch (sk->sk_family) { case AF_INET: + if (sk->sk_type == SOCK_RAW) + return SKNLGRP_NONE; + switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET_TCP_DESTROY; @@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) return SKNLGRP_NONE; } case AF_INET6: + if (sk->sk_type == SOCK_RAW) + return SKNLGRP_NONE; + switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET6_TCP_DESTROY; -- cgit v1.2.3 From 3f5be2da8565c1cce5655bb0948fcc957c6eb6c6 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 28 Jun 2016 12:07:50 -0400 Subject: audit: move audit_get_tty to reduce scope and kabi changes The only users of audit_get_tty and audit_put_tty are internal to audit, so move it out of include/linux/audit.h to kernel.h and create a proper function rather than inlining it. This also reduces kABI changes. Suggested-by: Paul Moore Signed-off-by: Richard Guy Briggs [PM: line wrapped description] Signed-off-by: Paul Moore --- include/linux/audit.h | 24 ------------------------ kernel/audit.c | 17 +++++++++++++++++ kernel/audit.h | 4 ++++ kernel/auditsc.c | 1 - 4 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 32cdafb312d8..b40ed5df5542 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -26,7 +26,6 @@ #include #include #include -#include #define AUDIT_INO_UNSET ((unsigned long)-1) #define AUDIT_DEV_UNSET ((dev_t)-1) @@ -344,23 +343,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid; } -static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) -{ - struct tty_struct *tty = NULL; - unsigned long flags; - - spin_lock_irqsave(&tsk->sighand->siglock, flags); - if (tsk->signal) - tty = tty_kref_get(tsk->signal->tty); - spin_unlock_irqrestore(&tsk->sighand->siglock, flags); - return tty; -} - -static inline void audit_put_tty(struct tty_struct *tty) -{ - tty_kref_put(tty); -} - extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); extern void __audit_bprm(struct linux_binprm *bprm); @@ -518,12 +500,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk) { return -1; } -static inline struct tty_struct *audit_get_tty(struct task_struct *tsk) -{ - return NULL; -} -static inline void audit_put_tty(struct tty_struct *tty) -{ } static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { } static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, diff --git a/kernel/audit.c b/kernel/audit.c index 384374a1d232..d5971010e44a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1866,6 +1866,23 @@ out_null: audit_log_format(ab, " exe=(null)"); } +struct tty_struct *audit_get_tty(struct task_struct *tsk) +{ + struct tty_struct *tty = NULL; + unsigned long flags; + + spin_lock_irqsave(&tsk->sighand->siglock, flags); + if (tsk->signal) + tty = tty_kref_get(tsk->signal->tty); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + return tty; +} + +void audit_put_tty(struct tty_struct *tty) +{ + tty_kref_put(tty); +} + void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { const struct cred *cred; diff --git a/kernel/audit.h b/kernel/audit.h index cbbe6bb6496e..a492f4c4e710 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -23,6 +23,7 @@ #include #include #include +#include /* AUDIT_NAMES is the number of slots we reserve in the audit_context * for saving names from getname(). If we get more names we will allocate @@ -262,6 +263,9 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); extern void audit_log_d_path_exe(struct audit_buffer *ab, struct mm_struct *mm); +extern struct tty_struct *audit_get_tty(struct task_struct *tsk); +extern void audit_put_tty(struct tty_struct *tty); + /* audit watch functions */ #ifdef CONFIG_AUDIT_WATCH extern void audit_put_watch(struct audit_watch *watch); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 33dafa70229d..60a354eed2fa 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 42482b4546336ecd93acdd95e435bafd7a4c581c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 23 Jun 2016 14:50:35 -0700 Subject: drm/i915: Add more Kabylake PCI IDs. The spec has been updated adding new PCI IDs. Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1466718636-19675-1-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit 33d9391d3020e069dca98fa87a604c037beb2b9e) Signed-off-by: Jani Nikula --- include/drm/i915_pciids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 9094599a1150..87dde1c48fbf 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -309,6 +309,7 @@ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ + INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */ @@ -322,7 +323,9 @@ INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */ #define INTEL_KBL_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ + INTEL_VGA_DEVICE(0x5927, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \ INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */ -- cgit v1.2.3 From 6b9dc6a474fd4e025c615ed4582735360005727c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 23 Jun 2016 14:50:36 -0700 Subject: drm/i915: Removing PCI IDs that are no longer listed as Kabylake. This is unusual. Usually IDs listed on early stages of platform definition are kept there as reserved for later use. However these IDs here are not listed anymore in any of steppings and devices IDs tables for Kabylake on configurations overview section of BSpec. So it is better removing them before they become used in any other future platform. Signed-off-by: Rodrigo Vivi Reviewed-by: Dhinakaran Pandiyan Link: http://patchwork.freedesktop.org/patch/msgid/1466718636-19675-2-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit a922eb8d4581c883c37ce6e12dca9ff2cb1ea723) Signed-off-by: Jani Nikula --- include/drm/i915_pciids.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 87dde1c48fbf..33466bfc6440 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -325,15 +325,10 @@ #define INTEL_KBL_GT3_IDS(info) \ INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x5927, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \ - INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */ + INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */ #define INTEL_KBL_GT4_IDS(info) \ - INTEL_VGA_DEVICE(0x5932, info), /* DT GT4 */ \ - INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \ - INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \ - INTEL_VGA_DEVICE(0x593D, info) /* WKS GT4 */ + INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */ #define INTEL_KBL_IDS(info) \ INTEL_KBL_GT1_IDS(info), \ -- cgit v1.2.3 From ceb56070359b7329b5678b5d95a376fcb24767be Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 27 Jun 2016 21:38:11 +0200 Subject: bpf, perf: delay release of BPF prog after grace period Commit dead9f29ddcc ("perf: Fix race in BPF program unregister") moved destruction of BPF program from free_event_rcu() callback to __free_event(), which is problematic if used with tail calls: if prog A is attached as trace event directly, but at the same time present in a tail call map used by another trace event program elsewhere, then we need to delay destruction via RCU grace period since it can still be in use by the program doing the tail call (the prog first needs to be dropped from the tail call map, then trace event with prog A attached destroyed, so we get immediate destruction). Fixes: dead9f29ddcc ("perf: Fix race in BPF program unregister") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Jann Horn Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ kernel/events/core.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8269cafc6eb1..0de4de6dd43e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -264,6 +264,10 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } + +static inline void bpf_prog_put_rcu(struct bpf_prog *prog) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..d00c47b67a97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7531,7 +7531,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) prog = event->tp_event->prog; if (prog) { event->tp_event->prog = NULL; - bpf_prog_put(prog); + bpf_prog_put_rcu(prog); } } -- cgit v1.2.3 From 3c35f6edc09b239a60de87a5aeb78563fc372704 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:49 +0100 Subject: reset: Reorder inline reset_control_get*() wrappers We're about to split the current API into two, where consumers will be forced to be explicit when requesting reset lines. The choice will be to either the call the *_exclusive or *_shared variant depending on whether they can actually tolorate not being asserted when that request is made. The new API will look like this once reorded and complete: reset_control_get_exclusive() reset_control_get_shared() reset_control_get_optional_exclusive() reset_control_get_optional_shared() of_reset_control_get_exclusive() of_reset_control_get_shared() of_reset_control_get_exclusive_by_index() of_reset_control_get_shared_by_index() devm_reset_control_get_exclusive() devm_reset_control_get_shared() devm_reset_control_get_optional_exclusive() devm_reset_control_get_optional_shared() devm_reset_control_get_exclusive_by_index() devm_reset_control_get_shared_by_index() Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index ec0306ce7b92..33eaf11dabe2 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -107,12 +107,6 @@ static inline struct reset_control *__must_check reset_control_get( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } -static inline struct reset_control *reset_control_get_optional( - struct device *dev, const char *id) -{ - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); -} - /** * reset_control_get_shared - Lookup and obtain a shared reference to a * reset controller. @@ -141,6 +135,12 @@ static inline struct reset_control *reset_control_get_shared( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); } +static inline struct reset_control *reset_control_get_optional( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); +} + /** * of_reset_control_get - Lookup and obtain an exclusive reference to a * reset controller. @@ -191,6 +191,21 @@ static inline struct reset_control *__must_check devm_reset_control_get( return __devm_reset_control_get(dev, id, 0, 0); } +/** + * devm_reset_control_get_shared - resource managed reset_control_get_shared() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver detach. + * See reset_control_get_shared() for more information. + */ +static inline struct reset_control *devm_reset_control_get_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + static inline struct reset_control *devm_reset_control_get_optional( struct device *dev, const char *id) { @@ -212,21 +227,6 @@ static inline struct reset_control *devm_reset_control_get_by_index( return __devm_reset_control_get(dev, NULL, index, 0); } -/** - * devm_reset_control_get_shared - resource managed reset_control_get_shared() - * @dev: device to be reset by the controller - * @id: reset line name - * - * Managed reset_control_get_shared(). For reset controllers returned from - * this function, reset_control_put() is called automatically on driver detach. - * See reset_control_get_shared() for more information. - */ -static inline struct reset_control *devm_reset_control_get_shared( - struct device *dev, const char *id) -{ - return __devm_reset_control_get(dev, id, 0, 1); -} - /** * devm_reset_control_get_shared_by_index - resource managed * reset_control_get_shared -- cgit v1.2.3 From a53e35db70d1638002e40d495d096181dbb0fe16 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:50 +0100 Subject: reset: Ensure drivers are explicit when requesting reset lines Phasing out generic reset line requests enables us to make some better decisions on when and how to (de)assert said lines. If an 'exclusive' line is requested, we know a device *requires* a reset and that it's preferable to act upon a request right away. However, if a 'shared' reset line is requested, we can reasonably assume sure that placing a device into reset isn't a hard requirement, but probably a measure to save power and is thus able to cope with not being asserted if another device is still in use. In order allow gentle adoption and not to forcing all consumers to move to the API immediately, causing administration headache between subsystems, this patch adds some temporary stand-in shim-calls. This will ease the burden at merge time and allow subsystems to migrate over to the new API in a more realistic time-frame. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 106 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 33eaf11dabe2..9cf4cf39bf1d 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -84,8 +84,8 @@ static inline struct reset_control *__devm_reset_control_get( #endif /* CONFIG_RESET_CONTROLLER */ /** - * reset_control_get - Lookup and obtain an exclusive reference to a - * reset controller. + * reset_control_get_exclusive - Lookup and obtain an exclusive reference + * to a reset controller. * @dev: device to be reset by the controller * @id: reset line name * @@ -98,8 +98,8 @@ static inline struct reset_control *__devm_reset_control_get( * * Use of id names is optional. */ -static inline struct reset_control *__must_check reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control * +__must_check reset_control_get_exclusive(struct device *dev, const char *id) { #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); @@ -135,15 +135,15 @@ static inline struct reset_control *reset_control_get_shared( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); } -static inline struct reset_control *reset_control_get_optional( +static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } /** - * of_reset_control_get - Lookup and obtain an exclusive reference to a - * reset controller. + * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference + * to a reset controller. * @node: device to be reset by the controller * @id: reset line name * @@ -151,15 +151,16 @@ static inline struct reset_control *reset_control_get_optional( * * Use of id names is optional. */ -static inline struct reset_control *of_reset_control_get( +static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { return __of_reset_control_get(node, id, 0, 0); } /** - * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to - * a reset controller by index. + * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive + * reference to a reset controller + * by index. * @node: device to be reset by the controller * @index: index of the reset controller * @@ -167,23 +168,27 @@ static inline struct reset_control *of_reset_control_get( * in whatever order. Returns a struct reset_control or IS_ERR() condition * containing errno. */ -static inline struct reset_control *of_reset_control_get_by_index( +static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { return __of_reset_control_get(node, NULL, index, 0); } /** - * devm_reset_control_get - resource managed reset_control_get() + * devm_reset_control_get_exclusive - resource managed + * reset_control_get_exclusive() * @dev: device to be reset by the controller * @id: reset line name * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. + * Managed reset_control_get_exclusive(). For reset controllers returned + * from this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_exclusive() for more information. */ -static inline struct reset_control *__must_check devm_reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control * +__must_check devm_reset_control_get_exclusive(struct device *dev, + const char *id) { #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); @@ -206,23 +211,26 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, 1); } -static inline struct reset_control *devm_reset_control_get_optional( +static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { return __devm_reset_control_get(dev, id, 0, 0); } /** - * devm_reset_control_get_by_index - resource managed reset_control_get + * devm_reset_control_get_exclusive_by_index - resource managed + * reset_control_get_exclusive() * @dev: device to be reset by the controller * @index: index of the reset controller * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. + * Managed reset_control_get_exclusive(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_exclusive() for more information. */ -static inline struct reset_control *devm_reset_control_get_by_index( - struct device *dev, int index) +static inline struct reset_control * +devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { return __devm_reset_control_get(dev, NULL, index, 0); } @@ -243,4 +251,54 @@ static inline struct reset_control *devm_reset_control_get_shared_by_index( return __devm_reset_control_get(dev, NULL, index, 1); } +/* + * TEMPORARY calls to use during transition: + * + * of_reset_control_get() => of_reset_control_get_exclusive() + * + * These inline function calls will be removed once all consumers + * have been moved over to the new explicit API. + */ +static inline struct reset_control *reset_control_get( + struct device *dev, const char *id) +{ + return reset_control_get_exclusive(dev, id); +} + +static inline struct reset_control *reset_control_get_optional( + struct device *dev, const char *id) +{ + return reset_control_get_optional_exclusive(dev, id); +} + +static inline struct reset_control *of_reset_control_get( + struct device_node *node, const char *id) +{ + return of_reset_control_get_exclusive(node, id); +} + +static inline struct reset_control *of_reset_control_get_by_index( + struct device_node *node, int index) +{ + return of_reset_control_get_exclusive_by_index(node, index); +} + +static inline struct reset_control *devm_reset_control_get( + struct device *dev, const char *id) +{ + return devm_reset_control_get_exclusive(dev, id); +} + +static inline struct reset_control *devm_reset_control_get_optional( + struct device *dev, const char *id) +{ + return devm_reset_control_get_optional_exclusive(dev, id); + +} + +static inline struct reset_control *devm_reset_control_get_by_index( + struct device *dev, int index) +{ + return devm_reset_control_get_exclusive_by_index(dev, index); +} #endif -- cgit v1.2.3 From 40faee8ee471ae526344cd5c62ff520d356de841 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:51 +0100 Subject: reset: Supply *_shared variant calls when using of_* API Consumers need to be able to specify whether they are requesting an 'exclusive' or 'shared' reset line no matter which API (of_*, devm_*, etc) they are using. This change allows users of the of_* API in particular to specify that their request is for a 'shared' line. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 9cf4cf39bf1d..fd69240907c8 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -157,6 +157,31 @@ static inline struct reset_control *of_reset_control_get_exclusive( return __of_reset_control_get(node, id, 0, 0); } +/** + * of_reset_control_get_shared - Lookup and obtain an shared reference + * to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *of_reset_control_get_shared( + struct device_node *node, const char *id) +{ + return __of_reset_control_get(node, id, 0, 1); +} + /** * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive * reference to a reset controller @@ -174,6 +199,34 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( return __of_reset_control_get(node, NULL, index, 0); } +/** + * of_reset_control_get_shared_by_index - Lookup and obtain an shared + * reference to a reset controller + * by index. + * @node: device to be reset by the controller + * @index: index of the reset controller + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * This is to be used to perform a list of resets for a device or power domain + * in whatever order. Returns a struct reset_control or IS_ERR() condition + * containing errno. + */ +static inline struct reset_control *of_reset_control_get_shared_by_index( + struct device_node *node, int index) +{ + return __of_reset_control_get(node, NULL, index, 1); +} + /** * devm_reset_control_get_exclusive - resource managed * reset_control_get_exclusive() -- cgit v1.2.3 From c33d61a0c400c50f378ef03a386e646abca292ca Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:52 +0100 Subject: reset: Supply *_shared variant calls when using *_optional APIs Consumers need to be able to specify whether they are requesting an 'exclusive' or 'shared' reset line no matter which API (of_*, devm_*, etc) they are using. This change allows users of the optional_* API in particular to specify that their request is for a 'shared' line. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index fd69240907c8..c358106611af 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -141,6 +141,12 @@ static inline struct reset_control *reset_control_get_optional_exclusive( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } +static inline struct reset_control *reset_control_get_optional_shared( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); +} + /** * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference * to a reset controller. @@ -270,6 +276,12 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive( return __devm_reset_control_get(dev, id, 0, 0); } +static inline struct reset_control *devm_reset_control_get_optional_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + /** * devm_reset_control_get_exclusive_by_index - resource managed * reset_control_get_exclusive() -- cgit v1.2.3 From 0bcc0eab363aa80f79769324bf3f2ab7781840f2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 6 Jun 2016 16:56:53 +0100 Subject: reset: TRIVIAL: Add line break at same place for similar APIs Standardise the way inline functions: devm_reset_control_get_shared_by_index devm_reset_control_get_exclusive_by_index ... are formatted. Signed-off-by: Lee Jones Signed-off-by: Philipp Zabel --- include/linux/reset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index c358106611af..45a4abeb6acb 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -310,8 +310,8 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) * this function, reset_control_put() is called automatically on driver detach. * See reset_control_get_shared() for more information. */ -static inline struct reset_control *devm_reset_control_get_shared_by_index( - struct device *dev, int index) +static inline struct reset_control * +devm_reset_control_get_shared_by_index(struct device *dev, int index) { return __devm_reset_control_get(dev, NULL, index, 1); } -- cgit v1.2.3 From ba4a1c28a92df55b9263e838068b92eaa80c7850 Mon Sep 17 00:00:00 2001 From: Steve Twiss Date: Mon, 27 Jun 2016 16:06:36 +0100 Subject: mfd: da9053: Fix compiler warning message for uninitialised variable Fix compiler warning caused by an uninitialised variable inside da9052_group_write() function. Defaulting the value to zero covers the trivial case. Signed-off-by: Steve Twiss Reported-by: Geert Uytterhoeven Signed-off-by: Lee Jones --- include/linux/mfd/da9052/da9052.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index c18a4c19d6fc..ce9230af09c2 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -171,7 +171,7 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg, static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg, unsigned reg_cnt, unsigned char *val) { - int ret; + int ret = 0; int i; for (i = 0; i < reg_cnt; i++) { -- cgit v1.2.3 From 5c672ab3f0ee0f78f7acad183f34db0f8781a200 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 30 Jun 2016 13:10:49 +0200 Subject: fuse: serialize dirops by default Negotiate with userspace filesystems whether they support parallel readdir and lookup. Disable parallelism by default for fear of breaking fuse filesystems. Signed-off-by: Miklos Szeredi Fixes: 9902af79c01a ("parallel lookups: actual switch to rwsem") Fixes: d9b3dbdcfd62 ("fuse: switch to ->iterate_shared()") --- fs/fuse/dir.c | 4 ++++ fs/fuse/fuse_i.h | 9 +++++++++ fs/fuse/inode.c | 19 ++++++++++++++++++- include/uapi/linux/fuse.h | 7 ++++++- 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ccd4971cc6c1..264f07c7754e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct dentry *newent; bool outarg_valid = true; + fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); + fuse_unlock_inode(dir); if (err == -ENOENT) { outarg_valid = false; err = 0; @@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, FUSE_READDIR); } + fuse_lock_inode(inode); fuse_request_send(fc, req); + fuse_unlock_inode(inode); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index eddbe02c4028..929c383432b0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -110,6 +110,9 @@ struct fuse_inode { /** Miscellaneous bits describing inode state */ unsigned long state; + + /** Lock for serializing lookup and readdir for back compatibility*/ + struct mutex mutex; }; /** FUSE inode state bits */ @@ -540,6 +543,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** allow parallel lookups and readdir (default is serialized) */ + unsigned parallel_dirops:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); +void fuse_unlock_inode(struct inode *inode); +void fuse_lock_inode(struct inode *inode); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ce67668a8e1..9961d8432ce3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); + mutex_init(&fi->mutex); fi->forget = fuse_alloc_forget(); if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); @@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); BUG_ON(!list_empty(&fi->queued_writes)); + mutex_destroy(&fi->mutex); kfree(fi->forget); call_rcu(&inode->i_rcu, fuse_i_callback); } @@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, return 0; } +void fuse_lock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_lock(&get_fuse_inode(inode)->mutex); +} + +void fuse_unlock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_unlock(&get_fuse_inode(inode)->mutex); +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PARALLEL_DIROPS) + fc->parallel_dirops = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { @@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | + FUSE_PARALLEL_DIROPS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5974fae54e12..27e17363263a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -105,6 +105,9 @@ * * 7.24 * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS */ #ifndef _LINUX_FUSE_H @@ -140,7 +143,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 24 +#define FUSE_KERNEL_MINOR_VERSION 25 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -234,6 +237,7 @@ struct fuse_file_lock { * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -253,6 +257,7 @@ struct fuse_file_lock { #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) #define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) /** * CUSE INIT request/reply flags -- cgit v1.2.3