From 9b3eb54106cf6acd03f07cf0ab01c13676a226c2 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 3 May 2017 16:43:19 +0200 Subject: xfrm: fix stack access out of bounds with CONFIG_XFRM_SUB_POLICY When CONFIG_XFRM_SUB_POLICY=y, xfrm_dst stores a copy of the flowi for that dst. Unfortunately, the code that allocates and fills this copy doesn't care about what type of flowi (flowi, flowi4, flowi6) gets passed. In multiple code paths (from raw_sendmsg, from TCP when replying to a FIN, in vxlan, geneve, and gre), the flowi that gets passed to xfrm is actually an on-stack flowi4, so we end up reading stuff from the stack past the end of the flowi4 struct. Since xfrm_dst->origin isn't used anywhere following commit ca116922afa8 ("xfrm: Eliminate "fl" and "pol" args to xfrm_bundle_ok()."), just get rid of it. xfrm_dst->partner isn't used either, so get rid of that too. Fixes: 9d6ec938019c ("ipv4: Use flowi4 in public route lookup interfaces.") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6793a30c66b1..7e7e2b0d2915 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -979,10 +979,6 @@ struct xfrm_dst { struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; -#ifdef CONFIG_XFRM_SUB_POLICY - struct flowi *origin; - struct xfrm_selector *partner; -#endif u32 xfrm_genid; u32 policy_genid; u32 route_mtu_cached; @@ -998,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); -#ifdef CONFIG_XFRM_SUB_POLICY - kfree(xdst->origin); - xdst->origin = NULL; - kfree(xdst->partner); - xdst->partner = NULL; -#endif } #endif -- cgit v1.2.3 From c2372c20425bd75a5527b3e2204059762190f6ca Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 22 May 2017 13:09:20 +0200 Subject: of/platform: Make of_platform_device_destroy globally visible of_platform_device_destroy is the counterpart to of_platform_device_create which is a non-static function. After creating a platform device it might be neccessary to destroy it to deal with -EPROBE_DEFER where a repeated of_platform_device_create call would fail otherwise. Therefore also make of_platform_device_destroy globally visible. Signed-off-by: Jan Glauber Acked-by: Rob Herring Signed-off-by: Ulf Hansson --- drivers/of/platform.c | 3 ++- include/linux/of_platform.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 71fecc2debfc..703a42118ffc 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -523,7 +523,7 @@ static int __init of_platform_default_populate_init(void) arch_initcall_sync(of_platform_default_populate_init); #endif -static int of_platform_device_destroy(struct device *dev, void *data) +int of_platform_device_destroy(struct device *dev, void *data) { /* Do not touch devices not populated from the device tree */ if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) @@ -544,6 +544,7 @@ static int of_platform_device_destroy(struct device *dev, void *data) of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; } +EXPORT_SYMBOL_GPL(of_platform_device_destroy); /** * of_platform_depopulate() - Remove devices populated from device tree diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index dc8224ae28d5..e0d1946270f3 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np, const char *bus_id, struct device *parent); +extern int of_platform_device_destroy(struct device *dev, void *data); extern int of_platform_bus_probe(struct device_node *root, const struct of_device_id *matches, struct device *parent); -- cgit v1.2.3 From 7254a50a5db40ca6739ddf37e0a45e6912532b2c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:05 +0800 Subject: blk-mq: remove blk_mq_abort_requeue_list() No one uses it any more, so remove it. Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- block/blk-mq.c | 19 ------------------- include/linux/blk-mq.h | 1 - 2 files changed, 20 deletions(-) (limited to 'include') diff --git a/block/blk-mq.c b/block/blk-mq.c index a69ad122ed66..f2224ffd225d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -void blk_mq_abort_requeue_list(struct request_queue *q) -{ - unsigned long flags; - LIST_HEAD(rq_list); - - spin_lock_irqsave(&q->requeue_lock, flags); - list_splice_init(&q->requeue_list, &rq_list); - spin_unlock_irqrestore(&q->requeue_lock, flags); - - while (!list_empty(&rq_list)) { - struct request *rq; - - rq = list_first_entry(&rq_list, struct request, queuelist); - list_del_init(&rq->queuelist); - blk_mq_end_request(rq, -EIO); - } -} -EXPORT_SYMBOL(blk_mq_abort_requeue_list); - struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { if (tag < tags->nr_tags) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c47aa248c640..fcd641032f8d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From c70d9d809fdeecedb96972457ee45c49a232d97f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 May 2017 15:40:12 -0500 Subject: ptrace: Properly initialize ptracer_cred on fork When I introduced ptracer_cred I failed to consider the weirdness of fork where the task_struct copies the old value by default. This winds up leaving ptracer_cred set even when a process forks and the child process does not wind up being ptraced. Because ptracer_cred is not set on non-ptraced processes whose parents were ptraced this has broken the ability of the enlightenment window manager to start setuid children. Fix this by properly initializing ptracer_cred in ptrace_init_task This must be done with a little bit of care to preserve the current value of ptracer_cred when ptrace carries through fork. Re-reading the ptracer_cred from the ptracing process at this point is inconsistent with how PT_PTRACE_CAP has been maintained all of these years. Tested-by: Takashi Iwai Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP") Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 7 +++++-- kernel/ptrace.c | 20 +++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 422bc2e4cb6a..ef3eb8bbfee4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, - struct task_struct *new_parent); + struct task_struct *new_parent, + const struct cred *ptracer_cred); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 @@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) if (unlikely(ptrace) && current->ptrace) { child->ptrace = current->ptrace; - __ptrace_link(child, current->parent); + __ptrace_link(child, current->parent, current->ptracer_cred); if (child->ptrace & PT_SEIZED) task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); @@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) set_tsk_thread_flag(child, TIF_SIGPENDING); } + else + child->ptracer_cred = NULL; } /** diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 266ddcc1d8bb..60f356d91060 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, } +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, + const struct cred *ptracer_cred) +{ + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); + child->parent = new_parent; + child->ptracer_cred = get_cred(ptracer_cred); +} + /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) +static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_entry)); - list_add(&child->ptrace_entry, &new_parent->ptraced); - child->parent = new_parent; rcu_read_lock(); - child->ptracer_cred = get_cred(__task_cred(new_parent)); + __ptrace_link(child, new_parent, __task_cred(new_parent)); rcu_read_unlock(); } @@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request, flags |= PT_SEIZED; task->ptrace = flags; - __ptrace_link(task, current); + ptrace_link(task, current); /* SEIZE doesn't trap tracee on attach */ if (!seize) @@ -459,7 +465,7 @@ static int ptrace_traceme(void) */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; - __ptrace_link(current, current->real_parent); + ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); -- cgit v1.2.3 From 3aa4266405a6c2e03eb0ff12d7c573d3d903da4c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 10 May 2017 13:48:41 +0300 Subject: net/sched: act_csum: Add accessors for offloading drivers Add the accessors for realizing if this is a csum action, and for which fields checksum is needed. Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/net/tc_act/tc_csum.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index f31fb6331a53..3248beaf16b0 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -3,6 +3,7 @@ #include #include +#include struct tcf_csum { struct tc_action common; @@ -11,4 +12,18 @@ struct tcf_csum { }; #define to_tcf_csum(a) ((struct tcf_csum *)a) +static inline bool is_tcf_csum(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_CSUM) + return true; +#endif + return false; +} + +static inline u32 tcf_csum_update_flags(const struct tc_action *a) +{ + return to_tcf_csum(a)->update_flags; +} + #endif /* __NET_TC_CSUM_H */ -- cgit v1.2.3 From 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 23 Feb 2017 11:19:36 +0200 Subject: net/mlx5: Avoid using pending command interface slots Currently when firmware command gets stuck or it takes long time to complete, the driver command will get timeout and the command slot is freed and can be used for new commands, and if the firmware receive new command on the old busy slot its behavior is unexpected and this could be harmful. To fix this when the driver command gets timeout we return failure, but we don't free the command slot and we wait for the firmware to explicitly respond to that command. Once all the entries are busy we will stop processing new firmware commands. Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') Signed-off-by: Mohamad Haj Yahia Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 +++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- include/linux/mlx5/driver.h | 7 +++- 4 files changed, 44 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 5bdaf3d545b2..10d282841f5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -774,7 +774,7 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } static void cmd_work_handler(struct work_struct *work) @@ -804,6 +804,7 @@ static void cmd_work_handler(struct work_struct *work) } cmd->ent_arr[ent->idx] = ent; + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); @@ -825,6 +826,20 @@ static void cmd_work_handler(struct work_struct *work) if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* Skip sending command to fw if internal error */ + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + u8 status = 0; + u32 drv_synd; + + ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); + MLX5_SET(mbox_out, ent->out, status, status); + MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); + + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + return; + } + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -835,7 +850,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT)); } } @@ -879,7 +894,7 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); } err = ent->ret; @@ -1375,7 +1390,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1395,6 +1410,19 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + + /* if we already completed the command, ignore it */ + if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, + &ent->state)) { + /* only real completion can free the cmd slot */ + if (!forced) { + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + free_ent(cmd, ent->idx); + } + continue; + } + if (ent->callback) cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) @@ -1417,7 +1445,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); } - free_ent(cmd, ent->idx); + + /* only real completion will free the entry slot */ + if (!forced) + free_ent(cmd, ent->idx); if (ent->callback) { ds = ent->ts2 - ent->ts1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea5d8d37a75c..33eae5ad2fb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -422,7 +422,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); break; case MLX5_EVENT_TYPE_PORT_CHANGE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index d0515391d33b..44f59b1d6f0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -90,7 +90,7 @@ static void trigger_cmd_completions(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector); + mlx5_cmd_comp_handler(dev, vector, true); return; no_trig: diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bcdf739ee41a..93273d9ea4d1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -787,7 +787,12 @@ enum { typedef void (*mlx5_cmd_cbk_t)(int status, void *context); +enum { + MLX5_CMD_ENT_STATE_PENDING_COMP, +}; + struct mlx5_cmd_work_ent { + unsigned long state; struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; void *uout; @@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, -- cgit v1.2.3 From 7f65b1f5adc5f8496ca8bec4947de66fefe36220 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 May 2017 14:50:30 +0200 Subject: cdc-ether: divorce initialisation with a filter reset and a generic method Some devices need their multicast filter reset but others are crashed by that. So the methods need to be separated. Signed-off-by: Oliver Neukum Reported-by: "Ridgway, Keith" Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 31 ++++++++++++++++++++++++------- include/linux/usb/usbnet.h | 1 + 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index f3ae88fdf332..8ab281b478f2 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -310,6 +310,26 @@ skip: return -ENODEV; } + return 0; + +bad_desc: + dev_info(&dev->udev->dev, "bad CDC descriptors\n"); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); + + +/* like usbnet_generic_cdc_bind() but handles filter initialization + * correctly + */ +int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int rv; + + rv = usbnet_generic_cdc_bind(dev, intf); + if (rv < 0) + goto bail_out; + /* Some devices don't initialise properly. In particular * the packet filter is not reset. There are devices that * don't do reset all the way. So the packet filter should @@ -317,13 +337,10 @@ skip: */ usbnet_cdc_update_filter(dev); - return 0; - -bad_desc: - dev_info(&dev->udev->dev, "bad CDC descriptors\n"); - return -ENODEV; +bail_out: + return rv; } -EXPORT_SYMBOL_GPL(usbnet_generic_cdc_bind); +EXPORT_SYMBOL_GPL(usbnet_ether_cdc_bind); void usbnet_cdc_unbind(struct usbnet *dev, struct usb_interface *intf) { @@ -417,7 +434,7 @@ int usbnet_cdc_bind(struct usbnet *dev, struct usb_interface *intf) BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct cdc_state))); - status = usbnet_generic_cdc_bind(dev, intf); + status = usbnet_ether_cdc_bind(dev, intf); if (status < 0) return status; diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 7dffa5624ea6..97116379db5f 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -206,6 +206,7 @@ struct cdc_state { }; extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *); +extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf); extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); -- cgit v1.2.3 From 12e8b570e732eaa5eae3a2895ba3fbcf91bde2b4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 22 May 2017 20:13:07 +0200 Subject: mlx5: fix bug reading rss_hash_type from CQE Masks for extracting part of the Completion Queue Entry (CQE) field rss_hash_type was swapped, namely CQE_RSS_HTYPE_IP and CQE_RSS_HTYPE_L4. The bug resulted in setting skb->l4_hash, even-though the rss_hash_type indicated that hash was NOT computed over the L4 (UDP or TCP) part of the packet. Added comments from the datasheet, to make it more clear what these masks are selecting. Signed-off-by: Jesper Dangaard Brouer Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dd9a263ed368..a940ec6a046c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -787,8 +787,14 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 6, - CQE_RSS_HTYPE_L4 = 0x3 << 2, + CQE_RSS_HTYPE_IP = 0x3 << 2, + /* cqe->rss_hash_type[3:2] - IP destination selected for hash + * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) + */ + CQE_RSS_HTYPE_L4 = 0x3 << 6, + /* cqe->rss_hash_type[7:6] - L4 destination selected for hash + * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI + */ }; enum { -- cgit v1.2.3 From 6f4dbd149d2a151b89d1a5bbf7530ee5546c7908 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 11:33:16 +0200 Subject: libceph: use kbasename() and kill ceph_file_part() Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_debug.h | 6 +++--- net/ceph/ceph_common.c | 13 ------------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index aa2e19182d99..51c5bd64bd00 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -3,6 +3,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include + #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG /* @@ -12,12 +14,10 @@ */ # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) -extern const char *ceph_file_part(const char *s, int len); # define dout(fmt, ...) \ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ - ceph_file_part(__FILE__, sizeof(__FILE__)), \ - __LINE__, ##__VA_ARGS__) + kbasename(__FILE__), __LINE__, ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4fd02831beed..47e94b560ba0 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -56,19 +56,6 @@ static const struct kernel_param_ops param_ops_supported_features = { module_param_cb(supported_features, ¶m_ops_supported_features, NULL, S_IRUGO); -/* - * find filename portion of a path (/foo/bar/baz -> baz) - */ -const char *ceph_file_part(const char *s, int len) -{ - const char *e = s + len; - - while (e != s && *(e-1) != '/') - e--; - return e; -} -EXPORT_SYMBOL(ceph_file_part); - const char *ceph_msg_type_name(int type) { switch (type) { -- cgit v1.2.3 From 4d071c3238987325b9e50e33051a40d1cce311cc Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 23 May 2017 14:18:17 -0500 Subject: PCI/PM: Add needs_resume flag to avoid suspend complete optimization Some drivers - like i915 - may not support the system suspend direct complete optimization due to differences in their runtime and system suspend sequence. Add a flag that when set resumes the device before calling the driver's system suspend handlers which effectively disables the optimization. Needed by a future patch fixing suspend/resume on i915. Suggested by Rafael. Signed-off-by: Imre Deak Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org --- drivers/pci/pci.c | 3 ++- include/linux/pci.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b01bd5bba8e6..563901cd9c06 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2144,7 +2144,8 @@ bool pci_dev_keep_suspended(struct pci_dev *pci_dev) if (!pm_runtime_suspended(dev) || pci_target_state(pci_dev) != pci_dev->current_state - || platform_pci_need_resume(pci_dev)) + || platform_pci_need_resume(pci_dev) + || (pci_dev->dev_flags & PCI_DEV_FLAGS_NEEDS_RESUME)) return false; /* diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..df7dd9021646 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -183,6 +183,11 @@ enum pci_dev_flags { PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), /* Do not use FLR even if device advertises PCI_AF_CAP */ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), + /* + * Resume before calling the driver's system suspend hooks, disabling + * the direct_complete optimization. + */ + PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:41 -0400 Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans It appears that TCP checksum offloading has been broken for Q-in-Q vlans. The behavior was execerbated by the series commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'") that that enabled accleleration features on stacked vlans. However, event without that series, it is possible to trigger this issue. It just requires a lot more specialized configuration. The root cause is the interaction between how netdev_intersect_features() works, the features actually set on the vlan devices and HW having the ability to run checksum with longer headers. The issue starts when netdev_interesect_features() replaces NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM, if the HW advertises IP|IPV6 specific checksums. This happens for tagged and multi-tagged packets. However, HW that enables IP|IPV6 checksum offloading doesn't gurantee that packets with arbitrarily long headers can be checksummed. This patch disables IP|IPV6 checksums on the packet for multi-tagged packets. CC: Toshiaki Makita CC: Michal Kubecek Signed-off-by: Vladislav Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8d5fcd6284ce..283dc2f5364d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, netdev_features_t features) { - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged_multi(skb)) { + /* In the case of multi-tagged packets, use a direct mask + * instead of using netdev_interesect_features(), to make + * sure that only devices supporting NETIF_F_HW_CSUM will + * have checksum offloading support. + */ + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + } return features; } -- cgit v1.2.3 From 614d0d77b49a9b131e58b77473698ab5b2c525b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:09 +0200 Subject: bpf: add various verifier test cases This patch adds various verifier test cases: 1) A test case for the pruning issue when tracking alignment is used. 2) Various PTR_TO_MAP_VALUE_OR_NULL tests to make sure pointer arithmetic turns such register into UNKNOWN_VALUE type. 3) Test cases for the special treatment of LD_ABS/LD_IND to make sure verifier doesn't break calling convention here. Latter is needed, since f.e. arm64 JIT uses r1 - r5 for storing temporary data, so they really must be marked as NOT_INIT. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 10 ++ tools/include/linux/filter.h | 10 ++ tools/testing/selftests/bpf/test_verifier.c | 239 +++++++++++++++++++++++++++- 3 files changed, 255 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 56197f82af45..62d948f80730 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -272,6 +272,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 390d7c9685fd..4ce25d43e8e3 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -208,6 +208,16 @@ .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3773562056da..cabb19b1e371 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -49,6 +49,7 @@ #define MAX_NR_MAPS 4 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) +#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) struct bpf_test { const char *descr; @@ -2614,6 +2615,30 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test17 (pruning, alignment)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_A(-6), + }, + .errstr = "misaligned packet access off 2+15+-4 size 4", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -3340,6 +3365,70 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, + { + "alu ops on ptr_to_map_value_or_null, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, { "invalid memory access with multiple map_lookup_elem calls", .insns = { @@ -4937,7 +5026,149 @@ static struct bpf_test tests[] = { .fixup_map_in_map = { 3 }, .errstr = "R1 type=map_value_or_null expected=map_ptr", .result = REJECT, - } + }, + { + "ld_abs: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "ld_ind: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 1), + BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -5059,9 +5290,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog, map_fds); - fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, "GPL", 0, bpf_vlog, - sizeof(bpf_vlog)); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog)); expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; -- cgit v1.2.3 From 83b4605b0c16cde5b00c8cf192408d51eab75402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 18:59:54 +0200 Subject: PCI/msi: fix the pci_alloc_irq_vectors_affinity stub We need to return an error for any call that asks for MSI / MSI-X vectors only, so that non-trivial fallback logic can work properly. Also valid dev->irq and use the "correct" errno value based on feedback from Linus. Signed-off-by: Christoph Hellwig Reported-by: Steven Rostedt Fixes: aff17164 ("PCI: Provide sensible IRQ vector alloc/free routines") Signed-off-by: Linus Torvalds --- include/linux/pci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..fc2e832d7b9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1342,9 +1342,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, const struct irq_affinity *aff_desc) { - if (min_vecs > 1) - return -EINVAL; - return 1; + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) + return 1; + return -ENOSPC; } static inline void pci_free_irq_vectors(struct pci_dev *dev) -- cgit v1.2.3 From 3fb07daff8e99243366a081e5129560734de4ada Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 May 2017 14:27:35 -0700 Subject: ipv4: add reference counting to metrics Andrey Konovalov reported crashes in ipv4_mtu() I could reproduce the issue with KASAN kernels, between 10.246.7.151 and 10.246.7.152 : 1) 20 concurrent netperf -t TCP_RR -H 10.246.7.152 -l 1000 & 2) At the same time run following loop : while : do ip ro add 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 ip ro del 10.246.7.152 dev eth0 src 10.246.7.151 mtu 1500 done Cong Wang attempted to add back rt->fi in commit 82486aa6f1b9 ("ipv4: restore rt->fi for reference counting") but this proved to add some issues that were complex to solve. Instead, I suggested to add a refcount to the metrics themselves, being a standalone object (in particular, no reference to other objects) I tried to make this patch as small as possible to ease its backport, instead of being super clean. Note that we believe that only ipv4 dst need to take care of the metric refcount. But if this is wrong, this patch adds the basic infrastructure to extend this to other families. Many thanks to Julian Anastasov for reviewing this patch, and Cong Wang for his efforts on this problem. Fixes: 2860583fe840 ("ipv4: Kill rt->fi") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Reviewed-by: Julian Anastasov Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/dst.h | 8 +++++++- include/net/ip_fib.h | 10 +++++----- net/core/dst.c | 23 ++++++++++++++--------- net/ipv4/fib_semantics.c | 17 ++++++++++------- net/ipv4/route.c | 10 +++++++++- 5 files changed, 45 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 049af33da3b6..cfc043784166 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -107,10 +107,16 @@ struct dst_entry { }; }; +struct dst_metrics { + u32 metrics[RTAX_MAX]; + atomic_t refcnt; +}; +extern const struct dst_metrics dst_default_metrics; + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL +#define DST_METRICS_REFCOUNTED 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6692c5758b33..f7f6aa789c61 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -114,11 +114,11 @@ struct fib_info { __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; - u32 *fib_metrics; -#define fib_mtu fib_metrics[RTAX_MTU-1] -#define fib_window fib_metrics[RTAX_WINDOW-1] -#define fib_rtt fib_metrics[RTAX_RTT-1] -#define fib_advmss fib_metrics[RTAX_ADVMSS-1] + struct dst_metrics *fib_metrics; +#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] +#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] +#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] +#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; diff --git a/net/core/dst.c b/net/core/dst.c index 960e503b5a52..6192f11beec9 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard_out); -const u32 dst_default_metrics[RTAX_MAX + 1] = { +const struct dst_metrics dst_default_metrics = { /* This initializer is needed to force linker to place this variable * into const section. Otherwise it might end into bss section. * We really want to avoid false sharing on this variable, and catch * any writes on it. */ - [RTAX_MAX] = 0xdeadbeef, + .refcnt = ATOMIC_INIT(1), }; void dst_init(struct dst_entry *dst, struct dst_ops *ops, @@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, if (dev) dev_hold(dev); dst->ops = ops; - dst_init_metrics(dst, dst_default_metrics, true); + dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; dst->path = dst; dst->from = NULL; @@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release); u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { - u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p) { - u32 *old_p = __DST_METRICS_PTR(old); + struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + atomic_set(&p->refcnt, 1); + memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { kfree(p); - p = __DST_METRICS_PTR(prev); + p = (struct dst_metrics *)__DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; + } else if (prev & DST_METRICS_REFCOUNTED) { + if (atomic_dec_and_test(&old_p->refcnt)) + kfree(old_p); } } - return p; + BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); + return (u32 *)p; } EXPORT_SYMBOL(dst_cow_metrics_generic); @@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) { unsigned long prev, new; - new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; + new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; prev = cmpxchg(&dst->_metrics, old, new); if (prev == old) kfree(__DST_METRICS_PTR(old)); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da449ddb8cc1..ad9ad4aab5da 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -203,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@ -213,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - if (fi->fib_metrics != (u32 *) dst_default_metrics) - kfree(fi->fib_metrics); + m = fi->fib_metrics; + if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + kfree(m); kfree(fi); } @@ -971,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; - fi->fib_metrics[type - 1] = val; + fi->fib_metrics->metrics[type - 1] = val; } if (ecn_ca) - fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; return 0; } @@ -1033,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); if (!fi->fib_metrics) goto failure; + atomic_set(&fi->fib_metrics->refcnt, 1); } else - fi->fib_metrics = (u32 *) dst_default_metrics; + fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@ -1238,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) + if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc && diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 655d9eebe43e..6883b3d4ba8f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1385,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *) dst; + if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) + kfree(p); + if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1438,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); + if (fi->fib_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + atomic_inc(&fi->fib_metrics->refcnt); + } #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif -- cgit v1.2.3