From 1dae9f1187189bc09ff6d25ca97ead711f7e26f9 Mon Sep 17 00:00:00 2001 From: Anastasia Kovaleva Date: Thu, 3 Oct 2024 13:44:31 +0300 Subject: net: Fix an unsafe loop on the list The kernel may crash when deleting a genetlink family if there are still listeners for that family: Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c000000000c080bc] netlink_update_socket_mc+0x3c/0xc0 LR [c000000000c0f764] __netlink_clear_multicast_users+0x74/0xc0 Call Trace: __netlink_clear_multicast_users+0x74/0xc0 genl_unregister_family+0xd4/0x2d0 Change the unsafe loop on the list to a safe one, because inside the loop there is an element removal from this list. Fixes: b8273570f802 ("genetlink: fix netns vs. netlink table locking (2)") Cc: stable@vger.kernel.org Signed-off-by: Anastasia Kovaleva Reviewed-by: Dmitry Bogdanov Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241003104431.12391-1-a.kovaleva@yadro.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c58ca8dd561b..db29c39e19a7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -894,6 +894,8 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) +#define sk_for_each_bound_safe(__sk, tmp, list) \ + hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset -- cgit v1.2.3 From 3cb7cf1540ddff5473d6baeb530228d19bc97b8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Oct 2024 18:41:30 +0000 Subject: net/sched: accept TCA_STAB only for root qdisc Most qdiscs maintain their backlog using qdisc_pkt_len(skb) on the assumption it is invariant between the enqueue() and dequeue() handlers. Unfortunately syzbot can crash a host rather easily using a TBF + SFQ combination, with an STAB on SFQ [1] We can't support TCA_STAB on arbitrary level, this would require to maintain per-qdisc storage. [1] [ 88.796496] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 88.798611] #PF: supervisor read access in kernel mode [ 88.799014] #PF: error_code(0x0000) - not-present page [ 88.799506] PGD 0 P4D 0 [ 88.799829] Oops: Oops: 0000 [#1] SMP NOPTI [ 88.800569] CPU: 14 UID: 0 PID: 2053 Comm: b371744477 Not tainted 6.12.0-rc1-virtme #1117 [ 88.801107] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 88.801779] RIP: 0010:sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq [ 88.802544] Code: 0f b7 50 12 48 8d 04 d5 00 00 00 00 48 89 d6 48 29 d0 48 8b 91 c0 01 00 00 48 c1 e0 03 48 01 c2 66 83 7a 1a 00 7e c0 48 8b 3a <4c> 8b 07 4c 89 02 49 89 50 08 48 c7 47 08 00 00 00 00 48 c7 07 00 All code ======== 0: 0f b7 50 12 movzwl 0x12(%rax),%edx 4: 48 8d 04 d5 00 00 00 lea 0x0(,%rdx,8),%rax b: 00 c: 48 89 d6 mov %rdx,%rsi f: 48 29 d0 sub %rdx,%rax 12: 48 8b 91 c0 01 00 00 mov 0x1c0(%rcx),%rdx 19: 48 c1 e0 03 shl $0x3,%rax 1d: 48 01 c2 add %rax,%rdx 20: 66 83 7a 1a 00 cmpw $0x0,0x1a(%rdx) 25: 7e c0 jle 0xffffffffffffffe7 27: 48 8b 3a mov (%rdx),%rdi 2a:* 4c 8b 07 mov (%rdi),%r8 <-- trapping instruction 2d: 4c 89 02 mov %r8,(%rdx) 30: 49 89 50 08 mov %rdx,0x8(%r8) 34: 48 c7 47 08 00 00 00 movq $0x0,0x8(%rdi) 3b: 00 3c: 48 rex.W 3d: c7 .byte 0xc7 3e: 07 (bad) ... Code starting with the faulting instruction =========================================== 0: 4c 8b 07 mov (%rdi),%r8 3: 4c 89 02 mov %r8,(%rdx) 6: 49 89 50 08 mov %rdx,0x8(%r8) a: 48 c7 47 08 00 00 00 movq $0x0,0x8(%rdi) 11: 00 12: 48 rex.W 13: c7 .byte 0xc7 14: 07 (bad) ... [ 88.803721] RSP: 0018:ffff9a1f892b7d58 EFLAGS: 00000206 [ 88.804032] RAX: 0000000000000000 RBX: ffff9a1f8420c800 RCX: ffff9a1f8420c800 [ 88.804560] RDX: ffff9a1f81bc1440 RSI: 0000000000000000 RDI: 0000000000000000 [ 88.805056] RBP: ffffffffc04bb0e0 R08: 0000000000000001 R09: 00000000ff7f9a1f [ 88.805473] R10: 000000000001001b R11: 0000000000009a1f R12: 0000000000000140 [ 88.806194] R13: 0000000000000001 R14: ffff9a1f886df400 R15: ffff9a1f886df4ac [ 88.806734] FS: 00007f445601a740(0000) GS:ffff9a2e7fd80000(0000) knlGS:0000000000000000 [ 88.807225] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 88.807672] CR2: 0000000000000000 CR3: 000000050cc46000 CR4: 00000000000006f0 [ 88.808165] Call Trace: [ 88.808459] [ 88.808710] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 88.809261] ? page_fault_oops (arch/x86/mm/fault.c:715) [ 88.809561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539) [ 88.809806] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623) [ 88.810074] ? sfq_dequeue (net/sched/sch_sfq.c:272 net/sched/sch_sfq.c:499) sch_sfq [ 88.810411] sfq_reset (net/sched/sch_sfq.c:525) sch_sfq [ 88.810671] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036) [ 88.810950] tbf_reset (./include/linux/timekeeping.h:169 net/sched/sch_tbf.c:334) sch_tbf [ 88.811208] qdisc_reset (./include/linux/skbuff.h:2135 ./include/linux/skbuff.h:2441 ./include/linux/skbuff.h:3304 ./include/linux/skbuff.h:3310 net/sched/sch_generic.c:1036) [ 88.811484] netif_set_real_num_tx_queues (./include/linux/spinlock.h:396 ./include/net/sch_generic.h:768 net/core/dev.c:2958) [ 88.811870] __tun_detach (drivers/net/tun.c:590 drivers/net/tun.c:673) [ 88.812271] tun_chr_close (drivers/net/tun.c:702 drivers/net/tun.c:3517) [ 88.812505] __fput (fs/file_table.c:432 (discriminator 1)) [ 88.812735] task_work_run (kernel/task_work.c:230) [ 88.813016] do_exit (kernel/exit.c:940) [ 88.813372] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4)) [ 88.813639] ? handle_mm_fault (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/memcontrol.h:1022 ./include/linux/memcontrol.h:1045 ./include/linux/memcontrol.h:1052 mm/memory.c:5928 mm/memory.c:6088) [ 88.813867] do_group_exit (kernel/exit.c:1070) [ 88.814138] __x64_sys_exit_group (kernel/exit.c:1099) [ 88.814490] x64_sys_call (??:?) [ 88.814791] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 88.815012] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 88.815495] RIP: 0033:0x7f44560f1975 Fixes: 175f9c1bba9b ("net_sched: Add size table for qdiscs") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Link: https://patch.msgid.link/20241007184130.3960565-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 1 - net/sched/sch_api.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 79edd5b5e3c9..5d74fa7e694c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -848,7 +848,6 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - qdisc_calculate_pkt_len(skb, sch); return sch->enqueue(skb, sch, to_free); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 74afc210527d..2eefa4783879 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -593,7 +593,6 @@ out: pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } -EXPORT_SYMBOL(__qdisc_calculate_pkt_len); void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) { @@ -1201,6 +1200,12 @@ skip: return -EINVAL; } + if (new && + !(parent->flags & TCQ_F_MQROOT) && + rcu_access_pointer(new->stab)) { + NL_SET_ERR_MSG(extack, "STAB not supported on a non root"); + return -EINVAL; + } err = cops->graft(parent, cl, new, &old, extack); if (err) return err; -- cgit v1.2.3 From 07cc7b0b942bf55ef1a471470ecda8d2a6a6541f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:32 -0700 Subject: rtnetlink: Add bulk registration helpers for rtnetlink message handlers. Before commit addf9b90de22 ("net: rtnetlink: use rcu to free rtnl message handlers"), once rtnl_msg_handlers[protocol] was allocated, the following rtnl_register_module() for the same protocol never failed. However, after the commit, rtnl_msg_handler[protocol][msgtype] needs to be allocated in each rtnl_register_module(), so each call could fail. Many callers of rtnl_register_module() do not handle the returned error, and we need to add many error handlings. To handle that easily, let's add wrapper functions for bulk registration of rtnetlink message handlers. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 17 +++++++++++++++++ net/core/rtnetlink.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index b45d57b5968a..2d3eb7cb4dff 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -29,6 +29,15 @@ static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) return msgtype & RTNL_KIND_MASK; } +struct rtnl_msg_handler { + struct module *owner; + int protocol; + int msgtype; + rtnl_doit_func doit; + rtnl_dumpit_func dumpit; + int flags; +}; + void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, @@ -36,6 +45,14 @@ int rtnl_register_module(struct module *owner, int protocol, int msgtype, int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); +int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n); +void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n); + +#define rtnl_register_many(handlers) \ + __rtnl_register_many(handlers, ARRAY_SIZE(handlers)) +#define rtnl_unregister_many(handlers) \ + __rtnl_unregister_many(handlers, ARRAY_SIZE(handlers)) + static inline int rtnl_msg_family(const struct nlmsghdr *nlh) { if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg)) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f0a520987085..e30e7ea0207d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -384,6 +384,35 @@ void rtnl_unregister_all(int protocol) } EXPORT_SYMBOL_GPL(rtnl_unregister_all); +int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) +{ + const struct rtnl_msg_handler *handler; + int i, err; + + for (i = 0, handler = handlers; i < n; i++, handler++) { + err = rtnl_register_internal(handler->owner, handler->protocol, + handler->msgtype, handler->doit, + handler->dumpit, handler->flags); + if (err) { + __rtnl_unregister_many(handlers, i); + break; + } + } + + return err; +} +EXPORT_SYMBOL_GPL(__rtnl_register_many); + +void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n) +{ + const struct rtnl_msg_handler *handler; + int i; + + for (i = n - 1, handler = handlers + n - 1; i >= 0; i--, handler--) + rtnl_unregister(handler->protocol, handler->msgtype); +} +EXPORT_SYMBOL_GPL(__rtnl_unregister_many); + static LIST_HEAD(link_ops); static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) -- cgit v1.2.3 From d51705614f668254cc5def7490df76f9680b4659 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:35 -0700 Subject: mctp: Handle error of rtnl_register_module(). Since introduced, mctp has been ignoring the returned value of rtnl_register_module(), which could fail silently. Handling the error allows users to view a module as an all-or-nothing thing in terms of the rtnetlink functionality. This prevents syzkaller from reporting spurious errors from its tests, where OOM often occurs and module is automatically loaded. Let's handle the errors by rtnl_register_many(). Fixes: 583be982d934 ("mctp: Add device handling and netlink interface") Fixes: 831119f88781 ("mctp: Add neighbour netlink interface") Fixes: 06d2f4c583a7 ("mctp: Add netlink route management") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 2 +- net/mctp/af_mctp.c | 6 +++++- net/mctp/device.c | 30 ++++++++++++++++++------------ net/mctp/neigh.c | 31 +++++++++++++++++++------------ net/mctp/route.c | 33 +++++++++++++++++++++++---------- 5 files changed, 66 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/net/mctp.h b/include/net/mctp.h index 7b17c52e8ce2..28d59ae94ca3 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -295,7 +295,7 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev); int mctp_routes_init(void); void mctp_routes_exit(void); -void mctp_device_init(void); +int mctp_device_init(void); void mctp_device_exit(void); #endif /* __NET_MCTP_H */ diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 43288b408fde..f6de136008f6 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -756,10 +756,14 @@ static __init int mctp_init(void) if (rc) goto err_unreg_routes; - mctp_device_init(); + rc = mctp_device_init(); + if (rc) + goto err_unreg_neigh; return 0; +err_unreg_neigh: + mctp_neigh_exit(); err_unreg_routes: mctp_routes_exit(); err_unreg_proto: diff --git a/net/mctp/device.c b/net/mctp/device.c index acb97b257428..85cc5f31f1e7 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -524,25 +524,31 @@ static struct notifier_block mctp_dev_nb = { .priority = ADDRCONF_NOTIFY_PRIORITY, }; -void __init mctp_device_init(void) +static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = { + {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0}, +}; + +int __init mctp_device_init(void) { - register_netdevice_notifier(&mctp_dev_nb); + int err; - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR, - NULL, mctp_dump_addrinfo, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR, - mctp_rtm_newaddr, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR, - mctp_rtm_deladdr, NULL, 0); + register_netdevice_notifier(&mctp_dev_nb); rtnl_af_register(&mctp_af_ops); + + err = rtnl_register_many(mctp_device_rtnl_msg_handlers); + if (err) { + rtnl_af_unregister(&mctp_af_ops); + unregister_netdevice_notifier(&mctp_dev_nb); + } + + return err; } void __exit mctp_device_exit(void) { + rtnl_unregister_many(mctp_device_rtnl_msg_handlers); rtnl_af_unregister(&mctp_af_ops); - rtnl_unregister(PF_MCTP, RTM_DELADDR); - rtnl_unregister(PF_MCTP, RTM_NEWADDR); - rtnl_unregister(PF_MCTP, RTM_GETADDR); - unregister_netdevice_notifier(&mctp_dev_nb); } diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index ffa0f9e0983f..590f642413e4 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -322,22 +322,29 @@ static struct pernet_operations mctp_net_ops = { .exit = mctp_neigh_net_exit, }; +static const struct rtnl_msg_handler mctp_neigh_rtnl_msg_handlers[] = { + {THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, mctp_rtm_newneigh, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_DELNEIGH, mctp_rtm_delneigh, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_GETNEIGH, NULL, mctp_rtm_getneigh, 0}, +}; + int __init mctp_neigh_init(void) { - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, - mctp_rtm_newneigh, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH, - mctp_rtm_delneigh, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH, - NULL, mctp_rtm_getneigh, 0); - - return register_pernet_subsys(&mctp_net_ops); + int err; + + err = register_pernet_subsys(&mctp_net_ops); + if (err) + return err; + + err = rtnl_register_many(mctp_neigh_rtnl_msg_handlers); + if (err) + unregister_pernet_subsys(&mctp_net_ops); + + return err; } -void __exit mctp_neigh_exit(void) +void mctp_neigh_exit(void) { + rtnl_unregister_many(mctp_neigh_rtnl_msg_handlers); unregister_pernet_subsys(&mctp_net_ops); - rtnl_unregister(PF_MCTP, RTM_GETNEIGH); - rtnl_unregister(PF_MCTP, RTM_DELNEIGH); - rtnl_unregister(PF_MCTP, RTM_NEWNEIGH); } diff --git a/net/mctp/route.c b/net/mctp/route.c index eefd7834d9a0..597e9cf5aa64 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1474,26 +1474,39 @@ static struct pernet_operations mctp_net_ops = { .exit = mctp_routes_net_exit, }; +static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = { + {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0}, +}; + int __init mctp_routes_init(void) { + int err; + dev_add_pack(&mctp_packet_type); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, - NULL, mctp_dump_rtinfo, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, - mctp_newroute, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, - mctp_delroute, NULL, 0); + err = register_pernet_subsys(&mctp_net_ops); + if (err) + goto err_pernet; + + err = rtnl_register_many(mctp_route_rtnl_msg_handlers); + if (err) + goto err_rtnl; - return register_pernet_subsys(&mctp_net_ops); + return 0; + +err_rtnl: + unregister_pernet_subsys(&mctp_net_ops); +err_pernet: + dev_remove_pack(&mctp_packet_type); + return err; } void mctp_routes_exit(void) { + rtnl_unregister_many(mctp_route_rtnl_msg_handlers); unregister_pernet_subsys(&mctp_net_ops); - rtnl_unregister(PF_MCTP, RTM_DELROUTE); - rtnl_unregister(PF_MCTP, RTM_NEWROUTE); - rtnl_unregister(PF_MCTP, RTM_GETROUTE); dev_remove_pack(&mctp_packet_type); } -- cgit v1.2.3