diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-28 13:09:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-28 13:09:37 -0700 |
commit | 50108c352db70405b3d71d8099d0b3adc3b3352c (patch) | |
tree | 61ae73b4f540704f9727b6c24ef306de6a7da5fd /net/ipv4/inet_fragment.c | |
parent | 8d025e2092e29bfd13e56c78e22af25fac83c8ec (diff) | |
parent | 18685451fc4e546fc0e718580d32df3c0e5c8272 (diff) | |
download | lwn-50108c352db70405b3d71d8099d0b3adc3b3352c.tar.gz lwn-50108c352db70405b3d71d8099d0b3adc3b3352c.zip |
Merge tag 'net-6.9-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Including fixes from bpf, WiFi and netfilter.
Current release - regressions:
- ipv6: fix address dump when IPv6 is disabled on an interface
Current release - new code bugs:
- bpf: temporarily disable atomic operations in BPF arena
- nexthop: fix uninitialized variable in nla_put_nh_group_stats()
Previous releases - regressions:
- bpf: protect against int overflow for stack access size
- hsr: fix the promiscuous mode in offload mode
- wifi: don't always use FW dump trig
- tls: adjust recv return with async crypto and failed copy to
userspace
- tcp: properly terminate timers for kernel sockets
- ice: fix memory corruption bug with suspend and rebuild
- at803x: fix kernel panic with at8031_probe
- qeth: handle deferred cc1
Previous releases - always broken:
- bpf: fix bug in BPF_LDX_MEMSX
- netfilter: reject table flag and netdev basechain updates
- inet_defrag: prevent sk release while still in use
- wifi: pick the version of SESSION_PROTECTION_NOTIF
- wwan: t7xx: split 64bit accesses to fix alignment issues
- mlxbf_gige: call request_irq() after NAPI initialized
- hns3: fix kernel crash when devlink reload during pf
initialization"
* tag 'net-6.9-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (81 commits)
inet: inet_defrag: prevent sk release while still in use
Octeontx2-af: fix pause frame configuration in GMP mode
net: lan743x: Add set RFE read fifo threshold for PCI1x1x chips
net: bcmasp: Remove phy_{suspend/resume}
net: bcmasp: Bring up unimac after PHY link up
net: phy: qcom: at803x: fix kernel panic with at8031_probe
netfilter: arptables: Select NETFILTER_FAMILY_ARP when building arp_tables.c
netfilter: nf_tables: skip netdev hook unregistration if table is dormant
netfilter: nf_tables: reject table flag and netdev basechain updates
netfilter: nf_tables: reject destroy command to remove basechain hooks
bpf: update BPF LSM designated reviewer list
bpf: Protect against int overflow for stack access size
bpf: Check bloom filter map value size
bpf: fix warning for crash_kexec
selftests: netdevsim: set test timeout to 10 minutes
net: wan: framer: Add missing static inline qualifiers
mlxbf_gige: call request_irq() after NAPI initialized
tls: get psock ref after taking rxlock to avoid leak
selftests: tls: add test with a partially invalid iov
tls: adjust recv return with async crypto and failed copy to userspace
...
Diffstat (limited to 'net/ipv4/inet_fragment.c')
-rw-r--r-- | net/ipv4/inet_fragment.c | 70 |
1 files changed, 57 insertions, 13 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7072fc0783ef..c88c9034d630 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include <net/ip.h> #include <net/ipv6.h> +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); |