From a11e1d432b51f63ba698d044441284a661f01144 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jun 2018 09:43:44 -0700 Subject: Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c86885954994..164cdedf6012 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3252,7 +3252,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events); +__poll_t datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, -- cgit v1.2.3 From 8b7008620b8452728cadead460a36f64ed78c460 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 11 Jul 2018 14:39:42 +0200 Subject: net: Don't copy pfmemalloc flag in __copy_skb_header() The pfmemalloc flag indicates that the skb was allocated from the PFMEMALLOC reserves, and the flag is currently copied on skb copy and clone. However, an skb copied from an skb flagged with pfmemalloc wasn't necessarily allocated from PFMEMALLOC reserves, and on the other hand an skb allocated that way might be copied from an skb that wasn't. So we should not copy the flag on skb copy, and rather decide whether to allow an skb to be associated with sockets unrelated to page reclaim depending only on how it was allocated. Move the pfmemalloc flag before headers_start[0] using an existing 1-bit hole, so that __copy_skb_header() doesn't copy it. When cloning, we'll now take care of this flag explicitly, contravening to the warning comment of __skb_clone(). While at it, restore the newline usage introduced by commit b19372273164 ("net: reorganize sk_buff for faster __copy_skb_header()") to visually separate bytes used in bitfields after headers_start[0], that was gone after commit a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area"), and describe the pfmemalloc flag in the kernel-doc structure comment. This doesn't change the size of sk_buff or cacheline boundaries, but consolidates the 15 bits hole before tc_index into a 2 bytes hole before csum, that could now be filled more easily. Reported-by: Patrick Talbert Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 +++++----- net/core/skbuff.c | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 164cdedf6012..610a201126ee 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -630,6 +630,7 @@ typedef unsigned char *sk_buff_data_t; * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue + * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -735,7 +736,7 @@ struct sk_buff { peeked:1, head_frag:1, xmit_more:1, - __unused:1; /* one bit hole */ + pfmemalloc:1; /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() @@ -754,31 +755,30 @@ struct sk_buff { __u8 __pkt_type_offset[0]; __u8 pkt_type:3; - __u8 pfmemalloc:1; __u8 ignore_df:1; - __u8 nf_trace:1; __u8 ip_summed:2; __u8 ooo_okay:1; + __u8 l4_hash:1; __u8 sw_hash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - __u8 no_fcs:1; /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; + __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_not_inet:1; - __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif __u8 ipvs_property:1; + __u8 inner_protocol_type:1; __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eba8dae22c25..4df3164bb5fc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -858,6 +858,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) n->cloned = 1; n->nohdr = 0; n->peeked = 0; + if (skb->pfmemalloc) + n->pfmemalloc = 1; n->destructor = NULL; C(tail); C(end); -- cgit v1.2.3