diff options
author | David S. Miller <davem@davemloft.net> | 2011-04-19 11:24:06 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-04-19 11:24:06 -0700 |
commit | 4805347c1eb12cfe79f42a12a5442ee01590a9c0 (patch) | |
tree | cb9d8b3cfc78d8411f0cea7b06c6472cdd35bb9a /include/linux/netfilter | |
parent | e1943424e43974f85b82bb31eaf832823bf49ce7 (diff) | |
parent | 91eb7c08c6cb3b8eeba1c61f5753c56dcb77f018 (diff) | |
download | lwn-4805347c1eb12cfe79f42a12a5442ee01590a9c0.tar.gz lwn-4805347c1eb12cfe79f42a12a5442ee01590a9c0.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'include/linux/netfilter')
-rw-r--r-- | include/linux/netfilter/ipset/ip_set_getport.h | 2 | ||||
-rw-r--r-- | include/linux/netfilter/x_tables.h | 96 |
2 files changed, 44 insertions, 54 deletions
diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h index 5aebd170f899..90d09300e954 100644 --- a/include/linux/netfilter/ipset/ip_set_getport.h +++ b/include/linux/netfilter/ipset/ip_set_getport.h @@ -22,7 +22,9 @@ static inline bool ip_set_proto_with_ports(u8 proto) { switch (proto) { case IPPROTO_TCP: + case IPPROTO_SCTP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: return true; } return false; diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 37219525ff6f..32cddf78b13e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -456,72 +456,60 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); -/* - * Per-CPU spinlock associated with per-cpu table entries, and - * with a counter for the "reading" side that allows a recursive - * reader to avoid taking the lock and deadlocking. - * - * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. - * It needs to ensure that the rules are not being changed while the packet - * is being processed. In some cases, the read lock will be acquired - * twice on the same CPU; this is okay because of the count. - * - * "writing" is used when reading counters. - * During replace any readers that are using the old tables have to complete - * before freeing the old table. This is handled by the write locking - * necessary for reading the counters. +/** + * xt_recseq - recursive seqcount for netfilter use + * + * Packet processing changes the seqcount only if no recursion happened + * get_counters() can use read_seqcount_begin()/read_seqcount_retry(), + * because we use the normal seqcount convention : + * Low order bit set to 1 if a writer is active. */ -struct xt_info_lock { - seqlock_t lock; - unsigned char readers; -}; -DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); +DECLARE_PER_CPU(seqcount_t, xt_recseq); -/* - * Note: we need to ensure that preemption is disabled before acquiring - * the per-cpu-variable, so we do it as a two step process rather than - * using "spin_lock_bh()". - * - * We _also_ need to disable bottom half processing before updating our - * nesting count, to make sure that the only kind of re-entrancy is this - * code being called by itself: since the count+lock is not an atomic - * operation, we can allow no races. +/** + * xt_write_recseq_begin - start of a write section * - * _Only_ that special combination of being per-cpu and never getting - * re-entered asynchronously means that the count is safe. + * Begin packet processing : all readers must wait the end + * 1) Must be called with preemption disabled + * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add()) + * Returns : + * 1 if no recursion on this cpu + * 0 if recursion detected */ -static inline void xt_info_rdlock_bh(void) +static inline unsigned int xt_write_recseq_begin(void) { - struct xt_info_lock *lock; + unsigned int addend; - local_bh_disable(); - lock = &__get_cpu_var(xt_info_locks); - if (likely(!lock->readers++)) - write_seqlock(&lock->lock); -} + /* + * Low order bit of sequence is set if we already + * called xt_write_recseq_begin(). + */ + addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1; -static inline void xt_info_rdunlock_bh(void) -{ - struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); + /* + * This is kind of a write_seqcount_begin(), but addend is 0 or 1 + * We dont check addend value to avoid a test and conditional jump, + * since addend is most likely 1 + */ + __this_cpu_add(xt_recseq.sequence, addend); + smp_wmb(); - if (likely(!--lock->readers)) - write_sequnlock(&lock->lock); - local_bh_enable(); + return addend; } -/* - * The "writer" side needs to get exclusive access to the lock, - * regardless of readers. This must be called with bottom half - * processing (and thus also preemption) disabled. +/** + * xt_write_recseq_end - end of a write section + * @addend: return value from previous xt_write_recseq_begin() + * + * End packet processing : all readers can proceed + * 1) Must be called with preemption disabled + * 2) softirqs must be disabled too (or we should use irqsafe_cpu_add()) */ -static inline void xt_info_wrlock(unsigned int cpu) -{ - write_seqlock(&per_cpu(xt_info_locks, cpu).lock); -} - -static inline void xt_info_wrunlock(unsigned int cpu) +static inline void xt_write_recseq_end(unsigned int addend) { - write_sequnlock(&per_cpu(xt_info_locks, cpu).lock); + /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ + smp_wmb(); + __this_cpu_add(xt_recseq.sequence, addend); } /* |