summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-04-07 12:22:06 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-07 12:22:06 -0400
commitd16658206a3a9a1cbe2cc062bee9ea7e782293a5 (patch)
treee7374985d7ec3bcfa77616ca449debfefa6849d9 /include
parent19952cc4f8f572493293a8caed27c4be89c5fc9d (diff)
parentb8dd6a223eb86d537c2c6d8d28916c1f0ba3ea3c (diff)
downloadlwn-d16658206a3a9a1cbe2cc062bee9ea7e782293a5.tar.gz
lwn-d16658206a3a9a1cbe2cc062bee9ea7e782293a5.zip
Merge branch 'master' of git://1984.lsi.us.es/nf-next
Pablo Neira Ayuso says: ==================== The following patchset contains Netfilter and IPVS updates for your net-next tree, most relevantly they are: * Add net namespace support to NFLOG, ULOG and ebt_ulog and NFQUEUE. The LOG and ebt_log target has been also adapted, but they still depend on the syslog netnamespace that seems to be missing, from Gao Feng. * Don't lose indications of congestion in IPv6 fragmentation handling, from Hannes Frederic Sowa.i * IPVS conversion to use RCU, including some code consolidation patches and optimizations, also some from Julian Anastasov. * cpu fanout support for NFQUEUE, from Holger Eitzenberger. * Better error reporting to userspace when dropping packets from all our _*_[xfrm|route]_me_harder functions, from Patrick McHardy. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netfilter.h5
-rw-r--r--include/linux/skbuff.h35
-rw-r--r--include/net/ip_vs.h130
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--include/net/netfilter/nf_log.h14
-rw-r--r--include/net/netns/netfilter.h18
-rw-r--r--include/uapi/linux/netfilter/xt_NFQUEUE.h9
-rw-r--r--include/uapi/linux/netfilter_ipv6/ip6t_frag.h4
8 files changed, 163 insertions, 54 deletions
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index ee142846f56a..0060fde3160e 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -289,11 +289,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif
}
-#ifdef CONFIG_PROC_FS
-#include <linux/proc_fs.h>
-extern struct proc_dir_entry *proc_net_netfilter;
-#endif
-
#else /* !CONFIG_NETFILTER */
#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 878e0ee81068..364e2440a7ee 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -575,7 +575,40 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
skb->_skb_refdst = (unsigned long)dst;
}
-extern void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst);
+extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
+ bool force);
+
+/**
+ * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst.
+ * If dst entry is cached, we do not take reference and dst_release
+ * will be avoided by refdst_drop. If dst entry is not cached, we take
+ * reference, so that last dst_release can destroy the dst immediately.
+ */
+static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+ __skb_dst_set_noref(skb, dst, false);
+}
+
+/**
+ * skb_dst_set_noref_force - sets skb dst, without taking reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst.
+ * No reference is taken and no dst_release will be called. While for
+ * cached dsts deferred reclaim is a basic feature, for entries that are
+ * not cached it is caller's job to guarantee that last dst_release for
+ * provided dst happens when nobody uses it, eg. after a RCU grace period.
+ */
+static inline void skb_dst_set_noref_force(struct sk_buff *skb,
+ struct dst_entry *dst)
+{
+ __skb_dst_set_noref(skb, dst, true);
+}
/**
* skb_dst_is_noref - Test if skb dst isn't refcounted
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index bee87badabef..f9f5b057b480 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
dst->ip = src->ip;
}
+static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
+ const union nf_inet_addr *src)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ dst->in6 = src->in6;
+ return;
+ }
+#endif
+ dst->ip = src->ip;
+ dst->all[1] = 0;
+ dst->all[2] = 0;
+ dst->all[3] = 0;
+}
+
static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
const union nf_inet_addr *b)
{
@@ -344,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
#define LeaveFunction(level) do {} while (0)
#endif
-#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); }
-
/*
* The port number of FTP service (in network order).
@@ -566,20 +579,19 @@ struct ip_vs_conn_param {
*/
struct ip_vs_conn {
struct hlist_node c_list; /* hashed list heads */
-#ifdef CONFIG_NET_NS
- struct net *net; /* Name space */
-#endif
/* Protocol, addresses and port numbers */
- u16 af; /* address family */
__be16 cport;
- __be16 vport;
__be16 dport;
- __u32 fwmark; /* Fire wall mark from skb */
+ __be16 vport;
+ u16 af; /* address family */
union nf_inet_addr caddr; /* client address */
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
+#ifdef CONFIG_NET_NS
+ struct net *net; /* Name space */
+#endif
/* counter and timer */
atomic_t refcnt; /* reference count */
@@ -593,6 +605,7 @@ struct ip_vs_conn {
* state transition triggerd
* synchronization
*/
+ __u32 fwmark; /* Fire wall mark from skb */
unsigned long sync_endtime; /* jiffies + sent_retries */
/* Control members */
@@ -620,6 +633,8 @@ struct ip_vs_conn {
const struct ip_vs_pe *pe;
char *pe_data;
__u8 pe_data_len;
+
+ struct rcu_head rcu_head;
};
/*
@@ -695,10 +710,9 @@ struct ip_vs_dest_user_kern {
* and the forwarding entries
*/
struct ip_vs_service {
- struct list_head s_list; /* for normal service table */
- struct list_head f_list; /* for fwmark-based service table */
+ struct hlist_node s_list; /* for normal service table */
+ struct hlist_node f_list; /* for fwmark-based service table */
atomic_t refcnt; /* reference counter */
- atomic_t usecnt; /* use counter */
u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
@@ -713,25 +727,35 @@ struct ip_vs_service {
struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */
struct ip_vs_stats stats; /* statistics for the service */
- struct ip_vs_app *inc; /* bind conns to this app inc */
/* for scheduling */
- struct ip_vs_scheduler *scheduler; /* bound scheduler object */
- rwlock_t sched_lock; /* lock sched_data */
+ struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
+ spinlock_t sched_lock; /* lock sched_data */
void *sched_data; /* scheduler application data */
/* alternate persistence engine */
- struct ip_vs_pe *pe;
+ struct ip_vs_pe __rcu *pe;
+
+ struct rcu_head rcu_head;
};
+/* Information for cached dst */
+struct ip_vs_dest_dst {
+ struct dst_entry *dst_cache; /* destination cache entry */
+ u32 dst_cookie;
+ union nf_inet_addr dst_saddr;
+ struct rcu_head rcu_head;
+};
+/* In grace period after removing */
+#define IP_VS_DEST_STATE_REMOVING 0x01
/*
* The real server destination forwarding entry
* with ip address, port number, and so on.
*/
struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */
- struct list_head d_list; /* for table with all the dests */
+ struct hlist_node d_list; /* for table with all the dests */
u16 af; /* address family */
__be16 port; /* port number of the server */
@@ -742,6 +766,7 @@ struct ip_vs_dest {
atomic_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
+ unsigned long state; /* state flags */
/* connection counters and thresholds */
atomic_t activeconns; /* active connections */
@@ -752,10 +777,7 @@ struct ip_vs_dest {
/* for destination cache */
spinlock_t dst_lock; /* lock of dst_cache */
- struct dst_entry *dst_cache; /* destination cache entry */
- u32 dst_rtos; /* RT_TOS(tos) for dst */
- u32 dst_cookie;
- union nf_inet_addr dst_saddr;
+ struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
@@ -763,6 +785,10 @@ struct ip_vs_dest {
__be16 vport; /* virtual port number */
union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */
+
+ struct list_head t_list; /* in dest_trash */
+ struct rcu_head rcu_head;
+ unsigned int in_rs_table:1; /* we are in rs_table */
};
@@ -778,9 +804,13 @@ struct ip_vs_scheduler {
/* scheduler initializing service */
int (*init_service)(struct ip_vs_service *svc);
/* scheduling service finish */
- int (*done_service)(struct ip_vs_service *svc);
- /* scheduler updating service */
- int (*update_service)(struct ip_vs_service *svc);
+ void (*done_service)(struct ip_vs_service *svc);
+ /* dest is linked */
+ int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
+ /* dest is unlinked */
+ int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
+ /* dest is updated */
+ int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
/* selecting a server from the given service */
struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
@@ -819,6 +849,7 @@ struct ip_vs_app {
struct ip_vs_app *app; /* its real application */
__be16 port; /* port number in net order */
atomic_t usecnt; /* usage counter */
+ struct rcu_head rcu_head;
/*
* output hook: Process packet in inout direction, diff set for TCP.
@@ -881,6 +912,9 @@ struct ipvs_master_sync_state {
struct netns_ipvs *ipvs;
};
+/* How much time to keep dests in trash */
+#define IP_VS_DEST_TRASH_PERIOD (120 * HZ)
+
/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
@@ -892,7 +926,7 @@ struct netns_ipvs {
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
- struct list_head rs_table[IP_VS_RTAB_SIZE];
+ struct hlist_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
/* ip_vs_proto */
@@ -904,7 +938,6 @@ struct netns_ipvs {
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
- spinlock_t tcp_app_lock;
#endif
/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
@@ -912,7 +945,6 @@ struct netns_ipvs {
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
struct list_head udp_apps[UDP_APP_TAB_SIZE];
- spinlock_t udp_app_lock;
#endif
/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
@@ -921,7 +953,6 @@ struct netns_ipvs {
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
/* Hash table for SCTP application incarnations */
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
- spinlock_t sctp_app_lock;
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
@@ -931,9 +962,10 @@ struct netns_ipvs {
int num_services; /* no of virtual services */
- rwlock_t rs_lock; /* real services table */
/* Trash for destinations */
struct list_head dest_trash;
+ spinlock_t dest_trash_lock;
+ struct timer_list dest_trash_timer; /* expiration timer */
/* Service counters */
atomic_t ftpsvc_counter;
atomic_t nullsvc_counter;
@@ -1181,9 +1213,19 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
int inverse);
+/* Get reference to gain full access to conn.
+ * By default, RCU read-side critical sections have access only to
+ * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
+ */
+static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
+{
+ return atomic_inc_not_zero(&cp->refcnt);
+}
+
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
{
+ smp_mb__before_atomic_dec();
atomic_dec(&cp->refcnt);
}
extern void ip_vs_conn_put(struct ip_vs_conn *cp);
@@ -1298,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
-void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
-void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
@@ -1346,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler);
-extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc);
+extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
+ struct ip_vs_scheduler *sched);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
@@ -1366,17 +1407,12 @@ extern struct ip_vs_stats ip_vs_stats;
extern int sysctl_ip_vs_sync_ver;
extern struct ip_vs_service *
-ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
-static inline void ip_vs_service_put(struct ip_vs_service *svc)
-{
- atomic_dec(&svc->usecnt);
-}
-
-extern struct ip_vs_dest *
-ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
- const union nf_inet_addr *daddr, __be16 dport);
+extern bool
+ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void);
@@ -1388,8 +1424,18 @@ extern struct ip_vs_dest *
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
__u16 protocol, __u32 fwmark, __u32 flags);
-extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
+extern void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
+static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
+{
+ atomic_inc(&dest->refcnt);
+}
+
+static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
+{
+ smp_mb__before_atomic_dec();
+ atomic_dec(&dest->refcnt);
+}
/*
* IPVS sync daemon data and function prototypes
@@ -1428,7 +1474,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset,
unsigned int hooknum, struct ip_vs_iphdr *iph);
-extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
+extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
#ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index de644bcd8613..b17697827482 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -17,6 +17,7 @@
#include <net/netns/ipv6.h>
#include <net/netns/sctp.h>
#include <net/netns/dccp.h>
+#include <net/netns/netfilter.h>
#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
@@ -94,6 +95,7 @@ struct net {
struct netns_dccp dccp;
#endif
#ifdef CONFIG_NETFILTER
+ struct netns_nf nf;
struct netns_xt xt;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct netns_ct ct;
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index e991bd0a27af..31f1fb9eb784 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -49,12 +49,18 @@ struct nf_logger {
int nf_log_register(u_int8_t pf, struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger);
-int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger);
-void nf_log_unbind_pf(u_int8_t pf);
+void nf_log_set(struct net *net, u_int8_t pf,
+ const struct nf_logger *logger);
+void nf_log_unset(struct net *net, const struct nf_logger *logger);
+
+int nf_log_bind_pf(struct net *net, u_int8_t pf,
+ const struct nf_logger *logger);
+void nf_log_unbind_pf(struct net *net, u_int8_t pf);
/* Calls the registered backend logging function */
-__printf(7, 8)
-void nf_log_packet(u_int8_t pf,
+__printf(8, 9)
+void nf_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
new file mode 100644
index 000000000000..88740024ccf3
--- /dev/null
+++ b/include/net/netns/netfilter.h
@@ -0,0 +1,18 @@
+#ifndef __NETNS_NETFILTER_H
+#define __NETNS_NETFILTER_H
+
+#include <linux/proc_fs.h>
+#include <linux/netfilter.h>
+
+struct nf_logger;
+
+struct netns_nf {
+#if defined CONFIG_PROC_FS
+ struct proc_dir_entry *proc_netfilter;
+#endif
+ const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *nf_log_dir_header;
+#endif
+};
+#endif
diff --git a/include/uapi/linux/netfilter/xt_NFQUEUE.h b/include/uapi/linux/netfilter/xt_NFQUEUE.h
index 9eafdbbb401c..8bb5fe657d34 100644
--- a/include/uapi/linux/netfilter/xt_NFQUEUE.h
+++ b/include/uapi/linux/netfilter/xt_NFQUEUE.h
@@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 {
__u16 bypass;
};
+struct xt_NFQ_info_v3 {
+ __u16 queuenum;
+ __u16 queues_total;
+ __u16 flags;
+#define NFQ_FLAG_BYPASS 0x01 /* for compatibility with v2 */
+#define NFQ_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */
+#define NFQ_FLAG_MASK 0x03
+};
+
#endif /* _XT_NFQ_TARGET_H */
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h
index b47f61b9e082..dfd8bc2268cf 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h
@@ -4,9 +4,9 @@
#include <linux/types.h>
struct ip6t_frag {
- __u32 ids[2]; /* Security Parameter Index */
+ __u32 ids[2]; /* Identification range */
__u32 hdrlen; /* Header Length */
- __u8 flags; /* */
+ __u8 flags; /* Flags */
__u8 invflags; /* Inverse flags */
};