diff options
Diffstat (limited to 'include/net/ip_vs.h')
-rw-r--r-- | include/net/ip_vs.h | 171 |
1 files changed, 160 insertions, 11 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff1804a0c469..c6c61100d244 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -29,6 +29,7 @@ #include <net/netfilter/nf_conntrack.h> #endif #include <net/net_namespace.h> /* Netw namespace */ +#include <linux/sched/isolation.h> #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 @@ -42,6 +43,8 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; +extern struct mutex __ip_vs_mutex; + struct ip_vs_iphdr { int hdr_flags; /* ipvs flags */ __u32 off; /* Where IP or IPv4 header starts */ @@ -351,11 +354,11 @@ struct ip_vs_seq { /* counters per cpu */ struct ip_vs_counters { - __u64 conns; /* connections scheduled */ - __u64 inpkts; /* incoming packets */ - __u64 outpkts; /* outgoing packets */ - __u64 inbytes; /* incoming bytes */ - __u64 outbytes; /* outgoing bytes */ + u64_stats_t conns; /* connections scheduled */ + u64_stats_t inpkts; /* incoming packets */ + u64_stats_t outpkts; /* outgoing packets */ + u64_stats_t inbytes; /* incoming bytes */ + u64_stats_t outbytes; /* outgoing bytes */ }; /* Stats per cpu */ struct ip_vs_cpu_stats { @@ -363,9 +366,12 @@ struct ip_vs_cpu_stats { struct u64_stats_sync syncp; }; +/* Default nice for estimator kthreads */ +#define IPVS_EST_NICE 0 + /* IPVS statistics objects */ struct ip_vs_estimator { - struct list_head list; + struct hlist_node list; u64 last_inbytes; u64 last_outbytes; @@ -378,6 +384,10 @@ struct ip_vs_estimator { u64 outpps; u64 inbps; u64 outbps; + + s32 ktid:16, /* kthread ID, -1=temp list */ + ktrow:8, /* row/tick ID for kthread */ + ktcid:8; /* chain ID for kthread tick */ }; /* @@ -405,6 +415,76 @@ struct ip_vs_stats { struct ip_vs_kstats kstats0; /* reset values */ }; +struct ip_vs_stats_rcu { + struct ip_vs_stats s; + struct rcu_head rcu_head; +}; + +int ip_vs_stats_init_alloc(struct ip_vs_stats *s); +struct ip_vs_stats *ip_vs_stats_alloc(void); +void ip_vs_stats_release(struct ip_vs_stats *stats); +void ip_vs_stats_free(struct ip_vs_stats *stats); + +/* Process estimators in multiple timer ticks (20/50/100, see ktrow) */ +#define IPVS_EST_NTICKS 50 +/* Estimation uses a 2-second period containing ticks (in jiffies) */ +#define IPVS_EST_TICK ((2 * HZ) / IPVS_EST_NTICKS) + +/* Limit of CPU load per kthread (8 for 12.5%), ratio of CPU capacity (1/C). + * Value of 4 and above ensures kthreads will take work without exceeding + * the CPU capacity under different circumstances. + */ +#define IPVS_EST_LOAD_DIVISOR 8 + +/* Kthreads should not have work that exceeds the CPU load above 50% */ +#define IPVS_EST_CPU_KTHREADS (IPVS_EST_LOAD_DIVISOR / 2) + +/* Desired number of chains per timer tick (chain load factor in 100us units), + * 48=4.8ms of 40ms tick (12% CPU usage): + * 2 sec * 1000 ms in sec * 10 (100us in ms) / 8 (12.5%) / 50 + */ +#define IPVS_EST_CHAIN_FACTOR \ + ALIGN_DOWN(2 * 1000 * 10 / IPVS_EST_LOAD_DIVISOR / IPVS_EST_NTICKS, 8) + +/* Compiled number of chains per tick + * The defines should match cond_resched_rcu + */ +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) +#define IPVS_EST_TICK_CHAINS IPVS_EST_CHAIN_FACTOR +#else +#define IPVS_EST_TICK_CHAINS 1 +#endif + +#if IPVS_EST_NTICKS > 127 +#error Too many timer ticks for ktrow +#endif + +/* Multiple chains processed in same tick */ +struct ip_vs_est_tick_data { + struct hlist_head chains[IPVS_EST_TICK_CHAINS]; + DECLARE_BITMAP(present, IPVS_EST_TICK_CHAINS); + DECLARE_BITMAP(full, IPVS_EST_TICK_CHAINS); + int chain_len[IPVS_EST_TICK_CHAINS]; +}; + +/* Context for estimation kthread */ +struct ip_vs_est_kt_data { + struct netns_ipvs *ipvs; + struct task_struct *task; /* task if running */ + struct ip_vs_est_tick_data __rcu *ticks[IPVS_EST_NTICKS]; + DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */ + unsigned long est_timer; /* estimation timer (jiffies) */ + struct ip_vs_stats *calc_stats; /* Used for calculation */ + int tick_len[IPVS_EST_NTICKS]; /* est count */ + int id; /* ktid per netns */ + int chain_max; /* max ests per tick chain */ + int tick_max; /* max ests per tick */ + int est_count; /* attached ests to kthread */ + int est_max_count; /* max ests per kthread */ + int add_row; /* row for new ests */ + int est_row; /* estimated row */ +}; + struct dst_entry; struct iphdr; struct ip_vs_conn; @@ -688,6 +768,7 @@ struct ip_vs_dest { union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + struct rcu_head rcu_head; struct list_head t_list; /* in dest_trash */ unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -869,7 +950,7 @@ struct netns_ipvs { atomic_t conn_count; /* connection counter */ /* ip_vs_ctl */ - struct ip_vs_stats tot_stats; /* Statistics & est. */ + struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ int num_services; /* no of virtual services */ int num_services6; /* IPv6 virtual services */ @@ -932,6 +1013,12 @@ struct netns_ipvs { int sysctl_schedule_icmp; int sysctl_ignore_tunneled; int sysctl_run_estimation; +#ifdef CONFIG_SYSCTL + cpumask_var_t sysctl_est_cpulist; /* kthread cpumask */ + int est_cpulist_valid; /* cpulist set */ + int sysctl_est_nice; /* kthread nice */ + int est_stopped; /* stop tasks */ +#endif /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -942,9 +1029,17 @@ struct netns_ipvs { struct ctl_table_header *lblcr_ctl_header; struct ctl_table *lblcr_ctl_table; /* ip_vs_est */ - struct list_head est_list; /* estimator list */ - spinlock_t est_lock; - struct timer_list est_timer; /* Estimation timer */ + struct delayed_work est_reload_work;/* Reload kthread tasks */ + struct mutex est_mutex; /* protect kthread tasks */ + struct hlist_head est_temp_list; /* Ests during calc phase */ + struct ip_vs_est_kt_data **est_kt_arr; /* Array of kthread data ptrs */ + unsigned long est_max_threads;/* Hard limit of kthreads */ + int est_calc_phase; /* Calculation phase */ + int est_chain_max; /* Calculated chain_max */ + int est_kt_count; /* Allocated ptrs */ + int est_add_ktid; /* ktid where to add ests */ + atomic_t est_genid; /* kthreads reload genid */ + atomic_t est_genid_done; /* applied genid */ /* ip_vs_sync */ spinlock_t sync_lock; struct ipvs_master_sync_state *ms; @@ -1077,6 +1172,19 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } +static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +{ + if (ipvs->est_cpulist_valid) + return ipvs->sysctl_est_cpulist; + else + return housekeeping_cpumask(HK_TYPE_KTHREAD); +} + +static inline int sysctl_est_nice(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_est_nice; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1174,6 +1282,16 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } +static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +{ + return housekeeping_cpumask(HK_TYPE_KTHREAD); +} + +static inline int sysctl_est_nice(struct netns_ipvs *ipvs) +{ + return IPVS_EST_NICE; +} + #endif /* IPVS core functions @@ -1475,10 +1593,41 @@ int stop_sync_thread(struct netns_ipvs *ipvs, int state); void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts); /* IPVS rate estimator prototypes (from ip_vs_est.c) */ -void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); +int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); +void ip_vs_est_reload_start(struct netns_ipvs *ipvs); +int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, + struct ip_vs_est_kt_data *kd); +void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd); + +static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + /* Stop tasks while cpulist is empty or if disabled with flag */ + ipvs->est_stopped = !sysctl_run_estimation(ipvs) || + (ipvs->est_cpulist_valid && + cpumask_empty(sysctl_est_cpulist(ipvs))); +#endif +} + +static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + return ipvs->est_stopped; +#else + return false; +#endif +} + +static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs) +{ + unsigned int limit = IPVS_EST_CPU_KTHREADS * + cpumask_weight(sysctl_est_cpulist(ipvs)); + + return max(1U, limit); +} /* Various IPVS packet transmitters (from ip_vs_xmit.c) */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |