padata: make the sequence counter an atomic_t

Using a spinlock to atomically increase a counter sounds wrong -- we've atomic_t for this! Also move 'seq_nr' to a different cache line than 'lock' to reduce cache line trashing. This has the nice side effect of decreasing the size of struct parallel_data from 192 to 128 bytes for a x86-64 build, e.g. occupying only two instead of three cache lines. Those changes results in a 5% performance increase on an IPsec test run using pcrypt. Btw. the seq_lock spinlock was never explicitly initialized -- one more reason to get rid of it. Signed-off-by: Mathias Krause <mathias.krause@secunet.com> Acked-by: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Mathias Krause <mathias.krause@secunet.com> 2013-10-25 12:14:15 +0200
committer: Herbert Xu <herbert@gondor.apana.org.au> 2013-10-30 12:02:58 +0800
commit: 0b6b098efcddac2bf4e2a895c9b655560bbfcee4 (patch)
tree: 0facc6f31504a5be471d4592f99bce0b28f995e2
parent: cfc6f11b768a9bdda17aac280474de1f0e344fea (diff)
download: lwn-0b6b098efcddac2bf4e2a895c9b655560bbfcee4.tar.gz
lwn-0b6b098efcddac2bf4e2a895c9b655560bbfcee4.zip
2 files changed, 5 insertions, 7 deletions
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 86292beebfe2..438694650471 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -129,10 +129,9 @@ struct parallel_data {
 	struct padata_serial_queue	__percpu *squeue;
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
+	atomic_t			seq_nr;
 	struct padata_cpumask		cpumask;
 	spinlock_t                      lock ____cacheline_aligned;
-	spinlock_t                      seq_lock;
-	unsigned int			seq_nr;
 	unsigned int			processed;
 	struct timer_list		timer;
 };
diff --git a/kernel/padata.c b/kernel/padata.c
index 07af2c95dcfe..2abd25d79cc8 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 
 static int padata_cpu_hash(struct parallel_data *pd)
 {
+	unsigned int seq_nr;
 	int cpu_index;
 
 	/*
@@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd)
 	 * seq_nr mod. number of cpus in use.
 	 */
 
-	spin_lock(&pd->seq_lock);
-	cpu_index =  pd->seq_nr % cpumask_weight(pd->cpumask.pcpu);
-	pd->seq_nr++;
-	spin_unlock(&pd->seq_lock);
+	seq_nr = atomic_inc_return(&pd->seq_nr);
+	cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
 
 	return padata_index_to_cpu(pd, cpu_index);
 }
@@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	padata_init_pqueues(pd);
 	padata_init_squeues(pd);
 	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
-	pd->seq_nr = 0;
+	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
 	atomic_set(&pd->refcnt, 0);
 	pd->pinst = pinst;
author	Mathias Krause <mathias.krause@secunet.com>	2013-10-25 12:14:15 +0200
committer	Herbert Xu <herbert@gondor.apana.org.au>	2013-10-30 12:02:58 +0800
commit	0b6b098efcddac2bf4e2a895c9b655560bbfcee4 (patch)
tree	0facc6f31504a5be471d4592f99bce0b28f995e2
parent	cfc6f11b768a9bdda17aac280474de1f0e344fea (diff)
download	lwn-0b6b098efcddac2bf4e2a895c9b655560bbfcee4.tar.gz lwn-0b6b098efcddac2bf4e2a895c9b655560bbfcee4.zip