summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2008-03-10 18:17:47 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2008-03-31 10:41:39 +0100
commit1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec (patch)
tree355f6084118d4ee9a986e07e5154eaa0e25b834a /fs
parentd82661d96993ac4efc1d54259ea85ffcd9b8bec6 (diff)
downloadlwn-1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec.tar.gz
lwn-1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec.zip
[GFS2] Faster gfs2_bitfit algorithm
This version of the gfs2_bitfit algorithm includes the latest suggestions from Steve Whitehouse. It is typically eight to ten times faster than the version we're using today. If there is a lot of metadata mixed in (lots of small files) the algorithm is often 15 times faster, and given the right conditions, I've seen peaks of 20 times faster. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/gfs2/rgrp.c93
1 files changed, 61 insertions, 32 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4291375cecc6..7e8f0b1d6c6e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
+#include <linux/prefetch.h>
#include "gfs2.h"
#include "incore.h"
@@ -33,6 +34,16 @@
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
+#if BITS_PER_LONG == 32
+#define LBITMASK (0x55555555UL)
+#define LBITSKIP55 (0x55555555UL)
+#define LBITSKIP00 (0x00000000UL)
+#else
+#define LBITMASK (0x5555555555555555UL)
+#define LBITSKIP55 (0x5555555555555555UL)
+#define LBITSKIP00 (0x0000000000000000UL)
+#endif
+
/*
* These routines are used by the resource group routines (rgrp.c)
* to keep track of block allocation. Each block is represented by two
@@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
u8 old_state)
{
- const u8 *byte;
- u32 blk = goal;
- unsigned int bit, bitlong;
- const unsigned long *plong;
-#if BITS_PER_LONG == 32
- const unsigned long plong55 = 0x55555555;
-#else
- const unsigned long plong55 = 0x5555555555555555;
-#endif
-
- byte = buffer + (goal / GFS2_NBBY);
- plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY));
- bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
- bitlong = bit;
-
- while (byte < buffer + buflen) {
-
- if (bitlong == 0 && old_state == 0 && *plong == plong55) {
- plong++;
- byte += sizeof(unsigned long);
- blk += sizeof(unsigned long) * GFS2_NBBY;
- continue;
+ const u8 *byte, *start, *end;
+ int bit, startbit;
+ u32 g1, g2, misaligned;
+ unsigned long *plong;
+ unsigned long lskipval;
+
+ lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
+ g1 = (goal / GFS2_NBBY);
+ start = buffer + g1;
+ byte = start;
+ end = buffer + buflen;
+ g2 = ALIGN(g1, sizeof(unsigned long));
+ plong = (unsigned long *)(buffer + g2);
+ startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
+ misaligned = g2 - g1;
+ if (!misaligned)
+ goto ulong_aligned;
+/* parse the bitmap a byte at a time */
+misaligned:
+ while (byte < end) {
+ if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
+ return goal +
+ (((byte - start) * GFS2_NBBY) +
+ ((bit - startbit) >> 1));
}
- if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
- return blk;
bit += GFS2_BIT_SIZE;
- if (bit >= 8) {
+ if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
bit = 0;
byte++;
+ misaligned--;
+ if (!misaligned) {
+ plong = (unsigned long *)byte;
+ goto ulong_aligned;
+ }
}
- bitlong += GFS2_BIT_SIZE;
- if (bitlong >= sizeof(unsigned long) * 8) {
- bitlong = 0;
- plong++;
- }
-
- blk++;
}
+ return BFITNOENT;
+/* parse the bitmap a unsigned long at a time */
+ulong_aligned:
+ /* Stop at "end - 1" or else prefetch can go past the end and segfault.
+ We could "if" it but we'd lose some of the performance gained.
+ This way will only slow down searching the very last 4/8 bytes
+ depending on architecture. I've experimented with several ways
+ of writing this section such as using an else before the goto
+ but this one seems to be the fastest. */
+ while ((unsigned char *)plong < end - 1) {
+ prefetch(plong + 1);
+ if (((*plong) & LBITMASK) != lskipval)
+ break;
+ plong++;
+ }
+ if ((unsigned char *)plong < end) {
+ byte = (const u8 *)plong;
+ misaligned += sizeof(unsigned long) - 1;
+ goto misaligned;
+ }
return BFITNOENT;
}