summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin K. Petersen <martin.petersen@oracle.com>2010-03-16 20:30:01 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2010-04-01 15:58:56 -0700
commit9b2ff973b075293e16e148e57a1856498e23e95d (patch)
tree2aac23425e42c6ac2f5949aaa4347c141bb17373
parente548510b30a5b74f9be9c501d955ca90c7d1a8d0 (diff)
downloadlwn-9b2ff973b075293e16e148e57a1856498e23e95d.tar.gz
lwn-9b2ff973b075293e16e148e57a1856498e23e95d.zip
block: Backport of various I/O topology fixes from 2.6.33 and 2.6.34
block: Backport of various I/O topology fixes from 2.6.33 and 2.6.34 The stacking code incorrectly scaled up the data offset in some cases causing misaligned devices to report alignment. Rewrite the stacking algorithm to remedy this. (Upstream commit 9504e0864b58b4a304820dcf3755f1da80d5e72f) The top device misalignment flag would not be set if the added bottom device was already misaligned as opposed to causing a stacking failure. Also massage the reporting so that an error is only returned if adding the bottom device caused the misalignment. I.e. don't return an error if the top is already flagged as misaligned. (Upstream commit fe0b393f2c0a0d23a9bc9ed7dc51a1ee511098bd) lcm() was defined to take integer-sized arguments. The supplied arguments are multiplied, however, causing us to overflow given sufficiently large input. That in turn led to incorrect optimal I/O size reporting in some cases. Switch lcm() over to unsigned long similar to gcd() and move the function from blk-settings.c to lib. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--block/blk-settings.c93
-rw-r--r--include/linux/lcm.h8
-rw-r--r--lib/Makefile2
-rw-r--r--lib/lcm.c15
4 files changed, 92 insertions, 26 deletions
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d5aa8865c644..9651c0a5abe0 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -8,6 +8,7 @@
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/gcd.h>
+#include <linux/lcm.h>
#include "blk.h"
@@ -490,18 +491,31 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
/**
* blk_stack_limits - adjust queue_limits for stacked devices
- * @t: the stacking driver limits (top)
- * @b: the underlying queue limits (bottom)
+ * @t: the stacking driver limits (top device)
+ * @b: the underlying queue limits (bottom, component device)
* @offset: offset to beginning of data within component device
*
* Description:
- * Merges two queue_limit structs. Returns 0 if alignment didn't
- * change. Returns -1 if adding the bottom device caused
- * misalignment.
+ * This function is used by stacking drivers like MD and DM to ensure
+ * that all component devices have compatible block sizes and
+ * alignments. The stacking driver must provide a queue_limits
+ * struct (top) and then iteratively call the stacking function for
+ * all component (bottom) devices. The stacking function will
+ * attempt to combine the values and ensure proper alignment.
+ *
+ * Returns 0 if the top and bottom queue_limits are compatible. The
+ * top device's block sizes and alignment offsets may be adjusted to
+ * ensure alignment with the bottom device. If no compatible sizes
+ * and alignments exist, -1 is returned and the resulting top
+ * queue_limits will have the misaligned flag set to indicate that
+ * the alignment_offset is undefined.
*/
int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset)
{
+ sector_t alignment;
+ unsigned int top, bottom, ret = 0;
+
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
@@ -518,6 +532,26 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);
+ t->misaligned |= b->misaligned;
+
+ alignment = queue_limit_alignment_offset(b, offset);
+
+ /* Bottom device has different alignment. Check that it is
+ * compatible with the current top alignment.
+ */
+ if (t->alignment_offset != alignment) {
+
+ top = max(t->physical_block_size, t->io_min)
+ + t->alignment_offset;
+ bottom = max(b->physical_block_size, b->io_min) + alignment;
+
+ /* Verify that top and bottom intervals line up */
+ if (max(top, bottom) & (min(top, bottom) - 1)) {
+ t->misaligned = 1;
+ ret = -1;
+ }
+ }
+
t->logical_block_size = max(t->logical_block_size,
b->logical_block_size);
@@ -525,37 +559,46 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->physical_block_size);
t->io_min = max(t->io_min, b->io_min);
+ t->io_opt = lcm(t->io_opt, b->io_opt);
+
t->no_cluster |= b->no_cluster;
- /* Bottom device offset aligned? */
- if (offset &&
- (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
+ /* Physical block size a multiple of the logical block size? */
+ if (t->physical_block_size & (t->logical_block_size - 1)) {
+ t->physical_block_size = t->logical_block_size;
t->misaligned = 1;
- return -1;
+ ret = -1;
}
- /* If top has no alignment offset, inherit from bottom */
- if (!t->alignment_offset)
- t->alignment_offset =
- b->alignment_offset & (b->physical_block_size - 1);
+ /* Minimum I/O a multiple of the physical block size? */
+ if (t->io_min & (t->physical_block_size - 1)) {
+ t->io_min = t->physical_block_size;
+ t->misaligned = 1;
+ ret = -1;
+ }
- /* Top device aligned on logical block boundary? */
- if (t->alignment_offset & (t->logical_block_size - 1)) {
+ /* Optimal I/O a multiple of the physical block size? */
+ if (t->io_opt & (t->physical_block_size - 1)) {
+ t->io_opt = 0;
t->misaligned = 1;
- return -1;
+ ret = -1;
}
- /* Find lcm() of optimal I/O size */
- if (t->io_opt && b->io_opt)
- t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);
- else if (b->io_opt)
- t->io_opt = b->io_opt;
+ /* Find lowest common alignment_offset */
+ t->alignment_offset = lcm(t->alignment_offset, alignment)
+ & (max(t->physical_block_size, t->io_min) - 1);
- /* Verify that optimal I/O size is a multiple of io_min */
- if (t->io_min && t->io_opt % t->io_min)
- return -1;
+ /* Verify that new alignment_offset is on a logical block boundary */
+ if (t->alignment_offset & (t->logical_block_size - 1)) {
+ t->misaligned = 1;
+ ret = -1;
+ }
- return 0;
+ /* Discard */
+ t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+ b->max_discard_sectors);
+
+ return ret;
}
EXPORT_SYMBOL(blk_stack_limits);
diff --git a/include/linux/lcm.h b/include/linux/lcm.h
new file mode 100644
index 000000000000..7bf01d779b45
--- /dev/null
+++ b/include/linux/lcm.h
@@ -0,0 +1,8 @@
+#ifndef _LCM_H
+#define _LCM_H
+
+#include <linux/compiler.h>
+
+unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
+
+#endif /* _LCM_H */
diff --git a/lib/Makefile b/lib/Makefile
index 2e78277eff9d..452f188c11e0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
- string_helpers.o gcd.o
+ string_helpers.o gcd.o lcm.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 000000000000..157cd88a6ffc
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+ if (a && b)
+ return (a * b) / gcd(a, b);
+ else if (b)
+ return b;
+
+ return a;
+}
+EXPORT_SYMBOL_GPL(lcm);