summaryrefslogtreecommitdiff
path: root/include/linux/rbtree_augmented.h
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2015-05-27 11:09:36 +0930
committerRusty Russell <rusty@rustcorp.com.au>2015-05-28 11:32:04 +0930
commitd72da4a4d973d8a0a0d3c97e7cdebf287fbe3a99 (patch)
treeb3fbbf22c0f1bbc175dce958cf99241aee665f4c /include/linux/rbtree_augmented.h
parent0be964be0d45084245673c971d72a4b51690231d (diff)
downloadlwn-d72da4a4d973d8a0a0d3c97e7cdebf287fbe3a99.tar.gz
lwn-d72da4a4d973d8a0a0d3c97e7cdebf287fbe3a99.zip
rbtree: Make lockless searches non-fatal
Change the insert and erase code such that lockless searches are non-fatal. In and of itself an rbtree cannot be correctly searched while in-modification, we can however provide weaker guarantees that will allow the rbtree to be used in conjunction with other techniques, such as latches; see 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()"). For this to work we need the following guarantees from the rbtree code: 1) a lockless reader must not see partial stores, this would allow it to observe nodes that are invalid memory. 2) there must not be (temporary) loops in the tree structure in the modifier's program order, this would cause a lookup which interrupts the modifier to get stuck indefinitely. For 1) we must use WRITE_ONCE() for all updates to the tree structure; in particular this patch only does rb_{left,right} as those are the only element required for simple searches. It generates slightly worse code, probably because volatile. But in pointer chasing heavy code a few instructions more should not matter. For 2) I have carefully audited the code and drawn every intermediate link state and not found a loop. Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Woodhouse <David.Woodhouse@intel.com> Cc: Rik van Riel <riel@redhat.com> Reviewed-by: Michel Lespinasse <walken@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'include/linux/rbtree_augmented.h')
-rw-r--r--include/linux/rbtree_augmented.h21
1 files changed, 14 insertions, 7 deletions
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 378c5ee75f78..14d7b831b63a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
{
if (parent) {
if (parent->rb_left == old)
- parent->rb_left = new;
+ WRITE_ONCE(parent->rb_left, new);
else
- parent->rb_right = new;
+ WRITE_ONCE(parent->rb_right, new);
} else
- root->rb_node = new;
+ WRITE_ONCE(root->rb_node, new);
}
extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
@@ -137,7 +137,8 @@ static __always_inline struct rb_node *
__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
const struct rb_augment_callbacks *augment)
{
- struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *child = node->rb_right;
+ struct rb_node *tmp = node->rb_left;
struct rb_node *parent, *rebalance;
unsigned long pc;
@@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
tmp = parent;
} else {
struct rb_node *successor = child, *child2;
+
tmp = child->rb_left;
if (!tmp) {
/*
@@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
*/
parent = successor;
child2 = successor->rb_right;
+
augment->copy(node, successor);
} else {
/*
@@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
successor = tmp;
tmp = tmp->rb_left;
} while (tmp);
- parent->rb_left = child2 = successor->rb_right;
- successor->rb_right = child;
+ child2 = successor->rb_right;
+ WRITE_ONCE(parent->rb_left, child2);
+ WRITE_ONCE(successor->rb_right, child);
rb_set_parent(child, successor);
+
augment->copy(node, successor);
augment->propagate(parent, successor);
}
- successor->rb_left = tmp = node->rb_left;
+ tmp = node->rb_left;
+ WRITE_ONCE(successor->rb_left, tmp);
rb_set_parent(tmp, successor);
pc = node->__rb_parent_color;
tmp = __rb_parent(pc);
__rb_change_child(node, successor, tmp, root);
+
if (child2) {
successor->__rb_parent_color = pc;
rb_set_parent_color(child2, parent, RB_BLACK);