summaryrefslogtreecommitdiffstats
path: root/lib/radix-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r--lib/radix-tree.c375
1 files changed, 287 insertions, 88 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7bf7d4e60e43..eaea14b8f2ca 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,20 +22,21 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
#include <linux/cpu.h>
#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/idr.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/radix-tree.h>
+#include <linux/kmemleak.h>
#include <linux/percpu.h>
+#include <linux/preempt.h> /* in_interrupt() */
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
#include <linux/slab.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
#include <linux/string.h>
-#include <linux/bitops.h>
-#include <linux/rcupdate.h>
-#include <linux/preempt.h> /* in_interrupt() */
/* Number of nodes in fully populated tree of given height */
@@ -60,6 +61,15 @@ static struct kmem_cache *radix_tree_node_cachep;
#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
/*
+ * The IDR does not have to be as high as the radix tree since it uses
+ * signed integers, not unsigned longs.
+ */
+#define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1)
+#define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \
+ RADIX_TREE_MAP_SHIFT))
+#define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1)
+
+/*
* Per-cpu pool of preloaded nodes
*/
struct radix_tree_preload {
@@ -149,27 +159,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
{
- root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
+ root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
}
static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
{
- root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
+ root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
}
static inline void root_tag_clear_all(struct radix_tree_root *root)
{
- root->gfp_mask &= __GFP_BITS_MASK;
+ root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
}
static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
{
- return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+ return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
}
static inline unsigned root_tags_get(const struct radix_tree_root *root)
{
- return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
+ return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+}
+
+static inline bool is_idr(const struct radix_tree_root *root)
+{
+ return !!(root->gfp_mask & ROOT_IS_IDR);
}
/*
@@ -187,6 +202,11 @@ static inline int any_tag_set(const struct radix_tree_node *node,
return 0;
}
+static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+ bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
+}
+
/**
* radix_tree_find_next_bit - find the next set bit in a memory region
*
@@ -240,6 +260,13 @@ static inline unsigned long node_maxindex(const struct radix_tree_node *node)
return shift_maxindex(node->shift);
}
+static unsigned long next_index(unsigned long index,
+ const struct radix_tree_node *node,
+ unsigned long offset)
+{
+ return (index & ~node_maxindex(node)) + (offset << node->shift);
+}
+
#ifndef __KERNEL__
static void dump_node(struct radix_tree_node *node, unsigned long index)
{
@@ -278,11 +305,52 @@ static void radix_tree_dump(struct radix_tree_root *root)
{
pr_debug("radix root: %p rnode %p tags %x\n",
root, root->rnode,
- root->gfp_mask >> __GFP_BITS_SHIFT);
+ root->gfp_mask >> ROOT_TAG_SHIFT);
if (!radix_tree_is_internal_node(root->rnode))
return;
dump_node(entry_to_node(root->rnode), 0);
}
+
+static void dump_ida_node(void *entry, unsigned long index)
+{
+ unsigned long i;
+
+ if (!entry)
+ return;
+
+ if (radix_tree_is_internal_node(entry)) {
+ struct radix_tree_node *node = entry_to_node(entry);
+
+ pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
+ node, node->offset, index * IDA_BITMAP_BITS,
+ ((index | node_maxindex(node)) + 1) *
+ IDA_BITMAP_BITS - 1,
+ node->parent, node->tags[0][0], node->shift,
+ node->count);
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+ dump_ida_node(node->slots[i],
+ index | (i << node->shift));
+ } else {
+ struct ida_bitmap *bitmap = entry;
+
+ pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
+ (int)(index & RADIX_TREE_MAP_MASK),
+ index * IDA_BITMAP_BITS,
+ (index + 1) * IDA_BITMAP_BITS - 1);
+ for (i = 0; i < IDA_BITMAP_LONGS; i++)
+ pr_cont(" %lx", bitmap->bitmap[i]);
+ pr_cont("\n");
+ }
+}
+
+static void ida_dump(struct ida *ida)
+{
+ struct radix_tree_root *root = &ida->ida_rt;
+ pr_debug("ida: %p %p free %d bitmap %p\n", ida, root->rnode,
+ root->gfp_mask >> ROOT_TAG_SHIFT,
+ ida->free_bitmap);
+ dump_ida_node(root->rnode, 0);
+}
#endif
/*
@@ -290,13 +358,11 @@ static void radix_tree_dump(struct radix_tree_root *root)
* that the caller has pinned this thread of control to the current CPU.
*/
static struct radix_tree_node *
-radix_tree_node_alloc(struct radix_tree_root *root,
- struct radix_tree_node *parent,
+radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
unsigned int shift, unsigned int offset,
unsigned int count, unsigned int exceptional)
{
struct radix_tree_node *ret = NULL;
- gfp_t gfp_mask = root_gfp_mask(root);
/*
* Preload code isn't irq safe and it doesn't make sense to use
@@ -533,7 +599,7 @@ static unsigned radix_tree_load_root(const struct radix_tree_root *root,
/*
* Extend a radix tree so it can store key @index.
*/
-static int radix_tree_extend(struct radix_tree_root *root,
+static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
unsigned long index, unsigned int shift)
{
struct radix_tree_node *slot;
@@ -546,19 +612,27 @@ static int radix_tree_extend(struct radix_tree_root *root,
maxshift += RADIX_TREE_MAP_SHIFT;
slot = root->rnode;
- if (!slot)
+ if (!slot && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
goto out;
do {
- struct radix_tree_node *node = radix_tree_node_alloc(root,
- NULL, shift, 0, 1, 0);
+ struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
+ shift, 0, 1, 0);
if (!node)
return -ENOMEM;
- /* Propagate the aggregated tag info into the new root */
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
- if (root_tag_get(root, tag))
- tag_set(node, tag, 0);
+ if (is_idr(root)) {
+ all_tag_set(node, IDR_FREE);
+ if (!root_tag_get(root, IDR_FREE)) {
+ tag_clear(node, IDR_FREE, 0);
+ root_tag_set(root, IDR_FREE);
+ }
+ } else {
+ /* Propagate the aggregated tag info to the new child */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (root_tag_get(root, tag))
+ tag_set(node, tag, 0);
+ }
}
BUG_ON(shift > BITS_PER_LONG);
@@ -619,6 +693,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
* one (root->rnode) as far as dependent read barriers go.
*/
root->rnode = child;
+ if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
+ root_tag_clear(root, IDR_FREE);
/*
* We have a dilemma here. The node's slot[0] must not be
@@ -674,7 +750,12 @@ static bool delete_node(struct radix_tree_root *root,
parent->slots[node->offset] = NULL;
parent->count--;
} else {
- root_tag_clear_all(root);
+ /*
+ * Shouldn't the tags already have all been cleared
+ * by the caller?
+ */
+ if (!is_idr(root))
+ root_tag_clear_all(root);
root->rnode = NULL;
}
@@ -714,6 +795,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
unsigned long maxindex;
unsigned int shift, offset = 0;
unsigned long max = index | ((1UL << order) - 1);
+ gfp_t gfp = root_gfp_mask(root);
shift = radix_tree_load_root(root, &child, &maxindex);
@@ -721,7 +803,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
if (order > 0 && max == ((1UL << order) - 1))
max++;
if (max > maxindex) {
- int error = radix_tree_extend(root, max, shift);
+ int error = radix_tree_extend(root, gfp, max, shift);
if (error < 0)
return error;
shift = error;
@@ -732,7 +814,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
shift -= RADIX_TREE_MAP_SHIFT;
if (child == NULL) {
/* Have to add a child node. */
- child = radix_tree_node_alloc(root, node, shift,
+ child = radix_tree_node_alloc(gfp, node, shift,
offset, 0, 0);
if (!child)
return -ENOMEM;
@@ -755,7 +837,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
return 0;
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
/*
* Free any nodes below this node. The tree is presumed to not need
* shrinking, and any user data in the tree is presumed to not need a
@@ -791,6 +872,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
}
}
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
static inline int insert_entries(struct radix_tree_node *node, void **slot,
void *item, unsigned order, bool replace)
{
@@ -996,69 +1078,70 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_lookup);
-static inline int slot_count(struct radix_tree_node *node,
- void **slot)
+static inline void replace_sibling_entries(struct radix_tree_node *node,
+ void **slot, int count, int exceptional)
{
- int n = 1;
#ifdef CONFIG_RADIX_TREE_MULTIORDER
void *ptr = node_to_entry(slot);
- unsigned offset = get_slot_offset(node, slot);
- int i;
+ unsigned offset = get_slot_offset(node, slot) + 1;
- for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
- if (node->slots[offset + i] != ptr)
+ while (offset < RADIX_TREE_MAP_SIZE) {
+ if (node->slots[offset] != ptr)
break;
- n++;
+ if (count < 0) {
+ node->slots[offset] = NULL;
+ node->count--;
+ }
+ node->exceptional += exceptional;
+ offset++;
}
#endif
- return n;
}
-static void replace_slot(struct radix_tree_root *root,
- struct radix_tree_node *node,
- void **slot, void *item,
- bool warn_typeswitch)
+static void replace_slot(void **slot, void *item, struct radix_tree_node *node,
+ int count, int exceptional)
{
- void *old = rcu_dereference_raw(*slot);
- int count, exceptional;
-
- WARN_ON_ONCE(radix_tree_is_internal_node(item));
-
- count = !!item - !!old;
- exceptional = !!radix_tree_exceptional_entry(item) -
- !!radix_tree_exceptional_entry(old);
-
- WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
+ if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
+ return;
- if (node) {
+ if (node && (count || exceptional)) {
node->count += count;
- if (exceptional) {
- exceptional *= slot_count(node, slot);
- node->exceptional += exceptional;
- }
+ node->exceptional += exceptional;
+ replace_sibling_entries(node, slot, count, exceptional);
}
rcu_assign_pointer(*slot, item);
}
-static inline void delete_sibling_entries(struct radix_tree_node *node,
- void **slot)
+static bool node_tag_get(const struct radix_tree_root *root,
+ const struct radix_tree_node *node,
+ unsigned int tag, unsigned int offset)
{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- bool exceptional = radix_tree_exceptional_entry(*slot);
- void *ptr = node_to_entry(slot);
- unsigned offset = get_slot_offset(node, slot);
- int i;
+ if (node)
+ return tag_get(node, tag, offset);
+ return root_tag_get(root, tag);
+}
- for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
- if (node->slots[offset + i] != ptr)
- break;
- node->slots[offset + i] = NULL;
- node->count--;
- if (exceptional)
- node->exceptional--;
+/*
+ * IDR users want to be able to store NULL in the tree, so if the slot isn't
+ * free, don't adjust the count, even if it's transitioning between NULL and
+ * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still
+ * have empty bits, but it only stores NULL in slots when they're being
+ * deleted.
+ */
+static int calculate_count(struct radix_tree_root *root,
+ struct radix_tree_node *node, void **slot,
+ void *item, void *old)
+{
+ if (is_idr(root)) {
+ unsigned offset = get_slot_offset(node, slot);
+ bool free = node_tag_get(root, node, IDR_FREE, offset);
+ if (!free)
+ return 0;
+ if (!old)
+ return 1;
}
-#endif
+ return !!item - !!old;
}
/**
@@ -1078,15 +1161,19 @@ void __radix_tree_replace(struct radix_tree_root *root,
void **slot, void *item,
radix_tree_update_node_t update_node, void *private)
{
- if (!item)
- delete_sibling_entries(node, slot);
+ void *old = rcu_dereference_raw(*slot);
+ int exceptional = !!radix_tree_exceptional_entry(item) -
+ !!radix_tree_exceptional_entry(old);
+ int count = calculate_count(root, node, slot, item, old);
+
/*
* This function supports replacing exceptional entries and
* deleting entries, but that needs accounting against the
* node unless the slot is root->rnode.
*/
- replace_slot(root, node, slot, item,
- !node && slot != (void **)&root->rnode);
+ WARN_ON_ONCE(!node && (slot != (void **)&root->rnode) &&
+ (count || exceptional));
+ replace_slot(slot, item, node, count, exceptional);
if (!node)
return;
@@ -1116,7 +1203,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
void radix_tree_replace_slot(struct radix_tree_root *root,
void **slot, void *item)
{
- replace_slot(root, NULL, slot, item, true);
+ __radix_tree_replace(root, NULL, slot, item, NULL, NULL);
}
/**
@@ -1191,6 +1278,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
void **slot;
unsigned int offset, end;
unsigned n, tag, tags = 0;
+ gfp_t gfp = root_gfp_mask(root);
if (!__radix_tree_lookup(root, index, &parent, &slot))
return -ENOENT;
@@ -1228,7 +1316,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
for (;;) {
if (node->shift > order) {
- child = radix_tree_node_alloc(root, node,
+ child = radix_tree_node_alloc(gfp, node,
node->shift - RADIX_TREE_MAP_SHIFT,
offset, 0, 0);
if (!child)
@@ -1444,8 +1532,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
radix_tree_load_root(root, &node, &maxindex);
if (index > maxindex)
return 0;
- if (node == NULL)
- return 0;
while (radix_tree_is_internal_node(node)) {
unsigned offset;
@@ -1453,8 +1539,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
parent = entry_to_node(node);
offset = radix_tree_descend(parent, &node, index);
- if (!node)
- return 0;
if (!tag_get(parent, tag, offset))
return 0;
if (node == RADIX_TREE_RETRY)
@@ -1481,6 +1565,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
unsigned tag_long = offset / BITS_PER_LONG;
unsigned tag_bit = offset % BITS_PER_LONG;
+ if (!node) {
+ iter->tags = 1;
+ return;
+ }
+
iter->tags = node->tags[tag][tag_long] >> tag_bit;
/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
@@ -1873,13 +1962,18 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
static bool __radix_tree_delete(struct radix_tree_root *root,
struct radix_tree_node *node, void **slot)
{
+ void *old = rcu_dereference_raw(*slot);
+ int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
unsigned offset = get_slot_offset(node, slot);
int tag;
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- node_tag_clear(root, node, tag, offset);
+ if (is_idr(root))
+ node_tag_set(root, node, IDR_FREE, offset);
+ else
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ node_tag_clear(root, node, tag, offset);
- replace_slot(root, node, slot, NULL, true);
+ replace_slot(slot, NULL, node, -1, exceptional);
return node && delete_node(root, node, NULL, NULL);
}
@@ -1916,12 +2010,13 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item)
{
- struct radix_tree_node *node;
+ struct radix_tree_node *node = NULL;
void **slot;
void *entry;
entry = __radix_tree_lookup(root, index, &node, &slot);
- if (!entry)
+ if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
+ get_slot_offset(node, slot))))
return NULL;
if (item && entry != item)
@@ -1957,8 +2052,7 @@ void radix_tree_clear_tags(struct radix_tree_root *root,
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
node_tag_clear(root, node, tag, offset);
} else {
- /* Clear root node tags */
- root->gfp_mask &= __GFP_BITS_MASK;
+ root_tag_clear_all(root);
}
}
@@ -1973,6 +2067,111 @@ int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
}
EXPORT_SYMBOL(radix_tree_tagged);
+/**
+ * idr_preload - preload for idr_alloc()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preallocate memory to use for the next call to idr_alloc(). This function
+ * returns with preemption disabled. It will be enabled by idr_preload_end().
+ */
+void idr_preload(gfp_t gfp_mask)
+{
+ __radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE);
+}
+EXPORT_SYMBOL(idr_preload);
+
+void **idr_get_free(struct radix_tree_root *root,
+ struct radix_tree_iter *iter, gfp_t gfp, int end)
+{
+ struct radix_tree_node *node = NULL, *child;
+ void **slot = (void **)&root->rnode;
+ unsigned long maxindex, start = iter->next_index;
+ unsigned long max = end > 0 ? end - 1 : INT_MAX;
+ unsigned int shift, offset = 0;
+
+ grow:
+ shift = radix_tree_load_root(root, &child, &maxindex);
+ if (!radix_tree_tagged(root, IDR_FREE))
+ start = max(start, maxindex + 1);
+ if (start > max)
+ return ERR_PTR(-ENOSPC);
+
+ if (start > maxindex) {
+ int error = radix_tree_extend(root, gfp, start, shift);
+ if (error < 0)
+ return ERR_PTR(error);
+ shift = error;
+ child = rcu_dereference_raw(root->rnode);
+ }
+
+ while (shift) {
+ shift -= RADIX_TREE_MAP_SHIFT;
+ if (child == NULL) {
+ /* Have to add a child node. */
+ child = radix_tree_node_alloc(gfp, node, shift, offset,
+ 0, 0);
+ if (!child)
+ return ERR_PTR(-ENOMEM);
+ all_tag_set(child, IDR_FREE);
+ rcu_assign_pointer(*slot, node_to_entry(child));
+ if (node)
+ node->count++;
+ } else if (!radix_tree_is_internal_node(child))
+ break;
+
+ node = entry_to_node(child);
+ offset = radix_tree_descend(node, &child, start);
+ if (!tag_get(node, IDR_FREE, offset)) {
+ offset = radix_tree_find_next_bit(node, IDR_FREE,
+ offset + 1);
+ start = next_index(start, node, offset);
+ if (start > max)
+ return ERR_PTR(-ENOSPC);
+ while (offset == RADIX_TREE_MAP_SIZE) {
+ offset = node->offset + 1;
+ node = node->parent;
+ if (!node)
+ goto grow;
+ shift = node->shift;
+ }
+ child = rcu_dereference_raw(node->slots[offset]);
+ }
+ slot = &node->slots[offset];
+ }
+
+ iter->index = start;
+ if (node)
+ iter->next_index = 1 + min(max, (start | node_maxindex(node)));
+ else
+ iter->next_index = 1;
+ iter->node = node;
+ __set_iter_shift(iter, shift);
+ set_iter_tags(iter, node, offset, IDR_FREE);
+
+ return slot;
+}
+
+/**
+ * idr_destroy - release all internal memory from an IDR
+ * @idr: idr handle
+ *
+ * After this function is called, the IDR is empty, and may be reused or
+ * the data structure containing it may be freed.
+ *
+ * A typical clean-up sequence for objects stored in an idr tree will use
+ * idr_for_each() to free all objects, if necessary, then idr_destroy() to
+ * free the memory used to keep track of those objects.
+ */
+void idr_destroy(struct idr *idr)
+{
+ struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+ if (radix_tree_is_internal_node(node))
+ radix_tree_free_nodes(node);
+ idr->idr_rt.rnode = NULL;
+ root_tag_set(&idr->idr_rt, IDR_FREE);
+}
+EXPORT_SYMBOL(idr_destroy);
+
static void
radix_tree_node_ctor(void *arg)
{