* Copyright (C) 2001 Momchil Velikov
  * Portions Copyright (C) 2001 Christoph Hellwig
  * Copyright (C) 2006 Nick Piggin
+ * Copyright (C) 2012 Konstantin Khlebnikov
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
        preempt_enable();
 }
 
+/**
+ * struct radix_tree_iter - radix tree iterator state
+ *
+ * @index:     index of current slot
+ * @next_index:        next-to-last index for this chunk
+ * @tags:      bit-mask for tag-iterating
+ *
+ * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
+ * subinterval of slots contained within one radix tree leaf node.  It is
+ * described by a pointer to its first slot and a struct radix_tree_iter
+ * which holds the chunk's position in the tree and its size.  For tagged
+ * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
+ * radix tree tag.
+ */
+struct radix_tree_iter {
+       unsigned long   index;
+       unsigned long   next_index;
+       unsigned long   tags;
+};
+
+#define RADIX_TREE_ITER_TAG_MASK       0x00FF  /* tag index in lower byte */
+#define RADIX_TREE_ITER_TAGGED         0x0100  /* lookup tagged slots */
+#define RADIX_TREE_ITER_CONTIG         0x0200  /* stop at first hole */
+
+/**
+ * radix_tree_iter_init - initialize radix tree iterator
+ *
+ * @iter:      pointer to iterator state
+ * @start:     iteration starting index
+ * Returns:    NULL
+ */
+static __always_inline void **
+radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
+{
+       /*
+        * Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it
+        * in the case of a successful tagged chunk lookup.  If the lookup was
+        * unsuccessful or non-tagged then nobody cares about ->tags.
+        *
+        * Set index to zero to bypass next_index overflow protection.
+        * See the comment in radix_tree_next_chunk() for details.
+        */
+       iter->index = 0;
+       iter->next_index = start;
+       return NULL;
+}
+
+/**
+ * radix_tree_next_chunk - find next chunk of slots for iteration
+ *
+ * @root:      radix tree root
+ * @iter:      iterator state
+ * @flags:     RADIX_TREE_ITER_* flags and tag index
+ * Returns:    pointer to chunk first slot, or NULL if there no more left
+ *
+ * This function looks up the next chunk in the radix tree starting from
+ * @iter->next_index.  It returns a pointer to the chunk's first slot.
+ * Also it fills @iter with data about chunk: position in the tree (index),
+ * its end (next_index), and constructs a bit mask for tagged iterating (tags).
+ */
+void **radix_tree_next_chunk(struct radix_tree_root *root,
+                            struct radix_tree_iter *iter, unsigned flags);
+
+/**
+ * radix_tree_chunk_size - get current chunk size
+ *
+ * @iter:      pointer to radix tree iterator
+ * Returns:    current chunk size
+ */
+static __always_inline unsigned
+radix_tree_chunk_size(struct radix_tree_iter *iter)
+{
+       return iter->next_index - iter->index;
+}
+
+/**
+ * radix_tree_next_slot - find next slot in chunk
+ *
+ * @slot:      pointer to current slot
+ * @iter:      pointer to interator state
+ * @flags:     RADIX_TREE_ITER_*, should be constant
+ * Returns:    pointer to next slot, or NULL if there no more left
+ *
+ * This function updates @iter->index in the case of a successful lookup.
+ * For tagged lookup it also eats @iter->tags.
+ */
+static __always_inline void **
+radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
+{
+       if (flags & RADIX_TREE_ITER_TAGGED) {
+               iter->tags >>= 1;
+               if (likely(iter->tags & 1ul)) {
+                       iter->index++;
+                       return slot + 1;
+               }
+               if (!(flags & RADIX_TREE_ITER_CONTIG) && likely(iter->tags)) {
+                       unsigned offset = __ffs(iter->tags);
+
+                       iter->tags >>= offset;
+                       iter->index += offset + 1;
+                       return slot + offset + 1;
+               }
+       } else {
+               unsigned size = radix_tree_chunk_size(iter) - 1;
+
+               while (size--) {
+                       slot++;
+                       iter->index++;
+                       if (likely(*slot))
+                               return slot;
+                       if (flags & RADIX_TREE_ITER_CONTIG)
+                               break;
+               }
+       }
+       return NULL;
+}
+
+/**
+ * radix_tree_for_each_chunk - iterate over chunks
+ *
+ * @slot:      the void** variable for pointer to chunk first slot
+ * @root:      the struct radix_tree_root pointer
+ * @iter:      the struct radix_tree_iter pointer
+ * @start:     iteration starting index
+ * @flags:     RADIX_TREE_ITER_* and tag index
+ *
+ * Locks can be released and reacquired between iterations.
+ */
+#define radix_tree_for_each_chunk(slot, root, iter, start, flags)      \
+       for (slot = radix_tree_iter_init(iter, start) ;                 \
+             (slot = radix_tree_next_chunk(root, iter, flags)) ;)
+
+/**
+ * radix_tree_for_each_chunk_slot - iterate over slots in one chunk
+ *
+ * @slot:      the void** variable, at the beginning points to chunk first slot
+ * @iter:      the struct radix_tree_iter pointer
+ * @flags:     RADIX_TREE_ITER_*, should be constant
+ *
+ * This macro is designed to be nested inside radix_tree_for_each_chunk().
+ * @slot points to the radix tree slot, @iter->index contains its index.
+ */
+#define radix_tree_for_each_chunk_slot(slot, iter, flags)              \
+       for (; slot ; slot = radix_tree_next_slot(slot, iter, flags))
+
+/**
+ * radix_tree_for_each_slot - iterate over non-empty slots
+ *
+ * @slot:      the void** variable for pointer to slot
+ * @root:      the struct radix_tree_root pointer
+ * @iter:      the struct radix_tree_iter pointer
+ * @start:     iteration starting index
+ *
+ * @slot points to radix tree slot, @iter->index contains its index.
+ */
+#define radix_tree_for_each_slot(slot, root, iter, start)              \
+       for (slot = radix_tree_iter_init(iter, start) ;                 \
+            slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;    \
+            slot = radix_tree_next_slot(slot, iter, 0))
+
+/**
+ * radix_tree_for_each_contig - iterate over contiguous slots
+ *
+ * @slot:      the void** variable for pointer to slot
+ * @root:      the struct radix_tree_root pointer
+ * @iter:      the struct radix_tree_iter pointer
+ * @start:     iteration starting index
+ *
+ * @slot points to radix tree slot, @iter->index contains its index.
+ */
+#define radix_tree_for_each_contig(slot, root, iter, start)            \
+       for (slot = radix_tree_iter_init(iter, start) ;                 \
+            slot || (slot = radix_tree_next_chunk(root, iter,          \
+                               RADIX_TREE_ITER_CONTIG)) ;              \
+            slot = radix_tree_next_slot(slot, iter,                    \
+                               RADIX_TREE_ITER_CONTIG))
+
+/**
+ * radix_tree_for_each_tagged - iterate over tagged slots
+ *
+ * @slot:      the void** variable for pointer to slot
+ * @root:      the struct radix_tree_root pointer
+ * @iter:      the struct radix_tree_iter pointer
+ * @start:     iteration starting index
+ * @tag:       tag index
+ *
+ * @slot points to radix tree slot, @iter->index contains its index.
+ */
+#define radix_tree_for_each_tagged(slot, root, iter, start, tag)       \
+       for (slot = radix_tree_iter_init(iter, start) ;                 \
+            slot || (slot = radix_tree_next_chunk(root, iter,          \
+                             RADIX_TREE_ITER_TAGGED | tag)) ;          \
+            slot = radix_tree_next_slot(slot, iter,                    \
+                               RADIX_TREE_ITER_TAGGED))
+
 #endif /* _LINUX_RADIX_TREE_H */
 
  * Portions Copyright (C) 2001 Christoph Hellwig
  * Copyright (C) 2005 SGI, Christoph Lameter
  * Copyright (C) 2006 Nick Piggin
+ * Copyright (C) 2012 Konstantin Khlebnikov
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
        }
        return 0;
 }
+
+/**
+ * radix_tree_find_next_bit - find the next set bit in a memory region
+ *
+ * @addr: The address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Unrollable variant of find_next_bit() for constant size arrays.
+ * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
+ * Returns next bit offset, or size if nothing found.
+ */
+static __always_inline unsigned long
+radix_tree_find_next_bit(const unsigned long *addr,
+                        unsigned long size, unsigned long offset)
+{
+       if (!__builtin_constant_p(size))
+               return find_next_bit(addr, size, offset);
+
+       if (offset < size) {
+               unsigned long tmp;
+
+               addr += offset / BITS_PER_LONG;
+               tmp = *addr >> (offset % BITS_PER_LONG);
+               if (tmp)
+                       return __ffs(tmp) + offset;
+               offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
+               while (offset < size) {
+                       tmp = *++addr;
+                       if (tmp)
+                               return __ffs(tmp) + offset;
+                       offset += BITS_PER_LONG;
+               }
+       }
+       return size;
+}
+
 /*
  * This assumes that the caller has performed appropriate preallocation, and
  * that the caller has pinned this thread of control to the current CPU.
 }
 EXPORT_SYMBOL(radix_tree_tag_get);
 
+/**
+ * radix_tree_next_chunk - find next chunk of slots for iteration
+ *
+ * @root:      radix tree root
+ * @iter:      iterator state
+ * @flags:     RADIX_TREE_ITER_* flags and tag index
+ * Returns:    pointer to chunk first slot, or NULL if iteration is over
+ */
+void **radix_tree_next_chunk(struct radix_tree_root *root,
+                            struct radix_tree_iter *iter, unsigned flags)
+{
+       unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
+       struct radix_tree_node *rnode, *node;
+       unsigned long index, offset;
+
+       if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
+               return NULL;
+
+       /*
+        * Catch next_index overflow after ~0UL. iter->index never overflows
+        * during iterating; it can be zero only at the beginning.
+        * And we cannot overflow iter->next_index in a single step,
+        * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
+        */
+       index = iter->next_index;
+       if (!index && iter->index)
+               return NULL;
+
+       rnode = rcu_dereference_raw(root->rnode);
+       if (radix_tree_is_indirect_ptr(rnode)) {
+               rnode = indirect_to_ptr(rnode);
+       } else if (rnode && !index) {
+               /* Single-slot tree */
+               iter->index = 0;
+               iter->next_index = 1;
+               iter->tags = 1;
+               return (void **)&root->rnode;
+       } else
+               return NULL;
+
+restart:
+       shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
+       offset = index >> shift;
+
+       /* Index outside of the tree */
+       if (offset >= RADIX_TREE_MAP_SIZE)
+               return NULL;
+
+       node = rnode;
+       while (1) {
+               if ((flags & RADIX_TREE_ITER_TAGGED) ?
+                               !test_bit(offset, node->tags[tag]) :
+                               !node->slots[offset]) {
+                       /* Hole detected */
+                       if (flags & RADIX_TREE_ITER_CONTIG)
+                               return NULL;
+
+                       if (flags & RADIX_TREE_ITER_TAGGED)
+                               offset = radix_tree_find_next_bit(
+                                               node->tags[tag],
+                                               RADIX_TREE_MAP_SIZE,
+                                               offset + 1);
+                       else
+                               while (++offset < RADIX_TREE_MAP_SIZE) {
+                                       if (node->slots[offset])
+                                               break;
+                               }
+                       index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
+                       index += offset << shift;
+                       /* Overflow after ~0UL */
+                       if (!index)
+                               return NULL;
+                       if (offset == RADIX_TREE_MAP_SIZE)
+                               goto restart;
+               }
+
+               /* This is leaf-node */
+               if (!shift)
+                       break;
+
+               node = rcu_dereference_raw(node->slots[offset]);
+               if (node == NULL)
+                       goto restart;
+               shift -= RADIX_TREE_MAP_SHIFT;
+               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+       }
+
+       /* Update the iterator state */
+       iter->index = index;
+       iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
+
+       /* Construct iter->tags bit-mask from node->tags[tag] array */
+       if (flags & RADIX_TREE_ITER_TAGGED) {
+               unsigned tag_long, tag_bit;
+
+               tag_long = offset / BITS_PER_LONG;
+               tag_bit  = offset % BITS_PER_LONG;
+               iter->tags = node->tags[tag][tag_long] >> tag_bit;
+               /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
+               if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
+                       /* Pick tags from next element */
+                       if (tag_bit)
+                               iter->tags |= node->tags[tag][tag_long + 1] <<
+                                               (BITS_PER_LONG - tag_bit);
+                       /* Clip chunk size, here only BITS_PER_LONG tags */
+                       iter->next_index = index + BITS_PER_LONG;
+               }
+       }
+
+       return node->slots + offset;
+}
+EXPORT_SYMBOL(radix_tree_next_chunk);
+
 /**
  * radix_tree_range_tag_if_tagged - for each item in given range set given
  *                                tag if item has another tag set