up_write(&root->rwsem);
}
-/*
- * Attach the anon_vmas from src to dst.
- * Returns 0 on success, -ENOMEM on failure.
- *
- * anon_vma_clone() is called by vma_expand(), vma_merge(), __split_vma(),
- * copy_vma() and anon_vma_fork(). The first four want an exact copy of src,
- * while the last one, anon_vma_fork(), may try to reuse an existing anon_vma to
- * prevent endless growth of anon_vma. Since dst->anon_vma is set to NULL before
- * call, we can identify this case by checking (!dst->anon_vma &&
- * src->anon_vma).
- *
- * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
- * and reuse existing anon_vma which has no vmas and only one child anon_vma.
- * This prevents degradation of anon_vma hierarchy to endless linear chain in
- * case of constantly forking task. On the other hand, an anon_vma with more
- * than one child isn't reused even if there was no alive vma, thus rmap
- * walker has a good chance of avoiding scanning the whole hierarchy when it
- * searches where page is mapped.
+static void check_anon_vma_clone(struct vm_area_struct *dst,
+ struct vm_area_struct *src)
+{
+ /* The write lock must be held. */
+ mmap_assert_write_locked(src->vm_mm);
+ /* If not a fork (implied by dst->anon_vma) then must be on same mm. */
+ VM_WARN_ON_ONCE(dst->anon_vma && dst->vm_mm != src->vm_mm);
+
+ /* If we have anything to do src->anon_vma must be provided. */
+ VM_WARN_ON_ONCE(!src->anon_vma && !list_empty(&src->anon_vma_chain));
+ VM_WARN_ON_ONCE(!src->anon_vma && dst->anon_vma);
+ /* We are establishing a new anon_vma_chain. */
+ VM_WARN_ON_ONCE(!list_empty(&dst->anon_vma_chain));
+ /*
+ * On fork, dst->anon_vma is set NULL (temporarily). Otherwise, anon_vma
+ * must be the same across dst and src.
+ */
+ VM_WARN_ON_ONCE(dst->anon_vma && dst->anon_vma != src->anon_vma);
+}
+
+static void cleanup_partial_anon_vmas(struct vm_area_struct *vma);
+
+/**
+ * anon_vma_clone - Establishes new anon_vma_chain objects in @dst linking to
+ * all of the anon_vma objects contained within @src anon_vma_chain's.
+ * @dst: The destination VMA with an empty anon_vma_chain.
+ * @src: The source VMA we wish to duplicate.
+ *
+ * This is the heart of the VMA side of the anon_vma implementation - we invoke
+ * this function whenever we need to set up a new VMA's anon_vma state.
+ *
+ * This is invoked for:
+ *
+ * - VMA Merge, but only when @dst is unfaulted and @src is faulted - meaning we
+ * clone @src into @dst.
+ * - VMA split.
+ * - VMA (m)remap.
+ * - Fork of faulted VMA.
+ *
+ * In all cases other than fork this is simply a duplication. Fork additionally
+ * adds a new active anon_vma.
+ *
+ * ONLY in the case of fork do we try to 'reuse' existing anon_vma's in an
+ * anon_vma hierarchy, reusing anon_vma's which have no VMA associated with them
+ * but do have a single child. This is to avoid waste of memory when repeatedly
+ * forking.
+ *
+ * Returns: 0 on success, -ENOMEM on failure.
*/
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
struct anon_vma_chain *avc, *pavc;
struct anon_vma *root = NULL;
+ check_anon_vma_clone(dst, src);
+
list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma;
return 0;
enomem_failure:
- /*
- * dst->anon_vma is dropped here otherwise its num_active_vmas can
- * be incorrectly decremented in unlink_anon_vmas().
- * We can safely do this because callers of anon_vma_clone() don't care
- * about dst->anon_vma if anon_vma_clone() failed.
- */
- dst->anon_vma = NULL;
- unlink_anon_vmas(dst);
+ cleanup_partial_anon_vmas(dst);
return -ENOMEM;
}
return -ENOMEM;
}
+/*
+ * In the unfortunate case of anon_vma_clone() failing to allocate memory we
+ * have to clean things up.
+ *
+ * On clone we hold the exclusive mmap write lock, so we can't race
+ * unlink_anon_vmas(). Since we're cloning, we know we can't have empty
+ * anon_vma's, since existing anon_vma's are what we're cloning from.
+ *
+ * So this function needs only traverse the anon_vma_chain and free each
+ * allocated anon_vma_chain.
+ */
+static void cleanup_partial_anon_vmas(struct vm_area_struct *vma)
+{
+ struct anon_vma_chain *avc, *next;
+ struct anon_vma *root = NULL;
+
+ /*
+ * We exclude everybody else from being able to modify anon_vma's
+ * underneath us.
+ */
+ mmap_assert_locked(vma->vm_mm);
+
+ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
+ struct anon_vma *anon_vma = avc->anon_vma;
+
+ /* All anon_vma's share the same root. */
+ if (!root) {
+ root = anon_vma->root;
+ anon_vma_lock_write(root);
+ }
+
+ anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
+ list_del(&avc->same_vma);
+ anon_vma_chain_free(avc);
+ }
+
+ if (root)
+ anon_vma_unlock_write(root);
+}
+
+/**
+ * unlink_anon_vmas() - remove all links between a VMA and anon_vma's, freeing
+ * anon_vma_chain objects.
+ * @vma: The VMA whose links to anon_vma objects is to be severed.
+ *
+ * As part of the process anon_vma_chain's are freed,
+ * anon_vma->num_children,num_active_vmas is updated as required and, if the
+ * relevant anon_vma references no further VMAs, its reference count is
+ * decremented.
+ */
void unlink_anon_vmas(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc, *next;
struct anon_vma *root = NULL;
+ /* Always hold mmap lock, read-lock on unmap possibly. */
+ mmap_assert_locked(vma->vm_mm);
+
+ /* Unfaulted is a no-op. */
+ VM_WARN_ON_ONCE(!vma->anon_vma && !list_empty(&vma->anon_vma_chain));
+
/*
* Unlink each anon_vma chained to the VMA. This list is ordered
* from newest to oldest, ensuring the root anon_vma gets freed last.