vma->vm_lock_seq = UINT_MAX;
}
-static inline bool is_vma_writer_only(int refcnt)
+/*
+ * This function determines whether the input VMA reference count describes a
+ * VMA which has excluded all VMA read locks.
+ *
+ * In the case of a detached VMA, we may incorrectly indicate that readers are
+ * excluded when one remains, because in that scenario we target a refcount of
+ * VM_REFCNT_EXCLUDE_READERS_FLAG, rather than the attached target of
+ * VM_REFCNT_EXCLUDE_READERS_FLAG + 1.
+ *
+ * However, the race window for that is very small so it is unlikely.
+ *
+ * Returns: true if readers are excluded, false otherwise.
+ */
+static inline bool __vma_are_readers_excluded(int refcnt)
{
/*
- * With a writer and no readers, refcnt is VM_REFCNT_EXCLUDE_READERS_FLAG
- * if the vma is detached and (VM_REFCNT_EXCLUDE_READERS_FLAG + 1) if it is
- * attached. Waiting on a detached vma happens only in
- * vma_mark_detached() and is a rare case, therefore most of the time
- * there will be no unnecessary wakeup.
- *
* See the comment describing the vm_area_struct->vm_refcnt field for
* details of possible refcnt values.
*/
refcnt <= VM_REFCNT_EXCLUDE_READERS_FLAG + 1;
}
+/*
+ * Actually decrement the VMA reference count.
+ *
+ * The function returns the reference count as it was immediately after the
+ * decrement took place. If it returns zero, the VMA is now detached.
+ */
+static inline __must_check unsigned int
+__vma_refcount_put_return(struct vm_area_struct *vma)
+{
+ int oldcnt;
+
+ if (__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt))
+ return 0;
+
+ return oldcnt - 1;
+}
+
+/**
+ * vma_refcount_put() - Drop reference count in VMA vm_refcnt field due to a
+ * read-lock being dropped.
+ * @vma: The VMA whose reference count we wish to decrement.
+ *
+ * If we were the last reader, wake up threads waiting to obtain an exclusive
+ * lock.
+ */
static inline void vma_refcount_put(struct vm_area_struct *vma)
{
- /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
+ /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt. */
struct mm_struct *mm = vma->vm_mm;
- int oldcnt;
+ int newcnt;
rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
- if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
- if (is_vma_writer_only(oldcnt - 1))
- rcuwait_wake_up(&mm->vma_writer_wait);
- }
+ newcnt = __vma_refcount_put_return(vma);
+ /*
+ * __vma_enter_locked() may be sleeping waiting for readers to drop
+ * their reference count, so wake it up if we were the last reader
+ * blocking it from being acquired.
+ *
+ * We may be raced by other readers temporarily incrementing the
+ * reference count, though the race window is very small, this might
+ * cause spurious wakeups.
+ */
+ if (newcnt && __vma_are_readers_excluded(newcnt))
+ rcuwait_wake_up(&mm->vma_writer_wait);
}
/*
vma_assert_attached(vma);
/*
- * We are the only writer, so no need to use vma_refcount_put().
- * The condition below is unlikely because the vma has been already
- * write-locked and readers can increment vm_refcnt only temporarily
- * before they check vm_lock_seq, realize the vma is locked and drop
- * back the vm_refcnt. That is a narrow window for observing a raised
- * vm_refcnt.
+ * This condition - that the VMA is still attached (refcnt > 0) - is
+ * unlikely, because the vma has been already write-locked and readers
+ * can increment vm_refcnt only temporarily before they check
+ * vm_lock_seq, realize the vma is locked and drop back the
+ * vm_refcnt. That is a narrow window for observing a raised vm_refcnt.
*
* See the comment describing the vm_area_struct->vm_refcnt field for
* details of possible refcnt values.
*/
- if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
+ if (unlikely(__vma_refcount_put_return(vma))) {
/* Wait until vma is detached with no readers. */
if (__vma_enter_locked(vma, true, TASK_UNINTERRUPTIBLE)) {
bool detached;
+ /*
+ * Once this is complete, no readers can increment the
+ * reference count, and the VMA is marked detached.
+ */
__vma_exit_locked(vma, &detached);
WARN_ON_ONCE(!detached);
}