}
/*
- * Must be called with "mtx" held.
+ * Charge the user's epoll_watches quota, allocate a fresh epitem for
+ * @tfile/@fd, and initialize its fields. The returned item is not yet
+ * linked into any data structure; the caller must install it via
+ * ep_register_epitem() (which takes over on success) or kmem_cache_free()
+ * it and decrement epoll_watches on its own.
+ *
+ * Returns ERR_PTR(-ENOSPC) if the quota is exceeded, ERR_PTR(-ENOMEM)
+ * if the slab allocation fails.
*/
-static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
- struct file *tfile, int fd, int full_check)
+static struct epitem *ep_alloc_epitem(struct eventpoll *ep,
+ const struct epoll_event *event,
+ struct file *tfile, int fd)
{
- int error, pwake = 0;
- __poll_t revents;
struct epitem *epi;
- struct ep_pqueue epq;
- struct eventpoll *tep = NULL;
-
- if (is_file_epoll(tfile))
- tep = tfile->private_data;
-
- lockdep_assert_irqs_enabled();
if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
max_user_watches) >= 0))
- return -ENOSPC;
+ return ERR_PTR(-ENOSPC);
percpu_counter_inc(&ep->user->epoll_watches);
- if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) {
+ epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL);
+ if (unlikely(!epi)) {
percpu_counter_dec(&ep->user->epoll_watches);
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
}
- /* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
epi->ep = ep;
ep_set_ffd(&epi->ffd, tfile, fd);
epi->event = *event;
epi->next = EP_UNACTIVE_PTR;
+ return epi;
+}
+
+/*
+ * Install @epi into its target file's f_ep hlist and into @ep's rbtree,
+ * taking one additional reference on @ep for the lifetime of the item.
+ *
+ * If @tep is non-NULL, the target file is itself an eventpoll; we hold
+ * tep->mtx at subclass 1 across the attach + rbtree insert to serialize
+ * with the target side. RB tree ops are protected by @ep->mtx, which
+ * the caller already holds.
+ *
+ * On failure the epi is freed and the epoll_watches counter decremented,
+ * matching ep_alloc_epitem()'s allocation. After this returns
+ * successfully, ep_insert()'s later error paths use ep_remove() for
+ * unwind; that cannot drop @ep's refcount to zero because the ep file
+ * itself still holds the original reference.
+ */
+static int ep_register_epitem(struct eventpoll *ep, struct epitem *epi,
+ struct eventpoll *tep, int full_check)
+{
+ struct file *tfile = epi->ffd.file;
+ int error;
+
if (tep)
mutex_lock_nested(&tep->mtx, 1);
- /* Add the current item to the list of active epoll hook for this file */
- if (unlikely(ep_attach_file(tfile, epi) < 0)) {
+
+ error = ep_attach_file(tfile, epi);
+ if (unlikely(error)) {
if (tep)
mutex_unlock(&tep->mtx);
kmem_cache_free(epi_cache, epi);
percpu_counter_dec(&ep->user->epoll_watches);
- return -ENOMEM;
+ return error;
}
if (full_check && !tep)
list_file(tfile);
- /*
- * Add the current item to the RB tree. All RB tree operations are
- * protected by "mtx", and ep_insert() is called with "mtx" held.
- */
ep_rbtree_insert(ep, epi);
+
if (tep)
mutex_unlock(&tep->mtx);
- /*
- * ep_remove() calls in the later error paths can't lead to
- * ep_free() as the ep file itself still holds an ep reference.
- */
ep_get(ep);
+ return 0;
+}
+
+/*
+ * Must be called with "mtx" held.
+ */
+static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
+ struct file *tfile, int fd, int full_check)
+{
+ int error, pwake = 0;
+ __poll_t revents;
+ struct epitem *epi;
+ struct ep_pqueue epq;
+ struct eventpoll *tep = NULL;
+
+ if (is_file_epoll(tfile))
+ tep = tfile->private_data;
+
+ lockdep_assert_irqs_enabled();
- /* now check if we've created too many backpaths */
+ epi = ep_alloc_epitem(ep, event, tfile, fd);
+ if (IS_ERR(epi))
+ return PTR_ERR(epi);
+
+ error = ep_register_epitem(ep, epi, tep, full_check);
+ if (error)
+ return error;
+
+ /* Reject the insert if the new link would create too many back-paths. */
if (unlikely(full_check && reverse_path_check())) {
ep_remove(ep, epi);
return -EINVAL;
*/
revents = ep_item_poll(epi, &epq.pt, 1);
- /*
- * We have to check if something went wrong during the poll wait queue
- * install process. Namely an allocation for a wait queue failed due
- * high memory pressure.
- */
+ /* ep_ptable_queue_proc() signals allocation failure by clearing epq.epi. */
if (unlikely(!epq.epi)) {
ep_remove(ep, epi);
return -ENOMEM;
}
- /* We have to drop the new item inside our item list to keep track of it */
+ /* Drop the new item onto the ready list if it is already ready. */
spin_lock_irq(&ep->lock);
- /* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
- /* If the file is already "ready" we drop it inside the ready list */
if (revents && !ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
- /* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
wake_up(&ep->wq);
if (waitqueue_active(&ep->poll_wait))