BUG/MAJOR: fd/threads: close a race on closing connections after takeover

author Willy Tarreau <w@1wt.eu>

Tue, 7 Mar 2023 18:11:02 +0000 (10:11 -0800)

committer Willy Tarreau <w@1wt.eu>

Thu, 9 Mar 2023 13:01:48 +0000 (14:01 +0100)
author Willy Tarreau <w@1wt.eu>
Tue, 7 Mar 2023 18:11:02 +0000 (10:11 -0800)
committer Willy Tarreau <w@1wt.eu>
Thu, 9 Mar 2023 13:01:48 +0000 (14:01 +0100)
diff --git a/include/haproxy/fd-t.h b/include/haproxy/fd-t.h

index b2ca87cb175033e8476e3ad1462a5a6174385b00..d0c3134cdb719ca9e35620a96a680c6f865ef284 100644 (file)
--- a/include/haproxy/fd-t.h
+++ b/include/haproxy/fd-t.h
@@ -71,6 +71,7 @@ enum {
  #define FD_EXPORTED_BIT    20  /* FD is exported and must not be closed */
  #define FD_EXCL_SYSCALL_BIT 21 /* a syscall claims exclusivity on this FD */
  #define FD_DISOWN_BIT      22  /* this fd will be closed by some external code */
+#define FD_MUST_CLOSE_BIT  23  /* this fd will be closed by some external code */
  
  
  /* and flag values */
@@ -112,6 +113,7 @@ enum {
  #define FD_EXPORTED         (1U << FD_EXPORTED_BIT)
  #define FD_EXCL_SYSCALL     (1U << FD_EXCL_SYSCALL_BIT)
  #define FD_DISOWN           (1U << FD_DISOWN_BIT)
+#define FD_MUST_CLOSE       (1U << FD_MUST_CLOSE_BIT)
  
  /* This function is used to report flags in debugging tools. Please reflect
   * below any single-bit flag addition above in the same order via the
diff --git a/src/fd.c b/src/fd.c

index 889b314225c9ed824687d7a4b248b987a90edde6..706aba829cc18f6238cd9dd7af7db244acb79398 100644 (file)
--- a/src/fd.c
+++ b/src/fd.c
@@ -419,12 +419,21 @@ void fd_delete(int fd)
          * will not take new bits in its running_mask so we have the guarantee
          * that the last thread eliminating running_mask is the one allowed to
          * safely delete the FD. Most of the time it will be the current thread.
+        * We still need to set and check the one-shot flag FD_MUST_CLOSE
+        * to take care of the rare cases where a thread wakes up on late I/O
+        * before the thread_mask is zero, and sets its bit in the running_mask
+        * just after the current thread finishes clearing its own bit, hence
+        * the two threads see themselves as last ones (which they really are).
          */
  
         HA_ATOMIC_OR(&fdtab[fd].running_mask, ti->ltid_bit);
+       HA_ATOMIC_OR(&fdtab[fd].state, FD_MUST_CLOSE);
         HA_ATOMIC_STORE(&fdtab[fd].thread_mask, 0);
-       if (fd_clr_running(fd) == ti->ltid_bit)
-               _fd_delete_orphan(fd);
+       if (fd_clr_running(fd) == ti->ltid_bit) {
+               if (HA_ATOMIC_BTR(&fdtab[fd].state, FD_MUST_CLOSE_BIT)) {
+                       _fd_delete_orphan(fd);
+               }
+       }
  }
  
  /* makes the new fd non-blocking and clears all other O_* flags; this is meant
@@ -587,29 +596,47 @@ int fd_update_events(int fd, uint evts)
                 return FD_UPDT_CLOSED;
         }
  
-       /* do nothing if the FD was taken over under us */
+       /* Do not take running_mask if not strictly needed (will trigger a
+        * cosmetic BUG_ON() in fd_insert() anyway if done).
+        */
+       tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+       if (!(tmask & ti->ltid_bit))
+               goto do_update;
+
+       HA_ATOMIC_OR(&fdtab[fd].running_mask, ti->ltid_bit);
+
+       /* From this point, our bit may possibly be in thread_mask, but it may
+        * still vanish, either because a takeover completed just before taking
+        * the bit above with the new owner deleting the FD, or because a
+        * takeover started just before taking the bit. In order to make sure a
+        * started takeover is complete, we need to verify that all bits of
+        * running_mask are present in thread_mask, since takeover first takes
+        * running then atomically replaces thread_mask. Once it's stable, if
+        * our bit remains there, no further takeover may happen because we
+        * hold running, but if our bit is not there it means we've lost the
+        * takeover race and have to decline touching the FD. Regarding the
+        * risk of deletion, our bit in running_mask prevents fd_delete() from
+        * finalizing the close, and the caller will leave the FD with a zero
+        * thread_mask and the FD_MUST_CLOSE flag set. It will then be our
+        * responsibility to close it.
+        */
         do {
-               /* make sure we read a synchronous copy of rmask and tmask
-                * (tmask is only up to date if it reflects all of rmask's
-                * bits).
-                */
-               do {
-                       rmask = _HA_ATOMIC_LOAD(&fdtab[fd].running_mask);
-                       tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
-               } while (rmask & ~tmask);
+               rmask = _HA_ATOMIC_LOAD(&fdtab[fd].running_mask);
+               tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+               rmask &= ~ti->ltid_bit;
+       } while (rmask & ~tmask);
  
-               if (!(tmask & ti->ltid_bit)) {
-                       /* a takeover has started */
-                       activity[tid].poll_skip_fd++;
+       /* Now tmask is stable. Do nothing if the FD was taken over under us */
  
-                       /* Let the poller know this FD was lost */
-                       if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, ti->ltid))
-                               fd_updt[fd_nbupdt++] = fd;
+       if (!(tmask & ti->ltid_bit)) {
+               /* a takeover has started */
+               activity[tid].poll_skip_fd++;
  
-                       fd_drop_tgid(fd);
-                       return FD_UPDT_MIGRATED;
-               }
-       } while (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &rmask, rmask | ti->ltid_bit));
+               if (fd_clr_running(fd) == ti->ltid_bit)
+                       goto closed_or_migrated;
+
+               goto do_update;
+       }
  
         /* with running we're safe now, we can drop the reference */
         fd_drop_tgid(fd);
@@ -706,16 +733,16 @@ int fd_update_events(int fd, uint evts)
  
         /* another thread might have attempted to close this FD in the mean
          * time (e.g. timeout task) striking on a previous thread and closing.
-        * This is detected by both thread_mask and running_mask being 0 after
+        * This is detected by us being the last owners of a running_mask bit,
+        * and the thread_mask being zero. At the moment we release the running
+        * bit, a takeover may also happen, so in practice we check for our loss
+        * of the thread_mask bitboth thread_mask and running_mask being 0 after
          * we remove ourselves last. There is no risk the FD gets reassigned
          * to a different group since it's not released until the real close()
          * in _fd_delete_orphan().
          */
-       if (fd_clr_running(fd) == ti->ltid_bit && !tmask) {
-               fd_drop_tgid(fd);
-               _fd_delete_orphan(fd);
-               return FD_UPDT_CLOSED;
-       }
+       if (fd_clr_running(fd) == ti->ltid_bit && !(tmask & ti->ltid_bit))
+               goto closed_or_migrated;
  
         /* we had to stop this FD and it still must be stopped after the I/O
          * cb's changes, so let's program an update for this.
@@ -729,6 +756,35 @@ int fd_update_events(int fd, uint evts)
  
         fd_drop_tgid(fd);
         return FD_UPDT_DONE;
+
+ closed_or_migrated:
+       /* We only come here once we've last dropped running and the FD is
+        * not for us as per !(tmask & tid_bit). It may imply we're
+        * responsible for closing it. Otherwise it's just a migration.
+        */
+       if (HA_ATOMIC_BTR(&fdtab[fd].state, FD_MUST_CLOSE_BIT)) {
+               fd_drop_tgid(fd);
+               _fd_delete_orphan(fd);
+               return FD_UPDT_CLOSED;
+       }
+
+       /* So we were alone, no close bit, at best the FD was migrated, at
+        * worst it's in the process of being closed by another thread. We must
+        * be ultra-careful as it can be re-inserted by yet another thread as
+        * the result of socket() or accept(). Let's just tell the poller the
+        * FD was lost. If it was closed it was already removed and this will
+        * only cost an update for nothing.
+        */
+
+ do_update:
+       /* The FD is not closed but we don't want the poller to wake up for
+        * it anymore.
+        */
+       if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, ti->ltid))
+               fd_updt[fd_nbupdt++] = fd;
+
+       fd_drop_tgid(fd);
+       return FD_UPDT_MIGRATED;
  }
  
  /* This is used by pollers at boot time to re-register desired events for
author	Willy Tarreau <w@1wt.eu>
	Tue, 7 Mar 2023 18:11:02 +0000 (10:11 -0800)
committer	Willy Tarreau <w@1wt.eu>
	Thu, 9 Mar 2023 13:01:48 +0000 (14:01 +0100)
include/haproxy/fd-t.h		patch \| blob \| blame \| history
src/fd.c		patch \| blob \| blame \| history