sched: Don't get KIST stuck in an infinite loop

author Matt Traudt <sirmatt@ksu.edu>

Wed, 27 Sep 2017 20:11:05 +0000 (16:11 -0400)

committer David Goulet <dgoulet@torproject.org>

Fri, 29 Sep 2017 15:06:31 +0000 (11:06 -0400)
author Matt Traudt <sirmatt@ksu.edu>
Wed, 27 Sep 2017 20:11:05 +0000 (16:11 -0400)
committer David Goulet <dgoulet@torproject.org>
Fri, 29 Sep 2017 15:06:31 +0000 (11:06 -0400)
diff --git a/changes/bug23676 b/changes/bug23676

new file mode 100644 (file)

index 0000000..90f9e31
--- /dev/null
+++ b/changes/bug23676
@@ -0,0 +1,6 @@
+  o Major bugfixes (scheduler):
+    If a channel is put into the scheduler's pending list, then it starts
+    closing, and then if the scheduler runs before it finishes closing, the
+    scheduler will get stuck trying to flush its cells while the lower layers
+    refuse to cooperate. Fix that race condition by given the scheduler an
+    escape method. Fixes bug 23676; bugfix on 0.3.2.1-alpha
diff --git a/src/or/scheduler_kist.c b/src/or/scheduler_kist.c

index 9e960cdef95bf7d0bf74f4b22f04ca5875d64d66..2dddd3e9a4914f133a00e9142f1cce8d8325714d 100644 (file)
--- a/src/or/scheduler_kist.c
+++ b/src/or/scheduler_kist.c
@@ -591,8 +591,23 @@ kist_scheduler_run(void)
        if (flush_result > 0) {
          update_socket_written(&socket_table, chan, flush_result *
                                (CELL_MAX_NETWORK_SIZE + TLS_PER_CELL_OVERHEAD));
+      } else {
+        /* XXX: This can happen because tor sometimes does flush in an
+         * opportunistic way cells from the circuit to the outbuf so the
+         * channel can end up here without having anything to flush nor needed
+         * to write to the kernel. Hopefully we'll fix that soon but for now
+         * we have to handle this case which happens kind of often. */
+        log_debug(LD_SCHED,
+                 "We didn't flush anything on a chan that we think "
+                 "can write and wants to write. The channel's state is '%s' "
+                 "and in scheduler state %d. We're going to mark it as "
+                 "waiting_for_cells (as that's most likely the issue) and "
+                 "stop scheduling it this round.",
+                 channel_state_to_string(chan->state),
+                 chan->scheduler_state);
+        chan->scheduler_state = SCHED_CHAN_WAITING_FOR_CELLS;
+        continue;
        }
-      /* XXX What if we didn't flush? */
      }
  
      /* Decide what to do with the channel now */
author	Matt Traudt <sirmatt@ksu.edu>
	Wed, 27 Sep 2017 20:11:05 +0000 (16:11 -0400)
committer	David Goulet <dgoulet@torproject.org>
	Fri, 29 Sep 2017 15:06:31 +0000 (11:06 -0400)
changes/bug23676	[new file with mode: 0644]	patch \| blob
src/or/scheduler_kist.c		patch \| blob \| blame \| history