]> git.ipfire.org Git - thirdparty/tor.git/commitdiff
Split the circuit timeout and close codepaths.
authorMike Perry <mikeperry-git@fscked.org>
Tue, 15 Jun 2010 08:13:49 +0000 (01:13 -0700)
committerMike Perry <mikeperry-git@fscked.org>
Wed, 16 Jun 2010 03:04:42 +0000 (20:04 -0700)
We need to record different statistics at point of timeout, vs the point
of forcible closing.

Also, give some better names to constants and state file variables
to indicate they are not dealing with timeouts, but abandoned circuits.

src/or/circuitbuild.c
src/or/circuituse.c
src/or/config.c
src/or/or.h
src/test/test.c

index b1335cc24c1e5bc57675b67d7c5d94bb49ac9261..f244aeaff0f551b22f14de3ad4e6eb705e82dfaa 100644 (file)
@@ -290,7 +290,7 @@ circuit_build_times_init(circuit_build_times_t *cbt)
 }
 
 /**
- * Rewind our timeout history by n timeout positions.
+ * Rewind our build time history by n positions.
  */
 static void
 circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
@@ -317,7 +317,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
 }
 
 /**
- * Add a new timeout value <b>time</b> to the set of build times. Time
+ * Add a new build time value <b>time</b> to the set of build times. Time
  * units are milliseconds.
  *
  * circuit_build_times <b>cbt</a> is a circular array, so loop around when
@@ -359,7 +359,7 @@ circuit_build_times_max(circuit_build_times_t *cbt)
   build_time_t max_build_time = 0;
   for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
     if (cbt->circuit_build_times[i] > max_build_time
-            && cbt->circuit_build_times[i] != CBT_BUILD_TIMEOUT)
+            && cbt->circuit_build_times[i] != CBT_BUILD_ABANDONED)
       max_build_time = cbt->circuit_build_times[i];
   }
   return max_build_time;
@@ -407,7 +407,7 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt,
   // calculate histogram
   for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
     if (cbt->circuit_build_times[i] == 0
-            || cbt->circuit_build_times[i] == CBT_BUILD_TIMEOUT)
+            || cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED)
       continue; /* 0 <-> uninitialized */
 
     c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH);
@@ -489,11 +489,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt,
   *next = NULL;
 
   state->TotalBuildTimes = cbt->total_build_times;
-  state->CircuitBuildTimeoutCount = 0;
+  state->CircuitBuildAbandonedCount = 0;
 
   for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) {
-    if (cbt->circuit_build_times[i] == CBT_BUILD_TIMEOUT)
-      state->CircuitBuildTimeoutCount++;
+    if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED)
+      state->CircuitBuildAbandonedCount++;
   }
 
   for (i = 0; i < nbins; i++) {
@@ -567,7 +567,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt)
     if (cbt->circuit_build_times[i] > max_timeout) {
       build_time_t replaced = cbt->circuit_build_times[i];
       num_filtered++;
-      cbt->circuit_build_times[i] = CBT_BUILD_TIMEOUT;
+      cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED;
 
       log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced,
                cbt->circuit_build_times[i]);
@@ -644,7 +644,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
         break;
       }
 
-      if (loaded_cnt+count+state->CircuitBuildTimeoutCount
+      if (loaded_cnt+count+state->CircuitBuildAbandonedCount
             > state->TotalBuildTimes) {
         log_warn(LD_CIRC,
                  "Too many build times in state file. "
@@ -665,9 +665,9 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
   }
 
   log_info(LD_CIRC,
-           "Adding %d timeouts.", state->CircuitBuildTimeoutCount);
-  for (i=0; i < state->CircuitBuildTimeoutCount; i++) {
-    loaded_times[loaded_cnt++] = CBT_BUILD_TIMEOUT;
+           "Adding %d timeouts.", state->CircuitBuildAbandonedCount);
+  for (i=0; i < state->CircuitBuildAbandonedCount; i++) {
+    loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED;
   }
 
   if (loaded_cnt != state->TotalBuildTimes) {
@@ -693,7 +693,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
 
   circuit_build_times_set_timeout(cbt);
 
-  if (!state->CircuitBuildTimeoutCount && cbt->total_build_times) {
+  if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) {
     circuit_build_times_filter_timeouts(cbt);
   }
 
@@ -715,7 +715,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
 {
   build_time_t *x=cbt->circuit_build_times;
   double a = 0;
-  int n=0,i=0,timeout_count=0;
+  int n=0,i=0,abandoned_count=0;
   build_time_t max_time=0;
 
   /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
@@ -732,8 +732,8 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
 
     if (x[i] < cbt->Xm) {
       a += tor_mathlog(cbt->Xm);
-    } else if (x[i] == CBT_BUILD_TIMEOUT) {
-      timeout_count++;
+    } else if (x[i] == CBT_BUILD_ABANDONED) {
+      abandoned_count++;
     } else {
       a += tor_mathlog(x[i]);
       if (x[i] > max_time)
@@ -750,13 +750,13 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
 
   tor_assert(max_time > 0);
 
-  a += timeout_count*tor_mathlog(max_time);
+  a += abandoned_count*tor_mathlog(max_time);
 
   a -= n*tor_mathlog(cbt->Xm);
   // Estimator comes from Eq #4 in:
   // "Bayesian estimation based on trimmed samples from Pareto populations"
   // by Arturo J. Fernández. We are right-censored only.
-  a = (n-timeout_count)/a;
+  a = (n-abandoned_count)/a;
 
   cbt->alpha = a;
 }
@@ -907,16 +907,27 @@ circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
 }
 
 /**
- * A circuit just timed out. If there has been no recent network activity
- * at all, but this circuit was launched back when we thought the network
- * was live, increment the number of "nonlive" circuit timeouts.
- *
- * Also distinguish between whether it failed before the first hop
- * and record that in our history for later deciding if the network has
- * changed.
+ * A circuit just timed out. If it failed after the first hop, record it
+ * in our history for later deciding if the network speed has changed.
  */
 static void
 circuit_build_times_network_timeout(circuit_build_times_t *cbt,
+                                    int did_onehop)
+{
+  if (did_onehop) {
+    cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]=1;
+    cbt->liveness.after_firsthop_idx++;
+    cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs;
+  }
+}
+
+/**
+ * A circuit was just forcibly closed. If there has been no recent network
+ * activity at all, but this circuit was launched back when we thought the
+ * network was live, increment the number of "nonlive" circuit timeouts.
+ */
+static void
+circuit_build_times_network_close(circuit_build_times_t *cbt,
                                     int did_onehop, time_t start_time)
 {
   time_t now = time(NULL);
@@ -940,11 +951,6 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt,
                start_time, now);
     }
     cbt->liveness.nonlive_timeouts++;
-  } else if (did_onehop) {
-    /* Count a one-hop timeout */
-    cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]=1;
-    cbt->liveness.after_firsthop_idx++;
-    cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs;
   }
 }
 
@@ -1010,8 +1016,6 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt)
  * Also resets the entire timeout history in this case and causes us
  * to restart the process of building test circuits and estimating a
  * new timeout.
- *
- * XXX: All this may have been broken by the close_ms change!
  */
 int
 circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
@@ -1091,7 +1095,7 @@ circuit_build_times_timeout_rate(const circuit_build_times_t *cbt)
  * update our timeout estimate.
  */
 int
-circuit_build_times_add_timeout(circuit_build_times_t *cbt,
+circuit_build_times_count_close(circuit_build_times_t *cbt,
                                 int did_onehop,
                                 time_t start_time)
 {
@@ -1101,22 +1105,38 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt,
     return 0;
   }
 
-  // XXX: All this has changed due to close_ms
-  circuit_build_times_network_timeout(cbt, did_onehop, start_time);
+  /* Record this force-close to help determine if the network is dead */
+  circuit_build_times_network_close(cbt, did_onehop, start_time);
 
   /* Only count timeouts if network is live.. */
   if (!circuit_build_times_network_check_live(cbt)) {
     return 0;
   }
 
-  /* If there are a ton of timeouts, we should reduce
-   * the circuit build timeout */
-  if (circuit_build_times_network_check_changed(cbt)) {
-    return 0;
+  circuit_build_times_add_time(cbt, CBT_BUILD_ABANDONED);
+  return 1;
+}
+
+/**
+ * Update timeout counts to determine if we need to expire
+ * our build time history due to excessive timeouts.
+ */
+void
+circuit_build_times_count_timeout(circuit_build_times_t *cbt,
+                                  int did_onehop)
+{
+  if (circuit_build_times_disabled()) {
+    cbt->close_ms = cbt->timeout_ms
+                  = circuit_build_times_get_initial_timeout();
+    return;
   }
 
-  circuit_build_times_add_time(cbt, CBT_BUILD_TIMEOUT);
-  return 1;
+  circuit_build_times_network_timeout(cbt, did_onehop);
+
+  /* If there are a ton of timeouts, we should reset
+   * the circuit build timeout.
+   */
+  circuit_build_times_network_check_changed(cbt);
 }
 
 /**
@@ -1804,9 +1824,15 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
           log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. "
                               "Assuming clock jump.", timediff);
         } else if (!circuit_build_times_disabled()) {
-          circuit_build_times_add_time(&circ_times, (build_time_t)timediff);
-          circuit_build_times_network_circ_success(&circ_times);
-          circuit_build_times_set_timeout(&circ_times);
+          /* Don't count circuit times if the network was not live */
+          if (circuit_build_times_network_check_live(&circ_times)) {
+            circuit_build_times_add_time(&circ_times, (build_time_t)timediff);
+            circuit_build_times_set_timeout(&circ_times);
+          }
+
+          if (circ->_base.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
+            circuit_build_times_network_circ_success(&circ_times);
+          }
         }
       }
       log_info(LD_CIRC,"circuit built!");
@@ -1829,6 +1855,10 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
       }
       circuit_rep_hist_note_result(circ);
       circuit_has_opened(circ); /* do other actions as necessary */
+
+      /* We're done with measurement circuits here. Just close them */
+      if (circ->_base.purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT)
+        circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_FINISHED);
       return 0;
     }
 
index 5127ca6e3de8269fcb7a21f79602d5169ea2340f..5027eb5a285c3f001aec6195aef65a35a98ecb37 100644 (file)
@@ -368,6 +368,8 @@ circuit_expire_building(time_t now)
        * Switch their purpose and wait. */
       if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
         victim->purpose = CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT;
+        circuit_build_times_count_timeout(&circ_times,
+                                          first_hop_succeeded);
         continue;
       }
 
@@ -380,7 +382,7 @@ circuit_expire_building(time_t now)
         log_notice(LD_CIRC,
                    "Extremely large value for circuit build timeout: %lds. "
                    "Assuming clock jump.", now - victim->timestamp_created);
-      } else if (circuit_build_times_add_timeout(&circ_times,
+      } else if (circuit_build_times_count_close(&circ_times,
                                           first_hop_succeeded,
                                           victim->timestamp_created)) {
         circuit_build_times_set_timeout(&circ_times);
index c3b4fc933e04213ac28e466dc5d3dcbe70a0e08a..bcdfbd717a0948c3492e974c88c8a2fe22985d9a 100644 (file)
@@ -421,7 +421,7 @@ static config_var_t _state_vars[] = {
   V(LastWritten,                      ISOTIME,  NULL),
 
   V(TotalBuildTimes,                  UINT,     NULL),
-  V(CircuitBuildTimeoutCount,         UINT,     "0"),
+  V(CircuitBuildAbandonedCount,         UINT,     "0"),
   VAR("CircuitBuildTimeBin",          LINELIST_S, BuildtimeHistogram, NULL),
   VAR("BuildtimeHistogram",           LINELIST_V, BuildtimeHistogram, NULL),
 
index 1f1ffe00eb956c8f459506f77026b1337de48204..e1483e64660daa86f6d33c7021b3cccb8a44cdc5 100644 (file)
@@ -2842,7 +2842,7 @@ typedef struct {
   /** Build time histogram */
   config_line_t * BuildtimeHistogram;
   unsigned int TotalBuildTimes;
-  unsigned int CircuitBuildTimeoutCount;
+  unsigned int CircuitBuildAbandonedCount;
 
   /** What version of Tor wrote this state file? */
   char *TorVersion;
@@ -3027,7 +3027,12 @@ void entry_guards_free_all(void);
 
 /** A build_time_t is milliseconds */
 typedef uint32_t build_time_t;
-#define CBT_BUILD_TIMEOUT ((build_time_t)(INT32_MAX-1))
+
+/**
+ * CBT_BUILD_ABANDONED is our flag value to represent a force-closed
+ * circuit (Aka a 'right-censored' pareto value).
+ */
+#define CBT_BUILD_ABANDONED ((build_time_t)(INT32_MAX-1))
 #define CBT_BUILD_TIME_MAX ((build_time_t)(INT32_MAX))
 
 /** Save state every 10 circuits */
@@ -3152,7 +3157,9 @@ void circuit_build_times_update_state(circuit_build_times_t *cbt,
                                       or_state_t *state);
 int circuit_build_times_parse_state(circuit_build_times_t *cbt,
                                     or_state_t *state, char **msg);
-int circuit_build_times_add_timeout(circuit_build_times_t *cbt,
+void circuit_build_times_count_timeout(circuit_build_times_t *cbt,
+                                       int did_onehop);
+int circuit_build_times_count_close(circuit_build_times_t *cbt,
                                     int did_onehop, time_t start_time);
 void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
 int circuit_build_times_add_time(circuit_build_times_t *cbt,
index 6cfb2f76b62592227494d97321f8cbe498c2c591..fbcfedbc9a538c606ea7ce566651a49de2ddf55a 100644 (file)
@@ -520,20 +520,20 @@ test_circuit_timeout(void)
     int build_times_idx = 0;
     int total_build_times = 0;
 
-    final.timeout_ms = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE;
-    estimate.timeout_ms = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE;
+    final.close_ms = final.timeout_ms = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE;
+    estimate.close_ms = estimate.timeout_ms
+                      = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE;
 
     for (i = 0; i < CBT_DEFAULT_RECENT_CIRCUITS*2; i++) {
       circuit_build_times_network_circ_success(&estimate);
       circuit_build_times_add_time(&estimate,
             circuit_build_times_generate_sample(&estimate, 0,
                 CBT_DEFAULT_QUANTILE_CUTOFF/100.0));
-      estimate.have_computed_timeout = 1;
+
       circuit_build_times_network_circ_success(&estimate);
       circuit_build_times_add_time(&final,
             circuit_build_times_generate_sample(&final, 0,
                 CBT_DEFAULT_QUANTILE_CUTOFF/100.0));
-      final.have_computed_timeout = 1;
     }
 
     test_assert(!circuit_build_times_network_check_changed(&estimate));
@@ -549,26 +549,22 @@ test_circuit_timeout(void)
       test_assert(circuit_build_times_network_check_live(&estimate));
       test_assert(circuit_build_times_network_check_live(&final));
 
-      if (circuit_build_times_add_timeout(&estimate, 0,
-                 (time_t)(approx_time()-estimate.timeout_ms/1000.0-1)))
-        estimate.have_computed_timeout = 1;
-      if (circuit_build_times_add_timeout(&final, 0,
-                 (time_t)(approx_time()-final.timeout_ms/1000.0-1)))
-        final.have_computed_timeout = 1;
+      circuit_build_times_count_close(&estimate, 0,
+                 (time_t)(approx_time()-estimate.close_ms/1000.0-1));
+      circuit_build_times_count_close(&final, 0,
+                 (time_t)(approx_time()-final.close_ms/1000.0-1));
     }
 
     test_assert(!circuit_build_times_network_check_live(&estimate));
     test_assert(!circuit_build_times_network_check_live(&final));
 
     for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) {
-      if (circuit_build_times_add_timeout(&estimate, 0,
-                (time_t)(approx_time()-estimate.timeout_ms/1000.0-1)))
-        estimate.have_computed_timeout = 1;
+      circuit_build_times_count_close(&estimate, 0,
+                (time_t)(approx_time()-estimate.close_ms/1000.0-1));
 
       if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) {
-        if (circuit_build_times_add_timeout(&final, 0,
-                (time_t)(approx_time()-final.timeout_ms/1000.0-1)))
-          final.have_computed_timeout = 1;
+        circuit_build_times_count_close(&final, 0,
+                (time_t)(approx_time()-final.close_ms/1000.0-1));
       }
     }
 
@@ -592,12 +588,10 @@ test_circuit_timeout(void)
     circuit_build_times_network_is_live(&final);
 
     for (i = 0; i < CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT; i++) {
-      if (circuit_build_times_add_timeout(&estimate, 1, approx_time()-1))
-        estimate.have_computed_timeout = 1;
+      circuit_build_times_count_timeout(&estimate, 1);
 
       if (i < CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT-1) {
-        if (circuit_build_times_add_timeout(&final, 1, approx_time()-1))
-          final.have_computed_timeout = 1;
+        circuit_build_times_count_timeout(&final, 1);
       }
     }
 
@@ -608,8 +602,7 @@ test_circuit_timeout(void)
     test_assert(circuit_build_times_network_check_live(&estimate));
     test_assert(circuit_build_times_network_check_live(&final));
 
-    if (circuit_build_times_add_timeout(&final, 1, approx_time()-1))
-      final.have_computed_timeout = 1;
+    circuit_build_times_count_timeout(&final, 1);
   }
 
 done: