]> git.ipfire.org Git - thirdparty/tor.git/commitdiff
the third piece of bug 969 fixing
authorRoger Dingledine <arma@torproject.org>
Sat, 20 Jun 2009 09:25:14 +0000 (05:25 -0400)
committerNick Mathewson <nickm@torproject.org>
Tue, 30 Jun 2009 14:10:13 +0000 (10:10 -0400)
when we write out our stability info, detect relays that have slipped
through the cracks. log about them and correct the problem.

if we continue to see a lot of these over time, it means there's another
spot where relays fall out of the routerlist without being marked as
unreachable.

src/or/main.c
src/or/or.h
src/or/rephist.c

index 8fc712bba39745565af17c2d61c9de423dded547..60c42aaae38d76733a99dc05fcdad87629bc78dd 100644 (file)
@@ -903,7 +903,7 @@ run_scheduled_events(time_t now)
     time_to_downrate_stability = rep_hist_downrate_old_runs(now);
   if (authdir_mode_tests_reachability(options)) {
     if (time_to_save_stability < now) {
-      if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
+      if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
         log_warn(LD_GENERAL, "Couldn't store mtbf data.");
       }
 #define SAVE_STABILITY_INTERVAL (30*60)
@@ -1955,14 +1955,15 @@ tor_cleanup(void)
   /* Remove our pid file. We don't care if there was an error when we
    * unlink, nothing we could do about it anyways. */
   if (options->command == CMD_RUN_TOR) {
+    time_t now = time(NULL);
     if (options->PidFile)
       unlink(options->PidFile);
     if (accounting_is_enabled(options))
-      accounting_record_bandwidth_usage(time(NULL), get_or_state());
+      accounting_record_bandwidth_usage(now, get_or_state());
     or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
-    or_state_save(time(NULL));
+    or_state_save(now);
     if (authdir_mode_tests_reachability(options))
-      rep_hist_record_mtbf_data();
+      rep_hist_record_mtbf_data(now, 0);
   }
 #ifdef USE_DMALLOC
   dmalloc_log_stats();
index f37b417fe5f18e78b29bb9161763be9595cd0664..eddeda1531dbcae145cb9e685e9033c907f68194 100644 (file)
@@ -3970,7 +3970,7 @@ void rep_history_clean(time_t before);
 
 void rep_hist_note_router_reachable(const char *id, time_t when);
 void rep_hist_note_router_unreachable(const char *id, time_t when);
-int rep_hist_record_mtbf_data(void);
+int rep_hist_record_mtbf_data(time_t now, int missing_means_down);
 int rep_hist_load_mtbf_data(time_t now);
 
 time_t rep_hist_downrate_old_runs(time_t now);
index 11e040c9452a71f978baeca1316b6522bd16ac11..13fdb58b5e87a40d7f4327037b99db206cc43cfd 100644 (file)
@@ -683,9 +683,13 @@ rep_history_clean(time_t before)
   }
 }
 
-/** Write MTBF data to disk.  Returns 0 on success, negative on failure. */
+/** Write MTBF data to disk. Return 0 on success, negative on failure.
+ *
+ * If <b>missing_means_down</b>, then if we're about to write an entry
+ * that is still considered up but isn't in our routerlist, consider it
+ * to be down. */
 int
-rep_hist_record_mtbf_data(void)
+rep_hist_record_mtbf_data(time_t now, int missing_means_down)
 {
   char time_buf[ISO_TIME_LEN+1];
 
@@ -745,6 +749,18 @@ rep_hist_record_mtbf_data(void)
     hist = (or_history_t*) or_history_p;
 
     base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
+
+    if (missing_means_down && hist->start_of_run &&
+        !router_get_by_digest(digest)) {
+      /* We think this relay is running, but it's not listed in our
+       * routerlist. Somehow it fell out without telling us it went
+       * down. Complain and also correct it. */
+      log_info(LD_HIST,
+               "Relay '%s' is listed as up in rephist, but it's not in "
+               "our routerlist. Correcting.", dbuf);
+      rep_hist_note_router_unreachable(digest, now);
+    }
+
     PRINTF((f, "R %s\n", dbuf));
     if (hist->start_of_run > 0) {
       format_iso_time(time_buf, hist->start_of_run);