Select fallbacks by bandwidth rather than consensus weight

author teor (Tim Wilson-Brown) <teor2345@gmail.com>

Fri, 8 Apr 2016 13:53:24 +0000 (23:53 +1000)

committer teor (Tim Wilson-Brown) <teor2345@gmail.com>

Fri, 15 Apr 2016 02:26:37 +0000 (12:26 +1000)
author teor (Tim Wilson-Brown) <teor2345@gmail.com>
Fri, 8 Apr 2016 13:53:24 +0000 (23:53 +1000)
committer teor (Tim Wilson-Brown) <teor2345@gmail.com>
Fri, 15 Apr 2016 02:26:37 +0000 (12:26 +1000)
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py

index 312049608c8339292ddc4629f31010c19efcf9ed..44a7318fc856a6de0cc04a62c64f03f5970e98f8 100755 (executable)
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -112,17 +112,13 @@ CUTOFF_GUARD = .95
  # .00 means no bad exits
  PERMITTED_BADEXIT = .00
  
-# Clients will time out after 30 seconds trying to download a consensus
-# So allow fallback directories half that to deliver a consensus
-# The exact download times might change based on the network connection
-# running this script, but only by a few seconds
-# There is also about a second of python overhead
-CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
-# If the relay fails a consensus check, retry the download
-# This avoids delisting a relay due to transient network conditions
-CONSENSUS_DOWNLOAD_RETRY = True
+# older entries' weights are adjusted with ALPHA^(age in days)
+AGE_ALPHA = 0.99
+
+# this factor is used to scale OnionOO entries to [0,1]
+ONIONOO_SCALE_ONE = 999.
  
-## List Length Limits
+## Fallback Count Limits
  
  # The target for these parameters is 20% of the guards in the network
  # This is around 200 as of October 2015
@@ -130,37 +126,53 @@ _FB_POG = 0.2
  FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG
  
  # We want exactly 100 fallbacks for the initial release
-# Limit the number of fallbacks (eliminating lowest by weight)
+# This gives us scope to add extra fallbacks to the list as needed
+# Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
  MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 100
  # Emit a C #error if the number of fallbacks is below
  MIN_FALLBACK_COUNT = 100
  
-## Fallback Weight Settings
+## Fallback Bandwidth Requirements
  
-# Any fallback with the Exit flag has its consensus weight multipled by this
-EXIT_WEIGHT_FRACTION = 1.0
+# Any fallback with the Exit flag has its bandwidth multipled by this fraction
+# to make sure we aren't further overloading exits
+# (Set to 1.0, because we asked that only lightly loaded exits opt-in,
+# and the extra load really isn't that much for large relays.)
+EXIT_BANDWIDTH_FRACTION = 1.0
  
-# If a single fallback's consensus weight is too low, it's pointless adding it
+# If a single fallback's bandwidth is too low, it's pointless adding it
  # We expect fallbacks to handle an extra 30 kilobytes per second of traffic
-# Make sure they support a hundred times that
-MIN_CONSENSUS_WEIGHT = 30.0 * 100.0
+# Make sure they can support a hundred times the expected extra load
+# (Use 102.4 to make it come out nicely in MB/s)
+# We convert this to a consensus weight before applying the filter,
+# because all the bandwidth amounts are specified by the relay
+MIN_BANDWIDTH = 102.4 * 30.0 * 1024.0
+
+# Clients will time out after 30 seconds trying to download a consensus
+# So allow fallback directories half that to deliver a consensus
+# The exact download times might change based on the network connection
+# running this script, but only by a few seconds
+# There is also about a second of python overhead
+CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
+# If the relay fails a consensus check, retry the download
+# This avoids delisting a relay due to transient network conditions
+CONSENSUS_DOWNLOAD_RETRY = True
+
+## Fallback Weights for Client Selection
  
  # All fallback weights are equal, and set to the value below
  # Authorities are weighted 1.0 by default
  # Clients use these weights to select fallbacks and authorities at random
  # If there are 100 fallbacks and 9 authorities:
-#  - each fallback is chosen with probability 10/(1000 + 9) ~= 0.99%
-#  - each authority is chosen with probability 1/(1000 + 9) ~= 0.09%
+#  - each fallback is chosen with probability 10.0/(10.0*100 + 1.0*9) ~= 0.99%
+#  - each authority is chosen with probability 1.0/(10.0*100 + 1.0*9) ~= 0.09%
+# A client choosing a bootstrap directory server will choose a fallback for
+# 10.0/(10.0*100 + 1.0*9) * 100 = 99.1% of attempts, and an authority for
+# 1.0/(10.0*100 + 1.0*9) * 9 = 0.9% of attempts.
+# (This disregards the bootstrap schedules, where clients start by choosing
+# from fallbacks & authoritites, then later choose from only authorities.)
  FALLBACK_OUTPUT_WEIGHT = 10.0
  
-## Other Configuration Parameters
-
-# older entries' weights are adjusted with ALPHA^(age in days)
-AGE_ALPHA = 0.99
-
-# this factor is used to scale OnionOO entries to [0,1]
-ONIONOO_SCALE_ONE = 999.
-
  ## Parsing Functions
  
  def parse_ts(t):
@@ -448,6 +460,11 @@ class Candidate(object):
        details['contact'] = None
      if not 'flags' in details or details['flags'] is None:
        details['flags'] = []
+    if (not 'advertised_bandwidth' in details
+        or details['advertised_bandwidth'] is None):
+      # relays without advertised bandwdith have it calculated from their
+      # consensus weight
+      details['advertised_bandwidth'] = 0
      details['last_changed_address_or_port'] = parse_ts(
                                        details['last_changed_address_or_port'])
      self._data = details
@@ -462,10 +479,6 @@ class Candidate(object):
      self._compute_ipv6addr()
      if self.ipv6addr is None:
        logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
-    # Reduce the weight of exits to EXIT_WEIGHT_FRACTION * consensus_weight
-    if self.is_exit():
-      exit_weight = self._data['consensus_weight'] * EXIT_WEIGHT_FRACTION
-      self._data['consensus_weight'] = exit_weight
  
    def _stable_sort_or_addresses(self):
      # replace self._data['or_addresses'] with a stable ordering,
@@ -754,11 +767,9 @@ class Candidate(object):
        logging.info('%s not a candidate: guard avg too low (%lf)',
                     self._fpr, self._guard)
        return False
-    if (MIN_CONSENSUS_WEIGHT is not None
-        and self._data['consensus_weight'] < MIN_CONSENSUS_WEIGHT):
-      logging.info('%s not a candidate: consensus weight %.0f too low, must ' +
-                   'be at least %.0f', self._fpr,
-                   self._data['consensus_weight'], MIN_CONSENSUS_WEIGHT)
+    if (not self._data.has_key('consensus_weight')
+        or self._data['consensus_weight'] < 1):
+      logging.info('%s not a candidate: consensus weight invalid', self._fpr)
        return False
      return True
  
@@ -889,6 +900,30 @@ class Candidate(object):
                              ipv6 if has_ipv6 else value)
      return False
  
+  def cw_to_bw_factor(self):
+    # any relays with a missing or zero consensus weight are not candidates
+    # any relays with a missing advertised bandwidth have it set to zero
+    return self._data['advertised_bandwidth'] / self._data['consensus_weight']
+
+  # since advertised_bandwidth is reported by the relay, it can be gamed
+  # to avoid this, use the median consensus weight to bandwidth factor to
+  # estimate this relay's measured bandwidth, and make that the upper limit
+  def measured_bandwidth(self, median_cw_to_bw_factor):
+    cw_to_bw= median_cw_to_bw_factor
+    # Reduce exit bandwidth to make sure we're not overloading them
+    if self.is_exit():
+      cw_to_bw *= EXIT_BANDWIDTH_FRACTION
+    measured_bandwidth = self._data['consensus_weight'] * cw_to_bw
+    if self._data['advertised_bandwidth'] != 0:
+      # limit advertised bandwidth (if available) to measured bandwidth
+      return min(measured_bandwidth, self._data['advertised_bandwidth'])
+    else:
+      return measured_bandwidth
+
+  def set_measured_bandwidth(self, median_cw_to_bw_factor):
+    self._data['measured_bandwidth'] = self.measured_bandwidth(
+                                                      median_cw_to_bw_factor)
+
    def is_exit(self):
      return 'Exit' in self._data['flags']
  
@@ -1056,8 +1091,8 @@ class CandidateList(dict):
      logging.debug('Loading details document.')
      d = fetch('details',
          fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
-                'consensus_weight,or_addresses,dir_address,' +
-                'recommended_version,flags'))
+                'consensus_weight,advertised_bandwidth,or_addresses,' +
+                'dir_address,recommended_version,flags'))
      logging.debug('Loading details document done.')
  
      if not 'relays' in d: raise Exception("No relays found in document.")
@@ -1083,15 +1118,24 @@ class CandidateList(dict):
          guard_count += 1
      return guard_count
  
-  # Find fallbacks that fit the uptime, stability, and flags criteria
+  # Find fallbacks that fit the uptime, stability, and flags criteria,
+  # and make an array of them in self.fallbacks
    def compute_fallbacks(self):
      self.fallbacks = map(lambda x: self[x],
-                      sorted(
-                        filter(lambda x: self[x].is_candidate(),
-                               self.keys()),
-                        key=lambda x: self[x]._data['consensus_weight'],
+                         filter(lambda x: self[x].is_candidate(),
+                                self.keys()))
+
+  # sort fallbacks by their consensus weight to advertised bandwidth factor,
+  # lowest to highest
+  # used to find the median cw_to_bw_factor()
+  def sort_fallbacks_by_cw_to_bw_factor(self):
+    self.fallbacks.sort(key=lambda x: self[x].cw_to_bw_factor())
+
+  # sort fallbacks by their measured bandwidth, highest to lowest
+  # calculate_measured_bandwidth before calling this
+  def sort_fallbacks_by_measured_bandwidth(self):
+    self.fallbacks.sort(key=lambda x: self[x].self._data['measured_bandwidth'],
                          reverse=True)
-                      )
  
    @staticmethod
    def load_relaylist(file_name):
@@ -1194,13 +1238,64 @@ class CandidateList(dict):
      return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
                                                  excluded_count, initial_count)
  
-  def fallback_min_weight(self):
+  # calculate each fallback's measured bandwidth based on the median
+  # consensus weight to advertised bandwdith ratio
+  def calculate_measured_bandwidth(self):
+    self.sort_fallbacks_by_cw_to_bw_factor()
+    median_fallback = self.fallback_median(True)
+    median_cw_to_bw_factor = median_fallback.cw_to_bw_factor()
+    for f in self.fallbacks:
+      f.set_measured_bandwidth(median_cw_to_bw_factor)
+
+  # remove relays with low measured bandwidth from the fallback list
+  # calculate_measured_bandwidth for each relay before calling this
+  def remove_low_bandwidth_relays(self):
+    if MIN_BANDWIDTH is None:
+      return
+    above_min_bw_fallbacks = []
+    for f in self.fallbacks:
+      if f._data['measured_bandwidth'] >= MIN_BANDWIDTH:
+        above_min_bw_fallbacks.append(f)
+      else:
+        # the bandwidth we log here is limited by the relay's consensus weight
+        # as well as its adverttised bandwidth. See set_measured_bandwidth
+        # for details
+        logging.info('%s not a candidate: bandwidth %.1fMB/s too low, must ' +
+                     'be at least %.1fMB/s', f._fpr,
+                     f._data['measured_bandwidth']/(1024.0*1024.0),
+                     MIN_BANDWIDTH/(1024.0*1024.0))
+    self.fallbacks = above_min_bw_fallbacks
+
+  # the minimum fallback in the list
+  # call one of the sort_fallbacks_* functions before calling this
+  def fallback_min(self):
      if len(self.fallbacks) > 0:
        return self.fallbacks[-1]
      else:
        return None
  
-  def fallback_max_weight(self):
+  # the median fallback in the list
+  # call one of the sort_fallbacks_* functions before calling this
+  def fallback_median(self, require_advertised_bandwidth):
+    # use the low-median when there are an evan number of fallbacks,
+    # for consistency with the bandwidth authorities
+    if len(self.fallbacks) > 0:
+      median_position = (len(self.fallbacks) - 1) / 2
+      if not require_advertised_bandwidth:
+        return self.fallbacks[median_position]
+      # if we need advertised_bandwidth but this relay doesn't have it,
+      # move to a fallback with greater consensus weight until we find one
+      while not self.fallbacks[median_position]._data['advertised_bandwidth']:
+        median_position += 1
+        if median_position >= len(self.fallbacks):
+          return None
+      return self.fallbacks[median_position]
+    else:
+      return None
+
+  # the maximum fallback in the list
+  # call one of the sort_fallbacks_* functions before calling this
+  def fallback_max(self):
      if len(self.fallbacks) > 0:
        return self.fallbacks[0]
      else:
@@ -1211,7 +1306,7 @@ class CandidateList(dict):
      # Report:
      #  whether we checked consensus download times
      #  the number of fallback directories (and limits/exclusions, if relevant)
-    #  min & max fallback weights
+    #  min & max fallback bandwidths
      #  #error if below minimum count
      if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
        s = '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
@@ -1243,11 +1338,12 @@ class CandidateList(dict):
        s += 'Excluded:     %d (Eligible Count Exceeded Target Count)'%(
                                                eligible_count - fallback_count)
        s += '\n'
-    min_fb = self.fallback_min_weight()
-    min_weight = min_fb._data['consensus_weight']
-    max_fb = self.fallback_max_weight()
-    max_weight = max_fb._data['consensus_weight']
-    s += 'Consensus Weight Range: %d - %d'%(min_weight, max_weight)
+    min_fb = self.fallback_min()
+    min_bw = min_fb._data['measured_bandwidth']
+    max_fb = self.fallback_max()
+    max_bw = max_fb._data['measured_bandwidth']
+    s += 'Bandwidth Range: %.1f - %.1f MB/s'%(min_bw/(1024.0*1024.0),
+                                              max_bw/(1024.0*1024.0))
      s += '\n'
      s += '*/'
      if fallback_count < MIN_FALLBACK_COUNT:
@@ -1293,6 +1389,14 @@ def list_fallbacks():
    print candidates.summarise_filters(initial_count, excluded_count)
    eligible_count = len(candidates.fallbacks)
  
+  # calculate the measured bandwidth of each relay,
+  # then remove low-bandwidth relays
+  candidates.calculate_measured_bandwidth()
+  candidates.remove_low_bandwidth_relays()
+  # make sure the list is sorted by bandwidth when we output it
+  # so that we include the active fallbacks with the greatest bandwidth
+  candidates.sort_fallbacks_by_measured_bandwidth()
+
    # print the raw fallback list
    #for x in candidates.fallbacks:
    #  print x.fallbackdir_line(True)
author	teor (Tim Wilson-Brown) <teor2345@gmail.com>
	Fri, 8 Apr 2016 13:53:24 +0000 (23:53 +1000)
committer	teor (Tim Wilson-Brown) <teor2345@gmail.com>
	Fri, 15 Apr 2016 02:26:37 +0000 (12:26 +1000)