]> git.ipfire.org Git - ipfire.org.git/commitdiff
downloads: Refactor mirror handling
authorMichael Tremer <michael.tremer@ipfire.org>
Thu, 1 Nov 2018 12:30:47 +0000 (12:30 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Thu, 1 Nov 2018 12:30:47 +0000 (12:30 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/backend/base.py
src/backend/countries.py
src/backend/mirrors.py
src/web/base.py
src/web/download.py

index d7d6e328984da06dcbccd60fc6e5b977bbefdb73..065d717de8ceb92ffe29e25cc060ab9dda422ec8 100644 (file)
@@ -43,7 +43,6 @@ class Backend(object):
 
                # Initialize backend modules.
                self.accounts = accounts.Accounts(self)
-               self.downloads = mirrors.Downloads(self)
                self.geoip = geoip.GeoIP(self)
                self.fireinfo = fireinfo.Fireinfo(self)
                self.iuse = iuse.IUse(self)
index edb63017c20229e4ef22927a3d31c3616516f5a6..d5a898fb8b9d5ad20069ae5d42251a4824a92f02 100644 (file)
@@ -2,6 +2,44 @@
 
 import iso3166
 
+ZONES = {
+       # Europe
+       "EU": ["AD", "AL", "AT", "AX", "BA", "BE", "BG", "BY", "CH", "CZ", "DE", "DK", "EE",
+                       "ES", "EU", "FI", "FO", "FR", "FX", "GB", "GG", "GI", "GR", "HR", "HU", "IE",
+                       "IM", "IS", "IT", "JE", "LI", "LT", "LU", "LV", "MC", "MD", "ME", "MK", "MT",
+                       "NL", "NO", "PL", "PT", "RO", "RS", "RU", "SE", "SI", "SJ", "SK", "SM", "TR",
+                       "UA", "VA"],
+
+       # Asia
+       "AS": ["AE", "AF", "AM", "AP", "AZ", "BD", "BH", "BN", "BT", "CC", "CN", "CX", "CY",
+                       "GE", "HK", "ID", "IL", "IN", "IO", "IQ", "IR", "JO", "JP", "KG", "KH", "KP",
+                       "KR", "KW", "KZ", "LA", "LB", "LK", "MM", "MN", "MO", "MV", "MY", "NP", "OM",
+                       "PH", "PK", "PS", "QA", "SA", "SG", "SY", "TH", "TJ", "TL", "TM", "TW", "UZ",
+                       "VN", "YE"],
+
+       # North America
+       "NA": ["AG", "AI", "AN", "AW", "BB", "BL", "BM", "BS", "BZ", "CA", "CR", "CU", "DM",
+                       "DO", "GD", "GL", "GP", "GT", "HN", "HT", "JM", "KN", "KY", "LC", "MF", "MQ",
+                       "MS", "MX", "NI", "PA", "PM", "PR", "SV", "TC", "TT", "US", "VC", "VG", "VI"],
+
+       # South America
+       "SA": ["AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE"],
+
+       # Africa
+       "AF": ["AO", "BF", "BI", "BJ", "BW", "CD", "CF", "CG", "CI", "CM", "CV", "DJ", "DZ",
+                       "EG", "EH", "ER", "ET", "GA", "GH", "GM", "GN", "GQ", "GW", "KE", "KM", "LR",
+                       "LS", "LY", "MA", "MG", "ML", "MR", "MU", "MW", "MZ", "NA", "NE", "NG", "RE",
+                       "RW", "SC", "SD", "SH", "SL", "SN", "SO", "ST", "SZ", "TD", "TG", "TN", "TZ",
+                       "UG", "YT", "ZA", "ZM", "ZW"],
+
+       # Antartica
+       "AN": ["AQ", "BV", "GS", "HM", "TF"],
+
+       # Oceania
+       "OC": ["AS", "AU", "CK", "FJ", "FM", "GU", "KI", "MH", "MP", "NC", "NF", "NR", "NU", "NZ",
+                       "PF", "PG", "PN", "PW", "SB", "TK", "TO", "TV", "UM", "VU", "WF", "WS"],
+}
+
 def get_name(code):
        try:
                return iso3166.countries_by_alpha2[code].name
@@ -10,3 +48,14 @@ def get_name(code):
 
 def get_all(locale=None):
        return sorted(iso3166.countries, key=lambda c: c.name)
+
+def get_zone(country_code):
+       for zone in ZONES:
+               if country_code in ZONES[zone]:
+                       return zone
+
+def get_in_zone(zone):
+       try:
+               return ZONES[zone]
+       except KeyError:
+               return []
index 97526943f7475bff90d33af4648305e80f6d989f..f0c30e69b5e894538f6ab671f2b3cdc4a6fdbd68 100644 (file)
@@ -11,75 +11,10 @@ import tornado.httpclient
 import tornado.netutil
 import urllib.parse
 
+from . import countries
 from .misc import Object
 from .decorators import *
 
-class Downloads(Object):
-       @property
-       def total(self):
-               ret = self.db.get("SELECT COUNT(*) AS total FROM log_download")
-
-               return ret.total
-
-       @property
-       def today(self):
-               ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date::date = NOW()::date")
-
-               return ret.today
-
-       @property
-       def yesterday(self):
-               ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE date::date = (NOW() - INTERVAL '1 day')::date")
-
-               return ret.yesterday
-
-       @property
-       def daily_map(self):
-               ret = self.db.query("WITH downloads AS (SELECT * FROM log_download \
-                       WHERE DATE(date) BETWEEN (NOW()::date - INTERVAL '30 days') AND DATE(NOW())) \
-                       SELECT DATE(date) AS date, COUNT(*) AS count FROM downloads \
-                       GROUP BY DATE(date) ORDER BY date")
-
-               return ret
-
-       def get_countries(self, duration="all"):
-               query = "SELECT country_code, count(country_code) AS count FROM log_download"
-
-               if duration == "today":
-                       query += " WHERE date::date = NOW()::date"
-
-               query += " GROUP BY country_code ORDER BY count DESC"
-
-               results = self.db.query(query)
-               ret = []
-
-               count = sum([o.count for o in results])
-               if count:
-                       for res in results:
-                               ret.append((res.country_code, res.count / count))
-
-               return ret
-
-       def get_mirror_load(self, duration="all"):
-               query = "SELECT mirror, COUNT(mirror) AS count FROM log_download"
-
-               if duration == "today":
-                       query += " WHERE date::date = NOW()::date"
-
-               query += " GROUP BY mirror ORDER BY count DESC"
-
-               results = self.db.query(query)
-               ret = {}
-
-               count = sum([o.count for o in results])
-               if count:
-                       for res in results:
-                               mirror = self.mirrors.get(res.mirror)
-                               ret[mirror.hostname] = res.count / count
-
-               return ret
-
-
 class Mirrors(Object):
        def _get_mirrors(self, query, *args):
                res = self.db.query(query, *args)
@@ -87,6 +22,12 @@ class Mirrors(Object):
                for row in res:
                        yield Mirror(self.backend, row.id, data=row)
 
+       def _get_mirror(self, query, *args):
+               res = self.db.get(query, *args)
+
+               if res:
+                       return Mirror(self.backend, res.id, data=res)
+
        def __iter__(self):
                mirrors = self._get_mirrors("SELECT * FROM mirrors \
                        WHERE enabled IS TRUE ORDER BY hostname")
@@ -100,86 +41,32 @@ class Mirrors(Object):
        def get(self, id):
                return Mirror(self.backend, id)
 
-       def get_by_hostname(self, hostname):
-               ret = self.db.get("SELECT * FROM mirrors WHERE hostname = %s", hostname)
-
-               if ret:
-                       return Mirror(self.backend, ret.id, ret)
-
-       def get_with_file(self, filename, country=None):
-               # XXX quick and dirty solution - needs a performance boost
-               mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
-
-               #if country:
-               #       # Sort out all mirrors that are not preferred to the given country
-               #       for mirror in self.get_for_country(country):
-               #               if not mirror.id in mirror_ids:
-               #                       mirror_ids.remove(mirror.id)
-
-               mirrors = []
-               for mirror_id in mirror_ids:
-                       mirror = self.get(mirror_id)
-                       if not mirror.state == "UP":
-                               continue
-                       mirrors.append(mirror)
-
-               return mirrors
-
-       def get_for_location(self, location, max_distance=4000, filename=None):
-               if not location:
-                       return []
-
-               if filename:
-                       res = self.db.query("\
-                               WITH client AS (SELECT point(%s, %s) AS location) \
-                               SELECT * FROM mirrors WHERE mirrors.state = %s \
-                                       AND mirrors.id IN ( \
-                                               SELECT mirror FROM mirror_files WHERE filename = %s \
-                                       ) AND mirrors.id IN ( \
-                                       SELECT id FROM mirrors_locations, client \
-                                               WHERE geodistance(mirrors_locations.location, client.location) <= %s \
-                               )",
-                               location.latitude, location.longitude, "UP", filename, max_distance)
-               else:
-                       res = self.db.query("\
-                               WITH client AS (SELECT point(%s, %s) AS location) \
-                               SELECT * FROM mirrors WHERE mirrors.state = %s AND mirrors.id IN ( \
-                                       SELECT id FROM mirrors_locations, client \
-                                               WHERE geodistance(mirrors_locations.location, client.location) <= %s \
-                               )",
-                               location.latitude, location.longitude, "UP", max_distance)
-
-               mirrors = []
-               for row in res:
-                       mirror = Mirror(self.backend, row.id, row)
-                       mirrors.append(mirror)
-
-               return sorted(mirrors, reverse=True)
-
-       def get_all_files(self):
-               files = []
-
-               for mirror in self:
-                       if not mirror.state == "UP":
-                               continue
-
-                       for file in mirror.filelist:
-                               if not file in files:
-                                       files.append(file)
-
-               return files
-
-       def get_random(self, filename=None):
-               if filename:
-                       ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
-                               AND mirrors.id IN (SELECT mirror FROM mirror_files \
-                                       WHERE filename = %s) ORDER BY RANDOM() LIMIT 1", "UP", filename)
-               else:
-                       ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \
-                               ORDER BY RANDOM() LIMIT 1", "UP")
+       def get_for_download(self, filename, country_code=None):
+               # Try to find a good mirror for this country first
+               if country_code:
+                       zone = countries.get_zone(country_code)
+
+                       mirror = self._get_mirror("SELECT mirrors.* FROM mirror_files files \
+                               LEFT JOIN mirrors ON files.mirror = mirrors.id \
+                               WHERE files.filename = %s \
+                                       AND mirrors.enabled IS TRUE AND mirrors.state = %s \
+                                       AND mirrors.country_code = ANY(%s) \
+                                       ORDER BY RANDOM() LIMIT 1", filename, "UP",
+                                       countries.get_in_zone(zone))
+
+                       if mirror:
+                               return mirror
+
+               # Get a random mirror that serves the file
+               return self._get_mirror("SELECT mirrors.* FROM mirror_files files \
+                       LEFT JOIN mirrors ON files.mirror = mirrors.id \
+                       WHERE files.filename = %s \
+                               AND mirrors.enabled IS TRUE AND mirrors.state = %s \
+                               ORDER BY RANDOM() LIMIT 1", filename, "UP")
 
-               if ret:
-                       return Mirror(self.backend, ret.id, ret)
+       def get_by_hostname(self, hostname):
+               return self._get_mirror("SELECT * FROM mirrors \
+                       WHERE hostname = %s", hostname)
 
        def file_exists(self, filename):
                ret = self.db.get("SELECT 1 FROM mirror_files \
@@ -191,90 +78,6 @@ class Mirrors(Object):
                return False
 
 
-class MirrorSet(Object):
-       def __init__(self, backend, mirrors):
-               Object.__init__(self, backend)
-
-               self._mirrors = mirrors
-
-       def __add__(self, other):
-               mirrors = []
-
-               for mirror in self._mirrors + other._mirrors:
-                       if mirror in mirrors:
-                               continue
-
-                       mirrors.append(mirror)
-
-               return MirrorSet(self.backend, mirrors)
-
-       def __sub__(self, other):
-               mirrors = self._mirrors[:]
-
-               for mirror in other._mirrors:
-                       if mirror in mirrors:
-                               mirrors.remove(mirror)
-
-               return MirrorSet(self.backend, mirrors)
-
-       def __iter__(self):
-               return iter(self._mirrors)
-
-       def __len__(self):
-               return len(self._mirrors)
-
-       def __str__(self):
-               return "<MirrorSet %s>" % ", ".join([m.hostname for m in self._mirrors])
-
-       def get_with_file(self, filename):
-               with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)]
-
-               mirrors = []
-               for mirror in self._mirrors:
-                       if mirror.id in with_file:
-                               mirrors.append(mirror)
-
-               return MirrorSet(self.backend, mirrors)
-
-       def get_random(self):
-               mirrors = []
-               for mirror in self._mirrors:
-                       for i in range(0, mirror.priority):
-                               mirrors.append(mirror)
-
-               return random.choice(mirrors)
-
-       def get_for_location(self, location):
-               distance = 2500
-               mirrors = []
-
-               if location:
-                       while len(mirrors) <= 3 and distance <= 8000:
-                               for mirror in self._mirrors:
-                                       if mirror in mirrors:
-                                               continue
-
-                                       mirror_distance = mirror.distance_to(location)
-                                       if mirror_distance is None:
-                                               continue
-
-                                       if mirror_distance <= distance:
-                                               mirrors.append(mirror)
-
-                               distance *= 1.2
-
-               return MirrorSet(self.backend, mirrors)
-
-       def get_with_state(self, state):
-               mirrors = []
-
-               for mirror in self._mirrors:
-                       if mirror.state == state:
-                               mirrors.append(mirror)
-
-               return MirrorSet(self.backend, mirrors)
-
-
 class Mirror(Object):
        def init(self, id, data=None):
                self.id   = id
@@ -348,13 +151,16 @@ class Mirror(Object):
 
        @property
        def country_code(self):
-               if self.location:
-                       return self.location.country
+               return self.data.country_code
 
        @property
        def country_name(self):
                return self.geoip.get_country_name(self.country_code)
 
+       @property
+       def zone(self):
+               return countries.get_zone(self.country_name)
+
        @lazy_property
        def asn(self):
                return self.geoip.get_asn(self.address)
@@ -495,60 +301,6 @@ class Mirror(Object):
 
                logging.info("Successfully updated mirror filelist from %s" % self.hostname)
 
-       @property
-       def prefer_for_countries(self):
-               countries = self.data.get("prefer_for_countries", "")
-               if countries:
-                       return sorted(countries.split(", "))
-
-               return []
-
-       @property
-       def prefer_for_countries_names(self):
-               countries = [self.geoip.get_country_name(c.upper()) for c in self.prefer_for_countries]
-
-               return sorted(countries)
-
-       def distance_to(self, location, ignore_preference=False):
-               if not location:
-                       return None
-
-               country_code = None
-               if location.country:
-                       country_code = location.country.lower()
-
-               if not ignore_preference and country_code in self.prefer_for_countries:
-                       return 0
-
-               # http://www.movable-type.co.uk/scripts/latlong.html
-
-               if self.latitude is None:
-                       return None
-
-               if self.longitude is None:
-                       return None
-
-               earth = 6371 # km
-               delta_lat = math.radians(self.latitude - location.latitude)
-               delta_lon = math.radians(self.longitude - location.longitude)
-
-               lat1 = math.radians(self.latitude)
-               lat2 = math.radians(location.latitude)
-
-               a = math.sin(delta_lat / 2) ** 2
-               a += math.cos(lat1) * math.cos(lat2) * (math.sin(delta_lon / 2) ** 2)
-
-               b1 = math.sqrt(a)
-               b2 = math.sqrt(1 - a)
-
-               c = 2 * math.atan2(b1, b2)
-
-               return c * earth
-
-       @property
-       def priority(self):
-               return self.data.get("priority", 10)
-
        @property
        def development(self):
                return self.data.get("mirrorlist_devel", False)
@@ -557,25 +309,22 @@ class Mirror(Object):
        def mirrorlist(self):
                return self.data.get("mirrorlist", False)
 
-       @property
+       @lazy_property
        def addresses(self):
-               if not hasattr(self, "__addresses"):
-                       try:
-                               addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM)
-                       except:
-                               raise Exception("Could not resolve %s" % self.hostname)
-
-                       ret = []
-                       for family, socktype, proto, canonname, address in addrinfo:
-                               if family == socket.AF_INET:
-                                       address, port = address
-                               elif family == socket.AF_INET6:
-                                       address, port, flowid, scopeid = address
-                               ret.append((family, address))
-
-                       self.__addresses = ret
-
-               return self.__addresses
+               try:
+                       addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM)
+               except:
+                       raise Exception("Could not resolve %s" % self.hostname)
+
+               ret = []
+               for family, socktype, proto, canonname, address in addrinfo:
+                       if family == socket.AF_INET:
+                               address, port = address
+                       elif family == socket.AF_INET6:
+                               address, port, flowid, scopeid = address
+                       ret.append((family, address))
+
+               return ret
 
        @property
        def addresses6(self):
index 91ad2d728df9ed3eafb69c63a47ce775cc8b665b..96df315e4d8bb64b267f29ed9309ee8a9ddcdff7 100644 (file)
@@ -9,6 +9,7 @@ import time
 import tornado.locale
 import tornado.web
 
+from ..decorators import *
 from .. import util
 
 class BaseHandler(tornado.web.RequestHandler):
@@ -71,6 +72,13 @@ class BaseHandler(tornado.web.RequestHandler):
                # Return the last IP if nothing else worked
                return remote_ips.pop()
 
+       @lazy_property
+       def current_country_code(self):
+               remote_ip = self.get_remote_ip()
+
+               if remote_ip:
+                       return self.backend.geoip.get_country(remote_ip)
+
        def get_remote_location(self):
                if not hasattr(self, "__remote_location"):
                        remote_ip = self.get_remote_ip()
index 0941c256d3438f9474d4666f4dd44c0372bf3c4f..32fcd84a7a4b37ed8b2f43fa64b9b3f0aaba75be 100644 (file)
@@ -1,10 +1,9 @@
 #!/usr/bin/python
 
-import random
+import logging
 import tornado.web
 
 from . import base
-from . import ui_modules
 
 class IndexHandler(base.BaseHandler):
        def get(self):
@@ -35,47 +34,24 @@ class FileHandler(base.BaseHandler):
        def prepare(self):
                self.set_header("Pragma", "no-cache")
 
-       def head(self, filename):
-               self.redirect_to_mirror(filename)
-
        def get(self, filename):
-               self.redirect_to_mirror(filename, log_download=True)
-
-       def find_mirror(self, filename):
-               exists = self.mirrors.file_exists(filename)
-               if not exists:
-                       raise tornado.web.HTTPError(404, "File not found: %s" % filename)
-
-               # Find mirrors located near to the user.
-               # If we have not found any, we use a random one.
-               remote_location = self.get_remote_location()
-
-               if remote_location:
-                       mirrors = self.mirrors.get_for_location(remote_location, filename=filename)
-
-                       if mirrors:
-                               return random.choice(mirrors)
-
-               return self.mirrors.get_random(filename=filename)
-
-       def redirect_to_mirror(self, filename, log_download=False):
-               # Find a random mirror.
-               mirror = self.find_mirror(filename)
+               mirror = self.backend.mirrors.get_for_download(filename,
+                       country_code=self.current_country_code)
 
-               # Construct the redirection URL.
-               download_url = mirror.build_url(filename)
+               # Send 404 if no mirror was found
+               if not mirror:
+                       raise tornado.web.HTTPError(404, "Could not find a mirror for %s" % filename)
 
-               # Redirect the request.
-               self.redirect(download_url)
+               # Construct the redirection URL
+               url = mirror.build_url(filename)
+               if not url:
+                       raise tornado.web.HTTPError(500, "Could not get download URL")
 
-               if not log_download:
-                       return
+               # Log something
+               logging.info("Sending client from %s to %s" % (self.current_country_code or "unknown", url))
 
-               remote_location = self.get_remote_location()
-               if remote_location:
-                       country_code = remote_location.country
-               else:
-                       country_code = None
+               # Redirect the request
+               self.redirect(url)
 
-               self.db.execute("INSERT INTO log_download(filename, mirror, country_code) \
-                       VALUES(%s, %s, %s)", filename, mirror.id, country_code)
+       # Allow HEAD
+       head = get