From f110a9ff126d4244072f31534297b18ba279b702 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Thu, 1 Nov 2018 12:30:47 +0000 Subject: [PATCH] downloads: Refactor mirror handling Signed-off-by: Michael Tremer --- src/backend/base.py | 1 - src/backend/countries.py | 49 ++++++ src/backend/mirrors.py | 355 ++++++--------------------------------- src/web/base.py | 8 + src/web/download.py | 56 ++---- 5 files changed, 125 insertions(+), 344 deletions(-) diff --git a/src/backend/base.py b/src/backend/base.py index d7d6e328..065d717d 100644 --- a/src/backend/base.py +++ b/src/backend/base.py @@ -43,7 +43,6 @@ class Backend(object): # Initialize backend modules. self.accounts = accounts.Accounts(self) - self.downloads = mirrors.Downloads(self) self.geoip = geoip.GeoIP(self) self.fireinfo = fireinfo.Fireinfo(self) self.iuse = iuse.IUse(self) diff --git a/src/backend/countries.py b/src/backend/countries.py index edb63017..d5a898fb 100644 --- a/src/backend/countries.py +++ b/src/backend/countries.py @@ -2,6 +2,44 @@ import iso3166 +ZONES = { + # Europe + "EU": ["AD", "AL", "AT", "AX", "BA", "BE", "BG", "BY", "CH", "CZ", "DE", "DK", "EE", + "ES", "EU", "FI", "FO", "FR", "FX", "GB", "GG", "GI", "GR", "HR", "HU", "IE", + "IM", "IS", "IT", "JE", "LI", "LT", "LU", "LV", "MC", "MD", "ME", "MK", "MT", + "NL", "NO", "PL", "PT", "RO", "RS", "RU", "SE", "SI", "SJ", "SK", "SM", "TR", + "UA", "VA"], + + # Asia + "AS": ["AE", "AF", "AM", "AP", "AZ", "BD", "BH", "BN", "BT", "CC", "CN", "CX", "CY", + "GE", "HK", "ID", "IL", "IN", "IO", "IQ", "IR", "JO", "JP", "KG", "KH", "KP", + "KR", "KW", "KZ", "LA", "LB", "LK", "MM", "MN", "MO", "MV", "MY", "NP", "OM", + "PH", "PK", "PS", "QA", "SA", "SG", "SY", "TH", "TJ", "TL", "TM", "TW", "UZ", + "VN", "YE"], + + # North America + "NA": ["AG", "AI", "AN", "AW", "BB", "BL", "BM", "BS", "BZ", "CA", "CR", "CU", "DM", + "DO", "GD", "GL", "GP", "GT", "HN", "HT", "JM", "KN", "KY", "LC", "MF", "MQ", + "MS", "MX", "NI", "PA", "PM", "PR", "SV", "TC", "TT", "US", "VC", "VG", "VI"], + + # South America + "SA": ["AR", "BO", "BR", "CL", "CO", "EC", "FK", "GF", "GY", "PE", "PY", "SR", "UY", "VE"], + + # Africa + "AF": ["AO", "BF", "BI", "BJ", "BW", "CD", "CF", "CG", "CI", "CM", "CV", "DJ", "DZ", + "EG", "EH", "ER", "ET", "GA", "GH", "GM", "GN", "GQ", "GW", "KE", "KM", "LR", + "LS", "LY", "MA", "MG", "ML", "MR", "MU", "MW", "MZ", "NA", "NE", "NG", "RE", + "RW", "SC", "SD", "SH", "SL", "SN", "SO", "ST", "SZ", "TD", "TG", "TN", "TZ", + "UG", "YT", "ZA", "ZM", "ZW"], + + # Antartica + "AN": ["AQ", "BV", "GS", "HM", "TF"], + + # Oceania + "OC": ["AS", "AU", "CK", "FJ", "FM", "GU", "KI", "MH", "MP", "NC", "NF", "NR", "NU", "NZ", + "PF", "PG", "PN", "PW", "SB", "TK", "TO", "TV", "UM", "VU", "WF", "WS"], +} + def get_name(code): try: return iso3166.countries_by_alpha2[code].name @@ -10,3 +48,14 @@ def get_name(code): def get_all(locale=None): return sorted(iso3166.countries, key=lambda c: c.name) + +def get_zone(country_code): + for zone in ZONES: + if country_code in ZONES[zone]: + return zone + +def get_in_zone(zone): + try: + return ZONES[zone] + except KeyError: + return [] diff --git a/src/backend/mirrors.py b/src/backend/mirrors.py index 97526943..f0c30e69 100644 --- a/src/backend/mirrors.py +++ b/src/backend/mirrors.py @@ -11,75 +11,10 @@ import tornado.httpclient import tornado.netutil import urllib.parse +from . import countries from .misc import Object from .decorators import * -class Downloads(Object): - @property - def total(self): - ret = self.db.get("SELECT COUNT(*) AS total FROM log_download") - - return ret.total - - @property - def today(self): - ret = self.db.get("SELECT COUNT(*) AS today FROM log_download WHERE date::date = NOW()::date") - - return ret.today - - @property - def yesterday(self): - ret = self.db.get("SELECT COUNT(*) AS yesterday FROM log_download WHERE date::date = (NOW() - INTERVAL '1 day')::date") - - return ret.yesterday - - @property - def daily_map(self): - ret = self.db.query("WITH downloads AS (SELECT * FROM log_download \ - WHERE DATE(date) BETWEEN (NOW()::date - INTERVAL '30 days') AND DATE(NOW())) \ - SELECT DATE(date) AS date, COUNT(*) AS count FROM downloads \ - GROUP BY DATE(date) ORDER BY date") - - return ret - - def get_countries(self, duration="all"): - query = "SELECT country_code, count(country_code) AS count FROM log_download" - - if duration == "today": - query += " WHERE date::date = NOW()::date" - - query += " GROUP BY country_code ORDER BY count DESC" - - results = self.db.query(query) - ret = [] - - count = sum([o.count for o in results]) - if count: - for res in results: - ret.append((res.country_code, res.count / count)) - - return ret - - def get_mirror_load(self, duration="all"): - query = "SELECT mirror, COUNT(mirror) AS count FROM log_download" - - if duration == "today": - query += " WHERE date::date = NOW()::date" - - query += " GROUP BY mirror ORDER BY count DESC" - - results = self.db.query(query) - ret = {} - - count = sum([o.count for o in results]) - if count: - for res in results: - mirror = self.mirrors.get(res.mirror) - ret[mirror.hostname] = res.count / count - - return ret - - class Mirrors(Object): def _get_mirrors(self, query, *args): res = self.db.query(query, *args) @@ -87,6 +22,12 @@ class Mirrors(Object): for row in res: yield Mirror(self.backend, row.id, data=row) + def _get_mirror(self, query, *args): + res = self.db.get(query, *args) + + if res: + return Mirror(self.backend, res.id, data=res) + def __iter__(self): mirrors = self._get_mirrors("SELECT * FROM mirrors \ WHERE enabled IS TRUE ORDER BY hostname") @@ -100,86 +41,32 @@ class Mirrors(Object): def get(self, id): return Mirror(self.backend, id) - def get_by_hostname(self, hostname): - ret = self.db.get("SELECT * FROM mirrors WHERE hostname = %s", hostname) - - if ret: - return Mirror(self.backend, ret.id, ret) - - def get_with_file(self, filename, country=None): - # XXX quick and dirty solution - needs a performance boost - mirror_ids = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)] - - #if country: - # # Sort out all mirrors that are not preferred to the given country - # for mirror in self.get_for_country(country): - # if not mirror.id in mirror_ids: - # mirror_ids.remove(mirror.id) - - mirrors = [] - for mirror_id in mirror_ids: - mirror = self.get(mirror_id) - if not mirror.state == "UP": - continue - mirrors.append(mirror) - - return mirrors - - def get_for_location(self, location, max_distance=4000, filename=None): - if not location: - return [] - - if filename: - res = self.db.query("\ - WITH client AS (SELECT point(%s, %s) AS location) \ - SELECT * FROM mirrors WHERE mirrors.state = %s \ - AND mirrors.id IN ( \ - SELECT mirror FROM mirror_files WHERE filename = %s \ - ) AND mirrors.id IN ( \ - SELECT id FROM mirrors_locations, client \ - WHERE geodistance(mirrors_locations.location, client.location) <= %s \ - )", - location.latitude, location.longitude, "UP", filename, max_distance) - else: - res = self.db.query("\ - WITH client AS (SELECT point(%s, %s) AS location) \ - SELECT * FROM mirrors WHERE mirrors.state = %s AND mirrors.id IN ( \ - SELECT id FROM mirrors_locations, client \ - WHERE geodistance(mirrors_locations.location, client.location) <= %s \ - )", - location.latitude, location.longitude, "UP", max_distance) - - mirrors = [] - for row in res: - mirror = Mirror(self.backend, row.id, row) - mirrors.append(mirror) - - return sorted(mirrors, reverse=True) - - def get_all_files(self): - files = [] - - for mirror in self: - if not mirror.state == "UP": - continue - - for file in mirror.filelist: - if not file in files: - files.append(file) - - return files - - def get_random(self, filename=None): - if filename: - ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \ - AND mirrors.id IN (SELECT mirror FROM mirror_files \ - WHERE filename = %s) ORDER BY RANDOM() LIMIT 1", "UP", filename) - else: - ret = self.db.get("SELECT * FROM mirrors WHERE state = %s \ - ORDER BY RANDOM() LIMIT 1", "UP") + def get_for_download(self, filename, country_code=None): + # Try to find a good mirror for this country first + if country_code: + zone = countries.get_zone(country_code) + + mirror = self._get_mirror("SELECT mirrors.* FROM mirror_files files \ + LEFT JOIN mirrors ON files.mirror = mirrors.id \ + WHERE files.filename = %s \ + AND mirrors.enabled IS TRUE AND mirrors.state = %s \ + AND mirrors.country_code = ANY(%s) \ + ORDER BY RANDOM() LIMIT 1", filename, "UP", + countries.get_in_zone(zone)) + + if mirror: + return mirror + + # Get a random mirror that serves the file + return self._get_mirror("SELECT mirrors.* FROM mirror_files files \ + LEFT JOIN mirrors ON files.mirror = mirrors.id \ + WHERE files.filename = %s \ + AND mirrors.enabled IS TRUE AND mirrors.state = %s \ + ORDER BY RANDOM() LIMIT 1", filename, "UP") - if ret: - return Mirror(self.backend, ret.id, ret) + def get_by_hostname(self, hostname): + return self._get_mirror("SELECT * FROM mirrors \ + WHERE hostname = %s", hostname) def file_exists(self, filename): ret = self.db.get("SELECT 1 FROM mirror_files \ @@ -191,90 +78,6 @@ class Mirrors(Object): return False -class MirrorSet(Object): - def __init__(self, backend, mirrors): - Object.__init__(self, backend) - - self._mirrors = mirrors - - def __add__(self, other): - mirrors = [] - - for mirror in self._mirrors + other._mirrors: - if mirror in mirrors: - continue - - mirrors.append(mirror) - - return MirrorSet(self.backend, mirrors) - - def __sub__(self, other): - mirrors = self._mirrors[:] - - for mirror in other._mirrors: - if mirror in mirrors: - mirrors.remove(mirror) - - return MirrorSet(self.backend, mirrors) - - def __iter__(self): - return iter(self._mirrors) - - def __len__(self): - return len(self._mirrors) - - def __str__(self): - return "" % ", ".join([m.hostname for m in self._mirrors]) - - def get_with_file(self, filename): - with_file = [m.mirror for m in self.db.query("SELECT mirror FROM mirror_files WHERE filename=%s", filename)] - - mirrors = [] - for mirror in self._mirrors: - if mirror.id in with_file: - mirrors.append(mirror) - - return MirrorSet(self.backend, mirrors) - - def get_random(self): - mirrors = [] - for mirror in self._mirrors: - for i in range(0, mirror.priority): - mirrors.append(mirror) - - return random.choice(mirrors) - - def get_for_location(self, location): - distance = 2500 - mirrors = [] - - if location: - while len(mirrors) <= 3 and distance <= 8000: - for mirror in self._mirrors: - if mirror in mirrors: - continue - - mirror_distance = mirror.distance_to(location) - if mirror_distance is None: - continue - - if mirror_distance <= distance: - mirrors.append(mirror) - - distance *= 1.2 - - return MirrorSet(self.backend, mirrors) - - def get_with_state(self, state): - mirrors = [] - - for mirror in self._mirrors: - if mirror.state == state: - mirrors.append(mirror) - - return MirrorSet(self.backend, mirrors) - - class Mirror(Object): def init(self, id, data=None): self.id = id @@ -348,13 +151,16 @@ class Mirror(Object): @property def country_code(self): - if self.location: - return self.location.country + return self.data.country_code @property def country_name(self): return self.geoip.get_country_name(self.country_code) + @property + def zone(self): + return countries.get_zone(self.country_name) + @lazy_property def asn(self): return self.geoip.get_asn(self.address) @@ -495,60 +301,6 @@ class Mirror(Object): logging.info("Successfully updated mirror filelist from %s" % self.hostname) - @property - def prefer_for_countries(self): - countries = self.data.get("prefer_for_countries", "") - if countries: - return sorted(countries.split(", ")) - - return [] - - @property - def prefer_for_countries_names(self): - countries = [self.geoip.get_country_name(c.upper()) for c in self.prefer_for_countries] - - return sorted(countries) - - def distance_to(self, location, ignore_preference=False): - if not location: - return None - - country_code = None - if location.country: - country_code = location.country.lower() - - if not ignore_preference and country_code in self.prefer_for_countries: - return 0 - - # http://www.movable-type.co.uk/scripts/latlong.html - - if self.latitude is None: - return None - - if self.longitude is None: - return None - - earth = 6371 # km - delta_lat = math.radians(self.latitude - location.latitude) - delta_lon = math.radians(self.longitude - location.longitude) - - lat1 = math.radians(self.latitude) - lat2 = math.radians(location.latitude) - - a = math.sin(delta_lat / 2) ** 2 - a += math.cos(lat1) * math.cos(lat2) * (math.sin(delta_lon / 2) ** 2) - - b1 = math.sqrt(a) - b2 = math.sqrt(1 - a) - - c = 2 * math.atan2(b1, b2) - - return c * earth - - @property - def priority(self): - return self.data.get("priority", 10) - @property def development(self): return self.data.get("mirrorlist_devel", False) @@ -557,25 +309,22 @@ class Mirror(Object): def mirrorlist(self): return self.data.get("mirrorlist", False) - @property + @lazy_property def addresses(self): - if not hasattr(self, "__addresses"): - try: - addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM) - except: - raise Exception("Could not resolve %s" % self.hostname) - - ret = [] - for family, socktype, proto, canonname, address in addrinfo: - if family == socket.AF_INET: - address, port = address - elif family == socket.AF_INET6: - address, port, flowid, scopeid = address - ret.append((family, address)) - - self.__addresses = ret - - return self.__addresses + try: + addrinfo = socket.getaddrinfo(self.hostname, 0, socket.AF_UNSPEC, socket.SOCK_STREAM) + except: + raise Exception("Could not resolve %s" % self.hostname) + + ret = [] + for family, socktype, proto, canonname, address in addrinfo: + if family == socket.AF_INET: + address, port = address + elif family == socket.AF_INET6: + address, port, flowid, scopeid = address + ret.append((family, address)) + + return ret @property def addresses6(self): diff --git a/src/web/base.py b/src/web/base.py index 91ad2d72..96df315e 100644 --- a/src/web/base.py +++ b/src/web/base.py @@ -9,6 +9,7 @@ import time import tornado.locale import tornado.web +from ..decorators import * from .. import util class BaseHandler(tornado.web.RequestHandler): @@ -71,6 +72,13 @@ class BaseHandler(tornado.web.RequestHandler): # Return the last IP if nothing else worked return remote_ips.pop() + @lazy_property + def current_country_code(self): + remote_ip = self.get_remote_ip() + + if remote_ip: + return self.backend.geoip.get_country(remote_ip) + def get_remote_location(self): if not hasattr(self, "__remote_location"): remote_ip = self.get_remote_ip() diff --git a/src/web/download.py b/src/web/download.py index 0941c256..32fcd84a 100644 --- a/src/web/download.py +++ b/src/web/download.py @@ -1,10 +1,9 @@ #!/usr/bin/python -import random +import logging import tornado.web from . import base -from . import ui_modules class IndexHandler(base.BaseHandler): def get(self): @@ -35,47 +34,24 @@ class FileHandler(base.BaseHandler): def prepare(self): self.set_header("Pragma", "no-cache") - def head(self, filename): - self.redirect_to_mirror(filename) - def get(self, filename): - self.redirect_to_mirror(filename, log_download=True) - - def find_mirror(self, filename): - exists = self.mirrors.file_exists(filename) - if not exists: - raise tornado.web.HTTPError(404, "File not found: %s" % filename) - - # Find mirrors located near to the user. - # If we have not found any, we use a random one. - remote_location = self.get_remote_location() - - if remote_location: - mirrors = self.mirrors.get_for_location(remote_location, filename=filename) - - if mirrors: - return random.choice(mirrors) - - return self.mirrors.get_random(filename=filename) - - def redirect_to_mirror(self, filename, log_download=False): - # Find a random mirror. - mirror = self.find_mirror(filename) + mirror = self.backend.mirrors.get_for_download(filename, + country_code=self.current_country_code) - # Construct the redirection URL. - download_url = mirror.build_url(filename) + # Send 404 if no mirror was found + if not mirror: + raise tornado.web.HTTPError(404, "Could not find a mirror for %s" % filename) - # Redirect the request. - self.redirect(download_url) + # Construct the redirection URL + url = mirror.build_url(filename) + if not url: + raise tornado.web.HTTPError(500, "Could not get download URL") - if not log_download: - return + # Log something + logging.info("Sending client from %s to %s" % (self.current_country_code or "unknown", url)) - remote_location = self.get_remote_location() - if remote_location: - country_code = remote_location.country - else: - country_code = None + # Redirect the request + self.redirect(url) - self.db.execute("INSERT INTO log_download(filename, mirror, country_code) \ - VALUES(%s, %s, %s)", filename, mirror.id, country_code) + # Allow HEAD + head = get -- 2.39.2