From: Michael Tremer Date: Fri, 6 Oct 2017 18:17:26 +0000 (+0100) Subject: Replace geoip database by local database X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d3e7a9fb825ef43ffb75fd28ab49562cea96875d;p=pbs.git Replace geoip database by local database Fixes #11493 The MySQL had a copy of a geoip database which was queries to estimate the position of a host on the globe. The database was huge and querying it was slow because MySQL is not very good with spacial queries. On top of that, this precision wasn't really needed. A local database is way quicker to query and provides us with the country an IP address is assumed to be in. That will allow us to select a few mirror servers as preferred mirrors. Signed-off-by: Michael Tremer --- diff --git a/Makefile.am b/Makefile.am index 499d0b57..1b9720c1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -58,7 +58,8 @@ AM_V_XSLT_0 = @echo " XSLT " $@; # ------------------------------------------------------------------------------ dist_doc_DATA = \ - COPYING + COPYING \ + README # ------------------------------------------------------------------------------ @@ -84,6 +85,7 @@ buildservice_PYTHON = \ src/buildservice/database.py \ src/buildservice/decorators.py \ src/buildservice/distribution.py \ + src/buildservice/geoip.py \ src/buildservice/git.py \ src/buildservice/keys.py \ src/buildservice/logs.py \ @@ -418,6 +420,14 @@ dist_cron_DATA = \ crondir = $(sysconfdir)/cron.d +dist_geoip_DATA = \ + src/geoip/GeoLite2-Country.mmdb + +geoipdir = $(datadir)/geoip + +EXTRA_DIST += \ + src/tools/update-geoip-database.sh + # ------------------------------------------------------------------------------ substitutions = \ diff --git a/README b/README new file mode 100644 index 00000000..1fd01549 --- /dev/null +++ b/README @@ -0,0 +1,8 @@ + +REQUIREMENTS + Python modules: + * geoip2 + + +This product includes GeoLite2 data created by MaxMind, available from + https://dev.maxmind.com/geoip/geoip2/geolite2/ diff --git a/pbs.conf.sample b/pbs.conf.sample index 373cbd2b..6578f86e 100644 --- a/pbs.conf.sample +++ b/pbs.conf.sample @@ -5,11 +5,3 @@ host = mysql-master.ipfire.org user = pakfire pass = pakfire db = pakfire - -[geoip-database] -; Credentials to the geoip database. - -host = mysql-master.ipfire.org -user = pakfire -pass = pakfire -db = geoip diff --git a/src/buildservice/__init__.py b/src/buildservice/__init__.py index a36947d1..d82cae1e 100644 --- a/src/buildservice/__init__.py +++ b/src/buildservice/__init__.py @@ -14,6 +14,7 @@ from . import builds from . import cache from . import database from . import distribution +from . import geoip from . import keys from . import logs from . import messages @@ -38,16 +39,13 @@ class Backend(object): # Read configuration file. self.config = self.read_config(config_file) - # Connect to databases. - self.geoip_db = self.connect_database("geoip-database") - # Global pakfire settings (from database). self.settings = settings.Settings(self) self.arches = arches.Arches(self) self.builds = builds.Builds(self) self.cache = cache.Cache(self) - self.geoip = mirrors.GeoIP(self) + self.geoip = geoip.GeoIP(self) self.jobs = builds.Jobs(self) self.builders = builders.Builders(self) self.distros = distribution.Distributions(self) diff --git a/src/buildservice/base.py b/src/buildservice/base.py index a69306f3..ecae7f36 100644 --- a/src/buildservice/base.py +++ b/src/buildservice/base.py @@ -40,10 +40,6 @@ class Object(object): """ return self.backend - @property - def geoip(self): - return self.backend.geoip - class DataObject(Object): # Table name diff --git a/src/buildservice/constants.py b/src/buildservice/constants.py index b7ba2573..192c2c13 100644 --- a/src/buildservice/constants.py +++ b/src/buildservice/constants.py @@ -5,6 +5,9 @@ import os.path # Import all constants from the pakfire module. from pakfire.constants import * +# XXX must be set by configure +DATADIR = "/usr/share/pakfire-build-service" + PAKFIRE_DIR = "/pakfire" PACKAGES_DIR = os.path.join(PAKFIRE_DIR, "packages") BUILD_RELEASE_DIR = os.path.join(PACKAGES_DIR, "release") diff --git a/src/buildservice/geoip.py b/src/buildservice/geoip.py new file mode 100644 index 00000000..ea95f5c3 --- /dev/null +++ b/src/buildservice/geoip.py @@ -0,0 +1,28 @@ +#!/usr/bin/python + +import geoip2.database +import geoip2.errors +import os.path + +from . import base + +from .constants import DATADIR + +class GeoIP(base.Object): + def init(self): + path = os.path.join(DATADIR, "geoip/GeoLite2-Country.mmdb") + + # Open the database + self.db = geoip2.database.Reader(path) + + def guess_from_address(self, address): + # Query the database + try: + result = self.db.country(address) + + # Return nothing if the address could not be found + except geoip2.errors.AddressNotFoundError: + return + + if result: + return result.country.iso_code diff --git a/src/buildservice/mirrors.py b/src/buildservice/mirrors.py index 4f093df2..5c8bf140 100644 --- a/src/buildservice/mirrors.py +++ b/src/buildservice/mirrors.py @@ -7,47 +7,7 @@ import socket from . import base from . import logs -class GeoIP(object): - def __init__(self, pakfire): - self.pakfire = pakfire - - self.db = self.pakfire.geoip_db - - def __encode_ip(self, addr): - # We get a tuple if there were proxy headers. - addr = addr.split(", ") - if addr: - addr = addr[-1] - - # ip is calculated as described in http://dev.maxmind.com/geoip/csv - try: - a1, a2, a3, a4 = addr.split(".") - - a1 = int(a1) - a2 = int(a2) - a3 = int(a3) - a4 = int(a4) - except: - return 0 - - return (16777216 * a1) + (65536 * a2) + (256 * a3) + a4 - - def get_all(self, addr): - addr = self.__encode_ip(addr) - - ret = self.db.get("\ - SELECT * FROM locations \ - JOIN addresses ON addresses.location = locations.id \ - WHERE \ - %s BETWEEN addresses.start_ip_num AND addresses.end_ip_num \ - LIMIT 1", addr) - - # If location was not determinable - if ret and ret.latitude == 0 and ret.longitude == 0: - return None - - return ret - +from .decorators import lazy_property class Mirrors(base.Object): def get_all(self): @@ -104,28 +64,23 @@ class Mirrors(base.Object): return Mirror(self.pakfire, mirror.id) def get_for_location(self, addr): - distance = 10 + country_code = self.backend.geoip.guess_from_address(addr) - # Get all mirrors in here. - _mirrors = self.get_all() + # Cannot return any good mirrors if location is unknown + if not country_code: + return [] mirrors = [] - while len(mirrors) <= 2 and distance <= 270: - for mirror in _mirrors: - if not mirror.enabled: - continue - if mirror in mirrors: - continue + # Walk through all mirrors + for mirror in self.get_all(): + if not mirror.enabled: + continue - # Cannot calc the distance for mirrors when their location is unknown. - if mirror.location is None: - continue + if mirror.country_code == country_code: + mirrors.append(mirror) - if mirror.distance_to(addr) <= distance: - mirrors.append(mirror) - - distance *= 1.2 + # XXX needs to search for nearby countries return mirrors @@ -307,52 +262,9 @@ class Mirror(base.Object): def address(self): return socket.gethostbyname(self.hostname) - @property - def location(self): - if self._location is None: - self._location = self.geoip.get_all(self.address) - - return self._location - - @property + @lazy_property def country_code(self): - if self.location: - return self.location.country_code - - return "UNKNOWN" - - @property - def latitude(self): - if self.location: - return self.location.latitude - - return 0 - - @property - def longitude(self): - if self.location: - return self.location.longitude - - return 0 - - def distance_to(self, addr): - location = self.geoip.get_all(addr) - if not location: - return 0 - - #if location.country_code.lower() in self.prefer_for_countries: - # return 0 - - distance_vector = ( - self.latitude - location.latitude, - self.longitude - location.longitude - ) - - distance = 0 - for i in distance_vector: - distance += i**2 - - return math.sqrt(distance) + return self.backend.geoip.guess_from_address(self.address) or "UNKNOWN" def get_history(self, *args, **kwargs): kwargs["mirror"] = self diff --git a/src/geoip/GeoLite2-Country.mmdb b/src/geoip/GeoLite2-Country.mmdb new file mode 100644 index 00000000..31b6d67e Binary files /dev/null and b/src/geoip/GeoLite2-Country.mmdb differ diff --git a/src/templates/mirrors-detail.html b/src/templates/mirrors-detail.html index 6804b986..4751d2dd 100644 --- a/src/templates/mirrors-detail.html +++ b/src/templates/mirrors-detail.html @@ -95,23 +95,9 @@
- {% if mirror.longitude and mirror.latitude %} - -

- {{ _("View larger map") }} - - - © OpenStreetMap contributors, CC-BY-SA -

-

- {{ _("The location of the mirror server is estimated by the IP address.") }} -

- {% else %} -

- {{ _("The location of the mirror server could not be estimated.") }} -

- {% end %} +

+ {{ _("The location of the mirror server could not be estimated.") }} +

diff --git a/src/tools/update-geoip-database.sh b/src/tools/update-geoip-database.sh new file mode 100755 index 00000000..25446721 --- /dev/null +++ b/src/tools/update-geoip-database.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +URL="http://geolite.maxmind.com/download/geoip/database/GeoLite2-Country.tar.gz" + +tmpfile=$(mktemp) + +# Download the file +if ! wget "${URL}" -O "${tmpfile}"; then + echo "Could not download the database file" >&2 + unlink "${tmpfile}" + + exit 1 +fi + +# Extract database from tarball +if ! tar xvOf "${tmpfile}" "*/GeoLite2-Country.mmdb" > src/geoip/GeoLite2-Country.mmdb; then + echo "Could not extract the database" >&2 + unlink "${tmpfile}" + + exit 1 +fi + +echo "OK" +exit 0