]> git.ipfire.org Git - people/jschlag/pbs.git/commitdiff
Replace geoip database by local database
authorMichael Tremer <michael.tremer@ipfire.org>
Fri, 6 Oct 2017 18:17:26 +0000 (19:17 +0100)
committerMichael Tremer <michael.tremer@ipfire.org>
Fri, 6 Oct 2017 18:17:26 +0000 (19:17 +0100)
Fixes #11493

The MySQL had a copy of a geoip database which was queries
to estimate the position of a host on the globe.

The database was huge and querying it was slow because
MySQL is not very good with spacial queries. On top of that,
this precision wasn't really needed.

A local database is way quicker to query and provides us
with the country an IP address is assumed to be in. That
will allow us to select a few mirror servers as preferred
mirrors.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
Makefile.am
README [new file with mode: 0644]
pbs.conf.sample
src/buildservice/__init__.py
src/buildservice/base.py
src/buildservice/constants.py
src/buildservice/geoip.py [new file with mode: 0644]
src/buildservice/mirrors.py
src/geoip/GeoLite2-Country.mmdb [new file with mode: 0644]
src/templates/mirrors-detail.html
src/tools/update-geoip-database.sh [new file with mode: 0755]

index 499d0b571379aca1a448af0d5918a49712480c37..1b9720c1f3149ba18d979aa30874286748d81d8c 100644 (file)
@@ -58,7 +58,8 @@ AM_V_XSLT_0 = @echo "  XSLT    " $@;
 # ------------------------------------------------------------------------------
 
 dist_doc_DATA = \
-       COPYING
+       COPYING \
+       README
 
 # ------------------------------------------------------------------------------
 
@@ -84,6 +85,7 @@ buildservice_PYTHON = \
        src/buildservice/database.py \
        src/buildservice/decorators.py \
        src/buildservice/distribution.py \
+       src/buildservice/geoip.py \
        src/buildservice/git.py \
        src/buildservice/keys.py \
        src/buildservice/logs.py \
@@ -418,6 +420,14 @@ dist_cron_DATA = \
 
 crondir = $(sysconfdir)/cron.d
 
+dist_geoip_DATA = \
+       src/geoip/GeoLite2-Country.mmdb
+
+geoipdir = $(datadir)/geoip
+
+EXTRA_DIST += \
+       src/tools/update-geoip-database.sh
+
 # ------------------------------------------------------------------------------
 
 substitutions = \
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..1fd0154
--- /dev/null
+++ b/README
@@ -0,0 +1,8 @@
+
+REQUIREMENTS
+       Python modules:
+               * geoip2
+
+
+This product includes GeoLite2 data created by MaxMind, available from
+  https://dev.maxmind.com/geoip/geoip2/geolite2/
index 373cbd2bed888c4608454d740ec6e9a6d8bcff51..6578f86e70e717901e13afc4adcde3de0f6aab7d 100644 (file)
@@ -5,11 +5,3 @@ host  = mysql-master.ipfire.org
 user  = pakfire
 pass  = pakfire
 db    = pakfire
-
-[geoip-database]
-; Credentials to the geoip database.
-
-host  = mysql-master.ipfire.org
-user  = pakfire
-pass  = pakfire
-db    = geoip
index a36947d1a9484d7f2048803a14c818b441acd7c8..d82cae1e526947c5e2f4c2b7a79d8f3ea5c40e00 100644 (file)
@@ -14,6 +14,7 @@ from . import builds
 from . import cache
 from . import database
 from . import distribution
+from . import geoip
 from . import keys
 from . import logs
 from . import messages
@@ -38,16 +39,13 @@ class Backend(object):
                # Read configuration file.
                self.config = self.read_config(config_file)
 
-               # Connect to databases.
-               self.geoip_db = self.connect_database("geoip-database")
-
                # Global pakfire settings (from database).
                self.settings = settings.Settings(self)
 
                self.arches      = arches.Arches(self)
                self.builds      = builds.Builds(self)
                self.cache       = cache.Cache(self)
-               self.geoip       = mirrors.GeoIP(self)
+               self.geoip       = geoip.GeoIP(self)
                self.jobs        = builds.Jobs(self)
                self.builders    = builders.Builders(self)
                self.distros     = distribution.Distributions(self)
index a69306f3e45306616f5037020d7ced7b31a94c26..ecae7f3619d731cd353939622a369e862e5fdc9a 100644 (file)
@@ -40,10 +40,6 @@ class Object(object):
                """
                return self.backend
 
-       @property
-       def geoip(self):
-               return self.backend.geoip
-
 
 class DataObject(Object):
        # Table name
index b7ba25734e0b02764d31613b6b3b2939a72a0236..192c2c134cfe42bc04737bdbc83c9ce1a397c30d 100644 (file)
@@ -5,6 +5,9 @@ import os.path
 # Import all constants from the pakfire module.
 from pakfire.constants import *
 
+# XXX must be set by configure
+DATADIR = "/usr/share/pakfire-build-service"
+
 PAKFIRE_DIR  = "/pakfire"
 PACKAGES_DIR = os.path.join(PAKFIRE_DIR, "packages")
 BUILD_RELEASE_DIR = os.path.join(PACKAGES_DIR, "release")
diff --git a/src/buildservice/geoip.py b/src/buildservice/geoip.py
new file mode 100644 (file)
index 0000000..ea95f5c
--- /dev/null
@@ -0,0 +1,28 @@
+#!/usr/bin/python
+
+import geoip2.database
+import geoip2.errors
+import os.path
+
+from . import base
+
+from .constants import DATADIR
+
+class GeoIP(base.Object):
+       def init(self):
+               path = os.path.join(DATADIR, "geoip/GeoLite2-Country.mmdb")
+
+               # Open the database
+               self.db = geoip2.database.Reader(path)
+
+       def guess_from_address(self, address):
+               # Query the database
+               try:
+                       result = self.db.country(address)
+
+               # Return nothing if the address could not be found
+               except geoip2.errors.AddressNotFoundError:
+                      return
+
+               if result:
+                       return result.country.iso_code
index 4f093df224c8aedc007c16d087fffd0fb108b9ab..5c8bf1403ca6f1bfeb4eff59046be38f27aca443 100644 (file)
@@ -7,47 +7,7 @@ import socket
 from . import base
 from . import logs
 
-class GeoIP(object):
-       def __init__(self, pakfire):
-               self.pakfire = pakfire
-
-               self.db = self.pakfire.geoip_db
-
-       def __encode_ip(self, addr):
-               # We get a tuple if there were proxy headers.
-               addr = addr.split(", ")
-               if addr:
-                       addr = addr[-1]
-
-               # ip is calculated as described in http://dev.maxmind.com/geoip/csv
-               try:
-                       a1, a2, a3, a4 = addr.split(".")
-
-                       a1 = int(a1)
-                       a2 = int(a2)
-                       a3 = int(a3)
-                       a4 = int(a4)
-               except:
-                       return 0
-
-               return (16777216 * a1) + (65536 * a2) + (256 * a3) + a4
-
-       def get_all(self, addr):
-               addr = self.__encode_ip(addr)
-
-               ret = self.db.get("\
-                       SELECT * FROM locations \
-                               JOIN addresses ON addresses.location = locations.id \
-                       WHERE \
-                               %s BETWEEN addresses.start_ip_num AND addresses.end_ip_num \
-                       LIMIT 1", addr)
-
-               # If location was not determinable
-               if ret and ret.latitude == 0 and ret.longitude == 0:
-                       return None
-
-               return ret
-
+from .decorators import lazy_property
 
 class Mirrors(base.Object):
        def get_all(self):
@@ -104,28 +64,23 @@ class Mirrors(base.Object):
                return Mirror(self.pakfire, mirror.id)
 
        def get_for_location(self, addr):
-               distance = 10
+               country_code = self.backend.geoip.guess_from_address(addr)
 
-               # Get all mirrors in here.
-               _mirrors = self.get_all()
+               # Cannot return any good mirrors if location is unknown
+               if not country_code:
+                       return []
 
                mirrors = []
-               while len(mirrors) <= 2 and distance <= 270:
-                       for mirror in _mirrors:
-                               if not mirror.enabled:
-                                       continue
 
-                               if mirror in mirrors:
-                                       continue
+               # Walk through all mirrors
+               for mirror in self.get_all():
+                       if not mirror.enabled:
+                               continue
 
-                               # Cannot calc the distance for mirrors when their location is unknown.
-                               if mirror.location is None:
-                                       continue
+                       if mirror.country_code == country_code:
+                               mirrors.append(mirror)
 
-                               if mirror.distance_to(addr) <= distance:
-                                       mirrors.append(mirror)
-
-                       distance *= 1.2
+                       # XXX needs to search for nearby countries
 
                return mirrors
 
@@ -307,52 +262,9 @@ class Mirror(base.Object):
        def address(self):
                return socket.gethostbyname(self.hostname)
 
-       @property
-       def location(self):
-               if self._location is None:
-                       self._location = self.geoip.get_all(self.address)
-
-               return self._location
-
-       @property
+       @lazy_property
        def country_code(self):
-               if self.location:
-                       return self.location.country_code
-                       
-               return "UNKNOWN"
-
-       @property
-       def latitude(self):
-               if self.location:
-                       return self.location.latitude
-
-               return 0
-
-       @property
-       def longitude(self):
-               if self.location:
-                       return self.location.longitude
-
-               return 0
-
-       def distance_to(self, addr):
-               location = self.geoip.get_all(addr)
-               if not location:
-                       return 0
-
-               #if location.country_code.lower() in self.prefer_for_countries:
-               #       return 0
-
-               distance_vector = (
-                       self.latitude - location.latitude,
-                       self.longitude - location.longitude
-               )
-
-               distance = 0
-               for i in distance_vector:
-                       distance += i**2
-
-               return math.sqrt(distance)
+               return self.backend.geoip.guess_from_address(self.address) or "UNKNOWN"
 
        def get_history(self, *args, **kwargs):
                kwargs["mirror"] = self
diff --git a/src/geoip/GeoLite2-Country.mmdb b/src/geoip/GeoLite2-Country.mmdb
new file mode 100644 (file)
index 0000000..31b6d67
Binary files /dev/null and b/src/geoip/GeoLite2-Country.mmdb differ
index 6804b986805c072e1837cb8a3b3cdc822381c9c6..4751d2dd054174012dc408f834c91909d4619b8d 100644 (file)
                </div>
 
                <div class="span6">
-                       {% if mirror.longitude and mirror.latitude %}
-                               <iframe width="100%" height="350" frameborder="0" scrolling="no" marginheight="0" marginwidth="0"
-                                       src="http://www.openstreetmap.org/export/embed.html?bbox={{ mirror.longitude - 4 }},{{ mirror.latitude - 4 }},{{ mirror.longitude + 4 }},{{ mirror.latitude + 4 }}&amp;layer=mapquest&amp;marker={{ mirror.latitude }},{{ mirror.longitude }}" style="border: 1px solid black">
-                               </iframe>
-                               <p>
-                                       <a href="http://www.openstreetmap.org/?lat={{ mirror.latitude }}&amp;lon={{ mirror.longitude }}&amp;zoom=8&amp;layers=M&amp;mlat={{ mirror.latitude }}&amp;mlon={{ mirror.longitude }}" target="_blank">{{ _("View larger map") }}</a>
-                                       -
-                                       &copy; <a href="http://www.openstreetmap.org/" target="_blank">OpenStreetMap</a> contributors, CC-BY-SA
-                               </p>
-                               <p class="muted">
-                                       {{ _("The location of the mirror server is estimated by the IP address.") }}
-                               </p>
-                       {% else %}
-                               <p class="muted">
-                                       {{ _("The location of the mirror server could not be estimated.") }}
-                               </p>
-                       {% end %}
+                       <p class="muted">
+                               {{ _("The location of the mirror server could not be estimated.") }}
+                       </p>
                </div>
        </div>
 
diff --git a/src/tools/update-geoip-database.sh b/src/tools/update-geoip-database.sh
new file mode 100755 (executable)
index 0000000..2544672
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+URL="http://geolite.maxmind.com/download/geoip/database/GeoLite2-Country.tar.gz"
+
+tmpfile=$(mktemp)
+
+# Download the file
+if ! wget "${URL}" -O "${tmpfile}"; then
+       echo "Could not download the database file" >&2
+       unlink "${tmpfile}"
+
+       exit 1
+fi
+
+# Extract database from tarball
+if ! tar xvOf "${tmpfile}" "*/GeoLite2-Country.mmdb" > src/geoip/GeoLite2-Country.mmdb; then
+       echo "Could not extract the database" >&2
+       unlink "${tmpfile}"
+
+       exit 1
+fi
+
+echo "OK"
+exit 0