From: Jochen Sprickerhof Date: Wed, 6 Jul 2022 16:26:35 +0000 (+0200) Subject: Add upstream patches for Python X-Git-Tag: debian/0.9.13-1~9 X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=4e9e69c20fde8fab36e0c2c922fb554bc0d4541d;p=location%2Fdebian%2Flibloc.git Add upstream patches for Python --- diff --git a/debian/patches/0001-Make-sources-around-that-we-can-run-tests-without-lo.patch b/debian/patches/0001-Make-sources-around-that-we-can-run-tests-without-lo.patch new file mode 100644 index 0000000..9cea44f --- /dev/null +++ b/debian/patches/0001-Make-sources-around-that-we-can-run-tests-without-lo.patch @@ -0,0 +1,6756 @@ +From: Michael Tremer +Date: Thu, 14 Apr 2022 18:31:56 +0000 +Subject: Make sources around that we can run tests without location installed + +In order to run the test suite, we need to make the Python module +loadable from the build directory so that we first of all test the right +code and that it just works without running "make install" first. + +Signed-off-by: Michael Tremer +--- + .gitignore | 5 +- + Makefile.am | 37 +- + po/POTFILES.in | 18 +- + src/python/__init__.py.in | 26 - + src/python/database.py | 213 ----- + src/python/downloader.py | 211 ----- + src/python/export.py | 291 ------- + src/python/i18n.py | 26 - + src/python/importer.py | 250 ------ + src/python/location-importer.in | 1535 ------------------------------------- + src/python/location.in | 644 ---------------- + src/python/location/__init__.py | 24 + + src/python/location/database.py | 213 +++++ + src/python/location/downloader.py | 211 +++++ + src/python/location/export.py | 291 +++++++ + src/python/location/i18n.py | 26 + + src/python/location/importer.py | 250 ++++++ + src/python/location/logger.py | 46 ++ + src/python/logger.py | 46 -- + src/scripts/location-importer.in | 1535 +++++++++++++++++++++++++++++++++++++ + src/scripts/location.in | 644 ++++++++++++++++ + 21 files changed, 3266 insertions(+), 3276 deletions(-) + delete mode 100644 src/python/__init__.py.in + delete mode 100644 src/python/database.py + delete mode 100644 src/python/downloader.py + delete mode 100644 src/python/export.py + delete mode 100644 src/python/i18n.py + delete mode 100644 src/python/importer.py + delete mode 100644 src/python/location-importer.in + delete mode 100644 src/python/location.in + create mode 100644 src/python/location/__init__.py + create mode 100644 src/python/location/database.py + create mode 100644 src/python/location/downloader.py + create mode 100644 src/python/location/export.py + create mode 100644 src/python/location/i18n.py + create mode 100644 src/python/location/importer.py + create mode 100644 src/python/location/logger.py + delete mode 100644 src/python/logger.py + create mode 100644 src/scripts/location-importer.in + create mode 100644 src/scripts/location.in + +diff --git a/.gitignore b/.gitignore +index f04b70e..20bc895 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -15,9 +15,8 @@ Makefile.in + /*.db.xz + /libtool + /stamp-h1 +-/src/python/location +-/src/python/location-importer +-/src/python/__init__.py ++/src/scripts/location ++/src/scripts/location-importer + /src/systemd/location-update.service + /src/systemd/location-update.timer + /test.db +diff --git a/Makefile.am b/Makefile.am +index 983cb4a..38ce961 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -175,21 +175,13 @@ CLEANFILES += \ + src/libloc.pc + + dist_pkgpython_PYTHON = \ +- src/python/database.py \ +- src/python/downloader.py \ +- src/python/export.py \ +- src/python/i18n.py \ +- src/python/importer.py \ +- src/python/logger.py +- +-pkgpython_PYTHON = \ +- src/python/__init__.py +- +-EXTRA_DIST += \ +- src/python/__init__.py.in +- +-CLEANFILES += \ +- src/python/__init__.py ++ src/python/location/__init__.py \ ++ src/python/location/database.py \ ++ src/python/location/downloader.py \ ++ src/python/location/export.py \ ++ src/python/location/i18n.py \ ++ src/python/location/importer.py \ ++ src/python/location/logger.py + + pyexec_LTLIBRARIES = \ + src/python/_location.la +@@ -275,16 +267,16 @@ uninstall-perl: + $(DESTDIR)/$(prefix)/man/man3/Location.3pm + + bin_SCRIPTS = \ +- src/python/location \ +- src/python/location-importer ++ src/scripts/location \ ++ src/scripts/location-importer + + EXTRA_DIST += \ +- src/python/location.in \ +- src/python/location-importer.in ++ src/scripts/location.in \ ++ src/scripts/location-importer.in + + CLEANFILES += \ +- src/python/location \ +- src/python/location-importer ++ src/scripts/location \ ++ src/scripts/location-importer + + # ------------------------------------------------------------------------------ + +@@ -321,6 +313,7 @@ TESTS_LDADD = \ + src/libloc-internal.la + + TESTS_ENVIRONMENT = \ ++ PYTHONPATH=$(abs_srcdir)/src/python:$(abs_builddir)/src/python/.libs \ + TEST_DATA_DIR="$(abs_top_srcdir)/tests/data" + + TESTS = \ +@@ -334,7 +327,7 @@ CLEANFILES += \ + testdata.db + + testdata.db: examples/python/create-database.py +- PYTHONPATH=$(abs_builddir)/src/python/.libs \ ++ PYTHONPATH=$(abs_srcdir)/src/python:$(abs_builddir)/src/python/.libs \ + ABS_SRCDIR="$(abs_srcdir)" \ + $(PYTHON) $< $@ + +diff --git a/po/POTFILES.in b/po/POTFILES.in +index 5d2cc46..5f5afa8 100644 +--- a/po/POTFILES.in ++++ b/po/POTFILES.in +@@ -1,12 +1,12 @@ + src/libloc.pc.in +-src/python/__init__.py.in +-src/python/database.py +-src/python/downloader.py +-src/python/export.py +-src/python/i18n.py +-src/python/importer.py +-src/python/location-importer.in +-src/python/location.in +-src/python/logger.py ++src/python/location/__init__.py ++src/python/location/database.py ++src/python/location/downloader.py ++src/python/location/export.py ++src/python/location/i18n.py ++src/python/location/importer.py ++src/python/location/logger.py ++src/scripts/location-importer.in ++src/scripts/location.in + src/systemd/location-update.service.in + src/systemd/location-update.timer.in +diff --git a/src/python/__init__.py.in b/src/python/__init__.py.in +deleted file mode 100644 +index bd94d35..0000000 +--- a/src/python/__init__.py.in ++++ /dev/null +@@ -1,26 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-__version__ = "@VERSION@" +- +-# Import everything from the C module +-from _location import * +- +-# Initialise logging +-from . import logger +diff --git a/src/python/database.py b/src/python/database.py +deleted file mode 100644 +index 5d79941..0000000 +--- a/src/python/database.py ++++ /dev/null +@@ -1,213 +0,0 @@ +-#!/usr/bin/env python +- +-""" +- A lightweight wrapper around psycopg2. +- +- Originally part of the Tornado framework. The tornado.database module +- is slated for removal in Tornado 3.0, and it is now available separately +- as torndb. +-""" +- +-import logging +-import psycopg2 +- +-log = logging.getLogger("location.database") +-log.propagate = 1 +- +-class Connection(object): +- """ +- A lightweight wrapper around MySQLdb DB-API connections. +- +- The main value we provide is wrapping rows in a dict/object so that +- columns can be accessed by name. Typical usage:: +- +- db = torndb.Connection("localhost", "mydatabase") +- for article in db.query("SELECT * FROM articles"): +- print article.title +- +- Cursors are hidden by the implementation, but other than that, the methods +- are very similar to the DB-API. +- +- We explicitly set the timezone to UTC and the character encoding to +- UTF-8 on all connections to avoid time zone and encoding errors. +- """ +- def __init__(self, host, database, user=None, password=None): +- self.host = host +- self.database = database +- +- self._db = None +- self._db_args = { +- "host" : host, +- "database" : database, +- "user" : user, +- "password" : password, +- "sslmode" : "require", +- } +- +- try: +- self.reconnect() +- except Exception: +- log.error("Cannot connect to database on %s", self.host, exc_info=True) +- +- def __del__(self): +- self.close() +- +- def close(self): +- """ +- Closes this database connection. +- """ +- if getattr(self, "_db", None) is not None: +- self._db.close() +- self._db = None +- +- def reconnect(self): +- """ +- Closes the existing database connection and re-opens it. +- """ +- self.close() +- +- self._db = psycopg2.connect(**self._db_args) +- self._db.autocommit = True +- +- # Initialize the timezone setting. +- self.execute("SET TIMEZONE TO 'UTC'") +- +- def query(self, query, *parameters, **kwparameters): +- """ +- Returns a row list for the given query and parameters. +- """ +- cursor = self._cursor() +- try: +- self._execute(cursor, query, parameters, kwparameters) +- column_names = [d[0] for d in cursor.description] +- return [Row(zip(column_names, row)) for row in cursor] +- finally: +- cursor.close() +- +- def get(self, query, *parameters, **kwparameters): +- """ +- Returns the first row returned for the given query. +- """ +- rows = self.query(query, *parameters, **kwparameters) +- if not rows: +- return None +- elif len(rows) > 1: +- raise Exception("Multiple rows returned for Database.get() query") +- else: +- return rows[0] +- +- def execute(self, query, *parameters, **kwparameters): +- """ +- Executes the given query, returning the lastrowid from the query. +- """ +- return self.execute_lastrowid(query, *parameters, **kwparameters) +- +- def execute_lastrowid(self, query, *parameters, **kwparameters): +- """ +- Executes the given query, returning the lastrowid from the query. +- """ +- cursor = self._cursor() +- try: +- self._execute(cursor, query, parameters, kwparameters) +- return cursor.lastrowid +- finally: +- cursor.close() +- +- def execute_rowcount(self, query, *parameters, **kwparameters): +- """ +- Executes the given query, returning the rowcount from the query. +- """ +- cursor = self._cursor() +- try: +- self._execute(cursor, query, parameters, kwparameters) +- return cursor.rowcount +- finally: +- cursor.close() +- +- def executemany(self, query, parameters): +- """ +- Executes the given query against all the given param sequences. +- +- We return the lastrowid from the query. +- """ +- return self.executemany_lastrowid(query, parameters) +- +- def executemany_lastrowid(self, query, parameters): +- """ +- Executes the given query against all the given param sequences. +- +- We return the lastrowid from the query. +- """ +- cursor = self._cursor() +- try: +- cursor.executemany(query, parameters) +- return cursor.lastrowid +- finally: +- cursor.close() +- +- def executemany_rowcount(self, query, parameters): +- """ +- Executes the given query against all the given param sequences. +- +- We return the rowcount from the query. +- """ +- cursor = self._cursor() +- +- try: +- cursor.executemany(query, parameters) +- return cursor.rowcount +- finally: +- cursor.close() +- +- def _ensure_connected(self): +- if self._db is None: +- log.warning("Database connection was lost...") +- +- self.reconnect() +- +- def _cursor(self): +- self._ensure_connected() +- return self._db.cursor() +- +- def _execute(self, cursor, query, parameters, kwparameters): +- log.debug("SQL Query: %s" % (query % (kwparameters or parameters))) +- +- try: +- return cursor.execute(query, kwparameters or parameters) +- except (OperationalError, psycopg2.ProgrammingError): +- log.error("Error connecting to database on %s", self.host) +- self.close() +- raise +- +- def transaction(self): +- return Transaction(self) +- +- +-class Row(dict): +- """A dict that allows for object-like property access syntax.""" +- def __getattr__(self, name): +- try: +- return self[name] +- except KeyError: +- raise AttributeError(name) +- +- +-class Transaction(object): +- def __init__(self, db): +- self.db = db +- +- self.db.execute("START TRANSACTION") +- +- def __enter__(self): +- return self +- +- def __exit__(self, exctype, excvalue, traceback): +- if exctype is not None: +- self.db.execute("ROLLBACK") +- else: +- self.db.execute("COMMIT") +- +- +-# Alias some common exceptions +-IntegrityError = psycopg2.IntegrityError +-OperationalError = psycopg2.OperationalError +diff --git a/src/python/downloader.py b/src/python/downloader.py +deleted file mode 100644 +index 05f7872..0000000 +--- a/src/python/downloader.py ++++ /dev/null +@@ -1,211 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import logging +-import lzma +-import os +-import random +-import stat +-import tempfile +-import time +-import urllib.error +-import urllib.parse +-import urllib.request +- +-from . import __version__ +-from _location import Database, DATABASE_VERSION_LATEST +- +-DATABASE_FILENAME = "location.db.xz" +-MIRRORS = ( +- "https://location.ipfire.org/databases/", +-) +- +-# Initialise logging +-log = logging.getLogger("location.downloader") +-log.propagate = 1 +- +-class Downloader(object): +- def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None): +- self.version = version +- +- # Set mirrors or use defaults +- self.mirrors = list(mirrors or MIRRORS) +- +- # Randomize mirrors +- random.shuffle(self.mirrors) +- +- # Get proxies from environment +- self.proxies = self._get_proxies() +- +- def _get_proxies(self): +- proxies = {} +- +- for protocol in ("https", "http"): +- proxy = os.environ.get("%s_proxy" % protocol, None) +- +- if proxy: +- proxies[protocol] = proxy +- +- return proxies +- +- def _make_request(self, url, baseurl=None, headers={}): +- if baseurl: +- url = urllib.parse.urljoin(baseurl, url) +- +- req = urllib.request.Request(url, method="GET") +- +- # Update headers +- headers.update({ +- "User-Agent" : "location/%s" % __version__, +- }) +- +- # Set headers +- for header in headers: +- req.add_header(header, headers[header]) +- +- # Set proxies +- for protocol in self.proxies: +- req.set_proxy(self.proxies[protocol], protocol) +- +- return req +- +- def _send_request(self, req, **kwargs): +- # Log request headers +- log.debug("HTTP %s Request to %s" % (req.method, req.host)) +- log.debug(" URL: %s" % req.full_url) +- log.debug(" Headers:") +- for k, v in req.header_items(): +- log.debug(" %s: %s" % (k, v)) +- +- try: +- res = urllib.request.urlopen(req, **kwargs) +- +- except urllib.error.HTTPError as e: +- # Log response headers +- log.debug("HTTP Response: %s" % e.code) +- log.debug(" Headers:") +- for header in e.headers: +- log.debug(" %s: %s" % (header, e.headers[header])) +- +- # Raise all other errors +- raise e +- +- # Log response headers +- log.debug("HTTP Response: %s" % res.code) +- log.debug(" Headers:") +- for k, v in res.getheaders(): +- log.debug(" %s: %s" % (k, v)) +- +- return res +- +- def download(self, public_key, timestamp=None, tmpdir=None, **kwargs): +- url = "%s/%s" % (self.version, DATABASE_FILENAME) +- +- headers = {} +- if timestamp: +- headers["If-Modified-Since"] = time.strftime( +- "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(timestamp), +- ) +- +- t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) +- with t: +- # Try all mirrors +- for mirror in self.mirrors: +- # Prepare HTTP request +- req = self._make_request(url, baseurl=mirror, headers=headers) +- +- try: +- with self._send_request(req) as res: +- decompressor = lzma.LZMADecompressor() +- +- # Read all data +- while True: +- buf = res.read(1024) +- if not buf: +- break +- +- # Decompress data +- buf = decompressor.decompress(buf) +- if buf: +- t.write(buf) +- +- # Write all data to disk +- t.flush() +- +- # Catch decompression errors +- except lzma.LZMAError as e: +- log.warning("Could not decompress downloaded file: %s" % e) +- continue +- +- except urllib.error.HTTPError as e: +- # The file on the server was too old +- if e.code == 304: +- log.warning("%s is serving an outdated database. Trying next mirror..." % mirror) +- +- # Log any other HTTP errors +- else: +- log.warning("%s reported: %s" % (mirror, e)) +- +- # Throw away any downloaded content and try again +- t.truncate() +- +- else: +- # Check if the downloaded database is recent +- if not self._check_database(t, public_key, timestamp): +- log.warning("Downloaded database is outdated. Trying next mirror...") +- +- # Throw away the data and try again +- t.truncate() +- continue +- +- # Make the file readable for everyone +- os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH) +- +- # Return temporary file +- return t +- +- # Delete the temporary file after unsuccessful downloads +- os.unlink(t.name) +- +- raise FileNotFoundError(url) +- +- def _check_database(self, f, public_key, timestamp=None): +- """ +- Checks the downloaded database if it can be opened, +- verified and if it is recent enough +- """ +- log.debug("Opening downloaded database at %s" % f.name) +- +- db = Database(f.name) +- +- # Database is not recent +- if timestamp and db.created_at < timestamp: +- return False +- +- log.info("Downloaded new database from %s" % (time.strftime( +- "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), +- ))) +- +- # Verify the database +- with open(public_key, "r") as f: +- if not db.verify(f): +- log.error("Could not verify database") +- return False +- +- return True +diff --git a/src/python/export.py b/src/python/export.py +deleted file mode 100644 +index 3cdece4..0000000 +--- a/src/python/export.py ++++ /dev/null +@@ -1,291 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020-2021 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import functools +-import io +-import ipaddress +-import logging +-import math +-import os +-import socket +-import sys +- +-from .i18n import _ +-import _location +- +-# Initialise logging +-log = logging.getLogger("location.export") +-log.propagate = 1 +- +-FLAGS = { +- _location.NETWORK_FLAG_ANONYMOUS_PROXY : "A1", +- _location.NETWORK_FLAG_SATELLITE_PROVIDER : "A2", +- _location.NETWORK_FLAG_ANYCAST : "A3", +- _location.NETWORK_FLAG_DROP : "XD", +-} +- +-class OutputWriter(object): +- suffix = "networks" +- mode = "w" +- +- def __init__(self, name, family=None, directory=None, f=None): +- self.name = name +- self.family = family +- self.directory = directory +- +- # Open output file +- if f: +- self.f = f +- elif self.directory: +- self.f = open(self.filename, self.mode) +- elif "b" in self.mode: +- self.f = io.BytesIO() +- else: +- self.f = io.StringIO() +- +- # Call any custom initialization +- self.init() +- +- # Immediately write the header +- self._write_header() +- +- def init(self): +- """ +- To be overwritten by anything that inherits from this +- """ +- pass +- +- def __repr__(self): +- return "<%s %s f=%s>" % (self.__class__.__name__, self, self.f) +- +- @functools.cached_property +- def tag(self): +- families = { +- socket.AF_INET6 : "6", +- socket.AF_INET : "4", +- } +- +- return "%sv%s" % (self.name, families.get(self.family, "?")) +- +- @functools.cached_property +- def filename(self): +- if self.directory: +- return os.path.join(self.directory, "%s.%s" % (self.tag, self.suffix)) +- +- def _write_header(self): +- """ +- The header of the file +- """ +- pass +- +- def _write_footer(self): +- """ +- The footer of the file +- """ +- pass +- +- def write(self, network): +- self.f.write("%s\n" % network) +- +- def finish(self): +- """ +- Called when all data has been written +- """ +- self._write_footer() +- +- # Flush all output +- self.f.flush() +- +- def print(self): +- """ +- Prints the entire output line by line +- """ +- if isinstance(self.f, io.BytesIO): +- raise TypeError(_("Won't write binary output to stdout")) +- +- # Go back to the beginning +- self.f.seek(0) +- +- # Iterate over everything line by line +- for line in self.f: +- sys.stdout.write(line) +- +- +-class IpsetOutputWriter(OutputWriter): +- """ +- For ipset +- """ +- suffix = "ipset" +- +- # The value is being used if we don't know any better +- DEFAULT_HASHSIZE = 64 +- +- # We aim for this many networks in a bucket on average. This allows us to choose +- # how much memory we want to sacrifice to gain better performance. The lower the +- # factor, the faster a lookup will be, but it will use more memory. +- # We will aim for only using three quarters of all buckets to avoid any searches +- # through the linked lists. +- HASHSIZE_FACTOR = 0.75 +- +- def init(self): +- # Count all networks +- self.networks = 0 +- +- @property +- def hashsize(self): +- """ +- Calculates an optimized hashsize +- """ +- # Return the default value if we don't know the size of the set +- if not self.networks: +- return self.DEFAULT_HASHSIZE +- +- # Find the nearest power of two that is larger than the number of networks +- # divided by the hashsize factor. +- exponent = math.log(self.networks / self.HASHSIZE_FACTOR, 2) +- +- # Return the size of the hash (the minimum is 64) +- return max(2 ** math.ceil(exponent), 64) +- +- def _write_header(self): +- # This must have a fixed size, because we will write the header again in the end +- self.f.write("create %s hash:net family inet%s" % ( +- self.tag, +- "6" if self.family == socket.AF_INET6 else "" +- )) +- self.f.write(" hashsize %8d maxelem 1048576 -exist\n" % self.hashsize) +- self.f.write("flush %s\n" % self.tag) +- +- def write(self, network): +- self.f.write("add %s %s\n" % (self.tag, network)) +- +- # Increment network counter +- self.networks += 1 +- +- def _write_footer(self): +- # Jump back to the beginning of the file +- self.f.seek(0) +- +- # Rewrite the header with better configuration +- self._write_header() +- +- +-class NftablesOutputWriter(OutputWriter): +- """ +- For nftables +- """ +- suffix = "set" +- +- def _write_header(self): +- self.f.write("define %s = {\n" % self.tag) +- +- def _write_footer(self): +- self.f.write("}\n") +- +- def write(self, network): +- self.f.write(" %s,\n" % network) +- +- +-class XTGeoIPOutputWriter(OutputWriter): +- """ +- Formats the output in that way, that it can be loaded by +- the xt_geoip kernel module from xtables-addons. +- """ +- mode = "wb" +- +- @property +- def tag(self): +- return self.name +- +- @property +- def suffix(self): +- return "iv%s" % ("6" if self.family == socket.AF_INET6 else "4") +- +- def write(self, network): +- self.f.write(network._first_address) +- self.f.write(network._last_address) +- +- +-formats = { +- "ipset" : IpsetOutputWriter, +- "list" : OutputWriter, +- "nftables" : NftablesOutputWriter, +- "xt_geoip" : XTGeoIPOutputWriter, +-} +- +-class Exporter(object): +- def __init__(self, db, writer): +- self.db, self.writer = db, writer +- +- def export(self, directory, families, countries, asns): +- for family in families: +- log.debug("Exporting family %s" % family) +- +- writers = {} +- +- # Create writers for countries +- for country_code in countries: +- writers[country_code] = self.writer(country_code, family=family, directory=directory) +- +- # Create writers for ASNs +- for asn in asns: +- writers[asn] = self.writer("AS%s" % asn, family=family, directory=directory) +- +- # Filter countries from special country codes +- country_codes = [ +- country_code for country_code in countries if not country_code in FLAGS.values() +- ] +- +- # Get all networks that match the family +- networks = self.db.search_networks(family=family, +- country_codes=country_codes, asns=asns, flatten=True) +- +- # Walk through all networks +- for network in networks: +- # Write matching countries +- try: +- writers[network.country_code].write(network) +- except KeyError: +- pass +- +- # Write matching ASNs +- try: +- writers[network.asn].write(network) +- except KeyError: +- pass +- +- # Handle flags +- for flag in FLAGS: +- if network.has_flag(flag): +- # Fetch the "fake" country code +- country = FLAGS[flag] +- +- try: +- writers[country].write(network) +- except KeyError: +- pass +- +- # Write everything to the filesystem +- for writer in writers.values(): +- writer.finish() +- +- # Print to stdout +- if not directory: +- for writer in writers.values(): +- writer.print() +diff --git a/src/python/i18n.py b/src/python/i18n.py +deleted file mode 100644 +index 2161aa6..0000000 +--- a/src/python/i18n.py ++++ /dev/null +@@ -1,26 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import gettext +- +-def _(singular, plural=None, n=None): +- if plural: +- return gettext.dngettext("libloc", singular, plural, n) +- +- return gettext.dgettext("libloc", singular) +diff --git a/src/python/importer.py b/src/python/importer.py +deleted file mode 100644 +index dee36ed..0000000 +--- a/src/python/importer.py ++++ /dev/null +@@ -1,250 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import gzip +-import logging +-import urllib.request +- +-# Initialise logging +-log = logging.getLogger("location.importer") +-log.propagate = 1 +- +-WHOIS_SOURCES = { +- # African Network Information Centre +- "AFRINIC": [ +- "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz" +- ], +- +- # Asia Pacific Network Information Centre +- "APNIC": [ +- "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", +- "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", +- #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", +- #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", +- "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", +- "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz" +- ], +- +- # American Registry for Internet Numbers +- # XXX there is nothing useful for us in here +- # ARIN: [ +- # "https://ftp.arin.net/pub/rr/arin.db" +- # ], +- +- # Japan Network Information Center +- "JPNIC": [ +- "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz" +- ], +- +- # Latin America and Caribbean Network Information Centre +- "LACNIC": [ +- "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz" +- ], +- +- # Réseaux IP Européens +- "RIPE": [ +- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", +- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", +- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", +- #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", +- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", +- "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz" +- ], +-} +- +-EXTENDED_SOURCES = { +- # African Network Information Centre +- # "ARIN": [ +- # "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest" +- # ], +- +- # Asia Pacific Network Information Centre +- # "APNIC": [ +- # "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest" +- # ], +- +- # American Registry for Internet Numbers +- "ARIN": [ +- "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest" +- ], +- +- # Latin America and Caribbean Network Information Centre +- "LACNIC": [ +- "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest" +- ], +- +- # Réseaux IP Européens +- # "RIPE": [ +- # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest" +- # ], +-} +- +-class Downloader(object): +- def __init__(self): +- self.proxy = None +- +- def set_proxy(self, url): +- """ +- Sets a HTTP proxy that is used to perform all requests +- """ +- log.info("Using proxy %s" % url) +- self.proxy = url +- +- def request(self, url, data=None, return_blocks=False): +- req = urllib.request.Request(url, data=data) +- +- # Configure proxy +- if self.proxy: +- req.set_proxy(self.proxy, "http") +- +- return DownloaderContext(self, req, return_blocks=return_blocks) +- +- +-class DownloaderContext(object): +- def __init__(self, downloader, request, return_blocks=False): +- self.downloader = downloader +- self.request = request +- +- # Should we return one block or a single line? +- self.return_blocks = return_blocks +- +- # Save the response object +- self.response = None +- +- def __enter__(self): +- log.info("Retrieving %s..." % self.request.full_url) +- +- # Send request +- self.response = urllib.request.urlopen(self.request) +- +- # Log the response headers +- log.debug("Response Headers:") +- for header in self.headers: +- log.debug(" %s: %s" % (header, self.get_header(header))) +- +- return self +- +- def __exit__(self, type, value, traceback): +- pass +- +- def __iter__(self): +- """ +- Makes the object iterable by going through each block +- """ +- if self.return_blocks: +- return iterate_over_blocks(self.body) +- +- return iterate_over_lines(self.body) +- +- @property +- def headers(self): +- if self.response: +- return self.response.headers +- +- def get_header(self, name): +- if self.headers: +- return self.headers.get(name) +- +- @property +- def body(self): +- """ +- Returns a file-like object with the decoded content +- of the response. +- """ +- content_type = self.get_header("Content-Type") +- +- # Decompress any gzipped response on the fly +- if content_type in ("application/x-gzip", "application/gzip"): +- return gzip.GzipFile(fileobj=self.response, mode="rb") +- +- # Return the response by default +- return self.response +- +- +-def read_blocks(f): +- for block in iterate_over_blocks(f): +- type = None +- data = {} +- +- for i, line in enumerate(block): +- key, value = line.split(":", 1) +- +- # The key of the first line defines the type +- if i == 0: +- type = key +- +- # Store value +- data[key] = value.strip() +- +- yield type, data +- +-def iterate_over_blocks(f, charsets=("utf-8", "latin1")): +- block = [] +- +- for line in f: +- # Convert to string +- for charset in charsets: +- try: +- line = line.decode(charset) +- except UnicodeDecodeError: +- continue +- else: +- break +- +- # Skip commented lines +- if line.startswith("#") or line.startswith("%"): +- continue +- +- # Strip line-endings +- line = line.rstrip() +- +- # Remove any comments at the end of line +- line, hash, comment = line.partition("#") +- +- if comment: +- # Strip any whitespace before the comment +- line = line.rstrip() +- +- # If the line is now empty, we move on +- if not line: +- continue +- +- if line: +- block.append(line) +- continue +- +- # End the block on an empty line +- if block: +- yield block +- +- # Reset the block +- block = [] +- +- # Return the last block +- if block: +- yield block +- +- +-def iterate_over_lines(f): +- for line in f: +- # Decode the line +- line = line.decode() +- +- # Strip the ending +- yield line.rstrip() +diff --git a/src/python/location-importer.in b/src/python/location-importer.in +deleted file mode 100644 +index bee9186..0000000 +--- a/src/python/location-importer.in ++++ /dev/null +@@ -1,1535 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020-2022 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import argparse +-import ipaddress +-import json +-import logging +-import math +-import re +-import socket +-import sys +-import telnetlib +- +-# Load our location module +-import location +-import location.database +-import location.importer +-from location.i18n import _ +- +-# Initialise logging +-log = logging.getLogger("location.importer") +-log.propagate = 1 +- +-# Define constants +-VALID_ASN_RANGES = ( +- (1, 23455), +- (23457, 64495), +- (131072, 4199999999), +-) +- +- +-class CLI(object): +- def parse_cli(self): +- parser = argparse.ArgumentParser( +- description=_("Location Importer Command Line Interface"), +- ) +- subparsers = parser.add_subparsers() +- +- # Global configuration flags +- parser.add_argument("--debug", action="store_true", +- help=_("Enable debug output")) +- parser.add_argument("--quiet", action="store_true", +- help=_("Enable quiet mode")) +- +- # version +- parser.add_argument("--version", action="version", +- version="%(prog)s @VERSION@") +- +- # Database +- parser.add_argument("--database-host", required=True, +- help=_("Database Hostname"), metavar=_("HOST")) +- parser.add_argument("--database-name", required=True, +- help=_("Database Name"), metavar=_("NAME")) +- parser.add_argument("--database-username", required=True, +- help=_("Database Username"), metavar=_("USERNAME")) +- parser.add_argument("--database-password", required=True, +- help=_("Database Password"), metavar=_("PASSWORD")) +- +- # Write Database +- write = subparsers.add_parser("write", help=_("Write database to file")) +- write.set_defaults(func=self.handle_write) +- write.add_argument("file", nargs=1, help=_("Database File")) +- write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key")) +- write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key")) +- write.add_argument("--vendor", nargs="?", help=_("Sets the vendor")) +- write.add_argument("--description", nargs="?", help=_("Sets a description")) +- write.add_argument("--license", nargs="?", help=_("Sets the license")) +- write.add_argument("--version", type=int, help=_("Database Format Version")) +- +- # Update WHOIS +- update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information")) +- update_whois.set_defaults(func=self.handle_update_whois) +- +- # Update announcements +- update_announcements = subparsers.add_parser("update-announcements", +- help=_("Update BGP Annoucements")) +- update_announcements.set_defaults(func=self.handle_update_announcements) +- update_announcements.add_argument("server", nargs=1, +- help=_("Route Server to connect to"), metavar=_("SERVER")) +- +- # Update overrides +- update_overrides = subparsers.add_parser("update-overrides", +- help=_("Update overrides"), +- ) +- update_overrides.add_argument( +- "files", nargs="+", help=_("Files to import"), +- ) +- update_overrides.set_defaults(func=self.handle_update_overrides) +- +- # Import countries +- import_countries = subparsers.add_parser("import-countries", +- help=_("Import countries"), +- ) +- import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"), +- help=_("File to import")) +- import_countries.set_defaults(func=self.handle_import_countries) +- +- args = parser.parse_args() +- +- # Configure logging +- if args.debug: +- location.logger.set_level(logging.DEBUG) +- elif args.quiet: +- location.logger.set_level(logging.WARNING) +- +- # Print usage if no action was given +- if not "func" in args: +- parser.print_usage() +- sys.exit(2) +- +- return args +- +- def run(self): +- # Parse command line arguments +- args = self.parse_cli() +- +- # Initialise database +- self.db = self._setup_database(args) +- +- # Call function +- ret = args.func(args) +- +- # Return with exit code +- if ret: +- sys.exit(ret) +- +- # Otherwise just exit +- sys.exit(0) +- +- def _setup_database(self, ns): +- """ +- Initialise the database +- """ +- # Connect to database +- db = location.database.Connection( +- host=ns.database_host, database=ns.database_name, +- user=ns.database_username, password=ns.database_password, +- ) +- +- with db.transaction(): +- db.execute(""" +- -- announcements +- CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint, +- first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, +- last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP); +- CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network); +- CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network)); +- CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops); +- +- -- autnums +- CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL); +- ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text; +- CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number); +- +- -- countries +- CREATE TABLE IF NOT EXISTS countries( +- country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL); +- CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code); +- +- -- networks +- CREATE TABLE IF NOT EXISTS networks(network inet, country text); +- ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; +- ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text; +- CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); +- CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); +- CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); +- +- -- overrides +- CREATE TABLE IF NOT EXISTS autnum_overrides( +- number bigint NOT NULL, +- name text, +- country text, +- is_anonymous_proxy boolean, +- is_satellite_provider boolean, +- is_anycast boolean +- ); +- CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number +- ON autnum_overrides(number); +- ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text; +- ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean; +- +- CREATE TABLE IF NOT EXISTS network_overrides( +- network inet NOT NULL, +- country text, +- is_anonymous_proxy boolean, +- is_satellite_provider boolean, +- is_anycast boolean +- ); +- CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network +- ON network_overrides(network); +- CREATE INDEX IF NOT EXISTS network_overrides_search +- ON network_overrides USING GIST(network inet_ops); +- ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text; +- ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean; +- """) +- +- return db +- +- def handle_write(self, ns): +- """ +- Compiles a database in libloc format out of what is in the database +- """ +- # Allocate a writer +- writer = location.Writer(ns.signing_key, ns.backup_signing_key) +- +- # Set all metadata +- if ns.vendor: +- writer.vendor = ns.vendor +- +- if ns.description: +- writer.description = ns.description +- +- if ns.license: +- writer.license = ns.license +- +- # Add all Autonomous Systems +- log.info("Writing Autonomous Systems...") +- +- # Select all ASes with a name +- rows = self.db.query(""" +- SELECT +- autnums.number AS number, +- COALESCE( +- (SELECT overrides.name FROM autnum_overrides overrides +- WHERE overrides.number = autnums.number), +- autnums.name +- ) AS name +- FROM autnums +- WHERE name <> %s ORDER BY number +- """, "") +- +- for row in rows: +- a = writer.add_as(row.number) +- a.name = row.name +- +- # Add all networks +- log.info("Writing networks...") +- +- # Select all known networks +- rows = self.db.query(""" +- WITH known_networks AS ( +- SELECT network FROM announcements +- UNION +- SELECT network FROM networks +- UNION +- SELECT network FROM network_overrides +- ), +- +- ordered_networks AS ( +- SELECT +- known_networks.network AS network, +- announcements.autnum AS autnum, +- networks.country AS country, +- +- -- Must be part of returned values for ORDER BY clause +- masklen(announcements.network) AS sort_a, +- masklen(networks.network) AS sort_b +- FROM +- known_networks +- LEFT JOIN +- announcements ON known_networks.network <<= announcements.network +- LEFT JOIN +- networks ON known_networks.network <<= networks.network +- ORDER BY +- known_networks.network, +- sort_a DESC, +- sort_b DESC +- ) +- +- -- Return a list of those networks enriched with all +- -- other information that we store in the database +- SELECT +- DISTINCT ON (network) +- network, +- autnum, +- +- -- Country +- COALESCE( +- ( +- SELECT country FROM network_overrides overrides +- WHERE networks.network <<= overrides.network +- ORDER BY masklen(overrides.network) DESC +- LIMIT 1 +- ), +- ( +- SELECT country FROM autnum_overrides overrides +- WHERE networks.autnum = overrides.number +- ), +- networks.country +- ) AS country, +- +- -- Flags +- COALESCE( +- ( +- SELECT is_anonymous_proxy FROM network_overrides overrides +- WHERE networks.network <<= overrides.network +- ORDER BY masklen(overrides.network) DESC +- LIMIT 1 +- ), +- ( +- SELECT is_anonymous_proxy FROM autnum_overrides overrides +- WHERE networks.autnum = overrides.number +- ), +- FALSE +- ) AS is_anonymous_proxy, +- COALESCE( +- ( +- SELECT is_satellite_provider FROM network_overrides overrides +- WHERE networks.network <<= overrides.network +- ORDER BY masklen(overrides.network) DESC +- LIMIT 1 +- ), +- ( +- SELECT is_satellite_provider FROM autnum_overrides overrides +- WHERE networks.autnum = overrides.number +- ), +- FALSE +- ) AS is_satellite_provider, +- COALESCE( +- ( +- SELECT is_anycast FROM network_overrides overrides +- WHERE networks.network <<= overrides.network +- ORDER BY masklen(overrides.network) DESC +- LIMIT 1 +- ), +- ( +- SELECT is_anycast FROM autnum_overrides overrides +- WHERE networks.autnum = overrides.number +- ), +- FALSE +- ) AS is_anycast, +- COALESCE( +- ( +- SELECT is_drop FROM network_overrides overrides +- WHERE networks.network <<= overrides.network +- ORDER BY masklen(overrides.network) DESC +- LIMIT 1 +- ), +- ( +- SELECT is_drop FROM autnum_overrides overrides +- WHERE networks.autnum = overrides.number +- ), +- FALSE +- ) AS is_drop +- FROM +- ordered_networks networks +- """) +- +- for row in rows: +- network = writer.add_network(row.network) +- +- # Save country +- if row.country: +- network.country_code = row.country +- +- # Save ASN +- if row.autnum: +- network.asn = row.autnum +- +- # Set flags +- if row.is_anonymous_proxy: +- network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY) +- +- if row.is_satellite_provider: +- network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER) +- +- if row.is_anycast: +- network.set_flag(location.NETWORK_FLAG_ANYCAST) +- +- if row.is_drop: +- network.set_flag(location.NETWORK_FLAG_DROP) +- +- # Add all countries +- log.info("Writing countries...") +- rows = self.db.query("SELECT * FROM countries ORDER BY country_code") +- +- for row in rows: +- c = writer.add_country(row.country_code) +- c.continent_code = row.continent_code +- c.name = row.name +- +- # Write everything to file +- log.info("Writing database to file...") +- for file in ns.file: +- writer.write(file) +- +- def handle_update_whois(self, ns): +- downloader = location.importer.Downloader() +- +- # Download all sources +- with self.db.transaction(): +- # Create some temporary tables to store parsed data +- self.db.execute(""" +- CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL) +- ON COMMIT DROP; +- CREATE UNIQUE INDEX _autnums_number ON _autnums(number); +- +- CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL) +- ON COMMIT DROP; +- CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); +- +- CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL) +- ON COMMIT DROP; +- CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); +- CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); +- """) +- +- # Remove all previously imported content +- self.db.execute(""" +- TRUNCATE TABLE networks; +- """) +- +- # Fetch all valid country codes to check parsed networks aganist... +- rows = self.db.query("SELECT * FROM countries ORDER BY country_code") +- validcountries = [] +- +- for row in rows: +- validcountries.append(row.country_code) +- +- for source_key in location.importer.WHOIS_SOURCES: +- for single_url in location.importer.WHOIS_SOURCES[source_key]: +- with downloader.request(single_url, return_blocks=True) as f: +- for block in f: +- self._parse_block(block, source_key, validcountries) +- +- # Process all parsed networks from every RIR we happen to have access to, +- # insert the largest network chunks into the networks table immediately... +- families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)") +- +- for family in (row.family for row in families): +- smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family) +- +- self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ +- SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) +- +- # ... determine any other prefixes for this network family, ... +- prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ +- WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family) +- +- # ... and insert networks with this prefix in case they provide additional +- # information (i. e. subnet of a larger chunk with a different country) +- for prefix in (row.prefix for row in prefixes): +- self.db.execute(""" +- WITH candidates AS ( +- SELECT +- _rirdata.network, +- _rirdata.country, +- _rirdata.original_countries, +- _rirdata.source +- FROM +- _rirdata +- WHERE +- family(_rirdata.network) = %s +- AND +- masklen(_rirdata.network) = %s +- ), +- filtered AS ( +- SELECT +- DISTINCT ON (c.network) +- c.network, +- c.country, +- c.original_countries, +- c.source, +- masklen(networks.network), +- networks.country AS parent_country +- FROM +- candidates c +- LEFT JOIN +- networks +- ON +- c.network << networks.network +- ORDER BY +- c.network, +- masklen(networks.network) DESC NULLS LAST +- ) +- INSERT INTO +- networks(network, country, original_countries, source) +- SELECT +- network, +- country, +- original_countries, +- source +- FROM +- filtered +- WHERE +- parent_country IS NULL +- OR +- country <> parent_country +- ON CONFLICT DO NOTHING""", +- family, prefix, +- ) +- +- self.db.execute(""" +- INSERT INTO autnums(number, name, source) +- SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums +- JOIN _organizations ON _autnums.organization = _organizations.handle +- ON CONFLICT (number) DO UPDATE SET name = excluded.name; +- """) +- +- # Download all extended sources +- for source_key in location.importer.EXTENDED_SOURCES: +- for single_url in location.importer.EXTENDED_SOURCES[source_key]: +- with self.db.transaction(): +- # Download data +- with downloader.request(single_url) as f: +- for line in f: +- self._parse_line(line, source_key, validcountries) +- +- # Download and import (technical) AS names from ARIN +- self._import_as_names_from_arin() +- +- def _check_parsed_network(self, network): +- """ +- Assistive function to detect and subsequently sort out parsed +- networks from RIR data (both Whois and so-called "extended sources"), +- which are or have... +- +- (a) not globally routable (RFC 1918 space, et al.) +- (b) covering a too large chunk of the IP address space (prefix length +- is < 7 for IPv4 networks, and < 10 for IPv6) +- (c) "0.0.0.0" or "::" as a network address +- (d) are too small for being publicly announced (we have decided not to +- process them at the moment, as they significantly enlarge our +- database without providing very helpful additional information) +- +- This unfortunately is necessary due to brain-dead clutter across +- various RIR databases, causing mismatches and eventually disruptions. +- +- We will return False in case a network is not suitable for adding +- it to our database, and True otherwise. +- """ +- +- if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)): +- return False +- +- if not network.is_global: +- log.debug("Skipping non-globally routable network: %s" % network) +- return False +- +- if network.version == 4: +- if network.prefixlen < 7: +- log.debug("Skipping too big IP chunk: %s" % network) +- return False +- +- if network.prefixlen > 24: +- log.debug("Skipping network too small to be publicly announced: %s" % network) +- return False +- +- if str(network.network_address) == "0.0.0.0": +- log.debug("Skipping network based on 0.0.0.0: %s" % network) +- return False +- +- elif network.version == 6: +- if network.prefixlen < 10: +- log.debug("Skipping too big IP chunk: %s" % network) +- return False +- +- if network.prefixlen > 48: +- log.debug("Skipping network too small to be publicly announced: %s" % network) +- return False +- +- if str(network.network_address) == "::": +- log.debug("Skipping network based on '::': %s" % network) +- return False +- +- else: +- # This should not happen... +- log.warning("Skipping network of unknown family, this should not happen: %s" % network) +- return False +- +- # In case we have made it here, the network is considered to +- # be suitable for libloc consumption... +- return True +- +- def _check_parsed_asn(self, asn): +- """ +- Assistive function to filter Autonomous System Numbers not being suitable +- for adding to our database. Returns False in such cases, and True otherwise. +- """ +- +- for start, end in VALID_ASN_RANGES: +- if start <= asn and end >= asn: +- return True +- +- log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn) +- return False +- +- def _parse_block(self, block, source_key, validcountries = None): +- # Get first line to find out what type of block this is +- line = block[0] +- +- # aut-num +- if line.startswith("aut-num:"): +- return self._parse_autnum_block(block, source_key) +- +- # inetnum +- if line.startswith("inet6num:") or line.startswith("inetnum:"): +- return self._parse_inetnum_block(block, source_key, validcountries) +- +- # organisation +- elif line.startswith("organisation:"): +- return self._parse_org_block(block, source_key) +- +- def _parse_autnum_block(self, block, source_key): +- autnum = {} +- for line in block: +- # Split line +- key, val = split_line(line) +- +- if key == "aut-num": +- m = re.match(r"^(AS|as)(\d+)", val) +- if m: +- autnum["asn"] = m.group(2) +- +- elif key == "org": +- autnum[key] = val.upper() +- +- elif key == "descr": +- # Save the first description line as well... +- if not key in autnum: +- autnum[key] = val +- +- # Skip empty objects +- if not autnum or not "asn" in autnum: +- return +- +- # Insert a dummy organisation handle into our temporary organisations +- # table in case the AS does not have an organisation handle set, but +- # has a description (a quirk often observed in APNIC area), so we can +- # later display at least some string for this AS. +- if not "org" in autnum: +- if "descr" in autnum: +- autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn") +- +- self.db.execute("INSERT INTO _organizations(handle, name, source) \ +- VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING", +- autnum.get("org"), autnum.get("descr"), source_key, +- ) +- else: +- log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \ +- autnum.get("asn")) +- return +- +- # Insert into database +- self.db.execute("INSERT INTO _autnums(number, organization, source) \ +- VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \ +- organization = excluded.organization", +- autnum.get("asn"), autnum.get("org"), source_key, +- ) +- +- def _parse_inetnum_block(self, block, source_key, validcountries = None): +- log.debug("Parsing inetnum block:") +- +- inetnum = {} +- for line in block: +- log.debug(line) +- +- # Split line +- key, val = split_line(line) +- +- # Filter any inetnum records which are only referring to IP space +- # not managed by that specific RIR... +- if key == "netname": +- if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()): +- log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip()) +- return +- +- if key == "inetnum": +- start_address, delim, end_address = val.partition("-") +- +- # Strip any excess space +- start_address, end_address = start_address.rstrip(), end_address.strip() +- +- # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22") +- if start_address and not (delim or end_address): +- try: +- start_address = ipaddress.ip_network(start_address, strict=False) +- except ValueError: +- start_address = start_address.split("/") +- ldigits = start_address[0].count(".") +- +- # How many octets do we need to add? +- # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.) +- if ldigits == 1: +- start_address = start_address[0] + ".0.0/" + start_address[1] +- elif ldigits == 2: +- start_address = start_address[0] + ".0/" + start_address[1] +- else: +- log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line) +- return +- +- try: +- start_address = ipaddress.ip_network(start_address, strict=False) +- except ValueError: +- log.warning("Could not parse line in LACNIC DB format: %s" % line) +- return +- +- # Enumerate first and last IP address of this network +- end_address = start_address[-1] +- start_address = start_address[0] +- +- else: +- # Convert to IP address +- try: +- start_address = ipaddress.ip_address(start_address) +- end_address = ipaddress.ip_address(end_address) +- except ValueError: +- log.warning("Could not parse line: %s" % line) +- return +- +- inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address)) +- +- elif key == "inet6num": +- inetnum[key] = [ipaddress.ip_network(val, strict=False)] +- +- elif key == "country": +- val = val.upper() +- +- # Catch RIR data objects with more than one country code... +- if not key in inetnum: +- inetnum[key] = [] +- else: +- if val in inetnum.get("country"): +- # ... but keep this list distinct... +- continue +- +- # When people set country codes to "UK", they actually mean "GB" +- if val == "UK": +- val = "GB" +- +- inetnum[key].append(val) +- +- # Skip empty objects +- if not inetnum or not "country" in inetnum: +- return +- +- # Prepare skipping objects with unknown country codes... +- invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries] +- +- # Iterate through all networks enumerated from above, check them for plausibility and insert +- # them into the database, if _check_parsed_network() succeeded +- for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): +- if self._check_parsed_network(single_network): +- +- # Skip objects with unknown country codes if they are valid to avoid log spam... +- if validcountries and invalidcountries: +- log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \ +- (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) +- break +- +- # Everything is fine here, run INSERT statement... +- self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \ +- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", +- "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key, +- ) +- +- def _parse_org_block(self, block, source_key): +- org = {} +- for line in block: +- # Split line +- key, val = split_line(line) +- +- if key == "organisation": +- org[key] = val.upper() +- elif key == "org-name": +- org[key] = val +- +- # Skip empty objects +- if not org: +- return +- +- self.db.execute("INSERT INTO _organizations(handle, name, source) \ +- VALUES(%s, %s, %s) ON CONFLICT (handle) DO \ +- UPDATE SET name = excluded.name", +- org.get("organisation"), org.get("org-name"), source_key, +- ) +- +- def _parse_line(self, line, source_key, validcountries = None): +- # Skip version line +- if line.startswith("2"): +- return +- +- # Skip comments +- if line.startswith("#"): +- return +- +- try: +- registry, country_code, type, line = line.split("|", 3) +- except: +- log.warning("Could not parse line: %s" % line) +- return +- +- # Skip any lines that are for stats only or do not have a country +- # code at all (avoids log spam below) +- if not country_code or country_code == '*': +- return +- +- # Skip objects with unknown country codes +- if validcountries and country_code not in validcountries: +- log.warning("Skipping line with bogus country '%s': %s" % \ +- (country_code, line)) +- return +- +- if type in ("ipv6", "ipv4"): +- return self._parse_ip_line(country_code, type, line, source_key) +- +- def _parse_ip_line(self, country, type, line, source_key): +- try: +- address, prefix, date, status, organization = line.split("|") +- except ValueError: +- organization = None +- +- # Try parsing the line without organization +- try: +- address, prefix, date, status = line.split("|") +- except ValueError: +- log.warning("Unhandled line format: %s" % line) +- return +- +- # Skip anything that isn't properly assigned +- if not status in ("assigned", "allocated"): +- return +- +- # Cast prefix into an integer +- try: +- prefix = int(prefix) +- except: +- log.warning("Invalid prefix: %s" % prefix) +- return +- +- # Fix prefix length for IPv4 +- if type == "ipv4": +- prefix = 32 - int(math.log(prefix, 2)) +- +- # Try to parse the address +- try: +- network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False) +- except ValueError: +- log.warning("Invalid IP address: %s" % address) +- return +- +- if not self._check_parsed_network(network): +- return +- +- self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ +- VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ +- UPDATE SET country = excluded.country", +- "%s" % network, country, [country], source_key, +- ) +- +- def _import_as_names_from_arin(self): +- downloader = location.importer.Downloader() +- +- # XXX: Download AS names file from ARIN (note that these names appear to be quite +- # technical, not intended for human consumption, as description fields in +- # organisation handles for other RIRs are - however, this is what we have got, +- # and in some cases, it might be still better than nothing) +- with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f: +- for line in f: +- # Convert binary line to string... +- line = str(line) +- +- # ... valid lines start with a space, followed by the number of the Autonomous System ... +- if not line.startswith(" "): +- continue +- +- # Split line and check if there is a valid ASN in it... +- asn, name = line.split()[0:2] +- +- try: +- asn = int(asn) +- except ValueError: +- log.debug("Skipping ARIN AS names line not containing an integer for ASN") +- continue +- +- # Filter invalid ASNs... +- if not self._check_parsed_asn(asn): +- continue +- +- # Skip any AS name that appears to be a placeholder for a different RIR or entity... +- if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name): +- continue +- +- # Bail out in case the AS name contains anything we do not expect here... +- if re.search(r"[^a-zA-Z0-9-_]", name): +- log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \ +- (asn, name)) +- +- # Things look good here, run INSERT statement and skip this one if we already have +- # a (better?) name for this Autonomous System... +- self.db.execute(""" +- INSERT INTO autnums( +- number, +- name, +- source +- ) VALUES (%s, %s, %s) +- ON CONFLICT (number) DO NOTHING""", +- asn, +- name, +- "ARIN", +- ) +- +- def handle_update_announcements(self, ns): +- server = ns.server[0] +- +- with self.db.transaction(): +- if server.startswith("/"): +- self._handle_update_announcements_from_bird(server) +- else: +- self._handle_update_announcements_from_telnet(server) +- +- # Purge anything we never want here +- self.db.execute(""" +- -- Delete default routes +- DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0'; +- +- -- Delete anything that is not global unicast address space +- DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3'; +- +- -- DELETE "current network" address space +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8'; +- +- -- DELETE local loopback address space +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8'; +- +- -- DELETE RFC 1918 address space +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16'; +- +- -- DELETE test, benchmark and documentation address space +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24'; +- +- -- DELETE CGNAT address space (RFC 6598) +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10'; +- +- -- DELETE link local address space +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16'; +- +- -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068) +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24'; +- DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16'; +- +- -- DELETE multicast and reserved address space +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4'; +- DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4'; +- +- -- Delete networks that are too small to be in the global routing table +- DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48; +- DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24; +- +- -- Delete any non-public or reserved ASNs +- DELETE FROM announcements WHERE NOT ( +- (autnum >= 1 AND autnum <= 23455) +- OR +- (autnum >= 23457 AND autnum <= 64495) +- OR +- (autnum >= 131072 AND autnum <= 4199999999) +- ); +- +- -- Delete everything that we have not seen for 14 days +- DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days'; +- """) +- +- def _handle_update_announcements_from_bird(self, server): +- # Pre-compile the regular expression for faster searching +- route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$") +- +- log.info("Requesting routing table from Bird (%s)" % server) +- +- aggregated_networks = [] +- +- # Send command to list all routes +- for line in self._bird_cmd(server, "show route"): +- m = route.match(line) +- if not m: +- # Skip empty lines +- if not line: +- pass +- +- # Ignore any header lines with the name of the routing table +- elif line.startswith(b"Table"): +- pass +- +- # Log anything else +- else: +- log.debug("Could not parse line: %s" % line.decode()) +- +- continue +- +- # Fetch the extracted network and ASN +- network, autnum = m.groups() +- +- # Decode into strings +- if network: +- network = network.decode() +- if autnum: +- autnum = autnum.decode() +- +- # Collect all aggregated networks +- if not autnum: +- log.debug("%s is an aggregated network" % network) +- aggregated_networks.append(network) +- continue +- +- # Insert it into the database +- self.db.execute("INSERT INTO announcements(network, autnum) \ +- VALUES(%s, %s) ON CONFLICT (network) DO \ +- UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", +- network, autnum, +- ) +- +- # Process any aggregated networks +- for network in aggregated_networks: +- log.debug("Processing aggregated network %s" % network) +- +- # Run "show route all" for each network +- for line in self._bird_cmd(server, "show route %s all" % network): +- # Try finding the path +- m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line) +- if m: +- # Select the last AS number in the path +- autnum = m.group(1).decode() +- +- # Insert it into the database +- self.db.execute("INSERT INTO announcements(network, autnum) \ +- VALUES(%s, %s) ON CONFLICT (network) DO \ +- UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", +- network, autnum, +- ) +- +- # We don't need to process any more +- break +- +- def _handle_update_announcements_from_telnet(self, server): +- # Pre-compile regular expression for routes +- route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL) +- +- with telnetlib.Telnet(server) as t: +- # Enable debug mode +- #if ns.debug: +- # t.set_debuglevel(10) +- +- # Wait for console greeting +- greeting = t.read_until(b"> ", timeout=30) +- if not greeting: +- log.error("Could not get a console prompt") +- return 1 +- +- # Disable pagination +- t.write(b"terminal length 0\n") +- +- # Wait for the prompt to return +- t.read_until(b"> ") +- +- # Fetch the routing tables +- for protocol in ("ipv6", "ipv4"): +- log.info("Requesting %s routing table" % protocol) +- +- # Request the full unicast routing table +- t.write(b"show bgp %s unicast\n" % protocol.encode()) +- +- # Read entire header which ends with "Path" +- t.read_until(b"Path\r\n") +- +- while True: +- # Try reading a full entry +- # Those might be broken across multiple lines but ends with i +- line = t.read_until(b"i\r\n", timeout=5) +- if not line: +- break +- +- # Show line for debugging +- #log.debug(repr(line)) +- +- # Try finding a route in here +- m = route.match(line) +- if m: +- network, autnum = m.groups() +- +- # Convert network to string +- network = network.decode() +- +- # Append /24 for IPv4 addresses +- if not "/" in network and not ":" in network: +- network = "%s/24" % network +- +- # Convert AS number to integer +- autnum = int(autnum) +- +- log.info("Found announcement for %s by %s" % (network, autnum)) +- +- self.db.execute("INSERT INTO announcements(network, autnum) \ +- VALUES(%s, %s) ON CONFLICT (network) DO \ +- UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", +- network, autnum, +- ) +- +- log.info("Finished reading the %s routing table" % protocol) +- +- def _bird_cmd(self, socket_path, command): +- # Connect to the socket +- s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +- s.connect(socket_path) +- +- # Allocate some buffer +- buffer = b"" +- +- log.debug("Sending Bird command: %s" % command) +- +- # Send the command +- s.send(b"%s\n" % command.encode()) +- +- while True: +- # Fill up the buffer +- buffer += s.recv(4096) +- +- while True: +- # Search for the next newline +- pos = buffer.find(b"\n") +- +- # If we cannot find one, we go back and read more data +- if pos <= 0: +- break +- +- # Cut after the newline character +- pos += 1 +- +- # Split the line we want and keep the rest in buffer +- line, buffer = buffer[:pos], buffer[pos:] +- +- # Try parsing any status lines +- if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45): +- code, delim, line = int(line[:4]), line[4], line[5:] +- +- log.debug("Received response code %s from bird" % code) +- +- # End of output +- if code == 0: +- return +- +- # Ignore hello line +- elif code == 1: +- continue +- +- # Otherwise return the line +- yield line +- +- def handle_update_overrides(self, ns): +- with self.db.transaction(): +- # Drop all data that we have +- self.db.execute(""" +- TRUNCATE TABLE autnum_overrides; +- TRUNCATE TABLE network_overrides; +- """) +- +- # Update overrides for various cloud providers big enough to publish their own IP +- # network allocation lists in a machine-readable format... +- self._update_overrides_for_aws() +- +- # Update overrides for Spamhaus DROP feeds... +- self._update_overrides_for_spamhaus_drop() +- +- for file in ns.files: +- log.info("Reading %s..." % file) +- +- with open(file, "rb") as f: +- for type, block in location.importer.read_blocks(f): +- if type == "net": +- network = block.get("net") +- # Try to parse and normalise the network +- try: +- network = ipaddress.ip_network(network, strict=False) +- except ValueError as e: +- log.warning("Invalid IP network: %s: %s" % (network, e)) +- continue +- +- # Prevent that we overwrite all networks +- if network.prefixlen == 0: +- log.warning("Skipping %s: You cannot overwrite default" % network) +- continue +- +- self.db.execute(""" +- INSERT INTO network_overrides( +- network, +- country, +- source, +- is_anonymous_proxy, +- is_satellite_provider, +- is_anycast, +- is_drop +- ) VALUES (%s, %s, %s, %s, %s, %s, %s) +- ON CONFLICT (network) DO NOTHING""", +- "%s" % network, +- block.get("country"), +- "manual", +- self._parse_bool(block, "is-anonymous-proxy"), +- self._parse_bool(block, "is-satellite-provider"), +- self._parse_bool(block, "is-anycast"), +- self._parse_bool(block, "drop"), +- ) +- +- elif type == "aut-num": +- autnum = block.get("aut-num") +- +- # Check if AS number begins with "AS" +- if not autnum.startswith("AS"): +- log.warning("Invalid AS number: %s" % autnum) +- continue +- +- # Strip "AS" +- autnum = autnum[2:] +- +- self.db.execute(""" +- INSERT INTO autnum_overrides( +- number, +- name, +- country, +- source, +- is_anonymous_proxy, +- is_satellite_provider, +- is_anycast, +- is_drop +- ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) +- ON CONFLICT DO NOTHING""", +- autnum, +- block.get("name"), +- block.get("country"), +- "manual", +- self._parse_bool(block, "is-anonymous-proxy"), +- self._parse_bool(block, "is-satellite-provider"), +- self._parse_bool(block, "is-anycast"), +- self._parse_bool(block, "drop"), +- ) +- +- else: +- log.warning("Unsupported type: %s" % type) +- +- def _update_overrides_for_aws(self): +- # Download Amazon AWS IP allocation file to create overrides... +- downloader = location.importer.Downloader() +- +- try: +- with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f: +- aws_ip_dump = json.load(f.body) +- except Exception as e: +- log.error("unable to preprocess Amazon AWS IP ranges: %s" % e) +- return +- +- # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately, +- # there seems to be no machine-readable version available of this other than +- # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html +- # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints +- # was helpful here as well. +- aws_region_country_map = { +- "af-south-1": "ZA", +- "ap-east-1": "HK", +- "ap-south-1": "IN", +- "ap-south-2": "IN", +- "ap-northeast-3": "JP", +- "ap-northeast-2": "KR", +- "ap-southeast-1": "SG", +- "ap-southeast-2": "AU", +- "ap-southeast-3": "MY", +- "ap-southeast-4": "AU", +- "ap-northeast-1": "JP", +- "ca-central-1": "CA", +- "eu-central-1": "DE", +- "eu-central-2": "CH", +- "eu-west-1": "IE", +- "eu-west-2": "GB", +- "eu-south-1": "IT", +- "eu-south-2": "ES", +- "eu-west-3": "FR", +- "eu-north-1": "SE", +- "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself +- "me-central-1": "AE", +- "me-south-1": "BH", +- "sa-east-1": "BR" +- } +- +- # Fetch all valid country codes to check parsed networks aganist... +- rows = self.db.query("SELECT * FROM countries ORDER BY country_code") +- validcountries = [] +- +- for row in rows: +- validcountries.append(row.country_code) +- +- with self.db.transaction(): +- for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]: +- try: +- network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False) +- except ValueError: +- log.warning("Unable to parse line: %s" % snetwork) +- continue +- +- # Sanitize parsed networks... +- if not self._check_parsed_network(network): +- continue +- +- # Determine region of this network... +- region = snetwork["region"] +- cc = None +- is_anycast = False +- +- # Any region name starting with "us-" will get "US" country code assigned straight away... +- if region.startswith("us-"): +- cc = "US" +- elif region.startswith("cn-"): +- # ... same goes for China ... +- cc = "CN" +- elif region == "GLOBAL": +- # ... funny region name for anycast-like networks ... +- is_anycast = True +- elif region in aws_region_country_map: +- # ... assign looked up country code otherwise ... +- cc = aws_region_country_map[region] +- else: +- # ... and bail out if we are missing something here +- log.warning("Unable to determine country code for line: %s" % snetwork) +- continue +- +- # Skip networks with unknown country codes +- if not is_anycast and validcountries and cc not in validcountries: +- log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \ +- (cc, network)) +- return +- +- # Conduct SQL statement... +- self.db.execute(""" +- INSERT INTO network_overrides( +- network, +- country, +- source, +- is_anonymous_proxy, +- is_satellite_provider, +- is_anycast +- ) VALUES (%s, %s, %s, %s, %s, %s) +- ON CONFLICT (network) DO NOTHING""", +- "%s" % network, +- cc, +- "Amazon AWS IP feed", +- None, +- None, +- is_anycast, +- ) +- +- +- def _update_overrides_for_spamhaus_drop(self): +- downloader = location.importer.Downloader() +- +- ip_urls = [ +- "https://www.spamhaus.org/drop/drop.txt", +- "https://www.spamhaus.org/drop/edrop.txt", +- "https://www.spamhaus.org/drop/dropv6.txt" +- ] +- +- asn_urls = [ +- "https://www.spamhaus.org/drop/asndrop.txt" +- ] +- +- for url in ip_urls: +- try: +- with downloader.request(url, return_blocks=False) as f: +- fcontent = f.body.readlines() +- except Exception as e: +- log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e)) +- return +- +- # Iterate through every line, filter comments and add remaining networks to +- # the override table in case they are valid... +- with self.db.transaction(): +- for sline in fcontent: +- +- # The response is assumed to be encoded in UTF-8... +- sline = sline.decode("utf-8") +- +- # Comments start with a semicolon... +- if sline.startswith(";"): +- continue +- +- # Extract network and ignore anything afterwards... +- try: +- network = ipaddress.ip_network(sline.split()[0], strict=False) +- except ValueError: +- log.error("Unable to parse line: %s" % sline) +- continue +- +- # Sanitize parsed networks... +- if not self._check_parsed_network(network): +- log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \ +- (url, network)) +- continue +- +- # Conduct SQL statement... +- self.db.execute(""" +- INSERT INTO network_overrides( +- network, +- source, +- is_drop +- ) VALUES (%s, %s, %s) +- ON CONFLICT (network) DO UPDATE SET is_drop = True""", +- "%s" % network, +- "Spamhaus DROP lists", +- True +- ) +- +- for url in asn_urls: +- try: +- with downloader.request(url, return_blocks=False) as f: +- fcontent = f.body.readlines() +- except Exception as e: +- log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e)) +- return +- +- # Iterate through every line, filter comments and add remaining ASNs to +- # the override table in case they are valid... +- with self.db.transaction(): +- for sline in fcontent: +- +- # The response is assumed to be encoded in UTF-8... +- sline = sline.decode("utf-8") +- +- # Comments start with a semicolon... +- if sline.startswith(";"): +- continue +- +- # Throw away anything after the first space... +- sline = sline.split()[0] +- +- # ... strip the "AS" prefix from it ... +- sline = sline.strip("AS") +- +- # ... and convert it into an integer. Voila. +- asn = int(sline) +- +- # Filter invalid ASNs... +- if not self._check_parsed_asn(asn): +- log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \ +- (url, asn)) +- continue +- +- # Conduct SQL statement... +- self.db.execute(""" +- INSERT INTO autnum_overrides( +- number, +- source, +- is_drop +- ) VALUES (%s, %s, %s) +- ON CONFLICT (number) DO UPDATE SET is_drop = True""", +- "%s" % asn, +- "Spamhaus ASN-DROP list", +- True +- ) +- +- @staticmethod +- def _parse_bool(block, key): +- val = block.get(key) +- +- # There is no point to proceed when we got None +- if val is None: +- return +- +- # Convert to lowercase +- val = val.lower() +- +- # True +- if val in ("yes", "1"): +- return True +- +- # False +- if val in ("no", "0"): +- return False +- +- # Default to None +- return None +- +- def handle_import_countries(self, ns): +- with self.db.transaction(): +- # Drop all data that we have +- self.db.execute("TRUNCATE TABLE countries") +- +- for file in ns.file: +- for line in file: +- line = line.rstrip() +- +- # Ignore any comments +- if line.startswith("#"): +- continue +- +- try: +- country_code, continent_code, name = line.split(maxsplit=2) +- except: +- log.warning("Could not parse line: %s" % line) +- continue +- +- self.db.execute("INSERT INTO countries(country_code, name, continent_code) \ +- VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code) +- +- +-def split_line(line): +- key, colon, val = line.partition(":") +- +- # Strip any excess space +- key = key.strip() +- val = val.strip() +- +- return key, val +- +-def main(): +- # Run the command line interface +- c = CLI() +- c.run() +- +-main() +diff --git a/src/python/location.in b/src/python/location.in +deleted file mode 100644 +index 233cea0..0000000 +--- a/src/python/location.in ++++ /dev/null +@@ -1,644 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2017-2021 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import argparse +-import datetime +-import ipaddress +-import logging +-import os +-import re +-import shutil +-import socket +-import sys +-import time +- +-# Load our location module +-import location +-import location.downloader +-import location.export +- +-from location.i18n import _ +- +-# Setup logging +-log = logging.getLogger("location") +- +-# Output formatters +- +-class CLI(object): +- def parse_cli(self): +- parser = argparse.ArgumentParser( +- description=_("Location Database Command Line Interface"), +- ) +- subparsers = parser.add_subparsers() +- +- # Global configuration flags +- parser.add_argument("--debug", action="store_true", +- help=_("Enable debug output")) +- parser.add_argument("--quiet", action="store_true", +- help=_("Enable quiet mode")) +- +- # version +- parser.add_argument("--version", action="version", +- version="%(prog)s @VERSION@") +- +- # database +- parser.add_argument("--database", "-d", +- default="@databasedir@/database.db", help=_("Path to database"), +- ) +- +- # public key +- parser.add_argument("--public-key", "-k", +- default="@databasedir@/signing-key.pem", help=_("Public Signing Key"), +- ) +- +- # Show the database version +- version = subparsers.add_parser("version", +- help=_("Show database version")) +- version.set_defaults(func=self.handle_version) +- +- # lookup an IP address +- lookup = subparsers.add_parser("lookup", +- help=_("Lookup one or multiple IP addresses"), +- ) +- lookup.add_argument("address", nargs="+") +- lookup.set_defaults(func=self.handle_lookup) +- +- # Dump the whole database +- dump = subparsers.add_parser("dump", +- help=_("Dump the entire database"), +- ) +- dump.add_argument("output", nargs="?", type=argparse.FileType("w")) +- dump.set_defaults(func=self.handle_dump) +- +- # Update +- update = subparsers.add_parser("update", help=_("Update database")) +- update.add_argument("--cron", +- help=_("Update the library only once per interval"), +- choices=("daily", "weekly", "monthly"), +- ) +- update.set_defaults(func=self.handle_update) +- +- # Verify +- verify = subparsers.add_parser("verify", +- help=_("Verify the downloaded database")) +- verify.set_defaults(func=self.handle_verify) +- +- # Get AS +- get_as = subparsers.add_parser("get-as", +- help=_("Get information about one or multiple Autonomous Systems"), +- ) +- get_as.add_argument("asn", nargs="+") +- get_as.set_defaults(func=self.handle_get_as) +- +- # Search for AS +- search_as = subparsers.add_parser("search-as", +- help=_("Search for Autonomous Systems that match the string"), +- ) +- search_as.add_argument("query", nargs=1) +- search_as.set_defaults(func=self.handle_search_as) +- +- # List all networks in an AS +- list_networks_by_as = subparsers.add_parser("list-networks-by-as", +- help=_("Lists all networks in an AS"), +- ) +- list_networks_by_as.add_argument("asn", nargs=1, type=int) +- list_networks_by_as.add_argument("--family", choices=("ipv6", "ipv4")) +- list_networks_by_as.add_argument("--format", +- choices=location.export.formats.keys(), default="list") +- list_networks_by_as.set_defaults(func=self.handle_list_networks_by_as) +- +- # List all networks in a country +- list_networks_by_cc = subparsers.add_parser("list-networks-by-cc", +- help=_("Lists all networks in a country"), +- ) +- list_networks_by_cc.add_argument("country_code", nargs=1) +- list_networks_by_cc.add_argument("--family", choices=("ipv6", "ipv4")) +- list_networks_by_cc.add_argument("--format", +- choices=location.export.formats.keys(), default="list") +- list_networks_by_cc.set_defaults(func=self.handle_list_networks_by_cc) +- +- # List all networks with flags +- list_networks_by_flags = subparsers.add_parser("list-networks-by-flags", +- help=_("Lists all networks with flags"), +- ) +- list_networks_by_flags.add_argument("--anonymous-proxy", +- action="store_true", help=_("Anonymous Proxies"), +- ) +- list_networks_by_flags.add_argument("--satellite-provider", +- action="store_true", help=_("Satellite Providers"), +- ) +- list_networks_by_flags.add_argument("--anycast", +- action="store_true", help=_("Anycasts"), +- ) +- list_networks_by_flags.add_argument("--drop", +- action="store_true", help=_("Hostile Networks safe to drop"), +- ) +- list_networks_by_flags.add_argument("--family", choices=("ipv6", "ipv4")) +- list_networks_by_flags.add_argument("--format", +- choices=location.export.formats.keys(), default="list") +- list_networks_by_flags.set_defaults(func=self.handle_list_networks_by_flags) +- +- # List bogons +- list_bogons = subparsers.add_parser("list-bogons", +- help=_("Lists all bogons"), +- ) +- list_bogons.add_argument("--family", choices=("ipv6", "ipv4")) +- list_bogons.add_argument("--format", +- choices=location.export.formats.keys(), default="list") +- list_bogons.set_defaults(func=self.handle_list_bogons) +- +- # List countries +- list_countries = subparsers.add_parser("list-countries", +- help=_("Lists all countries"), +- ) +- list_countries.add_argument("--show-name", +- action="store_true", help=_("Show the name of the country"), +- ) +- list_countries.add_argument("--show-continent", +- action="store_true", help=_("Show the continent"), +- ) +- list_countries.set_defaults(func=self.handle_list_countries) +- +- # Export +- export = subparsers.add_parser("export", +- help=_("Exports data in many formats to load it into packet filters"), +- ) +- export.add_argument("--format", help=_("Output format"), +- choices=location.export.formats.keys(), default="list") +- export.add_argument("--directory", help=_("Output directory")) +- export.add_argument("--family", +- help=_("Specify address family"), choices=("ipv6", "ipv4"), +- ) +- export.add_argument("objects", nargs="*", help=_("List country codes or ASNs to export")) +- export.set_defaults(func=self.handle_export) +- +- args = parser.parse_args() +- +- # Configure logging +- if args.debug: +- location.logger.set_level(logging.DEBUG) +- elif args.quiet: +- location.logger.set_level(logging.WARNING) +- +- # Print usage if no action was given +- if not "func" in args: +- parser.print_usage() +- sys.exit(2) +- +- return args +- +- def run(self): +- # Parse command line arguments +- args = self.parse_cli() +- +- # Open database +- try: +- db = location.Database(args.database) +- except FileNotFoundError as e: +- # Allow continuing without a database +- if args.func == self.handle_update: +- db = None +- +- else: +- sys.stderr.write("location: Could not open database %s: %s\n" \ +- % (args.database, e)) +- sys.exit(1) +- +- # Translate family (if present) +- if "family" in args: +- if args.family == "ipv6": +- args.family = socket.AF_INET6 +- elif args.family == "ipv4": +- args.family = socket.AF_INET +- else: +- args.family = 0 +- +- # Call function +- try: +- ret = args.func(db, args) +- +- # Catch invalid inputs +- except ValueError as e: +- sys.stderr.write("%s\n" % e) +- ret = 2 +- +- # Catch any other exceptions +- except Exception as e: +- sys.stderr.write("%s\n" % e) +- ret = 1 +- +- # Return with exit code +- if ret: +- sys.exit(ret) +- +- # Otherwise just exit +- sys.exit(0) +- +- def handle_version(self, db, ns): +- """ +- Print the version of the database +- """ +- t = time.strftime( +- "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), +- ) +- +- print(t) +- +- def handle_lookup(self, db, ns): +- ret = 0 +- +- format = " %-24s: %s" +- +- for address in ns.address: +- try: +- network = db.lookup(address) +- except ValueError: +- print(_("Invalid IP address: %s") % address, file=sys.stderr) +- return 2 +- +- args = { +- "address" : address, +- "network" : network, +- } +- +- # Nothing found? +- if not network: +- print(_("Nothing found for %(address)s") % args, file=sys.stderr) +- ret = 1 +- continue +- +- print("%s:" % address) +- print(format % (_("Network"), network)) +- +- # Print country +- if network.country_code: +- country = db.get_country(network.country_code) +- +- print(format % ( +- _("Country"), +- country.name if country else network.country_code), +- ) +- +- # Print AS information +- if network.asn: +- autonomous_system = db.get_as(network.asn) +- +- print(format % ( +- _("Autonomous System"), +- autonomous_system or "AS%s" % network.asn), +- ) +- +- # Anonymous Proxy +- if network.has_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY): +- print(format % ( +- _("Anonymous Proxy"), _("yes"), +- )) +- +- # Satellite Provider +- if network.has_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER): +- print(format % ( +- _("Satellite Provider"), _("yes"), +- )) +- +- # Anycast +- if network.has_flag(location.NETWORK_FLAG_ANYCAST): +- print(format % ( +- _("Anycast"), _("yes"), +- )) +- +- # Hostile Network +- if network.has_flag(location.NETWORK_FLAG_DROP): +- print(format % ( +- _("Hostile Network safe to drop"), _("yes"), +- )) +- +- return ret +- +- def handle_dump(self, db, ns): +- # Use output file or write to stdout +- f = ns.output or sys.stdout +- +- # Format everything like this +- format = "%-24s %s\n" +- +- # Write metadata +- f.write("#\n# Location Database Export\n#\n") +- +- f.write("# Generated: %s\n" % time.strftime( +- "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), +- )) +- +- if db.vendor: +- f.write("# Vendor: %s\n" % db.vendor) +- +- if db.license: +- f.write("# License: %s\n" % db.license) +- +- f.write("#\n") +- +- if db.description: +- for line in db.description.splitlines(): +- line = "# %s" % line +- f.write("%s\n" % line.rstrip()) +- +- f.write("#\n") +- +- # Iterate over all ASes +- for a in db.ases: +- f.write("\n") +- f.write(format % ("aut-num:", "AS%s" % a.number)) +- f.write(format % ("name:", a.name)) +- +- flags = { +- location.NETWORK_FLAG_ANONYMOUS_PROXY : "is-anonymous-proxy:", +- location.NETWORK_FLAG_SATELLITE_PROVIDER : "is-satellite-provider:", +- location.NETWORK_FLAG_ANYCAST : "is-anycast:", +- location.NETWORK_FLAG_DROP : "drop:", +- } +- +- # Iterate over all networks +- for n in db.networks: +- f.write("\n") +- f.write(format % ("net:", n)) +- +- if n.country_code: +- f.write(format % ("country:", n.country_code)) +- +- if n.asn: +- f.write(format % ("aut-num:", n.asn)) +- +- # Print all flags +- for flag in flags: +- if n.has_flag(flag): +- f.write(format % (flags[flag], "yes")) +- +- def handle_get_as(self, db, ns): +- """ +- Gets information about Autonomous Systems +- """ +- ret = 0 +- +- for asn in ns.asn: +- try: +- asn = int(asn) +- except ValueError: +- print(_("Invalid ASN: %s") % asn, file=sys.stderr) +- ret = 1 +- continue +- +- # Fetch AS from database +- a = db.get_as(asn) +- +- # Nothing found +- if not a: +- print(_("Could not find AS%s") % asn, file=sys.stderr) +- ret = 1 +- continue +- +- print(_("AS%(asn)s belongs to %(name)s") % { "asn" : a.number, "name" : a.name }) +- +- return ret +- +- def handle_search_as(self, db, ns): +- for query in ns.query: +- # Print all matches ASes +- for a in db.search_as(query): +- print(a) +- +- def handle_update(self, db, ns): +- if ns.cron and db: +- now = time.time() +- +- if ns.cron == "daily": +- delta = datetime.timedelta(days=1) +- elif ns.cron == "weekly": +- delta = datetime.timedelta(days=7) +- elif ns.cron == "monthly": +- delta = datetime.timedelta(days=30) +- +- delta = delta.total_seconds() +- +- # Check if the database has recently been updated +- if db.created_at >= (now - delta): +- log.info( +- _("The database has been updated recently"), +- ) +- return 3 +- +- # Fetch the timestamp we need from DNS +- t = location.discover_latest_version() +- +- # Check the version of the local database +- if db and t and db.created_at >= t: +- log.info("Already on the latest version") +- return +- +- # Download the database into the correct directory +- tmpdir = os.path.dirname(ns.database) +- +- # Create a downloader +- d = location.downloader.Downloader() +- +- # Try downloading a new database +- try: +- t = d.download(public_key=ns.public_key, timestamp=t, tmpdir=tmpdir) +- +- # If no file could be downloaded, log a message +- except FileNotFoundError as e: +- log.error("Could not download a new database") +- return 1 +- +- # If we have not received a new file, there is nothing to do +- if not t: +- return 3 +- +- # Move temporary file to destination +- shutil.move(t.name, ns.database) +- +- return 0 +- +- def handle_verify(self, db, ns): +- # Verify the database +- with open(ns.public_key, "r") as f: +- if not db.verify(f): +- log.error("Could not verify database") +- return 1 +- +- # Success +- log.debug("Database successfully verified") +- return 0 +- +- def __get_output_formatter(self, ns): +- try: +- cls = location.export.formats[ns.format] +- except KeyError: +- cls = location.export.OutputFormatter +- +- return cls +- +- def handle_list_countries(self, db, ns): +- for country in db.countries: +- line = [ +- country.code, +- ] +- +- if ns.show_continent: +- line.append(country.continent_code) +- +- if ns.show_name: +- line.append(country.name) +- +- # Format the output +- line = " ".join(line) +- +- # Print the output +- print(line) +- +- def handle_list_networks_by_as(self, db, ns): +- writer = self.__get_output_formatter(ns) +- +- for asn in ns.asn: +- f = writer("AS%s" % asn, f=sys.stdout) +- +- # Print all matching networks +- for n in db.search_networks(asns=[asn], family=ns.family): +- f.write(n) +- +- f.finish() +- +- def handle_list_networks_by_cc(self, db, ns): +- writer = self.__get_output_formatter(ns) +- +- for country_code in ns.country_code: +- # Open standard output +- f = writer(country_code, f=sys.stdout) +- +- # Print all matching networks +- for n in db.search_networks(country_codes=[country_code], family=ns.family): +- f.write(n) +- +- f.finish() +- +- def handle_list_networks_by_flags(self, db, ns): +- flags = 0 +- +- if ns.anonymous_proxy: +- flags |= location.NETWORK_FLAG_ANONYMOUS_PROXY +- +- if ns.satellite_provider: +- flags |= location.NETWORK_FLAG_SATELLITE_PROVIDER +- +- if ns.anycast: +- flags |= location.NETWORK_FLAG_ANYCAST +- +- if ns.drop: +- flags |= location.NETWORK_FLAG_DROP +- +- if not flags: +- raise ValueError(_("You must at least pass one flag")) +- +- writer = self.__get_output_formatter(ns) +- f = writer("custom", f=sys.stdout) +- +- for n in db.search_networks(flags=flags, family=ns.family): +- f.write(n) +- +- f.finish() +- +- def handle_list_bogons(self, db, ns): +- writer = self.__get_output_formatter(ns) +- f = writer("bogons", f=sys.stdout) +- +- for n in db.list_bogons(family=ns.family): +- f.write(n) +- +- f.finish() +- +- def handle_export(self, db, ns): +- countries, asns = [], [] +- +- # Translate family +- if ns.family: +- families = [ ns.family ] +- else: +- families = [ socket.AF_INET6, socket.AF_INET ] +- +- for object in ns.objects: +- m = re.match("^AS(\d+)$", object) +- if m: +- object = int(m.group(1)) +- +- asns.append(object) +- +- elif location.country_code_is_valid(object) \ +- or object in ("A1", "A2", "A3", "XD"): +- countries.append(object) +- +- else: +- log.warning("Invalid argument: %s" % object) +- continue +- +- # Default to exporting all countries +- if not countries and not asns: +- countries = ["A1", "A2", "A3", "XD"] + [country.code for country in db.countries] +- +- # Select the output format +- writer = self.__get_output_formatter(ns) +- +- e = location.export.Exporter(db, writer) +- e.export(ns.directory, countries=countries, asns=asns, families=families) +- +- +-def format_timedelta(t): +- s = [] +- +- if t.days: +- s.append( +- _("One Day", "%(days)s Days", t.days) % { "days" : t.days, } +- ) +- +- hours = t.seconds // 3600 +- if hours: +- s.append( +- _("One Hour", "%(hours)s Hours", hours) % { "hours" : hours, } +- ) +- +- minutes = (t.seconds % 3600) // 60 +- if minutes: +- s.append( +- _("One Minute", "%(minutes)s Minutes", minutes) % { "minutes" : minutes, } +- ) +- +- seconds = t.seconds % 60 +- if t.seconds: +- s.append( +- _("One Second", "%(seconds)s Seconds", seconds) % { "seconds" : seconds, } +- ) +- +- if not s: +- return _("Now") +- +- return _("%s ago") % ", ".join(s) +- +-def main(): +- # Run the command line interface +- c = CLI() +- c.run() +- +-main() +diff --git a/src/python/location/__init__.py b/src/python/location/__init__.py +new file mode 100644 +index 0000000..9b570c7 +--- /dev/null ++++ b/src/python/location/__init__.py +@@ -0,0 +1,24 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++# Import everything from the C module ++from _location import * ++ ++# Initialise logging ++from . import logger +diff --git a/src/python/location/database.py b/src/python/location/database.py +new file mode 100644 +index 0000000..5d79941 +--- /dev/null ++++ b/src/python/location/database.py +@@ -0,0 +1,213 @@ ++#!/usr/bin/env python ++ ++""" ++ A lightweight wrapper around psycopg2. ++ ++ Originally part of the Tornado framework. The tornado.database module ++ is slated for removal in Tornado 3.0, and it is now available separately ++ as torndb. ++""" ++ ++import logging ++import psycopg2 ++ ++log = logging.getLogger("location.database") ++log.propagate = 1 ++ ++class Connection(object): ++ """ ++ A lightweight wrapper around MySQLdb DB-API connections. ++ ++ The main value we provide is wrapping rows in a dict/object so that ++ columns can be accessed by name. Typical usage:: ++ ++ db = torndb.Connection("localhost", "mydatabase") ++ for article in db.query("SELECT * FROM articles"): ++ print article.title ++ ++ Cursors are hidden by the implementation, but other than that, the methods ++ are very similar to the DB-API. ++ ++ We explicitly set the timezone to UTC and the character encoding to ++ UTF-8 on all connections to avoid time zone and encoding errors. ++ """ ++ def __init__(self, host, database, user=None, password=None): ++ self.host = host ++ self.database = database ++ ++ self._db = None ++ self._db_args = { ++ "host" : host, ++ "database" : database, ++ "user" : user, ++ "password" : password, ++ "sslmode" : "require", ++ } ++ ++ try: ++ self.reconnect() ++ except Exception: ++ log.error("Cannot connect to database on %s", self.host, exc_info=True) ++ ++ def __del__(self): ++ self.close() ++ ++ def close(self): ++ """ ++ Closes this database connection. ++ """ ++ if getattr(self, "_db", None) is not None: ++ self._db.close() ++ self._db = None ++ ++ def reconnect(self): ++ """ ++ Closes the existing database connection and re-opens it. ++ """ ++ self.close() ++ ++ self._db = psycopg2.connect(**self._db_args) ++ self._db.autocommit = True ++ ++ # Initialize the timezone setting. ++ self.execute("SET TIMEZONE TO 'UTC'") ++ ++ def query(self, query, *parameters, **kwparameters): ++ """ ++ Returns a row list for the given query and parameters. ++ """ ++ cursor = self._cursor() ++ try: ++ self._execute(cursor, query, parameters, kwparameters) ++ column_names = [d[0] for d in cursor.description] ++ return [Row(zip(column_names, row)) for row in cursor] ++ finally: ++ cursor.close() ++ ++ def get(self, query, *parameters, **kwparameters): ++ """ ++ Returns the first row returned for the given query. ++ """ ++ rows = self.query(query, *parameters, **kwparameters) ++ if not rows: ++ return None ++ elif len(rows) > 1: ++ raise Exception("Multiple rows returned for Database.get() query") ++ else: ++ return rows[0] ++ ++ def execute(self, query, *parameters, **kwparameters): ++ """ ++ Executes the given query, returning the lastrowid from the query. ++ """ ++ return self.execute_lastrowid(query, *parameters, **kwparameters) ++ ++ def execute_lastrowid(self, query, *parameters, **kwparameters): ++ """ ++ Executes the given query, returning the lastrowid from the query. ++ """ ++ cursor = self._cursor() ++ try: ++ self._execute(cursor, query, parameters, kwparameters) ++ return cursor.lastrowid ++ finally: ++ cursor.close() ++ ++ def execute_rowcount(self, query, *parameters, **kwparameters): ++ """ ++ Executes the given query, returning the rowcount from the query. ++ """ ++ cursor = self._cursor() ++ try: ++ self._execute(cursor, query, parameters, kwparameters) ++ return cursor.rowcount ++ finally: ++ cursor.close() ++ ++ def executemany(self, query, parameters): ++ """ ++ Executes the given query against all the given param sequences. ++ ++ We return the lastrowid from the query. ++ """ ++ return self.executemany_lastrowid(query, parameters) ++ ++ def executemany_lastrowid(self, query, parameters): ++ """ ++ Executes the given query against all the given param sequences. ++ ++ We return the lastrowid from the query. ++ """ ++ cursor = self._cursor() ++ try: ++ cursor.executemany(query, parameters) ++ return cursor.lastrowid ++ finally: ++ cursor.close() ++ ++ def executemany_rowcount(self, query, parameters): ++ """ ++ Executes the given query against all the given param sequences. ++ ++ We return the rowcount from the query. ++ """ ++ cursor = self._cursor() ++ ++ try: ++ cursor.executemany(query, parameters) ++ return cursor.rowcount ++ finally: ++ cursor.close() ++ ++ def _ensure_connected(self): ++ if self._db is None: ++ log.warning("Database connection was lost...") ++ ++ self.reconnect() ++ ++ def _cursor(self): ++ self._ensure_connected() ++ return self._db.cursor() ++ ++ def _execute(self, cursor, query, parameters, kwparameters): ++ log.debug("SQL Query: %s" % (query % (kwparameters or parameters))) ++ ++ try: ++ return cursor.execute(query, kwparameters or parameters) ++ except (OperationalError, psycopg2.ProgrammingError): ++ log.error("Error connecting to database on %s", self.host) ++ self.close() ++ raise ++ ++ def transaction(self): ++ return Transaction(self) ++ ++ ++class Row(dict): ++ """A dict that allows for object-like property access syntax.""" ++ def __getattr__(self, name): ++ try: ++ return self[name] ++ except KeyError: ++ raise AttributeError(name) ++ ++ ++class Transaction(object): ++ def __init__(self, db): ++ self.db = db ++ ++ self.db.execute("START TRANSACTION") ++ ++ def __enter__(self): ++ return self ++ ++ def __exit__(self, exctype, excvalue, traceback): ++ if exctype is not None: ++ self.db.execute("ROLLBACK") ++ else: ++ self.db.execute("COMMIT") ++ ++ ++# Alias some common exceptions ++IntegrityError = psycopg2.IntegrityError ++OperationalError = psycopg2.OperationalError +diff --git a/src/python/location/downloader.py b/src/python/location/downloader.py +new file mode 100644 +index 0000000..05f7872 +--- /dev/null ++++ b/src/python/location/downloader.py +@@ -0,0 +1,211 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import logging ++import lzma ++import os ++import random ++import stat ++import tempfile ++import time ++import urllib.error ++import urllib.parse ++import urllib.request ++ ++from . import __version__ ++from _location import Database, DATABASE_VERSION_LATEST ++ ++DATABASE_FILENAME = "location.db.xz" ++MIRRORS = ( ++ "https://location.ipfire.org/databases/", ++) ++ ++# Initialise logging ++log = logging.getLogger("location.downloader") ++log.propagate = 1 ++ ++class Downloader(object): ++ def __init__(self, version=DATABASE_VERSION_LATEST, mirrors=None): ++ self.version = version ++ ++ # Set mirrors or use defaults ++ self.mirrors = list(mirrors or MIRRORS) ++ ++ # Randomize mirrors ++ random.shuffle(self.mirrors) ++ ++ # Get proxies from environment ++ self.proxies = self._get_proxies() ++ ++ def _get_proxies(self): ++ proxies = {} ++ ++ for protocol in ("https", "http"): ++ proxy = os.environ.get("%s_proxy" % protocol, None) ++ ++ if proxy: ++ proxies[protocol] = proxy ++ ++ return proxies ++ ++ def _make_request(self, url, baseurl=None, headers={}): ++ if baseurl: ++ url = urllib.parse.urljoin(baseurl, url) ++ ++ req = urllib.request.Request(url, method="GET") ++ ++ # Update headers ++ headers.update({ ++ "User-Agent" : "location/%s" % __version__, ++ }) ++ ++ # Set headers ++ for header in headers: ++ req.add_header(header, headers[header]) ++ ++ # Set proxies ++ for protocol in self.proxies: ++ req.set_proxy(self.proxies[protocol], protocol) ++ ++ return req ++ ++ def _send_request(self, req, **kwargs): ++ # Log request headers ++ log.debug("HTTP %s Request to %s" % (req.method, req.host)) ++ log.debug(" URL: %s" % req.full_url) ++ log.debug(" Headers:") ++ for k, v in req.header_items(): ++ log.debug(" %s: %s" % (k, v)) ++ ++ try: ++ res = urllib.request.urlopen(req, **kwargs) ++ ++ except urllib.error.HTTPError as e: ++ # Log response headers ++ log.debug("HTTP Response: %s" % e.code) ++ log.debug(" Headers:") ++ for header in e.headers: ++ log.debug(" %s: %s" % (header, e.headers[header])) ++ ++ # Raise all other errors ++ raise e ++ ++ # Log response headers ++ log.debug("HTTP Response: %s" % res.code) ++ log.debug(" Headers:") ++ for k, v in res.getheaders(): ++ log.debug(" %s: %s" % (k, v)) ++ ++ return res ++ ++ def download(self, public_key, timestamp=None, tmpdir=None, **kwargs): ++ url = "%s/%s" % (self.version, DATABASE_FILENAME) ++ ++ headers = {} ++ if timestamp: ++ headers["If-Modified-Since"] = time.strftime( ++ "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(timestamp), ++ ) ++ ++ t = tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) ++ with t: ++ # Try all mirrors ++ for mirror in self.mirrors: ++ # Prepare HTTP request ++ req = self._make_request(url, baseurl=mirror, headers=headers) ++ ++ try: ++ with self._send_request(req) as res: ++ decompressor = lzma.LZMADecompressor() ++ ++ # Read all data ++ while True: ++ buf = res.read(1024) ++ if not buf: ++ break ++ ++ # Decompress data ++ buf = decompressor.decompress(buf) ++ if buf: ++ t.write(buf) ++ ++ # Write all data to disk ++ t.flush() ++ ++ # Catch decompression errors ++ except lzma.LZMAError as e: ++ log.warning("Could not decompress downloaded file: %s" % e) ++ continue ++ ++ except urllib.error.HTTPError as e: ++ # The file on the server was too old ++ if e.code == 304: ++ log.warning("%s is serving an outdated database. Trying next mirror..." % mirror) ++ ++ # Log any other HTTP errors ++ else: ++ log.warning("%s reported: %s" % (mirror, e)) ++ ++ # Throw away any downloaded content and try again ++ t.truncate() ++ ++ else: ++ # Check if the downloaded database is recent ++ if not self._check_database(t, public_key, timestamp): ++ log.warning("Downloaded database is outdated. Trying next mirror...") ++ ++ # Throw away the data and try again ++ t.truncate() ++ continue ++ ++ # Make the file readable for everyone ++ os.chmod(t.name, stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH) ++ ++ # Return temporary file ++ return t ++ ++ # Delete the temporary file after unsuccessful downloads ++ os.unlink(t.name) ++ ++ raise FileNotFoundError(url) ++ ++ def _check_database(self, f, public_key, timestamp=None): ++ """ ++ Checks the downloaded database if it can be opened, ++ verified and if it is recent enough ++ """ ++ log.debug("Opening downloaded database at %s" % f.name) ++ ++ db = Database(f.name) ++ ++ # Database is not recent ++ if timestamp and db.created_at < timestamp: ++ return False ++ ++ log.info("Downloaded new database from %s" % (time.strftime( ++ "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), ++ ))) ++ ++ # Verify the database ++ with open(public_key, "r") as f: ++ if not db.verify(f): ++ log.error("Could not verify database") ++ return False ++ ++ return True +diff --git a/src/python/location/export.py b/src/python/location/export.py +new file mode 100644 +index 0000000..3cdece4 +--- /dev/null ++++ b/src/python/location/export.py +@@ -0,0 +1,291 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020-2021 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import functools ++import io ++import ipaddress ++import logging ++import math ++import os ++import socket ++import sys ++ ++from .i18n import _ ++import _location ++ ++# Initialise logging ++log = logging.getLogger("location.export") ++log.propagate = 1 ++ ++FLAGS = { ++ _location.NETWORK_FLAG_ANONYMOUS_PROXY : "A1", ++ _location.NETWORK_FLAG_SATELLITE_PROVIDER : "A2", ++ _location.NETWORK_FLAG_ANYCAST : "A3", ++ _location.NETWORK_FLAG_DROP : "XD", ++} ++ ++class OutputWriter(object): ++ suffix = "networks" ++ mode = "w" ++ ++ def __init__(self, name, family=None, directory=None, f=None): ++ self.name = name ++ self.family = family ++ self.directory = directory ++ ++ # Open output file ++ if f: ++ self.f = f ++ elif self.directory: ++ self.f = open(self.filename, self.mode) ++ elif "b" in self.mode: ++ self.f = io.BytesIO() ++ else: ++ self.f = io.StringIO() ++ ++ # Call any custom initialization ++ self.init() ++ ++ # Immediately write the header ++ self._write_header() ++ ++ def init(self): ++ """ ++ To be overwritten by anything that inherits from this ++ """ ++ pass ++ ++ def __repr__(self): ++ return "<%s %s f=%s>" % (self.__class__.__name__, self, self.f) ++ ++ @functools.cached_property ++ def tag(self): ++ families = { ++ socket.AF_INET6 : "6", ++ socket.AF_INET : "4", ++ } ++ ++ return "%sv%s" % (self.name, families.get(self.family, "?")) ++ ++ @functools.cached_property ++ def filename(self): ++ if self.directory: ++ return os.path.join(self.directory, "%s.%s" % (self.tag, self.suffix)) ++ ++ def _write_header(self): ++ """ ++ The header of the file ++ """ ++ pass ++ ++ def _write_footer(self): ++ """ ++ The footer of the file ++ """ ++ pass ++ ++ def write(self, network): ++ self.f.write("%s\n" % network) ++ ++ def finish(self): ++ """ ++ Called when all data has been written ++ """ ++ self._write_footer() ++ ++ # Flush all output ++ self.f.flush() ++ ++ def print(self): ++ """ ++ Prints the entire output line by line ++ """ ++ if isinstance(self.f, io.BytesIO): ++ raise TypeError(_("Won't write binary output to stdout")) ++ ++ # Go back to the beginning ++ self.f.seek(0) ++ ++ # Iterate over everything line by line ++ for line in self.f: ++ sys.stdout.write(line) ++ ++ ++class IpsetOutputWriter(OutputWriter): ++ """ ++ For ipset ++ """ ++ suffix = "ipset" ++ ++ # The value is being used if we don't know any better ++ DEFAULT_HASHSIZE = 64 ++ ++ # We aim for this many networks in a bucket on average. This allows us to choose ++ # how much memory we want to sacrifice to gain better performance. The lower the ++ # factor, the faster a lookup will be, but it will use more memory. ++ # We will aim for only using three quarters of all buckets to avoid any searches ++ # through the linked lists. ++ HASHSIZE_FACTOR = 0.75 ++ ++ def init(self): ++ # Count all networks ++ self.networks = 0 ++ ++ @property ++ def hashsize(self): ++ """ ++ Calculates an optimized hashsize ++ """ ++ # Return the default value if we don't know the size of the set ++ if not self.networks: ++ return self.DEFAULT_HASHSIZE ++ ++ # Find the nearest power of two that is larger than the number of networks ++ # divided by the hashsize factor. ++ exponent = math.log(self.networks / self.HASHSIZE_FACTOR, 2) ++ ++ # Return the size of the hash (the minimum is 64) ++ return max(2 ** math.ceil(exponent), 64) ++ ++ def _write_header(self): ++ # This must have a fixed size, because we will write the header again in the end ++ self.f.write("create %s hash:net family inet%s" % ( ++ self.tag, ++ "6" if self.family == socket.AF_INET6 else "" ++ )) ++ self.f.write(" hashsize %8d maxelem 1048576 -exist\n" % self.hashsize) ++ self.f.write("flush %s\n" % self.tag) ++ ++ def write(self, network): ++ self.f.write("add %s %s\n" % (self.tag, network)) ++ ++ # Increment network counter ++ self.networks += 1 ++ ++ def _write_footer(self): ++ # Jump back to the beginning of the file ++ self.f.seek(0) ++ ++ # Rewrite the header with better configuration ++ self._write_header() ++ ++ ++class NftablesOutputWriter(OutputWriter): ++ """ ++ For nftables ++ """ ++ suffix = "set" ++ ++ def _write_header(self): ++ self.f.write("define %s = {\n" % self.tag) ++ ++ def _write_footer(self): ++ self.f.write("}\n") ++ ++ def write(self, network): ++ self.f.write(" %s,\n" % network) ++ ++ ++class XTGeoIPOutputWriter(OutputWriter): ++ """ ++ Formats the output in that way, that it can be loaded by ++ the xt_geoip kernel module from xtables-addons. ++ """ ++ mode = "wb" ++ ++ @property ++ def tag(self): ++ return self.name ++ ++ @property ++ def suffix(self): ++ return "iv%s" % ("6" if self.family == socket.AF_INET6 else "4") ++ ++ def write(self, network): ++ self.f.write(network._first_address) ++ self.f.write(network._last_address) ++ ++ ++formats = { ++ "ipset" : IpsetOutputWriter, ++ "list" : OutputWriter, ++ "nftables" : NftablesOutputWriter, ++ "xt_geoip" : XTGeoIPOutputWriter, ++} ++ ++class Exporter(object): ++ def __init__(self, db, writer): ++ self.db, self.writer = db, writer ++ ++ def export(self, directory, families, countries, asns): ++ for family in families: ++ log.debug("Exporting family %s" % family) ++ ++ writers = {} ++ ++ # Create writers for countries ++ for country_code in countries: ++ writers[country_code] = self.writer(country_code, family=family, directory=directory) ++ ++ # Create writers for ASNs ++ for asn in asns: ++ writers[asn] = self.writer("AS%s" % asn, family=family, directory=directory) ++ ++ # Filter countries from special country codes ++ country_codes = [ ++ country_code for country_code in countries if not country_code in FLAGS.values() ++ ] ++ ++ # Get all networks that match the family ++ networks = self.db.search_networks(family=family, ++ country_codes=country_codes, asns=asns, flatten=True) ++ ++ # Walk through all networks ++ for network in networks: ++ # Write matching countries ++ try: ++ writers[network.country_code].write(network) ++ except KeyError: ++ pass ++ ++ # Write matching ASNs ++ try: ++ writers[network.asn].write(network) ++ except KeyError: ++ pass ++ ++ # Handle flags ++ for flag in FLAGS: ++ if network.has_flag(flag): ++ # Fetch the "fake" country code ++ country = FLAGS[flag] ++ ++ try: ++ writers[country].write(network) ++ except KeyError: ++ pass ++ ++ # Write everything to the filesystem ++ for writer in writers.values(): ++ writer.finish() ++ ++ # Print to stdout ++ if not directory: ++ for writer in writers.values(): ++ writer.print() +diff --git a/src/python/location/i18n.py b/src/python/location/i18n.py +new file mode 100644 +index 0000000..2161aa6 +--- /dev/null ++++ b/src/python/location/i18n.py +@@ -0,0 +1,26 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import gettext ++ ++def _(singular, plural=None, n=None): ++ if plural: ++ return gettext.dngettext("libloc", singular, plural, n) ++ ++ return gettext.dgettext("libloc", singular) +diff --git a/src/python/location/importer.py b/src/python/location/importer.py +new file mode 100644 +index 0000000..dee36ed +--- /dev/null ++++ b/src/python/location/importer.py +@@ -0,0 +1,250 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import gzip ++import logging ++import urllib.request ++ ++# Initialise logging ++log = logging.getLogger("location.importer") ++log.propagate = 1 ++ ++WHOIS_SOURCES = { ++ # African Network Information Centre ++ "AFRINIC": [ ++ "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz" ++ ], ++ ++ # Asia Pacific Network Information Centre ++ "APNIC": [ ++ "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", ++ "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", ++ #"https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", ++ #"https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", ++ "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", ++ "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz" ++ ], ++ ++ # American Registry for Internet Numbers ++ # XXX there is nothing useful for us in here ++ # ARIN: [ ++ # "https://ftp.arin.net/pub/rr/arin.db" ++ # ], ++ ++ # Japan Network Information Center ++ "JPNIC": [ ++ "https://ftp.nic.ad.jp/jpirr/jpirr.db.gz" ++ ], ++ ++ # Latin America and Caribbean Network Information Centre ++ "LACNIC": [ ++ "https://ftp.lacnic.net/lacnic/dbase/lacnic.db.gz" ++ ], ++ ++ # Réseaux IP Européens ++ "RIPE": [ ++ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", ++ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", ++ #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", ++ #"https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", ++ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", ++ "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz" ++ ], ++} ++ ++EXTENDED_SOURCES = { ++ # African Network Information Centre ++ # "ARIN": [ ++ # "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest" ++ # ], ++ ++ # Asia Pacific Network Information Centre ++ # "APNIC": [ ++ # "https://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest" ++ # ], ++ ++ # American Registry for Internet Numbers ++ "ARIN": [ ++ "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest" ++ ], ++ ++ # Latin America and Caribbean Network Information Centre ++ "LACNIC": [ ++ "https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest" ++ ], ++ ++ # Réseaux IP Européens ++ # "RIPE": [ ++ # "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest" ++ # ], ++} ++ ++class Downloader(object): ++ def __init__(self): ++ self.proxy = None ++ ++ def set_proxy(self, url): ++ """ ++ Sets a HTTP proxy that is used to perform all requests ++ """ ++ log.info("Using proxy %s" % url) ++ self.proxy = url ++ ++ def request(self, url, data=None, return_blocks=False): ++ req = urllib.request.Request(url, data=data) ++ ++ # Configure proxy ++ if self.proxy: ++ req.set_proxy(self.proxy, "http") ++ ++ return DownloaderContext(self, req, return_blocks=return_blocks) ++ ++ ++class DownloaderContext(object): ++ def __init__(self, downloader, request, return_blocks=False): ++ self.downloader = downloader ++ self.request = request ++ ++ # Should we return one block or a single line? ++ self.return_blocks = return_blocks ++ ++ # Save the response object ++ self.response = None ++ ++ def __enter__(self): ++ log.info("Retrieving %s..." % self.request.full_url) ++ ++ # Send request ++ self.response = urllib.request.urlopen(self.request) ++ ++ # Log the response headers ++ log.debug("Response Headers:") ++ for header in self.headers: ++ log.debug(" %s: %s" % (header, self.get_header(header))) ++ ++ return self ++ ++ def __exit__(self, type, value, traceback): ++ pass ++ ++ def __iter__(self): ++ """ ++ Makes the object iterable by going through each block ++ """ ++ if self.return_blocks: ++ return iterate_over_blocks(self.body) ++ ++ return iterate_over_lines(self.body) ++ ++ @property ++ def headers(self): ++ if self.response: ++ return self.response.headers ++ ++ def get_header(self, name): ++ if self.headers: ++ return self.headers.get(name) ++ ++ @property ++ def body(self): ++ """ ++ Returns a file-like object with the decoded content ++ of the response. ++ """ ++ content_type = self.get_header("Content-Type") ++ ++ # Decompress any gzipped response on the fly ++ if content_type in ("application/x-gzip", "application/gzip"): ++ return gzip.GzipFile(fileobj=self.response, mode="rb") ++ ++ # Return the response by default ++ return self.response ++ ++ ++def read_blocks(f): ++ for block in iterate_over_blocks(f): ++ type = None ++ data = {} ++ ++ for i, line in enumerate(block): ++ key, value = line.split(":", 1) ++ ++ # The key of the first line defines the type ++ if i == 0: ++ type = key ++ ++ # Store value ++ data[key] = value.strip() ++ ++ yield type, data ++ ++def iterate_over_blocks(f, charsets=("utf-8", "latin1")): ++ block = [] ++ ++ for line in f: ++ # Convert to string ++ for charset in charsets: ++ try: ++ line = line.decode(charset) ++ except UnicodeDecodeError: ++ continue ++ else: ++ break ++ ++ # Skip commented lines ++ if line.startswith("#") or line.startswith("%"): ++ continue ++ ++ # Strip line-endings ++ line = line.rstrip() ++ ++ # Remove any comments at the end of line ++ line, hash, comment = line.partition("#") ++ ++ if comment: ++ # Strip any whitespace before the comment ++ line = line.rstrip() ++ ++ # If the line is now empty, we move on ++ if not line: ++ continue ++ ++ if line: ++ block.append(line) ++ continue ++ ++ # End the block on an empty line ++ if block: ++ yield block ++ ++ # Reset the block ++ block = [] ++ ++ # Return the last block ++ if block: ++ yield block ++ ++ ++def iterate_over_lines(f): ++ for line in f: ++ # Decode the line ++ line = line.decode() ++ ++ # Strip the ending ++ yield line.rstrip() +diff --git a/src/python/location/logger.py b/src/python/location/logger.py +new file mode 100644 +index 0000000..0bdf9ec +--- /dev/null ++++ b/src/python/location/logger.py +@@ -0,0 +1,46 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import logging ++import logging.handlers ++ ++# Initialise root logger ++log = logging.getLogger("location") ++log.setLevel(logging.INFO) ++ ++# Log to console ++handler = logging.StreamHandler() ++handler.setLevel(logging.DEBUG) ++log.addHandler(handler) ++ ++# Log to syslog ++handler = logging.handlers.SysLogHandler(address="/dev/log", ++ facility=logging.handlers.SysLogHandler.LOG_DAEMON) ++handler.setLevel(logging.INFO) ++log.addHandler(handler) ++ ++# Format syslog messages ++formatter = logging.Formatter("%(message)s") ++handler.setFormatter(formatter) ++ ++def set_level(level): ++ """ ++ Sets the log level for the root logger ++ """ ++ log.setLevel(level) +diff --git a/src/python/logger.py b/src/python/logger.py +deleted file mode 100644 +index 0bdf9ec..0000000 +--- a/src/python/logger.py ++++ /dev/null +@@ -1,46 +0,0 @@ +-#!/usr/bin/python3 +-############################################################################### +-# # +-# libloc - A library to determine the location of someone on the Internet # +-# # +-# Copyright (C) 2020 IPFire Development Team # +-# # +-# This library is free software; you can redistribute it and/or # +-# modify it under the terms of the GNU Lesser General Public # +-# License as published by the Free Software Foundation; either # +-# version 2.1 of the License, or (at your option) any later version. # +-# # +-# This library is distributed in the hope that it will be useful, # +-# but WITHOUT ANY WARRANTY; without even the implied warranty of # +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # +-# Lesser General Public License for more details. # +-# # +-############################################################################### +- +-import logging +-import logging.handlers +- +-# Initialise root logger +-log = logging.getLogger("location") +-log.setLevel(logging.INFO) +- +-# Log to console +-handler = logging.StreamHandler() +-handler.setLevel(logging.DEBUG) +-log.addHandler(handler) +- +-# Log to syslog +-handler = logging.handlers.SysLogHandler(address="/dev/log", +- facility=logging.handlers.SysLogHandler.LOG_DAEMON) +-handler.setLevel(logging.INFO) +-log.addHandler(handler) +- +-# Format syslog messages +-formatter = logging.Formatter("%(message)s") +-handler.setFormatter(formatter) +- +-def set_level(level): +- """ +- Sets the log level for the root logger +- """ +- log.setLevel(level) +diff --git a/src/scripts/location-importer.in b/src/scripts/location-importer.in +new file mode 100644 +index 0000000..bee9186 +--- /dev/null ++++ b/src/scripts/location-importer.in +@@ -0,0 +1,1535 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2020-2022 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import argparse ++import ipaddress ++import json ++import logging ++import math ++import re ++import socket ++import sys ++import telnetlib ++ ++# Load our location module ++import location ++import location.database ++import location.importer ++from location.i18n import _ ++ ++# Initialise logging ++log = logging.getLogger("location.importer") ++log.propagate = 1 ++ ++# Define constants ++VALID_ASN_RANGES = ( ++ (1, 23455), ++ (23457, 64495), ++ (131072, 4199999999), ++) ++ ++ ++class CLI(object): ++ def parse_cli(self): ++ parser = argparse.ArgumentParser( ++ description=_("Location Importer Command Line Interface"), ++ ) ++ subparsers = parser.add_subparsers() ++ ++ # Global configuration flags ++ parser.add_argument("--debug", action="store_true", ++ help=_("Enable debug output")) ++ parser.add_argument("--quiet", action="store_true", ++ help=_("Enable quiet mode")) ++ ++ # version ++ parser.add_argument("--version", action="version", ++ version="%(prog)s @VERSION@") ++ ++ # Database ++ parser.add_argument("--database-host", required=True, ++ help=_("Database Hostname"), metavar=_("HOST")) ++ parser.add_argument("--database-name", required=True, ++ help=_("Database Name"), metavar=_("NAME")) ++ parser.add_argument("--database-username", required=True, ++ help=_("Database Username"), metavar=_("USERNAME")) ++ parser.add_argument("--database-password", required=True, ++ help=_("Database Password"), metavar=_("PASSWORD")) ++ ++ # Write Database ++ write = subparsers.add_parser("write", help=_("Write database to file")) ++ write.set_defaults(func=self.handle_write) ++ write.add_argument("file", nargs=1, help=_("Database File")) ++ write.add_argument("--signing-key", nargs="?", type=open, help=_("Signing Key")) ++ write.add_argument("--backup-signing-key", nargs="?", type=open, help=_("Backup Signing Key")) ++ write.add_argument("--vendor", nargs="?", help=_("Sets the vendor")) ++ write.add_argument("--description", nargs="?", help=_("Sets a description")) ++ write.add_argument("--license", nargs="?", help=_("Sets the license")) ++ write.add_argument("--version", type=int, help=_("Database Format Version")) ++ ++ # Update WHOIS ++ update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information")) ++ update_whois.set_defaults(func=self.handle_update_whois) ++ ++ # Update announcements ++ update_announcements = subparsers.add_parser("update-announcements", ++ help=_("Update BGP Annoucements")) ++ update_announcements.set_defaults(func=self.handle_update_announcements) ++ update_announcements.add_argument("server", nargs=1, ++ help=_("Route Server to connect to"), metavar=_("SERVER")) ++ ++ # Update overrides ++ update_overrides = subparsers.add_parser("update-overrides", ++ help=_("Update overrides"), ++ ) ++ update_overrides.add_argument( ++ "files", nargs="+", help=_("Files to import"), ++ ) ++ update_overrides.set_defaults(func=self.handle_update_overrides) ++ ++ # Import countries ++ import_countries = subparsers.add_parser("import-countries", ++ help=_("Import countries"), ++ ) ++ import_countries.add_argument("file", nargs=1, type=argparse.FileType("r"), ++ help=_("File to import")) ++ import_countries.set_defaults(func=self.handle_import_countries) ++ ++ args = parser.parse_args() ++ ++ # Configure logging ++ if args.debug: ++ location.logger.set_level(logging.DEBUG) ++ elif args.quiet: ++ location.logger.set_level(logging.WARNING) ++ ++ # Print usage if no action was given ++ if not "func" in args: ++ parser.print_usage() ++ sys.exit(2) ++ ++ return args ++ ++ def run(self): ++ # Parse command line arguments ++ args = self.parse_cli() ++ ++ # Initialise database ++ self.db = self._setup_database(args) ++ ++ # Call function ++ ret = args.func(args) ++ ++ # Return with exit code ++ if ret: ++ sys.exit(ret) ++ ++ # Otherwise just exit ++ sys.exit(0) ++ ++ def _setup_database(self, ns): ++ """ ++ Initialise the database ++ """ ++ # Connect to database ++ db = location.database.Connection( ++ host=ns.database_host, database=ns.database_name, ++ user=ns.database_username, password=ns.database_password, ++ ) ++ ++ with db.transaction(): ++ db.execute(""" ++ -- announcements ++ CREATE TABLE IF NOT EXISTS announcements(network inet, autnum bigint, ++ first_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, ++ last_seen_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP); ++ CREATE UNIQUE INDEX IF NOT EXISTS announcements_networks ON announcements(network); ++ CREATE INDEX IF NOT EXISTS announcements_family ON announcements(family(network)); ++ CREATE INDEX IF NOT EXISTS announcements_search ON announcements USING GIST(network inet_ops); ++ ++ -- autnums ++ CREATE TABLE IF NOT EXISTS autnums(number bigint, name text NOT NULL); ++ ALTER TABLE autnums ADD COLUMN IF NOT EXISTS source text; ++ CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number); ++ ++ -- countries ++ CREATE TABLE IF NOT EXISTS countries( ++ country_code text NOT NULL, name text NOT NULL, continent_code text NOT NULL); ++ CREATE UNIQUE INDEX IF NOT EXISTS countries_country_code ON countries(country_code); ++ ++ -- networks ++ CREATE TABLE IF NOT EXISTS networks(network inet, country text); ++ ALTER TABLE networks ADD COLUMN IF NOT EXISTS original_countries text[]; ++ ALTER TABLE networks ADD COLUMN IF NOT EXISTS source text; ++ CREATE UNIQUE INDEX IF NOT EXISTS networks_network ON networks(network); ++ CREATE INDEX IF NOT EXISTS networks_family ON networks USING BTREE(family(network)); ++ CREATE INDEX IF NOT EXISTS networks_search ON networks USING GIST(network inet_ops); ++ ++ -- overrides ++ CREATE TABLE IF NOT EXISTS autnum_overrides( ++ number bigint NOT NULL, ++ name text, ++ country text, ++ is_anonymous_proxy boolean, ++ is_satellite_provider boolean, ++ is_anycast boolean ++ ); ++ CREATE UNIQUE INDEX IF NOT EXISTS autnum_overrides_number ++ ON autnum_overrides(number); ++ ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS source text; ++ ALTER TABLE autnum_overrides ADD COLUMN IF NOT EXISTS is_drop boolean; ++ ++ CREATE TABLE IF NOT EXISTS network_overrides( ++ network inet NOT NULL, ++ country text, ++ is_anonymous_proxy boolean, ++ is_satellite_provider boolean, ++ is_anycast boolean ++ ); ++ CREATE UNIQUE INDEX IF NOT EXISTS network_overrides_network ++ ON network_overrides(network); ++ CREATE INDEX IF NOT EXISTS network_overrides_search ++ ON network_overrides USING GIST(network inet_ops); ++ ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS source text; ++ ALTER TABLE network_overrides ADD COLUMN IF NOT EXISTS is_drop boolean; ++ """) ++ ++ return db ++ ++ def handle_write(self, ns): ++ """ ++ Compiles a database in libloc format out of what is in the database ++ """ ++ # Allocate a writer ++ writer = location.Writer(ns.signing_key, ns.backup_signing_key) ++ ++ # Set all metadata ++ if ns.vendor: ++ writer.vendor = ns.vendor ++ ++ if ns.description: ++ writer.description = ns.description ++ ++ if ns.license: ++ writer.license = ns.license ++ ++ # Add all Autonomous Systems ++ log.info("Writing Autonomous Systems...") ++ ++ # Select all ASes with a name ++ rows = self.db.query(""" ++ SELECT ++ autnums.number AS number, ++ COALESCE( ++ (SELECT overrides.name FROM autnum_overrides overrides ++ WHERE overrides.number = autnums.number), ++ autnums.name ++ ) AS name ++ FROM autnums ++ WHERE name <> %s ORDER BY number ++ """, "") ++ ++ for row in rows: ++ a = writer.add_as(row.number) ++ a.name = row.name ++ ++ # Add all networks ++ log.info("Writing networks...") ++ ++ # Select all known networks ++ rows = self.db.query(""" ++ WITH known_networks AS ( ++ SELECT network FROM announcements ++ UNION ++ SELECT network FROM networks ++ UNION ++ SELECT network FROM network_overrides ++ ), ++ ++ ordered_networks AS ( ++ SELECT ++ known_networks.network AS network, ++ announcements.autnum AS autnum, ++ networks.country AS country, ++ ++ -- Must be part of returned values for ORDER BY clause ++ masklen(announcements.network) AS sort_a, ++ masklen(networks.network) AS sort_b ++ FROM ++ known_networks ++ LEFT JOIN ++ announcements ON known_networks.network <<= announcements.network ++ LEFT JOIN ++ networks ON known_networks.network <<= networks.network ++ ORDER BY ++ known_networks.network, ++ sort_a DESC, ++ sort_b DESC ++ ) ++ ++ -- Return a list of those networks enriched with all ++ -- other information that we store in the database ++ SELECT ++ DISTINCT ON (network) ++ network, ++ autnum, ++ ++ -- Country ++ COALESCE( ++ ( ++ SELECT country FROM network_overrides overrides ++ WHERE networks.network <<= overrides.network ++ ORDER BY masklen(overrides.network) DESC ++ LIMIT 1 ++ ), ++ ( ++ SELECT country FROM autnum_overrides overrides ++ WHERE networks.autnum = overrides.number ++ ), ++ networks.country ++ ) AS country, ++ ++ -- Flags ++ COALESCE( ++ ( ++ SELECT is_anonymous_proxy FROM network_overrides overrides ++ WHERE networks.network <<= overrides.network ++ ORDER BY masklen(overrides.network) DESC ++ LIMIT 1 ++ ), ++ ( ++ SELECT is_anonymous_proxy FROM autnum_overrides overrides ++ WHERE networks.autnum = overrides.number ++ ), ++ FALSE ++ ) AS is_anonymous_proxy, ++ COALESCE( ++ ( ++ SELECT is_satellite_provider FROM network_overrides overrides ++ WHERE networks.network <<= overrides.network ++ ORDER BY masklen(overrides.network) DESC ++ LIMIT 1 ++ ), ++ ( ++ SELECT is_satellite_provider FROM autnum_overrides overrides ++ WHERE networks.autnum = overrides.number ++ ), ++ FALSE ++ ) AS is_satellite_provider, ++ COALESCE( ++ ( ++ SELECT is_anycast FROM network_overrides overrides ++ WHERE networks.network <<= overrides.network ++ ORDER BY masklen(overrides.network) DESC ++ LIMIT 1 ++ ), ++ ( ++ SELECT is_anycast FROM autnum_overrides overrides ++ WHERE networks.autnum = overrides.number ++ ), ++ FALSE ++ ) AS is_anycast, ++ COALESCE( ++ ( ++ SELECT is_drop FROM network_overrides overrides ++ WHERE networks.network <<= overrides.network ++ ORDER BY masklen(overrides.network) DESC ++ LIMIT 1 ++ ), ++ ( ++ SELECT is_drop FROM autnum_overrides overrides ++ WHERE networks.autnum = overrides.number ++ ), ++ FALSE ++ ) AS is_drop ++ FROM ++ ordered_networks networks ++ """) ++ ++ for row in rows: ++ network = writer.add_network(row.network) ++ ++ # Save country ++ if row.country: ++ network.country_code = row.country ++ ++ # Save ASN ++ if row.autnum: ++ network.asn = row.autnum ++ ++ # Set flags ++ if row.is_anonymous_proxy: ++ network.set_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY) ++ ++ if row.is_satellite_provider: ++ network.set_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER) ++ ++ if row.is_anycast: ++ network.set_flag(location.NETWORK_FLAG_ANYCAST) ++ ++ if row.is_drop: ++ network.set_flag(location.NETWORK_FLAG_DROP) ++ ++ # Add all countries ++ log.info("Writing countries...") ++ rows = self.db.query("SELECT * FROM countries ORDER BY country_code") ++ ++ for row in rows: ++ c = writer.add_country(row.country_code) ++ c.continent_code = row.continent_code ++ c.name = row.name ++ ++ # Write everything to file ++ log.info("Writing database to file...") ++ for file in ns.file: ++ writer.write(file) ++ ++ def handle_update_whois(self, ns): ++ downloader = location.importer.Downloader() ++ ++ # Download all sources ++ with self.db.transaction(): ++ # Create some temporary tables to store parsed data ++ self.db.execute(""" ++ CREATE TEMPORARY TABLE _autnums(number integer NOT NULL, organization text NOT NULL, source text NOT NULL) ++ ON COMMIT DROP; ++ CREATE UNIQUE INDEX _autnums_number ON _autnums(number); ++ ++ CREATE TEMPORARY TABLE _organizations(handle text NOT NULL, name text NOT NULL, source text NOT NULL) ++ ON COMMIT DROP; ++ CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle); ++ ++ CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL, original_countries text[] NOT NULL, source text NOT NULL) ++ ON COMMIT DROP; ++ CREATE INDEX _rirdata_search ON _rirdata USING BTREE(family(network), masklen(network)); ++ CREATE UNIQUE INDEX _rirdata_network ON _rirdata(network); ++ """) ++ ++ # Remove all previously imported content ++ self.db.execute(""" ++ TRUNCATE TABLE networks; ++ """) ++ ++ # Fetch all valid country codes to check parsed networks aganist... ++ rows = self.db.query("SELECT * FROM countries ORDER BY country_code") ++ validcountries = [] ++ ++ for row in rows: ++ validcountries.append(row.country_code) ++ ++ for source_key in location.importer.WHOIS_SOURCES: ++ for single_url in location.importer.WHOIS_SOURCES[source_key]: ++ with downloader.request(single_url, return_blocks=True) as f: ++ for block in f: ++ self._parse_block(block, source_key, validcountries) ++ ++ # Process all parsed networks from every RIR we happen to have access to, ++ # insert the largest network chunks into the networks table immediately... ++ families = self.db.query("SELECT DISTINCT family(network) AS family FROM _rirdata ORDER BY family(network)") ++ ++ for family in (row.family for row in families): ++ smallest = self.db.get("SELECT MIN(masklen(network)) AS prefix FROM _rirdata WHERE family(network) = %s", family) ++ ++ self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ ++ SELECT network, country, original_countries, source FROM _rirdata WHERE masklen(network) = %s AND family(network) = %s", smallest.prefix, family) ++ ++ # ... determine any other prefixes for this network family, ... ++ prefixes = self.db.query("SELECT DISTINCT masklen(network) AS prefix FROM _rirdata \ ++ WHERE family(network) = %s ORDER BY masklen(network) ASC OFFSET 1", family) ++ ++ # ... and insert networks with this prefix in case they provide additional ++ # information (i. e. subnet of a larger chunk with a different country) ++ for prefix in (row.prefix for row in prefixes): ++ self.db.execute(""" ++ WITH candidates AS ( ++ SELECT ++ _rirdata.network, ++ _rirdata.country, ++ _rirdata.original_countries, ++ _rirdata.source ++ FROM ++ _rirdata ++ WHERE ++ family(_rirdata.network) = %s ++ AND ++ masklen(_rirdata.network) = %s ++ ), ++ filtered AS ( ++ SELECT ++ DISTINCT ON (c.network) ++ c.network, ++ c.country, ++ c.original_countries, ++ c.source, ++ masklen(networks.network), ++ networks.country AS parent_country ++ FROM ++ candidates c ++ LEFT JOIN ++ networks ++ ON ++ c.network << networks.network ++ ORDER BY ++ c.network, ++ masklen(networks.network) DESC NULLS LAST ++ ) ++ INSERT INTO ++ networks(network, country, original_countries, source) ++ SELECT ++ network, ++ country, ++ original_countries, ++ source ++ FROM ++ filtered ++ WHERE ++ parent_country IS NULL ++ OR ++ country <> parent_country ++ ON CONFLICT DO NOTHING""", ++ family, prefix, ++ ) ++ ++ self.db.execute(""" ++ INSERT INTO autnums(number, name, source) ++ SELECT _autnums.number, _organizations.name, _organizations.source FROM _autnums ++ JOIN _organizations ON _autnums.organization = _organizations.handle ++ ON CONFLICT (number) DO UPDATE SET name = excluded.name; ++ """) ++ ++ # Download all extended sources ++ for source_key in location.importer.EXTENDED_SOURCES: ++ for single_url in location.importer.EXTENDED_SOURCES[source_key]: ++ with self.db.transaction(): ++ # Download data ++ with downloader.request(single_url) as f: ++ for line in f: ++ self._parse_line(line, source_key, validcountries) ++ ++ # Download and import (technical) AS names from ARIN ++ self._import_as_names_from_arin() ++ ++ def _check_parsed_network(self, network): ++ """ ++ Assistive function to detect and subsequently sort out parsed ++ networks from RIR data (both Whois and so-called "extended sources"), ++ which are or have... ++ ++ (a) not globally routable (RFC 1918 space, et al.) ++ (b) covering a too large chunk of the IP address space (prefix length ++ is < 7 for IPv4 networks, and < 10 for IPv6) ++ (c) "0.0.0.0" or "::" as a network address ++ (d) are too small for being publicly announced (we have decided not to ++ process them at the moment, as they significantly enlarge our ++ database without providing very helpful additional information) ++ ++ This unfortunately is necessary due to brain-dead clutter across ++ various RIR databases, causing mismatches and eventually disruptions. ++ ++ We will return False in case a network is not suitable for adding ++ it to our database, and True otherwise. ++ """ ++ ++ if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)): ++ return False ++ ++ if not network.is_global: ++ log.debug("Skipping non-globally routable network: %s" % network) ++ return False ++ ++ if network.version == 4: ++ if network.prefixlen < 7: ++ log.debug("Skipping too big IP chunk: %s" % network) ++ return False ++ ++ if network.prefixlen > 24: ++ log.debug("Skipping network too small to be publicly announced: %s" % network) ++ return False ++ ++ if str(network.network_address) == "0.0.0.0": ++ log.debug("Skipping network based on 0.0.0.0: %s" % network) ++ return False ++ ++ elif network.version == 6: ++ if network.prefixlen < 10: ++ log.debug("Skipping too big IP chunk: %s" % network) ++ return False ++ ++ if network.prefixlen > 48: ++ log.debug("Skipping network too small to be publicly announced: %s" % network) ++ return False ++ ++ if str(network.network_address) == "::": ++ log.debug("Skipping network based on '::': %s" % network) ++ return False ++ ++ else: ++ # This should not happen... ++ log.warning("Skipping network of unknown family, this should not happen: %s" % network) ++ return False ++ ++ # In case we have made it here, the network is considered to ++ # be suitable for libloc consumption... ++ return True ++ ++ def _check_parsed_asn(self, asn): ++ """ ++ Assistive function to filter Autonomous System Numbers not being suitable ++ for adding to our database. Returns False in such cases, and True otherwise. ++ """ ++ ++ for start, end in VALID_ASN_RANGES: ++ if start <= asn and end >= asn: ++ return True ++ ++ log.info("Supplied ASN %s out of publicly routable ASN ranges" % asn) ++ return False ++ ++ def _parse_block(self, block, source_key, validcountries = None): ++ # Get first line to find out what type of block this is ++ line = block[0] ++ ++ # aut-num ++ if line.startswith("aut-num:"): ++ return self._parse_autnum_block(block, source_key) ++ ++ # inetnum ++ if line.startswith("inet6num:") or line.startswith("inetnum:"): ++ return self._parse_inetnum_block(block, source_key, validcountries) ++ ++ # organisation ++ elif line.startswith("organisation:"): ++ return self._parse_org_block(block, source_key) ++ ++ def _parse_autnum_block(self, block, source_key): ++ autnum = {} ++ for line in block: ++ # Split line ++ key, val = split_line(line) ++ ++ if key == "aut-num": ++ m = re.match(r"^(AS|as)(\d+)", val) ++ if m: ++ autnum["asn"] = m.group(2) ++ ++ elif key == "org": ++ autnum[key] = val.upper() ++ ++ elif key == "descr": ++ # Save the first description line as well... ++ if not key in autnum: ++ autnum[key] = val ++ ++ # Skip empty objects ++ if not autnum or not "asn" in autnum: ++ return ++ ++ # Insert a dummy organisation handle into our temporary organisations ++ # table in case the AS does not have an organisation handle set, but ++ # has a description (a quirk often observed in APNIC area), so we can ++ # later display at least some string for this AS. ++ if not "org" in autnum: ++ if "descr" in autnum: ++ autnum["org"] = "LIBLOC-%s-ORGHANDLE" % autnum.get("asn") ++ ++ self.db.execute("INSERT INTO _organizations(handle, name, source) \ ++ VALUES(%s, %s, %s) ON CONFLICT (handle) DO NOTHING", ++ autnum.get("org"), autnum.get("descr"), source_key, ++ ) ++ else: ++ log.warning("ASN %s neither has an organisation handle nor a description line set, omitting" % \ ++ autnum.get("asn")) ++ return ++ ++ # Insert into database ++ self.db.execute("INSERT INTO _autnums(number, organization, source) \ ++ VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \ ++ organization = excluded.organization", ++ autnum.get("asn"), autnum.get("org"), source_key, ++ ) ++ ++ def _parse_inetnum_block(self, block, source_key, validcountries = None): ++ log.debug("Parsing inetnum block:") ++ ++ inetnum = {} ++ for line in block: ++ log.debug(line) ++ ++ # Split line ++ key, val = split_line(line) ++ ++ # Filter any inetnum records which are only referring to IP space ++ # not managed by that specific RIR... ++ if key == "netname": ++ if re.match(r"^(ERX-NETBLOCK|(AFRINIC|ARIN|LACNIC|RIPE)-CIDR-BLOCK|IANA-NETBLOCK-\d{1,3}|NON-RIPE-NCC-MANAGED-ADDRESS-BLOCK|STUB-[\d-]{3,}SLASH\d{1,2})", val.strip()): ++ log.debug("Skipping record indicating historic/orphaned data: %s" % val.strip()) ++ return ++ ++ if key == "inetnum": ++ start_address, delim, end_address = val.partition("-") ++ ++ # Strip any excess space ++ start_address, end_address = start_address.rstrip(), end_address.strip() ++ ++ # Handle "inetnum" formatting in LACNIC DB (e.g. "24.152.8/22" instead of "24.152.8.0/22") ++ if start_address and not (delim or end_address): ++ try: ++ start_address = ipaddress.ip_network(start_address, strict=False) ++ except ValueError: ++ start_address = start_address.split("/") ++ ldigits = start_address[0].count(".") ++ ++ # How many octets do we need to add? ++ # (LACNIC does not seem to have a /8 or greater assigned, so the following should suffice.) ++ if ldigits == 1: ++ start_address = start_address[0] + ".0.0/" + start_address[1] ++ elif ldigits == 2: ++ start_address = start_address[0] + ".0/" + start_address[1] ++ else: ++ log.warning("Could not recover IPv4 address from line in LACNIC DB format: %s" % line) ++ return ++ ++ try: ++ start_address = ipaddress.ip_network(start_address, strict=False) ++ except ValueError: ++ log.warning("Could not parse line in LACNIC DB format: %s" % line) ++ return ++ ++ # Enumerate first and last IP address of this network ++ end_address = start_address[-1] ++ start_address = start_address[0] ++ ++ else: ++ # Convert to IP address ++ try: ++ start_address = ipaddress.ip_address(start_address) ++ end_address = ipaddress.ip_address(end_address) ++ except ValueError: ++ log.warning("Could not parse line: %s" % line) ++ return ++ ++ inetnum["inetnum"] = list(ipaddress.summarize_address_range(start_address, end_address)) ++ ++ elif key == "inet6num": ++ inetnum[key] = [ipaddress.ip_network(val, strict=False)] ++ ++ elif key == "country": ++ val = val.upper() ++ ++ # Catch RIR data objects with more than one country code... ++ if not key in inetnum: ++ inetnum[key] = [] ++ else: ++ if val in inetnum.get("country"): ++ # ... but keep this list distinct... ++ continue ++ ++ # When people set country codes to "UK", they actually mean "GB" ++ if val == "UK": ++ val = "GB" ++ ++ inetnum[key].append(val) ++ ++ # Skip empty objects ++ if not inetnum or not "country" in inetnum: ++ return ++ ++ # Prepare skipping objects with unknown country codes... ++ invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in validcountries] ++ ++ # Iterate through all networks enumerated from above, check them for plausibility and insert ++ # them into the database, if _check_parsed_network() succeeded ++ for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"): ++ if self._check_parsed_network(single_network): ++ ++ # Skip objects with unknown country codes if they are valid to avoid log spam... ++ if validcountries and invalidcountries: ++ log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \ ++ (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum"))) ++ break ++ ++ # Everything is fine here, run INSERT statement... ++ self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \ ++ VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country", ++ "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key, ++ ) ++ ++ def _parse_org_block(self, block, source_key): ++ org = {} ++ for line in block: ++ # Split line ++ key, val = split_line(line) ++ ++ if key == "organisation": ++ org[key] = val.upper() ++ elif key == "org-name": ++ org[key] = val ++ ++ # Skip empty objects ++ if not org: ++ return ++ ++ self.db.execute("INSERT INTO _organizations(handle, name, source) \ ++ VALUES(%s, %s, %s) ON CONFLICT (handle) DO \ ++ UPDATE SET name = excluded.name", ++ org.get("organisation"), org.get("org-name"), source_key, ++ ) ++ ++ def _parse_line(self, line, source_key, validcountries = None): ++ # Skip version line ++ if line.startswith("2"): ++ return ++ ++ # Skip comments ++ if line.startswith("#"): ++ return ++ ++ try: ++ registry, country_code, type, line = line.split("|", 3) ++ except: ++ log.warning("Could not parse line: %s" % line) ++ return ++ ++ # Skip any lines that are for stats only or do not have a country ++ # code at all (avoids log spam below) ++ if not country_code or country_code == '*': ++ return ++ ++ # Skip objects with unknown country codes ++ if validcountries and country_code not in validcountries: ++ log.warning("Skipping line with bogus country '%s': %s" % \ ++ (country_code, line)) ++ return ++ ++ if type in ("ipv6", "ipv4"): ++ return self._parse_ip_line(country_code, type, line, source_key) ++ ++ def _parse_ip_line(self, country, type, line, source_key): ++ try: ++ address, prefix, date, status, organization = line.split("|") ++ except ValueError: ++ organization = None ++ ++ # Try parsing the line without organization ++ try: ++ address, prefix, date, status = line.split("|") ++ except ValueError: ++ log.warning("Unhandled line format: %s" % line) ++ return ++ ++ # Skip anything that isn't properly assigned ++ if not status in ("assigned", "allocated"): ++ return ++ ++ # Cast prefix into an integer ++ try: ++ prefix = int(prefix) ++ except: ++ log.warning("Invalid prefix: %s" % prefix) ++ return ++ ++ # Fix prefix length for IPv4 ++ if type == "ipv4": ++ prefix = 32 - int(math.log(prefix, 2)) ++ ++ # Try to parse the address ++ try: ++ network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False) ++ except ValueError: ++ log.warning("Invalid IP address: %s" % address) ++ return ++ ++ if not self._check_parsed_network(network): ++ return ++ ++ self.db.execute("INSERT INTO networks(network, country, original_countries, source) \ ++ VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \ ++ UPDATE SET country = excluded.country", ++ "%s" % network, country, [country], source_key, ++ ) ++ ++ def _import_as_names_from_arin(self): ++ downloader = location.importer.Downloader() ++ ++ # XXX: Download AS names file from ARIN (note that these names appear to be quite ++ # technical, not intended for human consumption, as description fields in ++ # organisation handles for other RIRs are - however, this is what we have got, ++ # and in some cases, it might be still better than nothing) ++ with downloader.request("https://ftp.arin.net/info/asn.txt", return_blocks=False) as f: ++ for line in f: ++ # Convert binary line to string... ++ line = str(line) ++ ++ # ... valid lines start with a space, followed by the number of the Autonomous System ... ++ if not line.startswith(" "): ++ continue ++ ++ # Split line and check if there is a valid ASN in it... ++ asn, name = line.split()[0:2] ++ ++ try: ++ asn = int(asn) ++ except ValueError: ++ log.debug("Skipping ARIN AS names line not containing an integer for ASN") ++ continue ++ ++ # Filter invalid ASNs... ++ if not self._check_parsed_asn(asn): ++ continue ++ ++ # Skip any AS name that appears to be a placeholder for a different RIR or entity... ++ if re.match(r"^(ASN-BLK|)(AFCONC|AFRINIC|APNIC|ASNBLK|DNIC|LACNIC|RIPE|IANA)(?:\d?$|\-)", name): ++ continue ++ ++ # Bail out in case the AS name contains anything we do not expect here... ++ if re.search(r"[^a-zA-Z0-9-_]", name): ++ log.debug("Skipping ARIN AS name for %s containing invalid characters: %s" % \ ++ (asn, name)) ++ ++ # Things look good here, run INSERT statement and skip this one if we already have ++ # a (better?) name for this Autonomous System... ++ self.db.execute(""" ++ INSERT INTO autnums( ++ number, ++ name, ++ source ++ ) VALUES (%s, %s, %s) ++ ON CONFLICT (number) DO NOTHING""", ++ asn, ++ name, ++ "ARIN", ++ ) ++ ++ def handle_update_announcements(self, ns): ++ server = ns.server[0] ++ ++ with self.db.transaction(): ++ if server.startswith("/"): ++ self._handle_update_announcements_from_bird(server) ++ else: ++ self._handle_update_announcements_from_telnet(server) ++ ++ # Purge anything we never want here ++ self.db.execute(""" ++ -- Delete default routes ++ DELETE FROM announcements WHERE network = '::/0' OR network = '0.0.0.0/0'; ++ ++ -- Delete anything that is not global unicast address space ++ DELETE FROM announcements WHERE family(network) = 6 AND NOT network <<= '2000::/3'; ++ ++ -- DELETE "current network" address space ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '0.0.0.0/8'; ++ ++ -- DELETE local loopback address space ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '127.0.0.0/8'; ++ ++ -- DELETE RFC 1918 address space ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '10.0.0.0/8'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '172.16.0.0/12'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.168.0.0/16'; ++ ++ -- DELETE test, benchmark and documentation address space ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.0.0/24'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.0.2.0/24'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.18.0.0/15'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '198.51.100.0/24'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '203.0.113.0/24'; ++ ++ -- DELETE CGNAT address space (RFC 6598) ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '100.64.0.0/10'; ++ ++ -- DELETE link local address space ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '169.254.0.0/16'; ++ ++ -- DELETE IPv6 to IPv4 (6to4) address space (RFC 3068) ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '192.88.99.0/24'; ++ DELETE FROM announcements WHERE family(network) = 6 AND network <<= '2002::/16'; ++ ++ -- DELETE multicast and reserved address space ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '224.0.0.0/4'; ++ DELETE FROM announcements WHERE family(network) = 4 AND network <<= '240.0.0.0/4'; ++ ++ -- Delete networks that are too small to be in the global routing table ++ DELETE FROM announcements WHERE family(network) = 6 AND masklen(network) > 48; ++ DELETE FROM announcements WHERE family(network) = 4 AND masklen(network) > 24; ++ ++ -- Delete any non-public or reserved ASNs ++ DELETE FROM announcements WHERE NOT ( ++ (autnum >= 1 AND autnum <= 23455) ++ OR ++ (autnum >= 23457 AND autnum <= 64495) ++ OR ++ (autnum >= 131072 AND autnum <= 4199999999) ++ ); ++ ++ -- Delete everything that we have not seen for 14 days ++ DELETE FROM announcements WHERE last_seen_at <= CURRENT_TIMESTAMP - INTERVAL '14 days'; ++ """) ++ ++ def _handle_update_announcements_from_bird(self, server): ++ # Pre-compile the regular expression for faster searching ++ route = re.compile(b"^\s(.+?)\s+.+?\[(?:AS(.*?))?.\]$") ++ ++ log.info("Requesting routing table from Bird (%s)" % server) ++ ++ aggregated_networks = [] ++ ++ # Send command to list all routes ++ for line in self._bird_cmd(server, "show route"): ++ m = route.match(line) ++ if not m: ++ # Skip empty lines ++ if not line: ++ pass ++ ++ # Ignore any header lines with the name of the routing table ++ elif line.startswith(b"Table"): ++ pass ++ ++ # Log anything else ++ else: ++ log.debug("Could not parse line: %s" % line.decode()) ++ ++ continue ++ ++ # Fetch the extracted network and ASN ++ network, autnum = m.groups() ++ ++ # Decode into strings ++ if network: ++ network = network.decode() ++ if autnum: ++ autnum = autnum.decode() ++ ++ # Collect all aggregated networks ++ if not autnum: ++ log.debug("%s is an aggregated network" % network) ++ aggregated_networks.append(network) ++ continue ++ ++ # Insert it into the database ++ self.db.execute("INSERT INTO announcements(network, autnum) \ ++ VALUES(%s, %s) ON CONFLICT (network) DO \ ++ UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", ++ network, autnum, ++ ) ++ ++ # Process any aggregated networks ++ for network in aggregated_networks: ++ log.debug("Processing aggregated network %s" % network) ++ ++ # Run "show route all" for each network ++ for line in self._bird_cmd(server, "show route %s all" % network): ++ # Try finding the path ++ m = re.match(b"\s+BGP\.as_path:.* (\d+) {\d+}$", line) ++ if m: ++ # Select the last AS number in the path ++ autnum = m.group(1).decode() ++ ++ # Insert it into the database ++ self.db.execute("INSERT INTO announcements(network, autnum) \ ++ VALUES(%s, %s) ON CONFLICT (network) DO \ ++ UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", ++ network, autnum, ++ ) ++ ++ # We don't need to process any more ++ break ++ ++ def _handle_update_announcements_from_telnet(self, server): ++ # Pre-compile regular expression for routes ++ route = re.compile(b"^\*[\s\>]i([^\s]+).+?(\d+)\si\r\n", re.MULTILINE|re.DOTALL) ++ ++ with telnetlib.Telnet(server) as t: ++ # Enable debug mode ++ #if ns.debug: ++ # t.set_debuglevel(10) ++ ++ # Wait for console greeting ++ greeting = t.read_until(b"> ", timeout=30) ++ if not greeting: ++ log.error("Could not get a console prompt") ++ return 1 ++ ++ # Disable pagination ++ t.write(b"terminal length 0\n") ++ ++ # Wait for the prompt to return ++ t.read_until(b"> ") ++ ++ # Fetch the routing tables ++ for protocol in ("ipv6", "ipv4"): ++ log.info("Requesting %s routing table" % protocol) ++ ++ # Request the full unicast routing table ++ t.write(b"show bgp %s unicast\n" % protocol.encode()) ++ ++ # Read entire header which ends with "Path" ++ t.read_until(b"Path\r\n") ++ ++ while True: ++ # Try reading a full entry ++ # Those might be broken across multiple lines but ends with i ++ line = t.read_until(b"i\r\n", timeout=5) ++ if not line: ++ break ++ ++ # Show line for debugging ++ #log.debug(repr(line)) ++ ++ # Try finding a route in here ++ m = route.match(line) ++ if m: ++ network, autnum = m.groups() ++ ++ # Convert network to string ++ network = network.decode() ++ ++ # Append /24 for IPv4 addresses ++ if not "/" in network and not ":" in network: ++ network = "%s/24" % network ++ ++ # Convert AS number to integer ++ autnum = int(autnum) ++ ++ log.info("Found announcement for %s by %s" % (network, autnum)) ++ ++ self.db.execute("INSERT INTO announcements(network, autnum) \ ++ VALUES(%s, %s) ON CONFLICT (network) DO \ ++ UPDATE SET autnum = excluded.autnum, last_seen_at = CURRENT_TIMESTAMP", ++ network, autnum, ++ ) ++ ++ log.info("Finished reading the %s routing table" % protocol) ++ ++ def _bird_cmd(self, socket_path, command): ++ # Connect to the socket ++ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) ++ s.connect(socket_path) ++ ++ # Allocate some buffer ++ buffer = b"" ++ ++ log.debug("Sending Bird command: %s" % command) ++ ++ # Send the command ++ s.send(b"%s\n" % command.encode()) ++ ++ while True: ++ # Fill up the buffer ++ buffer += s.recv(4096) ++ ++ while True: ++ # Search for the next newline ++ pos = buffer.find(b"\n") ++ ++ # If we cannot find one, we go back and read more data ++ if pos <= 0: ++ break ++ ++ # Cut after the newline character ++ pos += 1 ++ ++ # Split the line we want and keep the rest in buffer ++ line, buffer = buffer[:pos], buffer[pos:] ++ ++ # Try parsing any status lines ++ if len(line) > 4 and line[:4].isdigit() and line[4] in (32, 45): ++ code, delim, line = int(line[:4]), line[4], line[5:] ++ ++ log.debug("Received response code %s from bird" % code) ++ ++ # End of output ++ if code == 0: ++ return ++ ++ # Ignore hello line ++ elif code == 1: ++ continue ++ ++ # Otherwise return the line ++ yield line ++ ++ def handle_update_overrides(self, ns): ++ with self.db.transaction(): ++ # Drop all data that we have ++ self.db.execute(""" ++ TRUNCATE TABLE autnum_overrides; ++ TRUNCATE TABLE network_overrides; ++ """) ++ ++ # Update overrides for various cloud providers big enough to publish their own IP ++ # network allocation lists in a machine-readable format... ++ self._update_overrides_for_aws() ++ ++ # Update overrides for Spamhaus DROP feeds... ++ self._update_overrides_for_spamhaus_drop() ++ ++ for file in ns.files: ++ log.info("Reading %s..." % file) ++ ++ with open(file, "rb") as f: ++ for type, block in location.importer.read_blocks(f): ++ if type == "net": ++ network = block.get("net") ++ # Try to parse and normalise the network ++ try: ++ network = ipaddress.ip_network(network, strict=False) ++ except ValueError as e: ++ log.warning("Invalid IP network: %s: %s" % (network, e)) ++ continue ++ ++ # Prevent that we overwrite all networks ++ if network.prefixlen == 0: ++ log.warning("Skipping %s: You cannot overwrite default" % network) ++ continue ++ ++ self.db.execute(""" ++ INSERT INTO network_overrides( ++ network, ++ country, ++ source, ++ is_anonymous_proxy, ++ is_satellite_provider, ++ is_anycast, ++ is_drop ++ ) VALUES (%s, %s, %s, %s, %s, %s, %s) ++ ON CONFLICT (network) DO NOTHING""", ++ "%s" % network, ++ block.get("country"), ++ "manual", ++ self._parse_bool(block, "is-anonymous-proxy"), ++ self._parse_bool(block, "is-satellite-provider"), ++ self._parse_bool(block, "is-anycast"), ++ self._parse_bool(block, "drop"), ++ ) ++ ++ elif type == "aut-num": ++ autnum = block.get("aut-num") ++ ++ # Check if AS number begins with "AS" ++ if not autnum.startswith("AS"): ++ log.warning("Invalid AS number: %s" % autnum) ++ continue ++ ++ # Strip "AS" ++ autnum = autnum[2:] ++ ++ self.db.execute(""" ++ INSERT INTO autnum_overrides( ++ number, ++ name, ++ country, ++ source, ++ is_anonymous_proxy, ++ is_satellite_provider, ++ is_anycast, ++ is_drop ++ ) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) ++ ON CONFLICT DO NOTHING""", ++ autnum, ++ block.get("name"), ++ block.get("country"), ++ "manual", ++ self._parse_bool(block, "is-anonymous-proxy"), ++ self._parse_bool(block, "is-satellite-provider"), ++ self._parse_bool(block, "is-anycast"), ++ self._parse_bool(block, "drop"), ++ ) ++ ++ else: ++ log.warning("Unsupported type: %s" % type) ++ ++ def _update_overrides_for_aws(self): ++ # Download Amazon AWS IP allocation file to create overrides... ++ downloader = location.importer.Downloader() ++ ++ try: ++ with downloader.request("https://ip-ranges.amazonaws.com/ip-ranges.json", return_blocks=False) as f: ++ aws_ip_dump = json.load(f.body) ++ except Exception as e: ++ log.error("unable to preprocess Amazon AWS IP ranges: %s" % e) ++ return ++ ++ # XXX: Set up a dictionary for mapping a region name to a country. Unfortunately, ++ # there seems to be no machine-readable version available of this other than ++ # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html ++ # (worse, it seems to be incomplete :-/ ); https://www.cloudping.cloud/endpoints ++ # was helpful here as well. ++ aws_region_country_map = { ++ "af-south-1": "ZA", ++ "ap-east-1": "HK", ++ "ap-south-1": "IN", ++ "ap-south-2": "IN", ++ "ap-northeast-3": "JP", ++ "ap-northeast-2": "KR", ++ "ap-southeast-1": "SG", ++ "ap-southeast-2": "AU", ++ "ap-southeast-3": "MY", ++ "ap-southeast-4": "AU", ++ "ap-northeast-1": "JP", ++ "ca-central-1": "CA", ++ "eu-central-1": "DE", ++ "eu-central-2": "CH", ++ "eu-west-1": "IE", ++ "eu-west-2": "GB", ++ "eu-south-1": "IT", ++ "eu-south-2": "ES", ++ "eu-west-3": "FR", ++ "eu-north-1": "SE", ++ "il-central-1": "IL", # XXX: This one is not documented anywhere except for ip-ranges.json itself ++ "me-central-1": "AE", ++ "me-south-1": "BH", ++ "sa-east-1": "BR" ++ } ++ ++ # Fetch all valid country codes to check parsed networks aganist... ++ rows = self.db.query("SELECT * FROM countries ORDER BY country_code") ++ validcountries = [] ++ ++ for row in rows: ++ validcountries.append(row.country_code) ++ ++ with self.db.transaction(): ++ for snetwork in aws_ip_dump["prefixes"] + aws_ip_dump["ipv6_prefixes"]: ++ try: ++ network = ipaddress.ip_network(snetwork.get("ip_prefix") or snetwork.get("ipv6_prefix"), strict=False) ++ except ValueError: ++ log.warning("Unable to parse line: %s" % snetwork) ++ continue ++ ++ # Sanitize parsed networks... ++ if not self._check_parsed_network(network): ++ continue ++ ++ # Determine region of this network... ++ region = snetwork["region"] ++ cc = None ++ is_anycast = False ++ ++ # Any region name starting with "us-" will get "US" country code assigned straight away... ++ if region.startswith("us-"): ++ cc = "US" ++ elif region.startswith("cn-"): ++ # ... same goes for China ... ++ cc = "CN" ++ elif region == "GLOBAL": ++ # ... funny region name for anycast-like networks ... ++ is_anycast = True ++ elif region in aws_region_country_map: ++ # ... assign looked up country code otherwise ... ++ cc = aws_region_country_map[region] ++ else: ++ # ... and bail out if we are missing something here ++ log.warning("Unable to determine country code for line: %s" % snetwork) ++ continue ++ ++ # Skip networks with unknown country codes ++ if not is_anycast and validcountries and cc not in validcountries: ++ log.warning("Skipping Amazon AWS network with bogus country '%s': %s" % \ ++ (cc, network)) ++ return ++ ++ # Conduct SQL statement... ++ self.db.execute(""" ++ INSERT INTO network_overrides( ++ network, ++ country, ++ source, ++ is_anonymous_proxy, ++ is_satellite_provider, ++ is_anycast ++ ) VALUES (%s, %s, %s, %s, %s, %s) ++ ON CONFLICT (network) DO NOTHING""", ++ "%s" % network, ++ cc, ++ "Amazon AWS IP feed", ++ None, ++ None, ++ is_anycast, ++ ) ++ ++ ++ def _update_overrides_for_spamhaus_drop(self): ++ downloader = location.importer.Downloader() ++ ++ ip_urls = [ ++ "https://www.spamhaus.org/drop/drop.txt", ++ "https://www.spamhaus.org/drop/edrop.txt", ++ "https://www.spamhaus.org/drop/dropv6.txt" ++ ] ++ ++ asn_urls = [ ++ "https://www.spamhaus.org/drop/asndrop.txt" ++ ] ++ ++ for url in ip_urls: ++ try: ++ with downloader.request(url, return_blocks=False) as f: ++ fcontent = f.body.readlines() ++ except Exception as e: ++ log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e)) ++ return ++ ++ # Iterate through every line, filter comments and add remaining networks to ++ # the override table in case they are valid... ++ with self.db.transaction(): ++ for sline in fcontent: ++ ++ # The response is assumed to be encoded in UTF-8... ++ sline = sline.decode("utf-8") ++ ++ # Comments start with a semicolon... ++ if sline.startswith(";"): ++ continue ++ ++ # Extract network and ignore anything afterwards... ++ try: ++ network = ipaddress.ip_network(sline.split()[0], strict=False) ++ except ValueError: ++ log.error("Unable to parse line: %s" % sline) ++ continue ++ ++ # Sanitize parsed networks... ++ if not self._check_parsed_network(network): ++ log.warning("Skipping bogus network found in Spamhaus DROP URL %s: %s" % \ ++ (url, network)) ++ continue ++ ++ # Conduct SQL statement... ++ self.db.execute(""" ++ INSERT INTO network_overrides( ++ network, ++ source, ++ is_drop ++ ) VALUES (%s, %s, %s) ++ ON CONFLICT (network) DO UPDATE SET is_drop = True""", ++ "%s" % network, ++ "Spamhaus DROP lists", ++ True ++ ) ++ ++ for url in asn_urls: ++ try: ++ with downloader.request(url, return_blocks=False) as f: ++ fcontent = f.body.readlines() ++ except Exception as e: ++ log.error("Unable to download Spamhaus DROP URL %s: %s" % (url, e)) ++ return ++ ++ # Iterate through every line, filter comments and add remaining ASNs to ++ # the override table in case they are valid... ++ with self.db.transaction(): ++ for sline in fcontent: ++ ++ # The response is assumed to be encoded in UTF-8... ++ sline = sline.decode("utf-8") ++ ++ # Comments start with a semicolon... ++ if sline.startswith(";"): ++ continue ++ ++ # Throw away anything after the first space... ++ sline = sline.split()[0] ++ ++ # ... strip the "AS" prefix from it ... ++ sline = sline.strip("AS") ++ ++ # ... and convert it into an integer. Voila. ++ asn = int(sline) ++ ++ # Filter invalid ASNs... ++ if not self._check_parsed_asn(asn): ++ log.warning("Skipping bogus ASN found in Spamhaus DROP URL %s: %s" % \ ++ (url, asn)) ++ continue ++ ++ # Conduct SQL statement... ++ self.db.execute(""" ++ INSERT INTO autnum_overrides( ++ number, ++ source, ++ is_drop ++ ) VALUES (%s, %s, %s) ++ ON CONFLICT (number) DO UPDATE SET is_drop = True""", ++ "%s" % asn, ++ "Spamhaus ASN-DROP list", ++ True ++ ) ++ ++ @staticmethod ++ def _parse_bool(block, key): ++ val = block.get(key) ++ ++ # There is no point to proceed when we got None ++ if val is None: ++ return ++ ++ # Convert to lowercase ++ val = val.lower() ++ ++ # True ++ if val in ("yes", "1"): ++ return True ++ ++ # False ++ if val in ("no", "0"): ++ return False ++ ++ # Default to None ++ return None ++ ++ def handle_import_countries(self, ns): ++ with self.db.transaction(): ++ # Drop all data that we have ++ self.db.execute("TRUNCATE TABLE countries") ++ ++ for file in ns.file: ++ for line in file: ++ line = line.rstrip() ++ ++ # Ignore any comments ++ if line.startswith("#"): ++ continue ++ ++ try: ++ country_code, continent_code, name = line.split(maxsplit=2) ++ except: ++ log.warning("Could not parse line: %s" % line) ++ continue ++ ++ self.db.execute("INSERT INTO countries(country_code, name, continent_code) \ ++ VALUES(%s, %s, %s) ON CONFLICT DO NOTHING", country_code, name, continent_code) ++ ++ ++def split_line(line): ++ key, colon, val = line.partition(":") ++ ++ # Strip any excess space ++ key = key.strip() ++ val = val.strip() ++ ++ return key, val ++ ++def main(): ++ # Run the command line interface ++ c = CLI() ++ c.run() ++ ++main() +diff --git a/src/scripts/location.in b/src/scripts/location.in +new file mode 100644 +index 0000000..233cea0 +--- /dev/null ++++ b/src/scripts/location.in +@@ -0,0 +1,644 @@ ++#!/usr/bin/python3 ++############################################################################### ++# # ++# libloc - A library to determine the location of someone on the Internet # ++# # ++# Copyright (C) 2017-2021 IPFire Development Team # ++# # ++# This library is free software; you can redistribute it and/or # ++# modify it under the terms of the GNU Lesser General Public # ++# License as published by the Free Software Foundation; either # ++# version 2.1 of the License, or (at your option) any later version. # ++# # ++# This library is distributed in the hope that it will be useful, # ++# but WITHOUT ANY WARRANTY; without even the implied warranty of # ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # ++# Lesser General Public License for more details. # ++# # ++############################################################################### ++ ++import argparse ++import datetime ++import ipaddress ++import logging ++import os ++import re ++import shutil ++import socket ++import sys ++import time ++ ++# Load our location module ++import location ++import location.downloader ++import location.export ++ ++from location.i18n import _ ++ ++# Setup logging ++log = logging.getLogger("location") ++ ++# Output formatters ++ ++class CLI(object): ++ def parse_cli(self): ++ parser = argparse.ArgumentParser( ++ description=_("Location Database Command Line Interface"), ++ ) ++ subparsers = parser.add_subparsers() ++ ++ # Global configuration flags ++ parser.add_argument("--debug", action="store_true", ++ help=_("Enable debug output")) ++ parser.add_argument("--quiet", action="store_true", ++ help=_("Enable quiet mode")) ++ ++ # version ++ parser.add_argument("--version", action="version", ++ version="%(prog)s @VERSION@") ++ ++ # database ++ parser.add_argument("--database", "-d", ++ default="@databasedir@/database.db", help=_("Path to database"), ++ ) ++ ++ # public key ++ parser.add_argument("--public-key", "-k", ++ default="@databasedir@/signing-key.pem", help=_("Public Signing Key"), ++ ) ++ ++ # Show the database version ++ version = subparsers.add_parser("version", ++ help=_("Show database version")) ++ version.set_defaults(func=self.handle_version) ++ ++ # lookup an IP address ++ lookup = subparsers.add_parser("lookup", ++ help=_("Lookup one or multiple IP addresses"), ++ ) ++ lookup.add_argument("address", nargs="+") ++ lookup.set_defaults(func=self.handle_lookup) ++ ++ # Dump the whole database ++ dump = subparsers.add_parser("dump", ++ help=_("Dump the entire database"), ++ ) ++ dump.add_argument("output", nargs="?", type=argparse.FileType("w")) ++ dump.set_defaults(func=self.handle_dump) ++ ++ # Update ++ update = subparsers.add_parser("update", help=_("Update database")) ++ update.add_argument("--cron", ++ help=_("Update the library only once per interval"), ++ choices=("daily", "weekly", "monthly"), ++ ) ++ update.set_defaults(func=self.handle_update) ++ ++ # Verify ++ verify = subparsers.add_parser("verify", ++ help=_("Verify the downloaded database")) ++ verify.set_defaults(func=self.handle_verify) ++ ++ # Get AS ++ get_as = subparsers.add_parser("get-as", ++ help=_("Get information about one or multiple Autonomous Systems"), ++ ) ++ get_as.add_argument("asn", nargs="+") ++ get_as.set_defaults(func=self.handle_get_as) ++ ++ # Search for AS ++ search_as = subparsers.add_parser("search-as", ++ help=_("Search for Autonomous Systems that match the string"), ++ ) ++ search_as.add_argument("query", nargs=1) ++ search_as.set_defaults(func=self.handle_search_as) ++ ++ # List all networks in an AS ++ list_networks_by_as = subparsers.add_parser("list-networks-by-as", ++ help=_("Lists all networks in an AS"), ++ ) ++ list_networks_by_as.add_argument("asn", nargs=1, type=int) ++ list_networks_by_as.add_argument("--family", choices=("ipv6", "ipv4")) ++ list_networks_by_as.add_argument("--format", ++ choices=location.export.formats.keys(), default="list") ++ list_networks_by_as.set_defaults(func=self.handle_list_networks_by_as) ++ ++ # List all networks in a country ++ list_networks_by_cc = subparsers.add_parser("list-networks-by-cc", ++ help=_("Lists all networks in a country"), ++ ) ++ list_networks_by_cc.add_argument("country_code", nargs=1) ++ list_networks_by_cc.add_argument("--family", choices=("ipv6", "ipv4")) ++ list_networks_by_cc.add_argument("--format", ++ choices=location.export.formats.keys(), default="list") ++ list_networks_by_cc.set_defaults(func=self.handle_list_networks_by_cc) ++ ++ # List all networks with flags ++ list_networks_by_flags = subparsers.add_parser("list-networks-by-flags", ++ help=_("Lists all networks with flags"), ++ ) ++ list_networks_by_flags.add_argument("--anonymous-proxy", ++ action="store_true", help=_("Anonymous Proxies"), ++ ) ++ list_networks_by_flags.add_argument("--satellite-provider", ++ action="store_true", help=_("Satellite Providers"), ++ ) ++ list_networks_by_flags.add_argument("--anycast", ++ action="store_true", help=_("Anycasts"), ++ ) ++ list_networks_by_flags.add_argument("--drop", ++ action="store_true", help=_("Hostile Networks safe to drop"), ++ ) ++ list_networks_by_flags.add_argument("--family", choices=("ipv6", "ipv4")) ++ list_networks_by_flags.add_argument("--format", ++ choices=location.export.formats.keys(), default="list") ++ list_networks_by_flags.set_defaults(func=self.handle_list_networks_by_flags) ++ ++ # List bogons ++ list_bogons = subparsers.add_parser("list-bogons", ++ help=_("Lists all bogons"), ++ ) ++ list_bogons.add_argument("--family", choices=("ipv6", "ipv4")) ++ list_bogons.add_argument("--format", ++ choices=location.export.formats.keys(), default="list") ++ list_bogons.set_defaults(func=self.handle_list_bogons) ++ ++ # List countries ++ list_countries = subparsers.add_parser("list-countries", ++ help=_("Lists all countries"), ++ ) ++ list_countries.add_argument("--show-name", ++ action="store_true", help=_("Show the name of the country"), ++ ) ++ list_countries.add_argument("--show-continent", ++ action="store_true", help=_("Show the continent"), ++ ) ++ list_countries.set_defaults(func=self.handle_list_countries) ++ ++ # Export ++ export = subparsers.add_parser("export", ++ help=_("Exports data in many formats to load it into packet filters"), ++ ) ++ export.add_argument("--format", help=_("Output format"), ++ choices=location.export.formats.keys(), default="list") ++ export.add_argument("--directory", help=_("Output directory")) ++ export.add_argument("--family", ++ help=_("Specify address family"), choices=("ipv6", "ipv4"), ++ ) ++ export.add_argument("objects", nargs="*", help=_("List country codes or ASNs to export")) ++ export.set_defaults(func=self.handle_export) ++ ++ args = parser.parse_args() ++ ++ # Configure logging ++ if args.debug: ++ location.logger.set_level(logging.DEBUG) ++ elif args.quiet: ++ location.logger.set_level(logging.WARNING) ++ ++ # Print usage if no action was given ++ if not "func" in args: ++ parser.print_usage() ++ sys.exit(2) ++ ++ return args ++ ++ def run(self): ++ # Parse command line arguments ++ args = self.parse_cli() ++ ++ # Open database ++ try: ++ db = location.Database(args.database) ++ except FileNotFoundError as e: ++ # Allow continuing without a database ++ if args.func == self.handle_update: ++ db = None ++ ++ else: ++ sys.stderr.write("location: Could not open database %s: %s\n" \ ++ % (args.database, e)) ++ sys.exit(1) ++ ++ # Translate family (if present) ++ if "family" in args: ++ if args.family == "ipv6": ++ args.family = socket.AF_INET6 ++ elif args.family == "ipv4": ++ args.family = socket.AF_INET ++ else: ++ args.family = 0 ++ ++ # Call function ++ try: ++ ret = args.func(db, args) ++ ++ # Catch invalid inputs ++ except ValueError as e: ++ sys.stderr.write("%s\n" % e) ++ ret = 2 ++ ++ # Catch any other exceptions ++ except Exception as e: ++ sys.stderr.write("%s\n" % e) ++ ret = 1 ++ ++ # Return with exit code ++ if ret: ++ sys.exit(ret) ++ ++ # Otherwise just exit ++ sys.exit(0) ++ ++ def handle_version(self, db, ns): ++ """ ++ Print the version of the database ++ """ ++ t = time.strftime( ++ "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), ++ ) ++ ++ print(t) ++ ++ def handle_lookup(self, db, ns): ++ ret = 0 ++ ++ format = " %-24s: %s" ++ ++ for address in ns.address: ++ try: ++ network = db.lookup(address) ++ except ValueError: ++ print(_("Invalid IP address: %s") % address, file=sys.stderr) ++ return 2 ++ ++ args = { ++ "address" : address, ++ "network" : network, ++ } ++ ++ # Nothing found? ++ if not network: ++ print(_("Nothing found for %(address)s") % args, file=sys.stderr) ++ ret = 1 ++ continue ++ ++ print("%s:" % address) ++ print(format % (_("Network"), network)) ++ ++ # Print country ++ if network.country_code: ++ country = db.get_country(network.country_code) ++ ++ print(format % ( ++ _("Country"), ++ country.name if country else network.country_code), ++ ) ++ ++ # Print AS information ++ if network.asn: ++ autonomous_system = db.get_as(network.asn) ++ ++ print(format % ( ++ _("Autonomous System"), ++ autonomous_system or "AS%s" % network.asn), ++ ) ++ ++ # Anonymous Proxy ++ if network.has_flag(location.NETWORK_FLAG_ANONYMOUS_PROXY): ++ print(format % ( ++ _("Anonymous Proxy"), _("yes"), ++ )) ++ ++ # Satellite Provider ++ if network.has_flag(location.NETWORK_FLAG_SATELLITE_PROVIDER): ++ print(format % ( ++ _("Satellite Provider"), _("yes"), ++ )) ++ ++ # Anycast ++ if network.has_flag(location.NETWORK_FLAG_ANYCAST): ++ print(format % ( ++ _("Anycast"), _("yes"), ++ )) ++ ++ # Hostile Network ++ if network.has_flag(location.NETWORK_FLAG_DROP): ++ print(format % ( ++ _("Hostile Network safe to drop"), _("yes"), ++ )) ++ ++ return ret ++ ++ def handle_dump(self, db, ns): ++ # Use output file or write to stdout ++ f = ns.output or sys.stdout ++ ++ # Format everything like this ++ format = "%-24s %s\n" ++ ++ # Write metadata ++ f.write("#\n# Location Database Export\n#\n") ++ ++ f.write("# Generated: %s\n" % time.strftime( ++ "%a, %d %b %Y %H:%M:%S GMT", time.gmtime(db.created_at), ++ )) ++ ++ if db.vendor: ++ f.write("# Vendor: %s\n" % db.vendor) ++ ++ if db.license: ++ f.write("# License: %s\n" % db.license) ++ ++ f.write("#\n") ++ ++ if db.description: ++ for line in db.description.splitlines(): ++ line = "# %s" % line ++ f.write("%s\n" % line.rstrip()) ++ ++ f.write("#\n") ++ ++ # Iterate over all ASes ++ for a in db.ases: ++ f.write("\n") ++ f.write(format % ("aut-num:", "AS%s" % a.number)) ++ f.write(format % ("name:", a.name)) ++ ++ flags = { ++ location.NETWORK_FLAG_ANONYMOUS_PROXY : "is-anonymous-proxy:", ++ location.NETWORK_FLAG_SATELLITE_PROVIDER : "is-satellite-provider:", ++ location.NETWORK_FLAG_ANYCAST : "is-anycast:", ++ location.NETWORK_FLAG_DROP : "drop:", ++ } ++ ++ # Iterate over all networks ++ for n in db.networks: ++ f.write("\n") ++ f.write(format % ("net:", n)) ++ ++ if n.country_code: ++ f.write(format % ("country:", n.country_code)) ++ ++ if n.asn: ++ f.write(format % ("aut-num:", n.asn)) ++ ++ # Print all flags ++ for flag in flags: ++ if n.has_flag(flag): ++ f.write(format % (flags[flag], "yes")) ++ ++ def handle_get_as(self, db, ns): ++ """ ++ Gets information about Autonomous Systems ++ """ ++ ret = 0 ++ ++ for asn in ns.asn: ++ try: ++ asn = int(asn) ++ except ValueError: ++ print(_("Invalid ASN: %s") % asn, file=sys.stderr) ++ ret = 1 ++ continue ++ ++ # Fetch AS from database ++ a = db.get_as(asn) ++ ++ # Nothing found ++ if not a: ++ print(_("Could not find AS%s") % asn, file=sys.stderr) ++ ret = 1 ++ continue ++ ++ print(_("AS%(asn)s belongs to %(name)s") % { "asn" : a.number, "name" : a.name }) ++ ++ return ret ++ ++ def handle_search_as(self, db, ns): ++ for query in ns.query: ++ # Print all matches ASes ++ for a in db.search_as(query): ++ print(a) ++ ++ def handle_update(self, db, ns): ++ if ns.cron and db: ++ now = time.time() ++ ++ if ns.cron == "daily": ++ delta = datetime.timedelta(days=1) ++ elif ns.cron == "weekly": ++ delta = datetime.timedelta(days=7) ++ elif ns.cron == "monthly": ++ delta = datetime.timedelta(days=30) ++ ++ delta = delta.total_seconds() ++ ++ # Check if the database has recently been updated ++ if db.created_at >= (now - delta): ++ log.info( ++ _("The database has been updated recently"), ++ ) ++ return 3 ++ ++ # Fetch the timestamp we need from DNS ++ t = location.discover_latest_version() ++ ++ # Check the version of the local database ++ if db and t and db.created_at >= t: ++ log.info("Already on the latest version") ++ return ++ ++ # Download the database into the correct directory ++ tmpdir = os.path.dirname(ns.database) ++ ++ # Create a downloader ++ d = location.downloader.Downloader() ++ ++ # Try downloading a new database ++ try: ++ t = d.download(public_key=ns.public_key, timestamp=t, tmpdir=tmpdir) ++ ++ # If no file could be downloaded, log a message ++ except FileNotFoundError as e: ++ log.error("Could not download a new database") ++ return 1 ++ ++ # If we have not received a new file, there is nothing to do ++ if not t: ++ return 3 ++ ++ # Move temporary file to destination ++ shutil.move(t.name, ns.database) ++ ++ return 0 ++ ++ def handle_verify(self, db, ns): ++ # Verify the database ++ with open(ns.public_key, "r") as f: ++ if not db.verify(f): ++ log.error("Could not verify database") ++ return 1 ++ ++ # Success ++ log.debug("Database successfully verified") ++ return 0 ++ ++ def __get_output_formatter(self, ns): ++ try: ++ cls = location.export.formats[ns.format] ++ except KeyError: ++ cls = location.export.OutputFormatter ++ ++ return cls ++ ++ def handle_list_countries(self, db, ns): ++ for country in db.countries: ++ line = [ ++ country.code, ++ ] ++ ++ if ns.show_continent: ++ line.append(country.continent_code) ++ ++ if ns.show_name: ++ line.append(country.name) ++ ++ # Format the output ++ line = " ".join(line) ++ ++ # Print the output ++ print(line) ++ ++ def handle_list_networks_by_as(self, db, ns): ++ writer = self.__get_output_formatter(ns) ++ ++ for asn in ns.asn: ++ f = writer("AS%s" % asn, f=sys.stdout) ++ ++ # Print all matching networks ++ for n in db.search_networks(asns=[asn], family=ns.family): ++ f.write(n) ++ ++ f.finish() ++ ++ def handle_list_networks_by_cc(self, db, ns): ++ writer = self.__get_output_formatter(ns) ++ ++ for country_code in ns.country_code: ++ # Open standard output ++ f = writer(country_code, f=sys.stdout) ++ ++ # Print all matching networks ++ for n in db.search_networks(country_codes=[country_code], family=ns.family): ++ f.write(n) ++ ++ f.finish() ++ ++ def handle_list_networks_by_flags(self, db, ns): ++ flags = 0 ++ ++ if ns.anonymous_proxy: ++ flags |= location.NETWORK_FLAG_ANONYMOUS_PROXY ++ ++ if ns.satellite_provider: ++ flags |= location.NETWORK_FLAG_SATELLITE_PROVIDER ++ ++ if ns.anycast: ++ flags |= location.NETWORK_FLAG_ANYCAST ++ ++ if ns.drop: ++ flags |= location.NETWORK_FLAG_DROP ++ ++ if not flags: ++ raise ValueError(_("You must at least pass one flag")) ++ ++ writer = self.__get_output_formatter(ns) ++ f = writer("custom", f=sys.stdout) ++ ++ for n in db.search_networks(flags=flags, family=ns.family): ++ f.write(n) ++ ++ f.finish() ++ ++ def handle_list_bogons(self, db, ns): ++ writer = self.__get_output_formatter(ns) ++ f = writer("bogons", f=sys.stdout) ++ ++ for n in db.list_bogons(family=ns.family): ++ f.write(n) ++ ++ f.finish() ++ ++ def handle_export(self, db, ns): ++ countries, asns = [], [] ++ ++ # Translate family ++ if ns.family: ++ families = [ ns.family ] ++ else: ++ families = [ socket.AF_INET6, socket.AF_INET ] ++ ++ for object in ns.objects: ++ m = re.match("^AS(\d+)$", object) ++ if m: ++ object = int(m.group(1)) ++ ++ asns.append(object) ++ ++ elif location.country_code_is_valid(object) \ ++ or object in ("A1", "A2", "A3", "XD"): ++ countries.append(object) ++ ++ else: ++ log.warning("Invalid argument: %s" % object) ++ continue ++ ++ # Default to exporting all countries ++ if not countries and not asns: ++ countries = ["A1", "A2", "A3", "XD"] + [country.code for country in db.countries] ++ ++ # Select the output format ++ writer = self.__get_output_formatter(ns) ++ ++ e = location.export.Exporter(db, writer) ++ e.export(ns.directory, countries=countries, asns=asns, families=families) ++ ++ ++def format_timedelta(t): ++ s = [] ++ ++ if t.days: ++ s.append( ++ _("One Day", "%(days)s Days", t.days) % { "days" : t.days, } ++ ) ++ ++ hours = t.seconds // 3600 ++ if hours: ++ s.append( ++ _("One Hour", "%(hours)s Hours", hours) % { "hours" : hours, } ++ ) ++ ++ minutes = (t.seconds % 3600) // 60 ++ if minutes: ++ s.append( ++ _("One Minute", "%(minutes)s Minutes", minutes) % { "minutes" : minutes, } ++ ) ++ ++ seconds = t.seconds % 60 ++ if t.seconds: ++ s.append( ++ _("One Second", "%(seconds)s Seconds", seconds) % { "seconds" : seconds, } ++ ) ++ ++ if not s: ++ return _("Now") ++ ++ return _("%s ago") % ", ".join(s) ++ ++def main(): ++ # Run the command line interface ++ c = CLI() ++ c.run() ++ ++main() diff --git a/debian/patches/0002-downloader-Fetch-__version__-from-C-module.patch b/debian/patches/0002-downloader-Fetch-__version__-from-C-module.patch new file mode 100644 index 0000000..3262d80 --- /dev/null +++ b/debian/patches/0002-downloader-Fetch-__version__-from-C-module.patch @@ -0,0 +1,39 @@ +From: Michael Tremer +Date: Thu, 14 Apr 2022 20:44:29 +0000 +Subject: downloader: Fetch __version__ from C module + +Signed-off-by: Michael Tremer +--- + src/python/location/downloader.py | 3 +-- + src/python/locationmodule.c | 4 ++++ + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/src/python/location/downloader.py b/src/python/location/downloader.py +index 05f7872..b9e0c22 100644 +--- a/src/python/location/downloader.py ++++ b/src/python/location/downloader.py +@@ -28,8 +28,7 @@ import urllib.error + import urllib.parse + import urllib.request + +-from . import __version__ +-from _location import Database, DATABASE_VERSION_LATEST ++from _location import Database, DATABASE_VERSION_LATEST, __version__ + + DATABASE_FILENAME = "location.db.xz" + MIRRORS = ( +diff --git a/src/python/locationmodule.c b/src/python/locationmodule.c +index 15f661b..09cd5dd 100644 +--- a/src/python/locationmodule.c ++++ b/src/python/locationmodule.c +@@ -117,6 +117,10 @@ PyMODINIT_FUNC PyInit__location(void) { + if (!m) + return NULL; + ++ // Version ++ if (PyModule_AddStringConstant(m, "__version__", PACKAGE_VERSION)) ++ return NULL; ++ + // AS + if (PyType_Ready(&ASType) < 0) + return NULL; diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..7e1ed4b --- /dev/null +++ b/debian/patches/series @@ -0,0 +1,2 @@ +0001-Make-sources-around-that-we-can-run-tests-without-lo.patch +0002-downloader-Fetch-__version__-from-C-module.patch