]> git.ipfire.org Git - people/ms/libloc.git/commitdiff
python: Import classic RIR importer from database repository
authorMichael Tremer <michael.tremer@ipfire.org>
Tue, 12 May 2020 13:20:42 +0000 (13:20 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 12 May 2020 13:20:42 +0000 (13:20 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/python/importer.py
src/python/location-importer.in

index 4cf3aa598aa34d3e432c409fcfbf7bc6582a13fb..3abbdc5852b6e2f1b2d5259bc76b053f466ebe1a 100644 (file)
@@ -25,6 +25,33 @@ import urllib.request
 log = logging.getLogger("location.importer")
 log.propagate = 1
 
+WHOIS_SOURCES = (
+       # African Network Information Centre
+       "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
+
+       # Asia Pacific Network Information Centre
+       "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
+       "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
+       "https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
+       "https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
+       "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
+       "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
+
+       # American Registry for Internet Numbers
+       "https://ftp.arin.net/pub/rr/arin.db",
+
+       # Latin America and Caribbean Network Information Centre
+       # XXX ???
+
+       # Réseaux IP Européens
+       "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
+       "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
+       "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
+       "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
+       "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
+       "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
+)
+
 class Downloader(object):
        def __init__(self):
                self.proxy = None
@@ -36,21 +63,24 @@ class Downloader(object):
                log.info("Using proxy %s" % url)
                self.proxy = url
 
-       def request(self, url, data=None):
+       def request(self, url, data=None, return_blocks=False):
                req = urllib.request.Request(url, data=data)
 
                # Configure proxy
                if self.proxy:
                        req.set_proxy(self.proxy, "http")
 
-               return DownloaderContext(self, req)
+               return DownloaderContext(self, req, return_blocks=return_blocks)
 
 
 class DownloaderContext(object):
-       def __init__(self, downloader, request):
+       def __init__(self, downloader, request, return_blocks=False):
                self.downloader = downloader
                self.request = request
 
+               # Should we return one block or a single line?
+               self.return_blocks = return_blocks
+
                # Save the response object
                self.response = None
 
@@ -74,19 +104,23 @@ class DownloaderContext(object):
                """
                        Makes the object iterable by going through each block
                """
+               if self.return_blocks:
+                       return iterate_over_blocks(self.body)
+
                # Store body
-               body = self.body
+               #body = self.body
 
-               while True:
-                       line = body.readline()
-                       if not line:
-                               break
+               #while True:
+               #       line = body.readline()
+               #       if not line:
+               #               break
 
-                       # Decode the line
-                       line = line.decode()
+               #       # Decode the line
+               #       print(line)
+               #       line = line.decode()
 
-                       # Strip the ending
-                       yield line.rstrip()
+               #       # Strip the ending
+               #       yield line.rstrip()
 
        @property
        def headers(self):
@@ -103,5 +137,54 @@ class DownloaderContext(object):
                        Returns a file-like object with the decoded content
                        of the response.
                """
+               content_type = self.get_header("Content-Type")
+
+               # Decompress any gzipped response on the fly
+               if content_type in ("application/x-gzip", "application/gzip"):
+                       return gzip.GzipFile(fileobj=self.response, mode="rb")
+
                # Return the response by default
                return self.response
+
+
+def iterate_over_blocks(f, charsets=("utf-8", "latin1")):
+       block = []
+
+       for line in f:
+               # Convert to string
+               for charset in charsets:
+                       try:
+                               line = line.decode(charset)
+                       except UnicodeDecodeError:
+                               continue
+                       else:
+                               break
+
+               # Skip commented lines
+               if line.startswith("#") or line.startswith("%"):
+                       continue
+
+               # Strip line-endings
+               line = line.rstrip()
+
+               # Remove any comments at the end of line
+               line, hash, comment = line.partition("#")
+
+               if comment:
+                       # Strip any whitespace before the comment
+                       line = line.rstrip()
+
+                       # If the line is now empty, we move on
+                       if not line:
+                               continue
+
+               if line:
+                       block.append(line)
+                       continue
+
+               # End the block on an empty line
+               if block:
+                       yield block
+
+               # Reset the block
+               block = []
index 564ada9700cc7b9222cc96b19e6f5009dd24bd45..976f1549e6036268e156ef14a34592cd99c1788a 100644 (file)
 ###############################################################################
 
 import argparse
+import ipaddress
 import logging
+import math
+import re
 import sys
 
 # Load our location module
@@ -31,11 +34,18 @@ from location.i18n import _
 log = logging.getLogger("location.importer")
 log.propagate = 1
 
+INVALID_ADDRESSES = (
+       "0.0.0.0",
+       "::/0",
+       "0::/0",
+)
+
 class CLI(object):
        def parse_cli(self):
                parser = argparse.ArgumentParser(
                        description=_("Location Importer Command Line Interface"),
                )
+               subparsers = parser.add_subparsers()
 
                # Global configuration flags
                parser.add_argument("--debug", action="store_true",
@@ -55,12 +65,21 @@ class CLI(object):
                parser.add_argument("--database-password", required=True,
                        help=_("Database Password"), metavar=_("PASSWORD"))
 
+               # Update WHOIS
+               update_whois = subparsers.add_parser("update-whois", help=_("Update WHOIS Information"))
+               update_whois.set_defaults(func=self.handle_update_whois)
+
                args = parser.parse_args()
 
                # Enable debug logging
                if args.debug:
                        log.setLevel(logging.DEBUG)
 
+               # Print usage if no action was given
+               if not "func" in args:
+                       parser.print_usage()
+                       sys.exit(2)
+
                return args
 
        def run(self):
@@ -68,10 +87,10 @@ class CLI(object):
                args = self.parse_cli()
 
                # Initialise database
-               db = self._setup_database(args)
+               self.db = self._setup_database(args)
 
                # Call function
-               ret = self.handle_import(db, args)
+               ret = args.func(args)
 
                # Return with exit code
                if ret:
@@ -92,15 +111,248 @@ class CLI(object):
 
                with db.transaction():
                        db.execute("""
-                               CREATE TABLE IF NOT EXISTS asnums(number integer, name text);
-                               CREATE UNIQUE INDEX IF NOT EXISTS asnums_number ON asnums(number);
+                               -- autnums
+                               CREATE TABLE IF NOT EXISTS autnums(number integer, name text, organization text);
+                               CREATE UNIQUE INDEX IF NOT EXISTS autnums_number ON autnums(number);
+
+                               -- inetnums
+                               CREATE TABLE IF NOT EXISTS inetnums(network inet, name text, country text, description text);
+                               CREATE UNIQUE INDEX IF NOT EXISTS inetnums_networks ON inetnums(network);
+                               CREATE INDEX IF NOT EXISTS inetnums_family ON inetnums(family(network));
+
+                               -- organizations
+                               CREATE TABLE IF NOT EXISTS organizations(handle text, name text, country text);
+                               CREATE UNIQUE INDEX IF NOT EXISTS organizations_handle ON organizations(handle);
+
+                               -- routes
+                               CREATE TABLE IF NOT EXISTS routes(network inet, asn integer);
+                               CREATE UNIQUE INDEX IF NOT EXISTS routes_network ON routes(network);
+                               CREATE INDEX IF NOT EXISTS routes_family ON routes(family(network));
                        """)
 
                return db
 
-       def handle_import(self, db, ns):
-               pass
+       def handle_update_whois(self, ns):
+               downloader = location.importer.Downloader()
+
+               # Download all sources
+               for source in location.importer.WHOIS_SOURCES:
+                       with self.db.transaction():
+                               with downloader.request(source, return_blocks=True) as f:
+                                       for block in f:
+                                               self._parse_block(block)
+
+       def _parse_block(self, block):
+               # Get first line to find out what type of block this is
+               line = block[0]
+
+               # inetnum
+               if line.startswith("inet6num:") or line.startswith("inetnum:"):
+                       return self._parse_inetnum_block(block)
+
+               # route
+               elif line.startswith("route6:") or line.startswith("route:"):
+                       return self._parse_route_block(block)
+
+               # aut-num
+               elif line.startswith("aut-num:"):
+                       return self._parse_autnum_block(block)
+
+               # organisation
+               elif line.startswith("organisation:"):
+                       return self._parse_org_block(block)
+
+               # person (ignored)
+               elif line.startswith("person:"):
+                       return
+
+               # domain (ignored)
+               elif line.startswith("domain:"):
+                       return
+
+               # mntner (ignored)
+               elif line.startswith("mntner:"):
+                       return
+
+               # as-block (ignored)
+               elif line.startswith("as-block:"):
+                       return
+
+               # as-set (ignored)
+               elif line.startswith("as-set:"):
+                       return
+
+               # route-set (ignored)
+               elif line.startswith("route-set:"):
+                       return
+
+               # role (ignored)
+               elif line.startswith("role:"):
+                       return
+
+               # key-cert (ignored)
+               elif line.startswith("key-cert:"):
+                       return
+
+               # irt (ignored)
+               elif line.startswith("irt:"):
+                       return
+
+               # Log any unknown blocks
+               else:
+                       log.warning("Unknown block:")
+                       for line in block:
+                               log.warning(line)
+
+       def _parse_autnum_block(self, block):
+               log.debug("Parsing autnum block:")
+
+               autnum = {}
+               for line in block:
+                       # Split line
+                       key, val = split_line(line)
+
+                       if key == "aut-num":
+                               m = re.match(r"^(AS|as)(\d+)", val)
+                               if m:
+                                       autnum["asn"] = m.group(2)
+
+                       elif key in ("as-name", "org"):
+                               autnum[key] = val
+
+               # Skip empty objects
+               if not autnum:
+                       return
+
+               # Insert into database
+               self.db.execute("INSERT INTO autnums(number, name, organization) \
+                       VALUES(%s, %s, %s) ON CONFLICT (number) DO UPDATE SET \
+                               name = excluded.name, organization = excluded.organization",
+                       autnum.get("asn"), autnum.get("as-name"), autnum.get("org"),
+               )
+
+       def _parse_inetnum_block(self, block):
+               inetnum = {}
+               for line in block:
+                       # Split line
+                       key, val = split_line(line)
+
+                       if key == "inetnum":
+                               start_address, delim, end_address = val.partition("-")
+
+                               # Strip any excess space
+                               start_address, end_address = start_address.rstrip(), end_address.strip()
+
+                               # Skip invalid blocks
+                               if start_address in INVALID_ADDRESSES:
+                                       return
+
+                               # Convert to IP address
+                               try:
+                                       start_address = ipaddress.ip_address(start_address)
+                                       end_address   = ipaddress.ip_address(end_address)
+                               except ValueError:
+                                       log.warning("Could not parse line: %s" % line)
+                                       return
+
+                               # Set prefix to default
+                               prefix = 32
+
+                               # Count number of addresses in this subnet
+                               num_addresses = int(end_address) - int(start_address)
+                               if num_addresses:
+                                       prefix -= math.log(num_addresses, 2)
+
+                               inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
+
+                       elif key == "inet6num":
+                               # Skip invalid blocks
+                               if val in INVALID_ADDRESSES:
+                                       return
+
+                               inetnum[key] = val
+
+                       elif key == "netname":
+                               inetnum[key] = val
+
+                       elif key == "country":
+                               if val == "UNITED STATES":
+                                       val = "US"
+
+                               inetnum[key] = val.upper()
+
+                       elif key == "descr":
+                               if key in inetnum:
+                                       inetnum[key] += "\n%s" % val
+                               else:
+                                       inetnum[key] = val
+
+               # Skip empty objects
+               if not inetnum:
+                       return
+
+               network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
+
+               self.db.execute("INSERT INTO inetnums(network, name, country, description) \
+                       VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO \
+                       UPDATE SET name = excluded.name, country = excluded.country, description = excluded.description",
+                       "%s" % network, inetnum.get("netname"), inetnum.get("country"), inetnum.get("descr"),
+               )
+
+       def _parse_org_block(self, block):
+               org = {}
+               for line in block:
+                       # Split line
+                       key, val = split_line(line)
+
+                       if key in ("organisation", "org-name", "country"):
+                               org[key] = val
+
+               # Skip empty objects
+               if not org:
+                       return
+
+               self.db.execute("INSERT INTO organizations(handle, name, country) \
+                       VALUES(%s, %s, %s) ON CONFLICT (handle) DO \
+                       UPDATE SET name = excluded.name, country = excluded.country",
+                       org.get("organisation"), org.get("org-name"), org.get("country"),
+               )
+
+       def _parse_route_block(self, block):
+               route = {}
+               for line in block:
+                       # Split line
+                       key, val = split_line(line)
+
+                       # Keep any significant data
+                       if key in ("route6", "route"):
+                               route[key] = val
+
+                       elif key == "origin":
+                               m = re.match(r"^(AS|as)(\d+)", val)
+                               if m:
+                                       route["asn"] = m.group(2)
+
+               # Skip empty objects
+               if not route:
+                       return
+
+               network = ipaddress.ip_network(route.get("route6") or route.get("route"), strict=False)
+
+               self.db.execute("INSERT INTO routes(network, asn) \
+                       VALUES(%s, %s) ON CONFLICT (network) DO UPDATE SET asn = excluded.asn",
+                       "%s" % network, route.get("asn"),
+               )
+
+
+def split_line(line):
+       key, colon, val = line.partition(":")
+
+       # Strip any excess space
+       key = key.strip()
+       val = val.strip()
 
+       return key, val
 
 def main():
        # Run the command line interface