]> git.ipfire.org Git - location/location-database.git/commitdiff
tools: Mirgate to use extended format
authorMichael Tremer <michael.tremer@ipfire.org>
Tue, 7 Jan 2020 17:10:31 +0000 (17:10 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 7 Jan 2020 17:10:31 +0000 (17:10 +0000)
This patch drops parsing the "whois" information and uses
the "new" "RIR Statistics Exchange Format":

  ftp://ftp.ripe.net/ripe/stats/RIR-Statistics-Exchange-Format.txt

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
tools/afrinic.py
tools/apnic.py
tools/arin.py
tools/base.py
tools/downloader.py
tools/lacnic.py
tools/ripe.py

index ad416f7deab3d35204c12399d2bdb72f7389c7db..26e526facbc01f8c7a9743c7359096f8f2cc2853 100644 (file)
@@ -26,5 +26,5 @@ class AFRINIC(base.RIR):
        name = "African Network Information Centre"
 
        database_urls = (
-        "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
+        "https://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-extended-latest",
     )
index a38246a30eef704b446ac140c24c6f94ee62d116..d34d6c916d574783dcec7796fcffb37daf3078e2 100644 (file)
@@ -26,10 +26,5 @@ class APNIC(base.RIR):
        name = "Asia Pacific Network Information Centre"
 
        database_urls = (
-               "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
-               "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
-               "https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
-               "https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
-               "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
-               "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
+               "http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-extended-latest",
        )
index 6ed669e2968a5c1c39b704c182508d95f781d401..8271062eb9250a7259de2ec3b773de3332b26a7e 100644 (file)
@@ -26,5 +26,5 @@ class ARIN(base.RIR):
        name = "American Registry for Internet Numbers"
 
        database_urls = (
-               "https://ftp.arin.net/pub/rr/arin.db",
+                "https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest",
        )
index 4d3da0d09f3eb387a35ff8bc03ed63d6e3c41b19..f469bc8ed50ddd924e5cafc79c8757f8ada403de 100644 (file)
@@ -98,21 +98,17 @@ class RIRParser(object):
 
        def _make_database(self, filename):
                db = sqlite3.connect(filename)
+               db.set_trace_callback(logging.debug)
 
                # Create database layout
                with db as cursor:
                        cursor.executescript("""
-                               CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT);
+                               CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER);
+                               CREATE INDEX autnums_org_id ON autnums(org_id);
 
-                               CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT,
-                                       family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER);
-                               CREATE INDEX inetnums_search ON inetnums(family, prefix, address_start);
-
-                               CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT);
-                               CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle);
-
-                               CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER,
+                               CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER,
                                        family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER);
+                               CREATE INDEX inetnums_sort ON inetnums(address_start);
                        """)
 
                return db
@@ -220,251 +216,135 @@ class RIRParser(object):
 
        def parse_url(self, url):
                with self.downloader.request(url) as r:
-                       for block in r:
-                               self.parse_block(block)
-
-       def parse_block(self, block):
-               # Get first line to find out what type of block this is
-               line = block[0]
-
-               # inetnum
-               if line.startswith("inet6num:") or line.startswith("inetnum:"):
-                       return self._parse_inetnum_block(block)
-
-               # route
-               elif line.startswith("route6:") or line.startswith("route:"):
-                       return self._parse_route_block(block)
-
-               # aut-num
-               elif line.startswith("aut-num:"):
-                       return self._parse_autnum_block(block)
-
-               # organisation
-               elif line.startswith("organisation:"):
-                       return self._parse_org_block(block)
-
-               # person (ignored)
-               elif line.startswith("person:"):
-                       return
-
-               # domain (ignored)
-               elif line.startswith("domain:"):
+                       for line in r:
+                               self.parse_line(line)
+
+       def parse_line(self, line):
+               try:
+                       lacnic, country_code, type, line = line.split("|", 3)
+               except:
+                       logging.warning("Could not parse line: %s" % line)
                        return
 
-               # mntner (ignored)
-               elif line.startswith("mntner:"):
+               # Skip any lines with addresses that are not allocated
+               if not "|allocated|" in line and not "|assigned|" in line:
                        return
 
-               # as-block (ignored)
-               elif line.startswith("as-block:"):
-                       return
-
-               # as-set (ignored)
-               elif line.startswith("as-set:"):
-                       return
+               if type in ("ipv6", "ipv4"):
+                       return self._parse_ip_line(country_code, type, line)
 
-               # route-set (ignored)
-               elif line.startswith("route-set:"):
-                       return
+               elif type == "asn":
+                       return self._parse_asn_line(country_code, line)
 
-               # role (ignored)
-               elif line.startswith("role:"):
+               else:
+                       logging.warning("Unknown line type: %s" % type)
                        return
 
-               # key-cert (ignored)
-               elif line.startswith("key-cert:"):
+       def _parse_ip_line(self, country_code, type, line):
+               try:
+                       address, prefix, date, status, org_id = line.split("|")
+               except:
+                       logging.warning("Unhandled line format: %s" % line)
                        return
 
-               # irt (ignored)
-               elif line.startswith("irt:"):
-                       return
-
-               # Log any unknown blocks
-               else:
-                       logging.warning("Unknown block:")
-                       for line in block:
-                               logging.warning(line)
-
-       def _parse_inetnum_block(self, block):
-               logging.debug("Parsing inetnum block:")
-
-               inetnum = {}
-               for line in block:
-                       logging.debug(line)
-
-                       # Split line
-                       key, val = util.split_line(line)
-
-                       if key == "inetnum":
-                               start_address, delim, end_address = val.partition("-")
-
-                               # Strip any excess space
-                               start_address, end_address = start_address.rstrip(), end_address.strip()
-
-                               # Skip invalid blocks
-                               if start_address in INVALID_ADDRESSES:
-                                       return
-
-                               # Convert to IP address
-                               try:
-                                       start_address = ipaddress.ip_address(start_address)
-                                       end_address   = ipaddress.ip_address(end_address)
-                               except ValueError:
-                                       logging.warning("Could not parse line: %s" % line)
-                                       return
-
-                               # Set prefix to default
-                               prefix = 32
-
-                               # Count number of addresses in this subnet
-                               num_addresses = int(end_address) - int(start_address)
-                               if num_addresses:
-                                       prefix -= math.log(num_addresses, 2)
-
-                               inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
-
-                       elif key == "inet6num":
-                               # Skip invalid blocks
-                               if val in INVALID_ADDRESSES:
-                                       return
-
-                               inetnum[key] = val
-                       
-                       elif key == "netname":
-                               inetnum[key] = val
-
-                       elif key == "country":
-                               if val == "UNITED STATES":
-                                       val = "US"
-
-                               inetnum[key] = val.upper()
-
-                       elif key == "descr":
-                               if key in inetnum:
-                                       inetnum[key] += "\n%s" % val
-                               else:
-                                       inetnum[key] = val
-
-               # Skip empty objects
-               if not inetnum:
+               # Cast prefix into an integer
+               try:
+                       prefix = int(prefix)
+               except:
+                       logging.warning("Invalid prefix: %s" % prefix)
+
+               # Fix prefix length for IPv4
+               if type == "ipv4":
+                       prefix = 32 - int(math.log(prefix, 2))
+
+               # Try to parse the address
+               try:
+                       network = ipaddress.ip_network("%s/%s" % (address, prefix), strict=False)
+               except ValueError:
+                       raise
+                       logging.warning("Invalid IP address: %s" % address)
                        return
 
                with self.db as c:
-                       network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
-
                        # Get the first and last address of this network
                        address_start, address_end = int(network.network_address), int(network.broadcast_address)
 
                        args = (
                                "%s" % network,
-                               inetnum.get("netname"),
-                               inetnum.get("country"),
-                               inetnum.get("descr"),
+                               country_code,
+                               org_id,
                                network.version,
                                struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
                                struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
                                network.prefixlen,
                        )
 
-                       c.execute("INSERT INTO inetnums(network, netname, country, description, family, \
-                               address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args)
-
-       def _parse_route_block(self, block):
-               logging.debug("Parsing route block:")
-
-               route = {}
-               for line in block:
-                       logging.debug(line)
+                       c.execute("INSERT INTO inetnums(network, country, org_id, \
+                               family, address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?)", args)
 
-                       # Split line
-                       key, val = util.split_line(line)
-
-                       # Keep any significant data
-                       if key in ("route6", "route"):
-                               route[key] = val
-
-                       elif key == "origin":
-                               m = RE_AS.match(val)
-                               if m:
-                                       route["asn"] = m.group(2)
-
-               # Skip empty objects
-               if not route:
+       def _parse_asn_line(self, country_code, line):
+               try:
+                       asn, dunno, date, status, org_id = line.split("|")
+               except:
+                       logging.warning("Could not parse line: %s" % line)
                        return
 
                with self.db as c:
-                       network = ipaddress.ip_network(route.get("route6") or route.get("route"), strict=False)
-
-                       # Get the first and last address of this network
-                       address_start, address_end = int(network.network_address), int(network.broadcast_address)
-
                        args = (
-                               "%s" % network,
-                               route.get("asn"),
-                               network.version,
-                               struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
-                               struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
-                               network.prefixlen,
+                               asn,
+                               country_code,
+                               org_id,
                        )
 
-                       c.execute("INSERT INTO routes(route, asn, family, \
-                               address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?)", args)
-
-       def _parse_autnum_block(self, block):
-               logging.debug("Parsing autnum block:")
+                       c.execute("INSERT INTO autnums(asn, country, org_id) \
+                               VALUES(?, ?, ?)", args)
 
-               autnum = {}
-               for line in block:
-                       logging.debug(line)
+       def _export_networks(self, f):
+               # Write header
+               self._write_header(f)
 
-                       # Split line
-                       key, val = util.split_line(line)
+               with self.db as c:
+                       # Write all networks
+                       res = c.execute("""
+                               SELECT inetnums.network,
+                                       autnums.asn,
+                                       inetnums.address_start,
+                                       inetnums.country
+                                       FROM inetnums
+                                       LEFT JOIN autnums
+                                               WHERE inetnums.org_id = autnums.org_id
+                                       ORDER BY inetnums.address_start
+                       """)
 
-                       if key == "aut-num":
-                               m = RE_AS.match(val)
-                               if m:
-                                       autnum["asn"] = m.group(2)
+                       for row in res:
+                               net, asn, address_start, country = row
 
-                       elif key in ("as-name", "org"):
-                               autnum[key] = val
+                               f.write(FMT % ("net:", net))
 
-               # Skip empty objects
-               if not autnum:
-                       return
+                               if asn:
+                                       f.write(FMT % ("asnum:", "AS%s" % asn))
 
-               with self.db as c:
-                       args = (
-                               autnum.get("asn"),
-                               autnum.get("as-name"),
-                               autnum.get("org"),
-                       )
+                               if country:
+                                       f.write(FMT % ("country:", country))
 
-                       c.execute("INSERT INTO autnums(asn, name, org) \
-                               VALUES(?, ?, ?)", args)
-       
-       def _parse_org_block(self, block):
-               logging.debug("Parsing org block:")
+                               # End the block
+                               f.write("\n")
 
-               org = {}
-               for line in block:
-                       logging.debug(line)
+       def _export_asnums(self, f):
+               # Write header
+               self._write_header(f)
 
-                       # Split line
-                       key, val = util.split_line(line)
+               with self.db as c:
+                       res = c.execute("SELECT DISTINCT autnums.asn, autnums.country \
+                               FROM autnums ORDER BY autnums.asn")
 
-                       if key in ("organisation", "org-name", "country"):
-                               org[key] = val
+                       for row in res:
+                               asn, country = row
 
-               # Skip empty objects
-               if not org:
-                       return
+                               f.write(FMT % ("asnum:", "AS%s" % asn))
 
-               with self.db as c:
-                       args = (
-                               org.get("organisation"),
-                               org.get("org-name"),
-                               org.get("country"),
-                       )
+                               if country:
+                                       f.write(FMT % ("country:", country))
 
-                       c.execute("INSERT INTO organisations(handle, name, country) \
-                               VALUES(?, ?, ?)", args)
+                               # End block
+                               f.write("\n")
index 6cc738e94df6e86b3ee185486165ce8ee437eb68..02f91a9091cd12f78e391db68c43cb912d6b9cc0 100644 (file)
@@ -43,7 +43,7 @@ class Downloader(object):
                log.info("Using proxy %s" % url)
                self.proxy = url
 
-       def request(self, url, data=None, return_blocks=True):
+       def request(self, url, data=None):
                req = urllib.request.Request(url, data=data)
 
                # Configure proxy
@@ -54,17 +54,14 @@ class Downloader(object):
                if self.USER_AGENT:
                        req.add_header("User-Agent", self.USER_AGENT)
 
-               return DownloaderContext(self, req, return_blocks=return_blocks)
+               return DownloaderContext(self, req)
 
 
 class DownloaderContext(object):
-       def __init__(self, downloader, request, return_blocks=True):
+       def __init__(self, downloader, request):
                self.downloader = downloader
                self.request = request
 
-               # Should we return one block or a single line?
-               self.return_blocks = return_blocks
-
                # Save the response object
                self.response = None
 
@@ -88,12 +85,6 @@ class DownloaderContext(object):
                """
                        Makes the object iterable by going through each block
                """
-               if self.return_blocks:
-                       for b in util.iterate_over_blocks(self.body):
-                               yield b
-
-                       return
-
                # Store body
                body = self.body
 
@@ -123,12 +114,6 @@ class DownloaderContext(object):
                        Returns a file-like object with the decoded content
                        of the response.
                """
-               content_type = self.get_header("Content-Type")
-
-               # Decompress any gzipped response on the fly
-               if content_type in ("application/x-gzip", "application/gzip"):
-                       return gzip.GzipFile(fileobj=self.response, mode="rb")
-
                # Return the response by default
                return self.response
 
@@ -143,9 +128,7 @@ if __name__ == "__main__":
 
        for url in sys.argv[1:]:
                print("Downloading %s..." % url)
-               
+
                with d.request(url) as r:
-                       for block in r:
-                               for line in block:
-                                       print(line)
-                               print()
+                       for line in r:
+                               print(line)
index 13f0b8d14a646bba4523ec1d4c0b05c536579446..767e766ceb7a71e47632365d44bb666c5fc3f248 100644 (file)
@@ -34,161 +34,3 @@ class LACNIC(base.RIR):
        database_urls = (
                "http://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest",
        )
-
-       @property
-       def parser(self):
-               return LACNICParser
-
-
-class LACNICParser(base.RIRParser):
-       def _make_database(self, filename):
-               db = sqlite3.connect(filename)
-
-               # Create database layout
-               with db as cursor:
-                       cursor.executescript("""
-                               CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, country TEXT, org_id INTEGER);
-                               CREATE INDEX autnums_org_id ON autnums(org_id);
-
-                               CREATE TABLE IF NOT EXISTS inetnums(network TEXT, country TEXT, org_id INTEGER,
-                                       family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER);
-                               CREATE INDEX inetnums_sort ON inetnums(address_start);
-                       """)
-
-               return db
-
-       def parse_url(self, url):
-               with self.downloader.request(url, return_blocks=False) as r:
-                       for line in r:
-                               self.parse_line(line)
-
-       def parse_line(self, line):
-               try:
-                       lacnic, country_code, type, line = line.split("|", 3)
-               except:
-                       logging.warning("Could not parse line: %s" % line)
-                       return
-
-               # Skip any lines with addresses that are not allocated
-               if not "|allocated|" in line:
-                       return
-
-               if type in ("ipv6", "ipv4"):
-                       return self._parse_ip_line(country_code, type, line)
-
-               elif type == "asn":
-                       return self._parse_asn_line(country_code, line)
-
-               else:
-                       logging.warning("Unknown line type: %s" % type)
-                       return
-
-       def _parse_ip_line(self, country_code, type, line):
-               try:
-                       address, prefix, date, status, org_id = line.split("|")
-               except:
-                       logging.warning("Unhandled line format: %s" % line)
-                       return
-
-               # Cast prefix into an integer
-               try:
-                       prefix = int(prefix)
-               except:
-                       logging.warning("Invalid prefix: %s" % prefix)
-
-               # Fix prefix length for IPv4
-               if type == "ipv4":
-                       prefix = 32 - int(math.log(prefix, 2))
-
-               # Try to parse the address
-               try:
-                       network = ipaddress.ip_network("%s/%s" % (address, prefix))
-               except ValueError:
-                       raise
-                       logging.warning("Invalid IP address: %s" % address)
-                       return
-
-               with self.db as c:
-                       # Get the first and last address of this network
-                       address_start, address_end = int(network.network_address), int(network.broadcast_address)
-
-                       args = (
-                               "%s" % network,
-                               country_code,
-                               org_id,
-                               network.version,
-                               struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
-                               struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
-                               network.prefixlen,
-                       )
-
-                       c.execute("INSERT INTO inetnums(network, country, org_id, \
-                               family, address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?)", args)
-
-       def _parse_asn_line(self, country_code, line):
-               try:
-                       asn, dunno, date, status, org_id = line.split("|")
-               except:
-                       logging.warning("Could not parse line: %s" % line)
-                       return
-
-               with self.db as c:
-                       args = (
-                               asn,
-                               country_code,
-                               org_id,
-                       )
-
-                       c.execute("INSERT INTO autnums(asn, country, org_id) VALUES(?, ?, ?)", args)
-
-       def _export_networks(self, f):
-               # Write header
-               self._write_header(f)
-
-               with self.db as c:
-                       # Write all networks
-                       res = c.execute("""
-                               SELECT inetnums.network,
-                                       autnums.asn,
-                                       inetnums.address_start,
-                                       inetnums.country
-                                       FROM inetnums
-                                       LEFT JOIN autnums
-                                               WHERE inetnums.org_id = autnums.org_id
-                                       ORDER BY inetnums.address_start
-                       """)
-
-                       for row in res:
-                               net, asn, address_start, country = row
-
-                               f.write(base.FMT % ("net:", net))
-
-                               if asn:
-                                       f.write(base.FMT % ("asnum:", "AS%s" % asn))
-
-                               if country:
-                                       f.write(base.FMT % ("country:", country))
-
-                               # End the block
-                               f.write("\n")
-
-       def _export_asnums(self, f):
-               # Write header
-               self._write_header(f)
-
-               with self.db as c:
-                       res = c.execute("SELECT DISTINCT autnums.asn, autnums.country \
-                               FROM autnums ORDER BY autnums.asn")
-
-                       for row in res:
-                               asn, country = row
-
-                               f.write(base.FMT % ("asnum:", "AS%s" % asn))
-
-                               if country:
-                                       f.write(base.FMT % ("country:", country))
-
-                               # End block
-                               f.write("\n")
-
-
index 99f4a8b019c8fc0b603b7701e42766c0c8f1480f..f2f6a42f7197d2e98627d4c2f68a5cec4549365d 100644 (file)
@@ -26,10 +26,5 @@ class RIPE(base.RIR):
        name = "Réseaux IP Européens"
 
        database_urls = (
-               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
-               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
-               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
-               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
-               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
-               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
+               "https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest",
        )