From: Michael Tremer Date: Tue, 30 Jan 2018 14:29:35 +0000 (+0000) Subject: Add parser to parse database formats that are in RIPE format X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=02b146bf30e1c57f0efb8ea734dd755db66274e6;p=location%2Flocation-database.git Add parser to parse database formats that are in RIPE format Signed-off-by: Michael Tremer --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6d9ebc1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +*.db +*.gz diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000..5ab9a11 --- /dev/null +++ b/tools/__init__.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +import logging + +# Setup logging +logging.basicConfig(level=logging.INFO) + +from .afrinic import AFRINIC +from .apnic import APNIC +from .arin import ARIN +from .ripe import RIPE + +RIRS = ( + AFRINIC, APNIC, ARIN, RIPE, +) diff --git a/tools/afrinic.py b/tools/afrinic.py new file mode 100644 index 0000000..ad416f7 --- /dev/null +++ b/tools/afrinic.py @@ -0,0 +1,30 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +from . import base + +class AFRINIC(base.RIR): + name = "African Network Information Centre" + + database_urls = ( + "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz", + ) diff --git a/tools/apnic.py b/tools/apnic.py new file mode 100644 index 0000000..a38246a --- /dev/null +++ b/tools/apnic.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +from . import base + +class APNIC(base.RIR): + name = "Asia Pacific Network Information Centre" + + database_urls = ( + "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.route.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz", + "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz", + ) diff --git a/tools/arin.py b/tools/arin.py new file mode 100644 index 0000000..6ed669e --- /dev/null +++ b/tools/arin.py @@ -0,0 +1,30 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +from . import base + +class ARIN(base.RIR): + name = "American Registry for Internet Numbers" + + database_urls = ( + "https://ftp.arin.net/pub/rr/arin.db", + ) diff --git a/tools/base.py b/tools/base.py new file mode 100644 index 0000000..125fd40 --- /dev/null +++ b/tools/base.py @@ -0,0 +1,384 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +import datetime +import ipaddress +import logging +import math +import os.path +import re +import sqlite3 + +from . import downloader + +FMT = "%-16s%s\n" + +RE_AS = re.compile(r"^(AS|as)(\d+)") + +INVALID_ADDRESSES = ( + "0.0.0.0", + "::/0", + "0::/0", +) + +class RIR(object): + name = None + database_urls = [] + + def __init__(self): + pass + + def __str__(self): + if self.name: + return "%s - %s" % (self.__class__.__name__, self.name) + + return self.__class__.__name__ + + @property + def parser(self): + return RIRParser + + def update(self, directory): + p = self.parser(self) + + # Download all data and store it in memory + p.fetch_data() + + # Write the database to disk + p.export_database(directory) + + +class RIRParser(object): + def __init__(self, rir): + self.rir = rir + + # Create a downloader to fetch data + self.downloader = downloader.Downloader() + + # Create a database to hold temporary data + self.db = self._make_database(":memory:") + + # Start time + self.start_time = datetime.datetime.utcnow() + + def _make_database(self, filename): + db = sqlite3.connect(filename) + + # Create database layout + with db as cursor: + cursor.executescript(""" + CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT); + + CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT); + + CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT); + CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle); + + CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER); + CREATE INDEX IF NOT EXISTS routes_route ON routes(route); + """) + + return db + + def export_database(self, directory): + # Write all ASes + fn_asnums = os.path.join(directory, "asnums.txt") + with open(fn_asnums, "w") as f: + self._export_asnums(f) + + # Write all networks + fn_networks = os.path.join(directory, "networks.txt") + with open(fn_networks, "w") as f: + self._export_networks(f) + + def _export_asnums(self, f): + # Write header + self._write_header(f) + + with self.db as c: + res = c.execute("""SELECT DISTINCT autnums.asn, autnums.name, + organisations.name, organisations.country FROM autnums + LEFT JOIN organisations ON autnums.org = organisations.handle + WHERE autnums.asn IS NOT NULL ORDER BY autnums.asn""") + + for row in res: + f.write(FMT % ("asnum:", "AS%s" % row[0])) + + if row[1]: + f.write(FMT % ("name:", row[1])) + + if row[2]: + f.write(FMT % ("org:", row[2])) + + if row[3]: + f.write(FMT % ("country:", row[3])) + + # End block + f.write("\n") + + def _export_networks(self, f): + # Write header + self._write_header(f) + + with self.db as c: + # Write all networks + res = c.execute("""SELECT inetnums.network, routes.asn, + inetnums.country, inetnums.netname, inetnums.description + FROM inetnums LEFT JOIN routes ON inetnums.network = routes.route + ORDER BY routes.asn, inetnums.network""") + + for row in res: + net, asn, country, name, description = row + + f.write(FMT % ("net:", net)) + + if name: + f.write(FMT % ("name:", name)) + + if asn: + f.write(FMT % ("asnum:", "AS%s" % asn)) + + if country: + f.write(FMT % ("country:", country)) + + if description: + for line in description.splitlines(): + f.write(FMT % ("descr:", line)) + + # End the block + f.write("\n") + + def _write_header(self, f): + f.write("#\n") + f.write("# %s\n" % self.rir) + f.write("# Generated at %s\n" % self.start_time) + f.write("#\n\n") + + def _split_line(self, line): + key, colon, val = line.partition(":") + + # Strip any excess space + key = key.strip() + val = val.strip() + + return key, val + + def fetch_data(self): + if not self.rir.database_urls: + raise NotImplementedError("Database URLs not set") + + # Parse entire database in one go + for url in self.rir.database_urls: + self.parse_url(url) + + def parse_url(self, url): + with self.downloader.request(url) as r: + for block in r: + self.parse_block(block) + + def parse_block(self, block): + # Get first line to find out what type of block this is + line = block[0] + + # inetnum + if line.startswith("inet6num:") or line.startswith("inetnum:"): + return self._parse_inetnum_block(block) + + # route + elif line.startswith("route6:") or line.startswith("route:"): + return self._parse_route_block(block) + + # aut-num + elif line.startswith("aut-num:"): + return self._parse_autnum_block(block) + + # organisation + elif line.startswith("organisation:"): + return self._parse_org_block(block) + + # person (ignored) + elif line.startswith("person:"): + return + + # domain (ignored) + elif line.startswith("domain:"): + return + + # Log any unknown blocks + else: + logging.warning("Unknown block:") + for line in block: + logging.warning(line) + + def _parse_inetnum_block(self, block): + logging.debug("Parsing inetnum block:") + + inetnum = {} + for line in block: + logging.debug(line) + + # Split line + key, val = self._split_line(line) + + if key == "inetnum": + start_address, delim, end_address = val.partition("-") + + # Strip any excess space + start_address, end_address = start_address.rstrip(), end_address.strip() + + # Skip invalid blocks + if start_address in INVALID_ADDRESSES: + return + + # Convert to IP address + start_address = ipaddress.ip_address(start_address) + end_address = ipaddress.ip_address(end_address) + prefix = 32 + + # Count number of addresses in this subnet + num_addresses = int(end_address) - int(start_address) + if num_addresses: + prefix -= math.log(num_addresses, 2) + + inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix) + + elif key == "inet6num": + # Skip invalid blocks + if val in INVALID_ADDRESSES: + return + + inetnum[key] = val + + elif key in ("netname", "country"): + inetnum[key] = val + + elif key == "descr": + if key in inetnum: + inetnum[key] += "\n%s" % val + else: + inetnum[key] = val + + # Skip empty objects + if not inetnum: + return + + with self.db as c: + args = ( + inetnum.get("inet6num") or inetnum.get("inetnum"), + inetnum.get("netname"), + inetnum.get("country"), + inetnum.get("descr"), + ) + + c.execute("INSERT INTO inetnums(network, netname, country, description) \ + VALUES(?, ?, ?, ?)", args) + + def _parse_route_block(self, block): + logging.debug("Parsing route block:") + + route = {} + for line in block: + logging.debug(line) + + # Split line + key, val = self._split_line(line) + + # Keep any significant data + if key in ("route6", "route"): + route[key] = val + + elif key == "origin": + m = RE_AS.match(val) + if m: + route["asn"] = m.group(2) + + # Skip empty objects + if not route: + return + + with self.db as c: + args = ( + route.get("route6") or route.get("route"), + route.get("asn"), + ) + + c.execute("INSERT INTO routes(route, asn) \ + VALUES(?, ?)", args) + + def _parse_autnum_block(self, block): + logging.debug("Parsing autnum block:") + + autnum = {} + for line in block: + logging.debug(line) + + # Split line + key, val = self._split_line(line) + + if key == "aut-num": + m = RE_AS.match(val) + if m: + autnum["asn"] = m.group(2) + + elif key in ("as-name", "org"): + autnum[key] = val + + # Skip empty objects + if not autnum: + return + + with self.db as c: + args = ( + autnum.get("asn"), + autnum.get("as-name"), + autnum.get("org"), + ) + + c.execute("INSERT INTO autnums(asn, name, org) \ + VALUES(?, ?, ?)", args) + + def _parse_org_block(self, block): + logging.debug("Parsing org block:") + + org = {} + for line in block: + logging.debug(line) + + # Split line + key, val = self._split_line(line) + + if key in ("organisation", "org-name", "country"): + org[key] = val + + # Skip empty objects + if not org: + return + + with self.db as c: + args = ( + org.get("organisation"), + org.get("org-name"), + org.get("country"), + ) + + c.execute("INSERT INTO organisations(handle, name, country) \ + VALUES(?, ?, ?)", args) diff --git a/tools/ripe.py b/tools/ripe.py new file mode 100644 index 0000000..99f4a8b --- /dev/null +++ b/tools/ripe.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +from . import base + +class RIPE(base.RIR): + name = "Réseaux IP Européens" + + database_urls = ( + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz", + "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz", + ) diff --git a/update-rirs b/update-rirs new file mode 100755 index 0000000..bea2b01 --- /dev/null +++ b/update-rirs @@ -0,0 +1,40 @@ +#!/usr/bin/python3 +############################################################################### +# # +# location-database - A database to determine someone's # +# location on the Internet # +# Copyright (C) 2018 Michael Tremer # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +import sys + +import tools + +# The user may pass a list of RIRs on the command line that should be updated +update_only = sys.argv[1:] + +for RIR in tools.RIRS: + name = RIR.__name__ + + # Skip RIRs that should not be updated + if update_only and not name in update_only: + continue + + rir = RIR() + + # Update all records from the database + rir.update(name)