]> git.ipfire.org Git - location/location-database.git/commitdiff
Add parser to parse database formats that are in RIPE format
authorMichael Tremer <michael.tremer@ipfire.org>
Tue, 30 Jan 2018 14:29:35 +0000 (14:29 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 30 Jan 2018 14:29:35 +0000 (14:29 +0000)
Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
.gitignore [new file with mode: 0644]
tools/__init__.py [new file with mode: 0644]
tools/afrinic.py [new file with mode: 0644]
tools/apnic.py [new file with mode: 0644]
tools/arin.py [new file with mode: 0644]
tools/base.py [new file with mode: 0644]
tools/ripe.py [new file with mode: 0644]
update-rirs [new file with mode: 0755]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..6d9ebc1
--- /dev/null
@@ -0,0 +1,3 @@
+__pycache__
+*.db
+*.gz
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644 (file)
index 0000000..5ab9a11
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+import logging
+
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+
+from .afrinic import AFRINIC
+from .apnic import APNIC
+from .arin import ARIN
+from .ripe import RIPE
+
+RIRS = (
+       AFRINIC, APNIC, ARIN, RIPE,
+)
diff --git a/tools/afrinic.py b/tools/afrinic.py
new file mode 100644 (file)
index 0000000..ad416f7
--- /dev/null
@@ -0,0 +1,30 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+from . import base
+
+class AFRINIC(base.RIR):
+       name = "African Network Information Centre"
+
+       database_urls = (
+        "https://ftp.afrinic.net/pub/pub/dbase/afrinic.db.gz",
+    )
diff --git a/tools/apnic.py b/tools/apnic.py
new file mode 100644 (file)
index 0000000..a38246a
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+from . import base
+
+class APNIC(base.RIR):
+       name = "Asia Pacific Network Information Centre"
+
+       database_urls = (
+               "https://ftp.apnic.net/apnic/whois/apnic.db.inet6num.gz",
+               "https://ftp.apnic.net/apnic/whois/apnic.db.inetnum.gz",
+               "https://ftp.apnic.net/apnic/whois/apnic.db.route6.gz",
+               "https://ftp.apnic.net/apnic/whois/apnic.db.route.gz",
+               "https://ftp.apnic.net/apnic/whois/apnic.db.aut-num.gz",
+               "https://ftp.apnic.net/apnic/whois/apnic.db.organisation.gz",
+       )
diff --git a/tools/arin.py b/tools/arin.py
new file mode 100644 (file)
index 0000000..6ed669e
--- /dev/null
@@ -0,0 +1,30 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+from . import base
+
+class ARIN(base.RIR):
+       name = "American Registry for Internet Numbers"
+
+       database_urls = (
+               "https://ftp.arin.net/pub/rr/arin.db",
+       )
diff --git a/tools/base.py b/tools/base.py
new file mode 100644 (file)
index 0000000..125fd40
--- /dev/null
@@ -0,0 +1,384 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+import datetime
+import ipaddress
+import logging
+import math
+import os.path
+import re
+import sqlite3
+
+from . import downloader
+
+FMT = "%-16s%s\n"
+
+RE_AS = re.compile(r"^(AS|as)(\d+)")
+
+INVALID_ADDRESSES = (
+       "0.0.0.0",
+       "::/0",
+       "0::/0",
+)
+
+class RIR(object):
+       name = None
+       database_urls = []
+
+       def __init__(self):
+               pass
+
+       def __str__(self):
+               if self.name:
+                       return "%s - %s" % (self.__class__.__name__, self.name)
+
+               return self.__class__.__name__
+
+       @property
+       def parser(self):
+               return RIRParser
+
+       def update(self, directory):
+               p = self.parser(self)
+
+               # Download all data and store it in memory
+               p.fetch_data()
+
+               # Write the database to disk
+               p.export_database(directory)
+
+
+class RIRParser(object):
+       def __init__(self, rir):
+               self.rir = rir
+
+               # Create a downloader to fetch data
+               self.downloader = downloader.Downloader()
+
+               # Create a database to hold temporary data
+               self.db = self._make_database(":memory:")
+
+               # Start time
+               self.start_time = datetime.datetime.utcnow()
+
+       def _make_database(self, filename):
+               db = sqlite3.connect(filename)
+
+               # Create database layout
+               with db as cursor:
+                       cursor.executescript("""
+                               CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT);
+
+                               CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT);
+
+                               CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT);
+                               CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle);
+
+                               CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER);
+                               CREATE INDEX IF NOT EXISTS routes_route ON routes(route);
+                       """)
+
+               return db
+
+       def export_database(self, directory):
+               # Write all ASes
+               fn_asnums = os.path.join(directory, "asnums.txt")
+               with open(fn_asnums, "w") as f:
+                       self._export_asnums(f)
+
+               # Write all networks
+               fn_networks = os.path.join(directory, "networks.txt")
+               with open(fn_networks, "w") as f:
+                       self._export_networks(f)
+
+       def _export_asnums(self, f):
+               # Write header
+               self._write_header(f)
+
+               with self.db as c:
+                       res = c.execute("""SELECT DISTINCT autnums.asn, autnums.name,
+                               organisations.name, organisations.country FROM autnums
+                               LEFT JOIN organisations ON autnums.org = organisations.handle
+                               WHERE autnums.asn IS NOT NULL ORDER BY autnums.asn""")
+
+                       for row in res:
+                               f.write(FMT % ("asnum:", "AS%s" % row[0]))
+
+                               if row[1]:
+                                       f.write(FMT % ("name:", row[1]))
+
+                               if row[2]:
+                                       f.write(FMT % ("org:", row[2]))
+
+                               if row[3]:
+                                       f.write(FMT % ("country:", row[3]))
+
+                               # End block
+                               f.write("\n")
+
+       def _export_networks(self, f):
+               # Write header
+               self._write_header(f)
+
+               with self.db as c:
+                       # Write all networks
+                       res = c.execute("""SELECT inetnums.network, routes.asn,
+                               inetnums.country, inetnums.netname, inetnums.description
+                               FROM inetnums LEFT JOIN routes ON inetnums.network = routes.route
+                               ORDER BY routes.asn, inetnums.network""")
+
+                       for row in res:
+                               net, asn, country, name, description = row
+
+                               f.write(FMT % ("net:", net))
+
+                               if name:
+                                       f.write(FMT % ("name:", name))
+
+                               if asn:
+                                       f.write(FMT % ("asnum:", "AS%s" % asn))
+
+                               if country:
+                                       f.write(FMT % ("country:", country))
+
+                               if description:
+                                       for line in description.splitlines():
+                                               f.write(FMT % ("descr:", line))
+
+                               # End the block
+                               f.write("\n")
+
+       def _write_header(self, f):
+               f.write("#\n")
+               f.write("# %s\n" % self.rir)
+               f.write("# Generated at %s\n" % self.start_time)
+               f.write("#\n\n")
+
+       def _split_line(self, line):
+               key, colon, val = line.partition(":")
+
+               # Strip any excess space
+               key = key.strip()
+               val = val.strip()
+
+               return key, val
+
+       def fetch_data(self):
+               if not self.rir.database_urls:
+                       raise NotImplementedError("Database URLs not set")
+
+               # Parse entire database in one go
+               for url in self.rir.database_urls:
+                       self.parse_url(url)
+
+       def parse_url(self, url):
+               with self.downloader.request(url) as r:
+                       for block in r:
+                               self.parse_block(block)
+
+       def parse_block(self, block):
+               # Get first line to find out what type of block this is
+               line = block[0]
+
+               # inetnum
+               if line.startswith("inet6num:") or line.startswith("inetnum:"):
+                       return self._parse_inetnum_block(block)
+
+               # route
+               elif line.startswith("route6:") or line.startswith("route:"):
+                       return self._parse_route_block(block)
+
+               # aut-num
+               elif line.startswith("aut-num:"):
+                       return self._parse_autnum_block(block)
+
+               # organisation
+               elif line.startswith("organisation:"):
+                       return self._parse_org_block(block)
+
+               # person (ignored)
+               elif line.startswith("person:"):
+                       return
+
+               # domain (ignored)
+               elif line.startswith("domain:"):
+                       return
+
+               # Log any unknown blocks
+               else:
+                       logging.warning("Unknown block:")
+                       for line in block:
+                               logging.warning(line)
+
+       def _parse_inetnum_block(self, block):
+               logging.debug("Parsing inetnum block:")
+
+               inetnum = {}
+               for line in block:
+                       logging.debug(line)
+
+                       # Split line
+                       key, val = self._split_line(line)
+
+                       if key == "inetnum":
+                               start_address, delim, end_address = val.partition("-")
+
+                               # Strip any excess space
+                               start_address, end_address = start_address.rstrip(), end_address.strip()
+
+                               # Skip invalid blocks
+                               if start_address in INVALID_ADDRESSES:
+                                       return
+
+                               # Convert to IP address
+                               start_address = ipaddress.ip_address(start_address)
+                               end_address   = ipaddress.ip_address(end_address)
+                               prefix        = 32
+
+                               # Count number of addresses in this subnet
+                               num_addresses = int(end_address) - int(start_address)
+                               if num_addresses:
+                                       prefix -= math.log(num_addresses, 2)
+
+                               inetnum["inetnum"] = "%s/%.0f" % (start_address, prefix)
+
+                       elif key == "inet6num":
+                               # Skip invalid blocks
+                               if val in INVALID_ADDRESSES:
+                                       return
+
+                               inetnum[key] = val
+                       
+                       elif key in ("netname", "country"):
+                               inetnum[key] = val
+
+                       elif key == "descr":
+                               if key in inetnum:
+                                       inetnum[key] += "\n%s" % val
+                               else:
+                                       inetnum[key] = val
+
+               # Skip empty objects
+               if not inetnum:
+                       return
+
+               with self.db as c:
+                       args = (
+                               inetnum.get("inet6num") or inetnum.get("inetnum"),
+                               inetnum.get("netname"),
+                               inetnum.get("country"),
+                               inetnum.get("descr"),
+                       )
+
+                       c.execute("INSERT INTO inetnums(network, netname, country, description) \
+                               VALUES(?, ?, ?, ?)", args)
+
+       def _parse_route_block(self, block):
+               logging.debug("Parsing route block:")
+
+               route = {}
+               for line in block:
+                       logging.debug(line)
+
+                       # Split line
+                       key, val = self._split_line(line)
+
+                       # Keep any significant data
+                       if key in ("route6", "route"):
+                               route[key] = val
+
+                       elif key == "origin":
+                               m = RE_AS.match(val)
+                               if m:
+                                       route["asn"] = m.group(2)
+
+               # Skip empty objects
+               if not route:
+                       return
+
+               with self.db as c:
+                       args = (
+                               route.get("route6") or route.get("route"),
+                               route.get("asn"),
+                       )
+
+                       c.execute("INSERT INTO routes(route, asn) \
+                               VALUES(?, ?)", args)
+
+       def _parse_autnum_block(self, block):
+               logging.debug("Parsing autnum block:")
+
+               autnum = {}
+               for line in block:
+                       logging.debug(line)
+
+                       # Split line
+                       key, val = self._split_line(line)
+
+                       if key == "aut-num":
+                               m = RE_AS.match(val)
+                               if m:
+                                       autnum["asn"] = m.group(2)
+
+                       elif key in ("as-name", "org"):
+                               autnum[key] = val
+
+               # Skip empty objects
+               if not autnum:
+                       return
+
+               with self.db as c:
+                       args = (
+                               autnum.get("asn"),
+                               autnum.get("as-name"),
+                               autnum.get("org"),
+                       )
+
+                       c.execute("INSERT INTO autnums(asn, name, org) \
+                               VALUES(?, ?, ?)", args)
+       
+       def _parse_org_block(self, block):
+               logging.debug("Parsing org block:")
+
+               org = {}
+               for line in block:
+                       logging.debug(line)
+
+                       # Split line
+                       key, val = self._split_line(line)
+
+                       if key in ("organisation", "org-name", "country"):
+                               org[key] = val
+
+               # Skip empty objects
+               if not org:
+                       return
+
+               with self.db as c:
+                       args = (
+                               org.get("organisation"),
+                               org.get("org-name"),
+                               org.get("country"),
+                       )
+
+                       c.execute("INSERT INTO organisations(handle, name, country) \
+                               VALUES(?, ?, ?)", args)
diff --git a/tools/ripe.py b/tools/ripe.py
new file mode 100644 (file)
index 0000000..99f4a8b
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+from . import base
+
+class RIPE(base.RIR):
+       name = "Réseaux IP Européens"
+
+       database_urls = (
+               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inet6num.gz",
+               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.inetnum.gz",
+               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route6.gz",
+               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.route.gz",
+               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.aut-num.gz",
+               "https://ftp.ripe.net/ripe/dbase/split/ripe.db.organisation.gz",
+       )
diff --git a/update-rirs b/update-rirs
new file mode 100755 (executable)
index 0000000..bea2b01
--- /dev/null
@@ -0,0 +1,40 @@
+#!/usr/bin/python3
+###############################################################################
+#                                                                             #
+# location-database - A database to determine someone's                       #
+#                     location on the Internet                                #
+# Copyright (C) 2018 Michael Tremer                                           #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+import sys
+
+import tools
+
+# The user may pass a list of RIRs on the command line that should be updated
+update_only = sys.argv[1:]
+
+for RIR in tools.RIRS:
+    name = RIR.__name__
+
+    # Skip RIRs that should not be updated
+    if update_only and not name in update_only:
+        continue
+
+    rir = RIR()
+
+    # Update all records from the database
+    rir.update(name)