]> git.ipfire.org Git - location/location-database.git/commitdiff
Migrate to a binary-encoded database format
authorMichael Tremer <michael.tremer@ipfire.org>
Mon, 14 Jan 2019 07:58:49 +0000 (07:58 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Mon, 14 Jan 2019 07:58:49 +0000 (07:58 +0000)
This approach is very similar to the one that we used before, but instead of storing
IP addresses as two integers, they are encoded as a big-endian 128 bit long binary
"string".

This is then stored as the BLOB data type of sqlite which has fewer limitations in
length, can be indexed and uses memcmp() which should be at least as fast as comparing
integers.

This allows a slightly easier query and seems to me significantly faster than the
PostgreSQL approach.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
tools/base.py

index b57f485ee7a84e1d5c72e3229ae771f892340338..4d3da0d09f3eb387a35ff8bc03ed63d6e3c41b19 100644 (file)
@@ -27,6 +27,7 @@ import math
 import os.path
 import re
 import sqlite3
+import struct
 
 from . import downloader
 from . import util
@@ -104,16 +105,14 @@ class RIRParser(object):
                                CREATE TABLE IF NOT EXISTS autnums(asn INTEGER, name TEXT, org TEXT);
 
                                CREATE TABLE IF NOT EXISTS inetnums(network TEXT, netname TEXT, country TEXT, description TEXT,
-                                       family INTEGER, address_start0 INTEGER, address_start1 INTEGER,
-                                       address_end0 INTEGER, address_end1 INTEGER, prefix INTEGER);
-                               CREATE INDEX inetnums_search ON inetnums(family, prefix, address_start0, address_start1);
+                                       family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER);
+                               CREATE INDEX inetnums_search ON inetnums(family, prefix, address_start);
 
                                CREATE TABLE IF NOT EXISTS organisations(handle TEXT, name TEXT, country TEXT);
                                CREATE INDEX IF NOT EXISTS organisations_handle ON organisations(handle);
 
                                CREATE TABLE IF NOT EXISTS routes(route TEXT, asn INTEGER,
-                                       family INTEGER, address_start0 INTEGER, address_start1 INTEGER,
-                                       address_end0 INTEGER, address_end1 INTEGER, prefix INTEGER);
+                                       family INTEGER, address_start BLOB, address_end BLOB, prefix INTEGER);
                        """)
 
                return db
@@ -165,8 +164,7 @@ class RIRParser(object):
                                        networks.netname,
                                        networks.description FROM (
                                                SELECT DISTINCT routes.route,
-                                                       routes.address_start0,
-                                                       routes.address_start1,
+                                                       routes.address_start,
                                                        inetnums.network,
                                                        routes.asn,
                                                        inetnums.country,
@@ -176,22 +174,11 @@ class RIRParser(object):
                                                LEFT JOIN inetnums
                                                        WHERE routes.family = inetnums.family
                                                        AND routes.prefix >= inetnums.prefix
-                                                       AND (
-                                                               inetnums.address_start0 < routes.address_start0
-                                                               OR (
-                                                                       inetnums.address_start0 = routes.address_start0
-                                                                       AND inetnums.address_start1 <= routes.address_start1
-                                                               )
-                                                       ) AND (
-                                                               inetnums.address_end0 > routes.address_end0
-                                                               OR (
-                                                                       inetnums.address_end0 = routes.address_end0
-                                                                       AND inetnums.address_end1 >= routes.address_end1
-                                                               )
-                                                       )
+                                                       AND inetnums.address_start <= routes.address_start
+                                                       AND inetnums.address_end >= routes.address_end
                                                ORDER BY inetnums.prefix DESC
                                        ) networks
-                               ORDER BY networks.address_start0, networks.address_start1
+                               ORDER BY networks.address_start
                        """)
 
                        for row in res:
@@ -366,7 +353,7 @@ class RIRParser(object):
                        network = ipaddress.ip_network(inetnum.get("inet6num") or inetnum.get("inetnum"), strict=False)
 
                        # Get the first and last address of this network
-                       address_start, address_end = network.network_address, network.broadcast_address
+                       address_start, address_end = int(network.network_address), int(network.broadcast_address)
 
                        args = (
                                "%s" % network,
@@ -374,16 +361,13 @@ class RIRParser(object):
                                inetnum.get("country"),
                                inetnum.get("descr"),
                                network.version,
-                               int(address_start) >> 64,
-                               int(address_start) & 0xffffffff,
-                               int(address_end) >> 64,
-                               int(address_end) & 0xffffffff,
+                               struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
+                               struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
                                network.prefixlen,
                        )
 
                        c.execute("INSERT INTO inetnums(network, netname, country, description, family, \
-                               address_start0, address_start1, address_end0, address_end1, prefix) \
-                               VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", args)
+                               address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args)
 
        def _parse_route_block(self, block):
                logging.debug("Parsing route block:")
@@ -412,22 +396,19 @@ class RIRParser(object):
                        network = ipaddress.ip_network(route.get("route6") or route.get("route"), strict=False)
 
                        # Get the first and last address of this network
-                       address_start, address_end = network.network_address, network.broadcast_address
+                       address_start, address_end = int(network.network_address), int(network.broadcast_address)
 
                        args = (
                                "%s" % network,
                                route.get("asn"),
                                network.version,
-                               int(address_start) >> 64,
-                               int(address_start) & 0xffffffff,
-                               int(address_end) >> 64,
-                               int(address_end) & 0xffffffff,
+                               struct.pack(">QQ", address_start >> 64, address_start % (2 ** 64)),
+                               struct.pack(">QQ", address_end >> 64, address_end % (2 ** 64)),
                                network.prefixlen,
                        )
 
                        c.execute("INSERT INTO routes(route, asn, family, \
-                               address_start0, address_start1, address_end0, address_end1, prefix) \
-                               VALUES(?, ?, ?, ?, ?, ?, ?, ?)", args)
+                               address_start, address_end, prefix) VALUES(?, ?, ?, ?, ?, ?)", args)
 
        def _parse_autnum_block(self, block):
                logging.debug("Parsing autnum block:")