]> git.ipfire.org Git - location/libloc.git/commitdiff
export: Flatten the tree before exporting it
authorMichael Tremer <michael.tremer@ipfire.org>
Tue, 27 Oct 2020 17:14:30 +0000 (17:14 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Tue, 27 Oct 2020 17:32:56 +0000 (17:32 +0000)
This patch removes the possibility that any IP address ranges
might show up in multiple exported files.

If this was content from the database:

  * 10.0.0.0/16 - DE
  * 10.0.1.0/24 - FR

Then the IP address 10.0.1.1 would match for both countries.

The algorithm will now break the larger /16 subnet apart into
smaller subnets so that 10.0.1.0/24 is no longer overlapped.

There was some time spent on this to make this as efficient
as possible.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/python/export.py

index 5eaf43fe3980d3101d4473e19931a5853b30901b..dd44332c292ab771b46d1ee4525946de7f9cfefd 100644 (file)
@@ -89,28 +89,55 @@ class OutputWriter(object):
 
        def write(self, network, subnets):
                if self.flatten and self._flatten(network):
-                       log.debug("Skipping writing network %s" % network)
+                       log.debug("Skipping writing network %s (last one was %s)" % (network, self._last_network))
                        return
 
+               # Convert network into a Python object
+               _network = ipaddress.ip_network("%s" % network)
+
                # Write the network when it has no subnets
                if not subnets:
-                       network = ipaddress.ip_network("%s" % network)
-                       return self._write_network(network)
+                       log.debug("Writing %s to %s" % (_network, self.f))
+                       return self._write_network(_network)
+
+               # Convert subnets into Python objects
+               _subnets = [ipaddress.ip_network("%s" % subnet) for subnet in subnets]
+
+               # Split the network into smaller bits so that
+               # we can accomodate for any gaps in it later
+               to_check = set()
+               for _subnet in _subnets:
+                       to_check.update(
+                               _network.address_exclude(_subnet)
+                       )
+
+               # Clear the list of all subnets
+               subnets = []
+
+               # Check if all subnets to not overlap with anything else
+               while to_check:
+                       subnet_to_check = to_check.pop()
 
-               # Collect all matching subnets
-               matching_subnets = []
+                       for _subnet in _subnets:
+                               # Drop this subnet if it equals one of the subnets
+                               # or if it is subnet of one of them
+                               if subnet_to_check == _subnet or subnet_to_check.subnet_of(_subnet):
+                                       break
 
-               for subnet in sorted(subnets):
-                       # Try to find the subnet in the database
-                       n = self.db.lookup("%s" % subnet.network_address)
+                               # Break it down if it overlaps
+                               if subnet_to_check.overlaps(_subnet):
+                                       to_check.update(
+                                               subnet_to_check.address_exclude(_subnet)
+                                       )
+                                       break
 
-                       # No entry or matching country means those IP addresses belong here
-                       if not n or n.country_code == network.country_code:
-                               matching_subnets.append(subnet)
+                       # Add the subnet again as it passed the check
+                       else:
+                               subnets.append(subnet_to_check)
 
                # Write all networks as compact as possible
-               for network in ipaddress.collapse_addresses(matching_subnets):
-                       log.debug("Writing %s to database" % network)
+               for network in ipaddress.collapse_addresses(subnets):
+                       log.debug("Writing %s to %s" % (network, self.f))
                        self._write_network(network)
 
        def finish(self):
@@ -206,40 +233,40 @@ class Exporter(object):
                        # Get all networks that match the family
                        networks = self.db.search_networks(family=family)
 
-                       # Materialise the generator into a list (uses quite some memory)
-                       networks = list(networks)
+                       # Create a stack with all networks in order where we can put items back
+                       # again and retrieve them in the next iteration.
+                       networks = BufferedStack(networks)
 
                        # Walk through all networks
-                       for i, network in enumerate(networks):
-                               _network = ipaddress.ip_network("%s" % network)
-
-                               # Search for all subnets
-                               subnets = set()
-
-                               while i < len(networks):
-                                       subnet = networks[i+1]
-
-                                       if subnet.is_subnet_of(network):
-                                               _subnet = ipaddress.ip_network("%s" % subnet)
-
-                                               subnets.add(_subnet)
-                                               subnets.update(_network.address_exclude(_subnet))
-
-                                               i += 1
-                                       else:
+                       for network in networks:
+                               # Collect all networks which are a subnet of network
+                               subnets = []
+                               for subnet in networks:
+                                       # If the next subnet was not a subnet, we have to push
+                                       # it back on the stack and break this loop
+                                       if not subnet.is_subnet_of(network):
+                                               networks.push(subnet)
                                                break
 
+                                       subnets.append(subnet)
+
                                # Write matching countries
-                               try:
-                                       writers[network.country_code].write(network, subnets)
-                               except KeyError:
-                                       pass
+                               if network.country_code and network.country_code in writers:
+                                       # Mismatching subnets
+                                       gaps = [
+                                               subnet for subnet in subnets if not network.country_code == subnet.country_code
+                                       ]
+
+                                       writers[network.country_code].write(network, gaps)
 
                                # Write matching ASNs
-                               try:
-                                       writers[network.asn].write(network, subnets)
-                               except KeyError:
-                                       pass
+                               if network.asn and network.asn in writers:
+                                       # Mismatching subnets
+                                       gaps = [
+                                               subnet for subnet in subnets if not network.asn == subnet.asn
+                                       ]
+
+                                       writers[network.asn].write(network, gaps)
 
                                # Handle flags
                                for flag in flags:
@@ -247,10 +274,19 @@ class Exporter(object):
                                                # Fetch the "fake" country code
                                                country = flags[flag]
 
-                                               try:
-                                                       writers[country].write(network, subnets)
-                                               except KeyError:
-                                                       pass
+                                               if not country in writers:
+                                                       continue
+
+                                               gaps = [
+                                                       subnet for subnet in subnets
+                                                               if not subnet.has_flag(flag)
+                                               ]
+
+                                               writers[country].write(network, gaps)
+
+                               # Push all subnets back onto the stack
+                               for subnet in reversed(subnets):
+                                       networks.push(subnet)
 
                        # Write everything to the filesystem
                        for writer in writers.values():
@@ -262,3 +298,33 @@ class Exporter(object):
                )
 
                return os.path.join(directory, filename)
+
+
+class BufferedStack(object):
+       """
+               This class takes an iterator and when being iterated
+               over it returns objects from that iterator for as long
+               as there are any.
+
+               It additionally has a function to put an item back on
+               the back so that it will be returned again at the next
+               iteration.
+       """
+       def __init__(self, iterator):
+               self.iterator = iterator
+               self.stack = []
+
+       def __iter__(self):
+               return self
+
+       def __next__(self):
+               if self.stack:
+                       return self.stack.pop(0)
+
+               return next(self.iterator)
+
+       def push(self, elem):
+               """
+                       Takes an element and puts it on the stack
+               """
+               self.stack.insert(0, elem)