]> git.ipfire.org Git - people/ms/libloc.git/blobdiff - src/scripts/location-importer.in
importer: Improve checks for unspecified networks
[people/ms/libloc.git] / src / scripts / location-importer.in
index 9b83f3fcf244f6e6e9096c6981138c1abe65ff4c..4b340374226a0d70b971ccc354a4a55311dc7717 100644 (file)
@@ -713,7 +713,7 @@ class CLI(object):
                                                name text NOT NULL, source text NOT NULL) ON COMMIT DROP;
                                        CREATE UNIQUE INDEX _organizations_handle ON _organizations(handle);
 
-                                       CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text NOT NULL,
+                                       CREATE TEMPORARY TABLE _rirdata(network inet NOT NULL, country text,
                                                original_countries text[] NOT NULL, source text NOT NULL)
                                                ON COMMIT DROP;
                                        CREATE INDEX _rirdata_search ON _rirdata
@@ -914,14 +914,24 @@ class CLI(object):
                        We will return False in case a network is not suitable for adding
                        it to our database, and True otherwise.
                """
+               # Check input
+               if isinstance(network, ipaddress.IPv6Network):
+                       pass
+               elif isinstance(network, ipaddress.IPv4Network):
+                       pass
+               else:
+                       raise ValueError("Invalid network: %s (type %s)" % (network, type(network)))
 
-               if not network or not (isinstance(network, ipaddress.IPv4Network) or isinstance(network, ipaddress.IPv6Network)):
-                       return False
-
+               # Ignore anything that isn't globally routable
                if not network.is_global:
                        log.debug("Skipping non-globally routable network: %s" % network)
                        return False
 
+               # Ignore anything that is unspecified IP range (See RFC 5735 for IPv4 or RFC 2373 for IPv6)
+               elif network.is_unspecified:
+                       log.debug("Skipping unspecified network: %s" % network)
+                       return False
+
                if network.version == 4:
                        if network.prefixlen < 7:
                                log.debug("Skipping too big IP chunk: %s" % network)
@@ -931,10 +941,6 @@ class CLI(object):
                                log.debug("Skipping network too small to be publicly announced: %s" % network)
                                return False
 
-                       if str(network.network_address) == "0.0.0.0":
-                               log.debug("Skipping network based on 0.0.0.0: %s" % network)
-                               return False
-
                elif network.version == 6:
                        if network.prefixlen < 10:
                                log.debug("Skipping too big IP chunk: %s" % network)
@@ -944,15 +950,6 @@ class CLI(object):
                                log.debug("Skipping network too small to be publicly announced: %s" % network)
                                return False
 
-                       if str(network.network_address) == "::":
-                               log.debug("Skipping network based on '::': %s" % network)
-                               return False
-
-               else:
-                       # This should not happen...
-                       log.warning("Skipping network of unknown family, this should not happen: %s" % network)
-                       return False
-
                # In case we have made it here, the network is considered to
                # be suitable for libloc consumption...
                return True
@@ -1099,28 +1096,28 @@ class CLI(object):
                                inetnum[key] = [ipaddress.ip_network(val, strict=False)]
 
                        elif key == "country":
-                               val = val.upper()
-
-                               # Catch RIR data objects with more than one country code...
-                               if not key in inetnum:
-                                       inetnum[key] = []
-                               else:
-                                       if val in inetnum.get("country"):
-                                               # ... but keep this list distinct...
-                                               continue
+                               cc = val.upper()
 
                                # Ignore certain country codes
-                               if val in IGNORED_COUNTRIES:
-                                       log.debug("Ignoring country code '%s'" % val)
+                               if cc in IGNORED_COUNTRIES:
+                                       log.debug("Ignoring country code '%s'" % cc)
                                        continue
 
                                # Translate country codes
                                try:
-                                       val = TRANSLATED_COUNTRIES[val]
+                                       cc = TRANSLATED_COUNTRIES[cc]
                                except KeyError:
                                        pass
 
-                               inetnum[key].append(val)
+                               # Do we know this country?
+                               if not cc in countries:
+                                       log.warning("Skipping invalid country code '%s'" % cc)
+                                       continue
+
+                               try:
+                                       inetnum[key].append(cc)
+                               except KeyError:
+                                       inetnum[key] = [cc]
 
                        # Parse the geofeed attribute
                        elif key == "geofeed":
@@ -1133,37 +1130,51 @@ class CLI(object):
                                        inetnum["geofeed"] = m.group(1)
 
                # Skip empty objects
-               if not inetnum or not "country" in inetnum:
+               if not inetnum:
                        return
 
-               # Prepare skipping objects with unknown country codes...
-               invalidcountries = [singlecountry for singlecountry in inetnum.get("country") if singlecountry not in countries]
-
                # Iterate through all networks enumerated from above, check them for plausibility and insert
                # them into the database, if _check_parsed_network() succeeded
                for single_network in inetnum.get("inet6num") or inetnum.get("inetnum"):
-                       if self._check_parsed_network(single_network):
-                               # Skip objects with unknown country codes if they are valid to avoid log spam...
-                               if invalidcountries:
-                                       log.warning("Skipping network with bogus countr(y|ies) %s (original countries: %s): %s" % \
-                                               (invalidcountries, inetnum.get("country"), inetnum.get("inet6num") or inetnum.get("inetnum")))
-                                       break
+                       if not self._check_parsed_network(single_network):
+                               continue
 
-                               # Everything is fine here, run INSERT statement...
-                               self.db.execute("INSERT INTO _rirdata(network, country, original_countries, source) \
-                                       VALUES(%s, %s, %s, %s) ON CONFLICT (network) DO UPDATE SET country = excluded.country",
-                                       "%s" % single_network, inetnum.get("country")[0], inetnum.get("country"), source_key,
+                       # Fetch the countries or use a list with an empty country
+                       countries = inetnum.get("country", [None])
+
+                       # Insert the network into the database but only use the first country code
+                       for cc in countries:
+                               self.db.execute("""
+                                       INSERT INTO
+                                               _rirdata
+                                       (
+                                               network,
+                                               country,
+                                               original_countries,
+                                               source
+                                       )
+                                       VALUES
+                                       (
+                                               %s, %s, %s, %s
+                                       )
+                                       ON CONFLICT (network)
+                                               DO UPDATE SET country = excluded.country
+                                       """, "%s" % single_network, cc, [cc for cc in countries if cc], source_key,
                                )
 
-                               # Update any geofeed information
-                               geofeed = inetnum.get("geofeed", None)
-                               if geofeed:
-                                       self._parse_geofeed(geofeed, single_network)
+                               # If there are more than one country, we will only use the first one
+                               break
 
-                               # Delete any previous geofeeds
-                               else:
-                                       self.db.execute("DELETE FROM network_geofeeds WHERE network = %s",
-                                               "%s" % single_network)
+                       # Update any geofeed information
+                       geofeed = inetnum.get("geofeed", None)
+                       if geofeed:
+                               self._parse_geofeed(geofeed, single_network)
+
+                       # Delete any previous geofeeds
+                       else:
+                               self.db.execute(
+                                       "DELETE FROM network_geofeeds WHERE network = %s", "%s" % single_network,
+                               )
 
        def _parse_geofeed(self, url, single_network):
                # Parse the URL